Remove Special Chars - Python Code Snippet


The Python class cleans and removes special chars. Use it to clean the properties and values. This sample fits ASCII encoding, you can amend it to fit UTF8.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

import logging
LOG = logging.getLogger(__name__)

class Cleaner:
    _clean_underscore = re.compile('(/|:| |\.|\+|-|!|\^|&|\[|\]|\{|\}|<|>|~|\||"|\?|=|[^\x00-\x7F])')
    _clean_remove = re.compile(r"[\'\(\)]")
    _compacter = re.compile('_+')
    def clean(cls, s):
            if type(s) == unicode:
                s = s.encode('ascii','ignore')
            ret = str(s)
            ret = cls._clean_remove.sub('', ret)
            ret = cls._clean_underscore.sub('_', ret)
            ret = cls._compacter.sub('_', ret)
            ret = ret.strip('_')
            if ret == '':
                return '_empty_'
            return ret
        except Exception as e:
            LOG.error("problem cleaning `{}`".format(s), exc_info = 1)
            raise e

