Remove Special Chars - Python Code Snippet

 

The Python class cleans and removes special chars. Use it to clean the properties and values. This sample fits ASCII encoding, you can amend it to fit UTF8.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import re

import logging
LOG = logging.getLogger(__name__)

class Cleaner:
    _clean_underscore = re.compile('(/|:| |\.|\+|-|!|\^|&|\[|\]|\{|\}|<|>|~|\||"|\?|=|[^\x00-\x7F])')
    _clean_remove = re.compile(r"[\'\(\)]")
    _compacter = re.compile('_+')
   
    @classmethod
    def clean(cls, s):
        try:
            if type(s) == unicode:
                s = s.encode('ascii','ignore')
            ret = str(s)
            ret = cls._clean_remove.sub('', ret)
            ret = cls._clean_underscore.sub('_', ret)
            ret = cls._compacter.sub('_', ret)
            ret = ret.strip('_')
            if ret == '':
                return '_empty_'
            return ret
        except Exception as e:
            LOG.error("problem cleaning `{}`".format(s), exc_info = 1)
            raise e

Was this article helpful?
1 out of 1 found this helpful