The Python class cleans and removes special chars. Use it to clean the properties and values. This sample fits ASCII encoding, you can amend it to fit UTF8.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import logging
LOG = logging.getLogger(__name__)
class Cleaner:
_clean_underscore = re.compile('(/|:| |\.|\+|-|!|\^|&|\[|\]|\{|\}|<|>|~|\||"|\?|=|[^\x00-\x7F])')
_clean_remove = re.compile(r"[\'\(\)]")
_compacter = re.compile('_+')
@classmethod
def clean(cls, s):
try:
if type(s) == unicode:
s = s.encode('ascii','ignore')
ret = str(s)
ret = cls._clean_remove.sub('', ret)
ret = cls._clean_underscore.sub('_', ret)
ret = cls._compacter.sub('_', ret)
ret = ret.strip('_')
if ret == '':
return '_empty_'
return ret
except Exception as e:
LOG.error("problem cleaning `{}`".format(s), exc_info = 1)
raise e