import re
import six
from weblib.py3k_support import *
RE_SPECIAL_ENTITY = re.compile(b'&#(1[2-6][0-9]);')
[docs]def make_str(value, encoding='utf-8', errors='strict'):
"""
Normalize unicode/byte string to byte string.
"""
if isinstance(value, unicode):
# Convert to string (py2.x) or bytes (py3.x)
value = value.encode(encoding, errors=errors)
elif isinstance(value, str):
pass
else:
value = str(value)
return value
[docs]def make_unicode(value, encoding='utf-8', errors='strict'):
"""
Normalize unicode/byte string to unicode string.
"""
if not isinstance(value, unicode):
# Convert to unicode (py2.x and py3.x)
value = value.decode(encoding, errors=errors)
return value
def special_entity_handler(match):
num = int(match.group(1))
if 128 <= num <= 160:
try:
num = unichr(num).encode('utf-8')
return smart_str('&#%d;' % ord(num.decode('cp1252')[1]))
except UnicodeDecodeError:
return match.group(0)
else:
return match.group(0)
def fix_special_entities(body):
return RE_SPECIAL_ENTITY.sub(special_entity_handler, body)
def decode_list(values, encoding='utf-8'):
if not isinstance(values, list):
raise TypeError('unsupported values type: %s' % type(values))
return [smart_unicode(value, encoding) for value in values]
def decode_dict(values, encoding='utf-8'):
if not isinstance(values, dict):
raise TypeError('unsupported values type: %s' % type(values))
return dict(decode_pairs(values.items(), encoding))
def decode_pairs(pairs, encoding='utf-8'):
def decode(value):
if isinstance(value, six.binary_type):
return smart_unicode(value, encoding)
else:
return value
return [(decode(pair[0]), decode(pair[1])) for pair in pairs]
# Backward compatibility
smart_str = make_str
smart_unicode = make_unicode