python安全转编码
在对string进行decode时,有时会遇到错误的字串,导致解码失败,所以写了个安全解码函数
第一个是利用exception中的信息写的,第二个是最初用土办法写的
简单测了下,前者较后者大概有10%性能提高
def conv(s, decoding='gbk', encoding=''):
while True:
try:
ustr = s.decode(decoding)
except Exception, e:
s = s[:e.start]+s[e.end:]
else:
if encoding:
return ustr.encode(encoding)
else:
return ustr
def conv(s, decoding='gbk', encoding=''):
flag = False
l = []
i = 0
while i < len(s):
if flag:
try:
u = (ch+s[i]).decode(decoding)
except:
flag = False
i+=1
else:
flag = False
l.append(u)
i+=1
elif ord(s[i]) > 0x80:
ch = s[i]
flag = True
i+=1
else:
l.append(s[i].decode('gbk'))
i+=1
if not encoding:
result = ''.join(l)
else:
result = ''.join(l).encode(encoding)
return result
0 Comments:
Post a Comment