Friday, April 11, 2008

python安全转编码

在对string进行decode时,有时会遇到错误的字串,导致解码失败,所以写了个安全解码函数
第一个是利用exception中的信息写的,第二个是最初用土办法写的
简单测了下,前者较后者大概有10%性能提高
def conv(s, decoding='gbk', encoding=''):
 while True:
  try:
   ustr = s.decode(decoding)
  except Exception, e:
   s = s[:e.start]+s[e.end:]
  else:
   if encoding:
    return ustr.encode(encoding)
   else:
    return ustr

def conv(s, decoding='gbk', encoding=''):
 flag = False
 l = []
 i = 0
 while i < len(s):
  if flag:
   try:
    u = (ch+s[i]).decode(decoding)
   except:
    flag = False
    i+=1
   else:
    flag = False
    l.append(u)
    i+=1
  elif ord(s[i]) > 0x80:
   ch = s[i]
   flag = True
   i+=1
  else:
   l.append(s[i].decode('gbk'))
   i+=1
 if not encoding:
  result = ''.join(l)
 else:
  result = ''.join(l).encode(encoding)
 return result

0 Comments: