编程知识 cdmana.com

HtmlUnicode编码转Gbk

在使用luasocket调用webservice接口的时候,返回了如下字符:
 
<fieldInfo><fieldChName>&#x662F;&#x5426;&#x6210;&#x529F;</fieldChName>
 
字符串中出现了大量的&#x....;格式的字符。
 
 
要解决这种编码的格式,分以下几步:
1、将&#x....;转化成unicode编码
resbody = string.gsub( resbody, "&#x(%w+);", "\\u%1")
2、转化成utf-8编码
local function unicode_to_utf8(convertStr)


if type(convertStr)~="string" then
return convertStr
end


local bit = require("bit32")
local resultStr=""
local i=1
while true do
local num1=string.byte(convertStr,i)
local unicode
if num1~=nil and string.sub(convertStr,i,i+1)=="\\u" then
unicode=tonumber("0x"..string.sub(convertStr,i+2,i+5))
i=i+6
elseif num1~=nil then
unicode=num1
i=i+1
else
break
end


if unicode <= 0x007f then
resultStr=resultStr..string.char(bit.band(unicode,0x7f))
elseif unicode >= 0x0080 and unicode <= 0x07ff then
resultStr=resultStr..string.char(bit.bor(0xc0,bit.band(bit.rshift(unicode,6),0x1f)))
resultStr=resultStr..string.char(bit.bor(0x80,bit.band(unicode,0x3f)))
elseif unicode >= 0x0800 and unicode <= 0xffff then
resultStr=resultStr..string.char(bit.bor(0xe0,bit.band(bit.rshift(unicode,12),0x0f)))
resultStr=resultStr..string.char(bit.bor(0x80,bit.band(bit.rshift(unicode,6),0x3f)))
resultStr=resultStr..string.char(bit.bor(0x80,bit.band(unicode,0x3f)))
end
end
resultStr=resultStr..'\0'
return resultStr
end
3、将utf-8编码转化成gbk, 使用iconv开源库进行转换
local xmlbody = iconv_convert(resbody,"utf-8", "gbk")

版权声明
本文为[零落年华]所创,转载请带上原文链接,感谢
https://my.oschina.net/u/3312209/blog/4713227

Scroll to Top