最近做OWA开发,遇到很多俄文邮件,这些邮件经我们程序处理后全部显示为乱码,分析接收的数据类似下面这种类型: Elena%3CBR%3E%3CB%3ESubject%3A%3C%2FB%3E+%D0%94%D0%BE%D0%B3%D0%BE%D0%B2%D0%BE%D1%80+%0A%D1%86%D0%B5%D1%81%D1%81%D0%B8%D0%B8%3CBR%3E%3C%2FFONT%3E%3CBR%3E%3C%2FDIV%3E%0A%3CDIV%3E%3C%2FDIV%3E%0A%3CDIV+class%3DSection1%3E%0A%3CP+class 按道理说,Exchange服务器本身能够处理多语言,但是把这些数据传给Exchange后却显示乱码,没办法只能自己想办法。
看上边的那段数据,都是%号,跟URL的编码很类似,试一下就知道了,用.net C#验证一下,很简单一句话搞定
解析的结果是这样:Elena<BR><B>Subject:</B> Договор цессии<BR></FONT><BR></DIV><DIV></DIV><DIV class="Section1"><P class
首先:Decode URL编码:
//From Codeguru.
string UriDecode(const std::string & sSrc){
char * const pStart = new char[SRC_LEN]; char * pEnd = pStart;
while (pSrc < SRC_LAST_DEC) { if (*pSrc == '%') { char dec1, dec2; if (-1 != (dec1 = HEX2DEC[*(pSrc + 1)]) && -1 != (dec2 = HEX2DEC[*(pSrc + 2)])) { *pEnd++ = (dec1 << 4) + dec2; pSrc += 3; continue; } }
*pEnd++ = *pSrc++; }
// the last 2- chars while (pSrc < SRC_END) *pEnd++ = *pSrc++;
std::string sResult(pStart, pEnd); delete [] pStart; return sResult;}
然后,解码UTF8:
wstring UTF2Uni(const char* src, std::wstring &t){ if (src == NULL) { return L""; } int size_s = strlen(src); int size_d = size_s + 10; //? wchar_t *des = new wchar_t[size_d]; memset(des, 0, size_d * sizeof(wchar_t)); int s = 0, d = 0; bool toomuchbyte = true; //set true to skip error prefix. while (s < size_s && d < size_d) { unsigned char c = src[s]; if ((c & 0x80) == 0) { des[d++] += src[s++]; } else if((c & 0xE0) == 0xC0) ///< 110x-xxxx 10xx-xxxx { WCHAR &wideChar = des[d++]; wideChar = (src[s + 0] & 0x3F) << 6; wideChar |= (src[s + 1] & 0x3F); s += 2; } else if((c & 0xF0) == 0xE0) ///< 1110-xxxx 10xx-xxxx 10xx-xxxx { WCHAR &wideChar = des[d++]; wideChar = (src[s + 0] & 0x1F) << 12; wideChar |= (src[s + 1] & 0x3F) << 6; wideChar |= (src[s + 2] & 0x3F); s += 3; } else if((c & 0xF8) == 0xF0) ///< 1111-0xxx 10xx-xxxx 10xx-xxxx 10xx-xxxx { WCHAR &wideChar = des[d++]; wideChar = (src[s + 0] & 0x0F) << 18; wideChar = (src[s + 1] & 0x3F) << 12; wideChar |= (src[s + 2] & 0x3F) << 6; wideChar |= (src[s + 3] & 0x3F); s += 4; } else { WCHAR &wideChar = des[d++]; ///< 1111-10xx 10xx-xxxx 10xx-xxxx 10xx-xxxx 10xx-xxxx wideChar = (src[s + 0] & 0x07) << 24; wideChar = (src[s + 1] & 0x3F) << 18; wideChar = (src[s + 2] & 0x3F) << 12; wideChar |= (src[s + 3] & 0x3F) << 6; wideChar |= (src[s + 4] & 0x3F); s += 5; } } t = des; delete[] des; des = NULL; return t;}