当前位置：文江博客话题详情

JavaScript-如何检测发往后台的文本的编码格式？

发布于 2016-10-23 02:33:54 字数 178 浏览 1372 评论 3

问题是这样的：
页面的编码格式是utf-8，后台需要接受gbk格式，所以前端在发送前使用iconv对utf-8做了转换。

但是碰到一个问题，如果用户修改了前端页面的编码，比如改为gb2312，然后发起请求，程序会将gb2312文本当做utf8转为gbk发给后台，然后就会是一串乱码。

如何解决这个问题

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

浮生未歇 2017-09-16 17:24:12

有两种方式：
1.服务端探测请求参数的编码，你可以使用firfox的字符集探测算法，探测字节流的编码格式，有Java版本的也有C++版本的，Java版本导入的jar包名称为jcahrdet.jar,使用方法如下：

import java.io.IOException;
import java.io.InputStream;

import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;

/**
* 与编码相关的工具类
* @author Administrator
*
*/
public class CharsetUtil {

private static boolean found = false;

private static String encoding = "";
/**
*
* @param stream
* @param languageHint
* 语言提示区域代码 eg：1 : Japanese; 2 : Chinese; 3 : Simplified Chinese;*
* 4 : Traditional Chinese; 5 : Korean; 6 : Dont know (default)
* @return
*/
public static String getStreamEncoding(InputStream stream , int languageHint){

nsDetector detector = new nsDetector(languageHint);
detector.Init(new nsICharsetDetectionObserver(){
public void Notify(String charset) {
found = true;
encoding = charset;
}
});
byte[] buf = new byte[1024];
int len;
boolean done = false;
boolean isAscii = true;

try {
while((len=(stream.read(buf))) != -1 ){
if(isAscii)
isAscii = detector.isAscii(buf, len);
if(!isAscii && !done)
done = detector.DoIt(buf, len, false);
if(done) break;
}
detector.DataEnd();
if(isAscii){
encoding="ASCII";
found = true;
}
if(!found){
String[] probable = detector.getProbableCharsets();
if(probable != null && probable.length >= 1){
encoding = probable[0];
}
else{
return "gb2312";
}
}

} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return encoding ;
};
}

2.客户端保证，确保浏览器改变不了查看的编码，使用js达到这种效果，不过一担客户端禁止了js就无效了。
参见：http://www.yslow.net/show.php?tid=104

回复收藏 0

归属感 2017-07-18 07:43:27

通过javascript控制，保证发送前使用encodeURIComponent或encodeURI对中文进行编码，两个函数的区别可google

回复收藏 0

想挽留 2016-10-26 08:04:59

具体得看你使用的语言，比如在java中可以这样来做：

public class CnCharsetChecker {
/* Support for Chinese(GB2312) characters */
// #define isgb2312head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf7)
// #define isgb2312tail(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe)
public static boolean isGB2312( byte head,byte tail ){
int iHead = head & 0xff;
int iTail = tail & 0xff;
return ((iHead>=0xa1 && iHead<=0xf7 &&
iTail>=0xa1 && iTail<=0xfe) ? true : false);
}
/* Support for Chinese(GBK) characters */
// #define isgbkhead(c) (0x81<=(uchar)(c) && (uchar)(c)<=0xfe)
// #define isgbktail(c) ((0x40<=(uchar)(c) && (uchar)(c)<=0x7e)
// || (0x80<=(uchar)(c) && (uchar)(c)<=0xfe))
public static boolean isGBK( byte head,byte tail ){
int iHead = head & 0xff;
int iTail = tail & 0xff;
return ((iHead>=0x81 && iHead<=0xfe &&
(iTail>=0x40 && iTail<=0x7e ||
iTail>=0x80 && iTail<=0xfe)) ? true : false);
}
/* Support for Chinese(BIG5) characters */
// #define isbig5head(c) (0xa1<=(uchar)(c) && (uchar)(c)<=0xf9)
// #define isbig5tail(c) ((0x40<=(uchar)(c) && (uchar)(c)<=0x7e)
// || (0xa1<=(uchar)(c) && (uchar)(c)<=0xfe))
public static boolean isBIG5( byte head,byte tail ){
int iHead = head & 0xff;
int iTail = tail & 0xff;
return ((iHead>=0xa1 && iHead<=0xf9 &&
(iTail>=0x40 && iTail<=0x7e ||
iTail>=0xa1 && iTail<=0xfe)) ? true : false);
}

public static void main(String[] args){
String sGB = "爱";
String sGBK = "愛";
String sBIG5 = "稲";
byte[] sChars = null;
sChars = sGB.getBytes();
System.out.println(sGB+" is "+
CnCharsetChecker.isGB2312(sChars[0],sChars[1])+" for GB2312;"+
CnCharsetChecker.isGBK(sChars[0],sChars[1])+" for GBK,"+
CnCharsetChecker.isBIG5(sChars[0],sChars[1])+" for BIG5");
sChars = sGBK.getBytes();
System.out.println(sGBK+" is "+
CnCharsetChecker.isGB2312(sChars[0],sChars[1])+" for GB2312;"+
CnCharsetChecker.isGBK(sChars[0],sChars[1])+" for GBK,"+
CnCharsetChecker.isBIG5(sChars[0],sChars[1])+" for BIG5");
sChars = sBIG5.getBytes();
System.out.println(sBIG5+" is "+
CnCharsetChecker.isGB2312(sChars[0],sChars[1])+" for GB2312;"+
CnCharsetChecker.isGBK(sChars[0],sChars[1])+" for GBK,"+
CnCharsetChecker.isBIG5(sChars[0],sChars[1])+" for BIG5");
}
}