相关文章
- 【转】彻底搞明白 GB2312、GBK 、GB18030和UTF-8
1.ASICII、GB2312、GBK、GB18030 以及 UTF8 的关系
2.编写代码
- 引入依赖:hutool工具类
<dependency><groupId>cn.hutool</groupId><artifactId>hutool-all</artifactId><version>5.8.11</version></dependency>
- 编写java代码
package top.lishuoboy.test;import cn.hutool.core.convert.Convert;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.text.StrBuilder;
import cn.hutool.core.text.StrPool;
import cn.hutool.core.util.*;import java.nio.ByteOrder;
import java.nio.charset.Charset;public class CharsetPrintTest {private static final String SPACE = " ";private static final String BASE_DIR = "D:/111/charset/";private static final Charset GB2312 = Charset.forName("GB2312");private static final Charset GBK = CharsetUtil.CHARSET_GBK;private static final Charset GB18030 = Charset.forName("GB18030");private static final Charset UTF_8 = CharsetUtil.CHARSET_UTF_8;public static void main(String[] args) {FileUtil.del(BASE_DIR);printAllChar(GB2312);printAllChar(GBK);printAllChar(GB18030);printAllChar(UTF_8);}/** 输出指定编码的所有字符 */public static void printAllChar(Charset charset) {StrBuilder sb = new StrBuilder();for (int i = 1; i < (1 << 16); i++) { // 默认只输出前2个字节,否则太大byte[] bytes = ByteUtil.intToBytes(i, ByteOrder.BIG_ENDIAN);byte[] bytesNew = remove0(bytes);// 不同编码输出字节不一致if (GB2312.equals(charset) || GBK.equals(charset)) { // GB2312、GBK占1、2个字节if (i > (1 << 16)) continue;} else if (GB18030.equals(charset)) { // GB18030占1、2、4个字节if (i > (1 << 16) && i < (1 << 24)) continue;} else if (UTF_8.equals(charset)) { // UTF_8占1、2、3、4个字节if (i > (1 << 24)) continue;} else {System.err.println("不支持的字符编码");break;}// 每16个换行if (i % 16 == 1) {sb.append(StrPool.CRLF + Convert.toHex(bytes) + SPACE);}String str = new String(bytesNew, charset);str = replace(str, i);sb.append(str + SPACE);}FileUtil.appendString(sb.toString(), BASE_DIR + charset + ".log", charset);}/** 将字节数组中前面的0干掉(知道第一个不是0的结束) */private static byte[] remove0(byte[] bytes) {for (int i = 0; i < bytes.length; i++) {if (bytes[i] != 0) {byte[] bytesNew = new byte[bytes.length - i];ArrayUtil.copy(bytes, i, bytesNew, 0, bytes.length - i);return bytesNew;}}return bytes;}/** 【BCompare比对专用】替换莫名其妙的字符,方便BCompare对比,可以不用 */private static String replace(String str, int i) {String charStr = RandomUtil.BASE_CHAR_NUMBER + RandomUtil.BASE_CHAR.toUpperCase() + ":;=<>?�\u007F";if (i > (1 << 7 - 1)) {str = StrUtil.replaceChars(str, charStr, "");}return str;}
}
3.对比
3.1. GB2312与GBK
- 1
- 2
3.2. GBK与GB18030(只比对前2个字节)
-
1
-
2
-
3
-
1
-
2