/* 功能:实现文件编码格式的判断 通过一个文件的最前面三个字节,可以判断出该的编码类型: ANSI: 无格式定义;(第一个字节开始就是文件内容) Unicode: 前两个字节为FFFE; Unicode big endian: 前两字节为FEFF; UTF-8: 前两字节为EFBB,第三字节为BF */ #include <stdio.h> #include <stdlib.h> #include <string.h> // 读取一个文件的最前面n个字节,并以十六进制形式输出每个字节的值 void readNBytes( char *fileName, int n) { FILE *fp = fopen (fileName, "r" ); unsigned char *buf = (unsigned char *) malloc ( sizeof (unsigned char )*n); int i; if (fp == NULL) { printf ( "open file [%s] failed.\n" , fileName); return ; } fread (buf, sizeof (unsigned char ), n, fp); fclose (fp); printf ( "%s:\t" , fileName); for (i = 0; i < n; i++) { printf ( "%x\t" , buf[i]); } printf ( "\n" ); free (buf); } void main() { char fileName[][50] = { "ansi.txt" , "unicode.txt" , "ubigendian.txt" , "utf8.txt" }; int i; for (i = 0; i < 4; i++) { // 每个文件中的内容都是:你what123456 readNBytes(fileName[i], 3); } } |
每个测试文件中的内容都是:你what123456
运行结果为:
ansi.txt: c4 e3 77
unicode.txt: ff fe 60
ubigendian.txt: fe ff 4f
utf8.txt: ef bb bf
分类: C/C++, Linux_C/C++