【视频/图像数据格式】基本视频/图像数据格式

基本视频/图像数据格式

1.概述
2.视频图像数据格式
- 2.1 yuv420p
- 2.2 yuv422p
- 2.3 yuv444p
- 2.4 RGB格式
- 2.5 BMP格式
3.格式转换
- 3.1 RGB24转换为YUV420P
4.视频图像评价指标
- 4.1 MSE
- 4.2 PSNR

参考：

雷霄骅博士博客：

http://t.csdnimg.cn/kl2jL
http://t.csdnimg.cn/pMLLE
http://t.csdnimg.cn/FjtOK
http://t.csdnimg.cn/K95yN

ffmpeg-7.0中/test/utils.c文件

1.概述

视频/图像数据格式作为视频/图像处理的对象，是整个视频图像处理体系的最基本单元，有必要熟悉其存储格式。主要视频图像数据格式有yuv420p，yuv422p，yuv444p，bmp和rgb24等，其中视频编码器的主要输入数据格式就是yuv420p，因为这种格式存储数据量很小，易于存储。

视频图像处理体系大致可以分为几层：

协议层（http、rtmp、file…）
封装层（mkv，mp4，flv，mpegts，avi…）
编解码层（h264，h265，mpeg2…）
像素层（yuv420p，yuv422p，yuv444p，rgb24…）

这里仅关注像素层（视频图像的主要组成部分）的数据格式。

2.视频图像数据格式

2.1 yuv420p

最常见的视频编码数据格式，yuv420p格式中，p的含义是planar，即平面存储。存储时分为三个分量Y、U和V，即在内存当中存储时先存储Y，后存储U，最后V。如果是420p格式，U和V分量的数据大小均为Y分量的1/4，这是因为U和V分量的width和height均为Y分量的1/2。
在内存中，YUV420P的存储方式为：Y0 Y1 Y2 … U0 U1 U2 … V0 V1 V2 …，并且Y的长度分别为U和V分量的4倍。如果不是p格式，可能存储的方式是interleave，即交叉式存储，这里不讨论。YUV420p格式的数据，其读取和写入数据的方式为

int video_yuv420_split(char *url, int w, int h, int num){FILE *fp = fopen(url, "rb+");FILE *fp_y = fopen("output_420_y.y", "wb+");FILE *fp_u = fopen("output_420_u.y", "wb+");FILE *fp_v = fopen("output_420_v.y", "wb+");// 3/2 = 1(Y) + 1/4(U) + 1/4(V)unsigned char *pic = (unsigned char *) mallo c(w * h * 3/2);for(int i = 0; i < num; i++){fread(pic, 1, w * h * 3/2, fp); 				// Read YUV data from .yuv filefwrite(pic, 1, w * h, fp_y);					// Write Y component into fp_yfwrite(pic + w * h, 1, w * h / 4, fp_u);		// Write U component into fp_ufwrite(pic + w * h * 5 / 4, 1, w * h / 4, fp_v);// Write V component into fp_v}free(pic);fclose(fp);fclose(fp_y);fclose(fp_u);fclose(fp_v);return 0;
}

2.2 yuv422p

与yuv420p类似，区别在于U和V分量的比例不同。yuv422p格式当中，仍然是先存储Y分量，后存储U分量，最后是V分量。但是U和V分量的大小分别是Y分量的一半，例如 Y0 Y1 Y2 Y3 U0 U1 V0 V1。读写方式和yuv420p的区别在于

	unsigned char *pic = (unsigned char *) malloc (w * h * 2);for(int i = 0; i < num; i++){fread(pic, 1, w * h * 2, fp); 					// Read YUV data from .yuv filefwrite(pic, 1, w * h, fp_y);					// Write Y component into fp_yfwrite(pic + w * h, 1, w * h / 2, fp_u);		// Write U component into fp_ufwrite(pic + w * h * 3 / 2, 1, w * h / 2, fp_v);// Write V component into fp_v}

2.3 yuv444p

yuv444p格式中，YUV三个分量的大小相同，例如 Y0 Y1 Y2 Y3 U0 U1 U2 U3 V0 V1 V2 V3。读写方式为

	unsigned char *pic = (unsigned char *) malloc (w * h * 3);for(int i = 0; i < num; i++){fread(pic, 1, w * h * 3, fp); 				// Read YUV data from .yuv filefwrite(pic, 1, w * h, fp_y);				// Write Y component into fp_yfwrite(pic + w * h, 1, w * h, fp_u);		// Write U component into fp_ufwrite(pic + w * h * 2, 1, w * h, fp_v);	// Write V component into fp_v}

2.4 RGB格式

对于后缀为.rgb格式的文件，其存储数据的方式与yuv不同。在YUV格式当中，YUV三个通道是分别进行存储，而RGB格式是三个通道交替进行存储，例如 r0 g0 b0 r1 g1 b1 r2 g2 b2…，因此其读写数据的方式也不同。

	unsigned char *pic = (unsigned char *) malloc (w * h * 3);for(int i = 0; i < num; i++){fread(pic, 1, w * h * 3, fp);			// read .rgb filefor(int j = 0; j < w * h * 3; j = j + 3){fwrite(pic + j, 1, 1, fp_y);		// write r componentfwrite(pic + j + 1, 1, 1, fp_u);	// write g componentfwrite(pic + j + 2 , 1, 1, fp_v);	// write b component}

常见的8种颜色RGB数值为

颜色	RGB
白	(255, 255, 255)
黄	(255, 255, 0)
青	(0, 255, 255)
绿	( 0, 255, 0)
品红	(255, 0, 255)
红	(255, 0, 0)
蓝	(0, 0, 255)
黑	(0, 0, 0)

另外，灰色为128

2.5 BMP格式

BMP格式是对RGB进行封装得到的格式，能够使用普通的图片浏览器打开。对RGB格式进行封装得到BMP格式的方式如下：

/**1. Convert RGB24 file to BMP file2. @param rgb24path    Location of input RGB file.3. @param width        Width of input RGB file.4. @param height       Height of input RGB file.5. @param url_out      Location of Output BMP file.*/
int video_rgb24_to_bmp(const char *rgb24path,int width,int height,const char *bmppath){typedef struct {  long imageSize;long blank;long startPosition;}BmpHead;typedef struct{long  Length;long  width;long  height;unsigned short  colorPlane;unsigned short  bitColor;long  zipFormat;long  realSize;long  xPels;long  yPels;long  colorUse;long  colorImportant;}InfoHead;int i = 0;int j = 0;BmpHead m_BMPHeader = { 0 };InfoHead  m_BMPInfoHeader = { 0 };char bfType[2] = {'B', 'M'};int header_size = sizeof(bfType) + sizeof(BmpHead) + sizeof(InfoHead);unsigned char *rgb24_buffer = NULL;FILE* fp_rgb24 = NULL;FILE* fp_bmp = NULL;if((fp_rgb24 = fopen(rgb24path, "rb")) == NULL){printf("Error: Cannot open input RGB24 file.\n");return -1;}if((fp_bmp = fopen(bmppath, "wb")) == NULL){printf("Error: Cannot open output BMP file.\n");return -1;}rgb24_buffer = (unsigned char *)malloc(width * height * 3);fread(rgb24_buffer, 1, width * height * 3, fp_rgb24);m_BMPHeader.imageSize = 3 * width * height + header_size;m_BMPHeader.startPosition = header_size;m_BMPInfoHeader.Length = sizeof(InfoHead); m_BMPInfoHeader.width = width;//BMP storage pixel data in opposite direction of Y-axis (from bottom to top).m_BMPInfoHeader.height =- height;m_BMPInfoHeader.colorPlane = 1;m_BMPInfoHeader.bitColor = 24;m_BMPInfoHeader.realSize = 3 * width * height;fwrite(bfType, 1, sizeof(bfType), fp_bmp);fwrite(&m_BMPHeader, 1, sizeof(m_BMPHeader), fp_bmp);fwrite(&m_BMPInfoHeader, 1, sizeof(m_BMPInfoHeader), fp_bmp);//BMP save R1|G1|B1,R2|G2|B2 as B1|G1|R1,B2|G2|R2//It saves pixel data in Little Endian//So we change 'R' and 'B'for(j = 0; j < height; j++){for(i = 0; i < width; i++){// 将R分量和B分量的位置进行交换char temp = rgb24_buffer[(j * width + i) * 3 + 2];rgb24_buffer[(j * width + i) * 3 + 2] = rgb24_buffer[(j * width + i) * 3 + 0];rgb24_buffer[(j * width + i) * 3 + 0] = temp;}}fwrite(rgb24_buffer, 3 * width * height, 1, fp_bmp);fclose(fp_rgb24);fclose(fp_bmp);free(rgb24_buffer);printf("Finish generate %s!\n", bmppath);return 0;
}

在这里，代码执行的任务包括：

存储写上BMP的头部信息
将RGB格式的文件修改为BGR。这是因为BMP存储时使用的是小端存储（Little Endian），存储时的顺序为B、G、R

BMP文件是由BITMAPFILEHEADER、BITMAPINFOHEADER、RGB像素数据共3个部分构成，如下所示。其中，BITMAPFILEHEADER对应上述的BmpHead，BITMAPINFOHEADER对应上述的InfoHead。

typedef  struct  tagBITMAPFILEHEADER
{ unsigned short int  bfType;       //位图文件的类型，必须为BM unsigned long       bfSize;       //文件大小，以字节为单位unsigned short int  bfReserverd1; //位图文件保留字，必须为0 unsigned short int  bfReserverd2; //位图文件保留字，必须为0 unsigned long       bfbfOffBits;  //位图文件头到数据的偏移量，以字节为单位
}BITMAPFILEHEADER; 
typedef  struct  tagBITMAPINFOHEADER 
{ long biSize;                    //该结构大小，字节为单位long  biWidth;                  //图形宽度以象素为单位long  biHeight;                 //图形高度以象素为单位short int  biPlanes;            //目标设备的级别，必须为1 short int  biBitcount;          //颜色深度，每个象素所需要的位数short int  biCompression;       //位图的压缩类型long  biSizeImage;              //位图的大小，以字节为单位long  biXPelsPermeter;       	//位图水平分辨率，每米像素数long  biYPelsPermeter;       	//位图垂直分辨率，每米像素数long  biClrUsed;            	//位图实际使用的颜色表中的颜色数long  biClrImportant;       	//位图显示过程中重要的颜色数
}BITMAPINFOHEADER;

3.格式转换

3.1 RGB24转换为YUV420P

RGB24转换YUV420p的公式为：

Y = 0.299 * R + 0.587 * G + 0.114 * B
U =-0.147 * R - 0.289 * G + 0.463 * B
V = 0.615 * R - 0.515 * G - 0.100 * B

代码参考ffmpeg-7.0当中的/test/utils.c，这个文档相比雷霄骅博士的写法有所不同，或许更好理解。

#define SCALEBITS 8
#define ONE_HALF  (1 << (SCALEBITS - 1))
#define FIX(x)    ((int) ((x) * (1 << SCALEBITS) + 0.5))	// 乘以255倍，猜测目的应该是提升精度
#define err_if(expr) do {                                              \if (expr) {                                                        \fprintf(stderr, "%s\n", strerror(errno));                      \exit(1);                                                       \}                                                                  \
} while (0)static void rgb24_to_yuv420p(unsigned char *lum, unsigned char *cb, // lum是Y分量的地址，cb是U分量的地址unsigned char *cr, const unsigned char *src,	// cr是V分量的地址int width, int height)
{int wrap, wrap3, x, y;int r, g, b, r1, g1, b1;const unsigned char *p;wrap  = width;		// yuv指针偏移量，用于定位图像每一行的宽度wrap3 = width * 3;	// rgb指针偏移量，用于定位图像每一行的宽度p     = src;		// src为rgb图像的指针地址// 这里每2x2个像素进行处理，是因为U和V分量的长和宽分别只占据Y分量的1/2for (y = 0; y < height; y += 2) { for (x = 0; x < width; x += 2) {r       = p[0];g       = p[1];b       = p[2];r1      = r;g1      = g;b1      = b;lum[0]  = (FIX(0.29900) * r + FIX(0.58700) * g +FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;r       = p[3];g       = p[4];b       = p[5];r1     += r;g1     += g;b1     += b;lum[1]  = (FIX(0.29900) * r + FIX(0.58700) * g +FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;p      += wrap3;	// 移动到当前2x2小块的左下小块lum    += wrap;		// 移动到当前2x2小块的左下小块r       = p[0];g       = p[1];b       = p[2];r1     += r;g1     += g;b1     += b;lum[0]  = (FIX(0.29900) * r + FIX(0.58700) * g +FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;r       = p[3];g       = p[4];b       = p[5];r1     += r;g1     += g;b1     += b;lum[1]  = (FIX(0.29900) * r + FIX(0.58700) * g +FIX(0.11400) * b + ONE_HALF) >> SCALEBITS;// 每2x2个像素有一个Cb和Cr分量，将其写入到cb和cr数组当中cb[0]   = ((- FIX(0.16874) * r1 - FIX(0.33126) * g1 +FIX(0.50000) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;cr[0]   = ((FIX(0.50000) * r1 - FIX(0.41869) * g1 -FIX(0.08131) * b1 + 4 * ONE_HALF - 1) >> (SCALEBITS + 2)) + 128;cb++;cr++;p   += -wrap3 + 2 * 3;	// 乘以3是因为rgb是顺序存储的，移动到下一个2x2小块左上小块的r分量lum += -wrap  + 2;		// 回到上一行的起始位置，加2则指向下一个2x2小块的左上小块}p   += wrap3;lum += wrap;}
}static void pgmyuv_save(const char *filename, int w, int h,const unsigned char *rgb_tab)
{FILE *f;int i, h2, w2;unsigned char *cb, *cr;unsigned char *lum_tab, *cb_tab, *cr_tab;lum_tab = malloc(w * h);cb_tab  = malloc(w * h / 4);cr_tab  = malloc(w * h / 4);rgb24_to_yuv420p(lum_tab, cb_tab, cr_tab, rgb_tab, w, h);if (filename) {f = fopen(filename, "wb");fprintf(f, "P5\n%d %d\n%d\n", w, h * 3 / 2, 255);} else {f = stdout;}err_if(fwrite(lum_tab, 1, w * h, f) != w * h); // 写入Y分量h2 = h / 2;w2 = w / 2;cb = cb_tab;cr = cr_tab;if (filename) {for (i = 0; i < h2; i++) {err_if(fwrite(cb, 1, w2, f) != w2);	// 写入U分量err_if(fwrite(cr, 1, w2, f) != w2);	// 写入V分量cb += w2;cr += w2;}fclose(f);} else {for (i = 0; i < h2; i++) {err_if(fwrite(cb, 1, w2, f) != w2);cb += w2;}for (i = 0; i < h2; i++) {err_if(fwrite(cr, 1, w2, f) != w2);cr += w2;}}free(lum_tab);free(cb_tab);free(cr_tab);
}

4.视频图像评价指标

4.1 MSE

MSE全称为Mean Square Error，表示均方误差，其计算方式为

int width = WIDTH;
int height = HEIGHT;
double mse = 0.0;
for(int j = 0; j < width * height; j++){mse += pow((double)(src[j] - dst[j]), 2);
}
mse = mse / (width * height);

4.2 PSNR

PSNR的计算是在MSE计算的基础之上获得的，计算方式为

double psnr = 0.0;
psnr = 10 * log10(255.0 * 255.0 / mse);

PSNR描述了两幅图片的差异程度，单位是dB，dB越大，表示两幅图像越接近，否则差异越大。在视频编码标准中，PSNR是衡量编码工具的重要指标，通常与Bitrate结合起来，来评判编码算法的优劣。

CSDN：https://blog.csdn.net/weixin_42877471
Github：https://github.com/DoFulangChen