可优化语句执行

概述
CopyOneRowTo函数
- ScalarVector类
- CopySendString 函数
- FixedRowOut 函数
- CopySendInt32 函数
- CopySendData 函数
- appendBinaryStringInfo 函数

声明：本文的部分内容参考了他人的文章。在编写过程中，我们尊重他人的知识产权和学术成果，力求遵循合理使用原则，并在适用的情况下注明引用来源。
本文主要参考了 OpenGauss1.1.0 的开源代码

概述

本文主要围绕列存储进行学习。

CopyOneRowTo函数

CopyOneRowTo 函数的作用是将一个数据行（row）从一个源 ScalarVector 复制到目标 ScalarVector，以实现数据的拷贝。具体来说，它用于在处理批次数据时，从一个源列（ScalarVector）复制数据到另一个目标列（ScalarVector），以便在数据处理过程中进行转换、修改等操作，CopyOneRowTo 函数帮助实现了批次数据的复制和转换。
CopyOneRowTo函数源码如下：（src/gausskernel/optimizer/commands/copy.cpp）

/** Emit one row during CopyTo().*/
static void CopyOneRowTo(CopyState cstate, Oid tupleOid, Datum* values, const bool* nulls)
{bool need_delim = false;  // 标志是否需要添加分隔符FmgrInfo* out_functions = cstate->out_functions;  // 输出函数的信息MemoryContext oldcontext;  // 保存旧的内存上下文ListCell* cur = NULL;  // 遍历属性列表的指针char* string = NULL;  // 临时字符串// 重置行内存上下文，切换到行内存上下文MemoryContextReset(cstate->rowcontext);oldcontext = MemoryContextSwitchTo(cstate->rowcontext);if (IS_BINARY(cstate)) {// 对于二进制格式，发送元组的二进制头部信息CopySendInt16(cstate, list_length(cstate->attnumlist));// 如果需要，发送 OIDif (cstate->oids) {// 假设 Oid 和 int32 大小相同CopySendInt32(cstate, sizeof(int32));CopySendInt32(cstate, tupleOid);}} else if (cstate->oids) {// 对于文本格式，如果需要，发送 OID// 假设数字不需要引用或编码转换string = DatumGetCString(DirectFunctionCall1(oidout, ObjectIdGetDatum(tupleOid)));CopySendString(cstate, string);need_delim = true;}// 是否为固定列宽if (IS_FIXED(cstate))FixedRowOut(cstate, values, nulls);else {// 遍历属性列表foreach (cur, cstate->attnumlist) {int attnum = lfirst_int(cur);  // 属性序号Datum value = values[attnum - 1];  // 属性值bool isnull = nulls[attnum - 1];  // 是否为 NULL 值if (cstate->fileformat == FORMAT_CSV || cstate->fileformat == FORMAT_TEXT) {// 对于 CSV 或文本格式，添加分隔符if (need_delim)CopySendString(cstate, cstate->delim);need_delim = true;}if (isnull) {// 处理 NULL 值switch (cstate->fileformat) {case FORMAT_CSV:case FORMAT_TEXT:CopySendString(cstate, cstate->null_print_client);break;case FORMAT_BINARY:CopySendInt32(cstate, -1);break;default:ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Invalid file format")));}} else {if (!IS_BINARY(cstate)) {// 非二进制格式，将值转换为字符串并处理string = OutputFunctionCall(&out_functions[attnum - 1], value);switch (cstate->fileformat) {case FORMAT_CSV:CopyAttributeOutCSV(cstate,string,cstate->force_quote_flags[attnum - 1],list_length(cstate->attnumlist) == 1);break;case FORMAT_TEXT:CopyAttributeOutText(cstate, string);break;default:ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("Invalid file format")));}} else {// 二进制格式，调用输出函数并发送数据bytea* outputbytes = NULL;outputbytes = SendFunctionCall(&out_functions[attnum - 1], value);CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);CopySendData(cstate, VARDATA(outputbytes), VARSIZE(outputbytes) - VARHDRSZ);}}}}// 发送行数据，并切换回旧的内存上下文cstate->writelineFunc(cstate);(void)MemoryContextSwitchTo(oldcontext);
}

ScalarVector类

ScalarVector 类是一种数据结构，用于存储单一数据列的向量化数据。在数据库系统中，数据通常以表格的形式存储，每个列都包含一组数据。ScalarVector 类的作用是为了优化这些列数据的处理，提高数据访问和计算的效率。
ScalarVector 类的源码如下：（路径：src/include/vecexecutor/vectorbatch.h）

// the core data structure for a column
class ScalarVector : public BaseObject {friend class VectorBatch;public:// number of values.int m_rows;// type desciption information for this scalar value.ScalarDesc m_desc;// this value means that the value in the scalarvector is always the samebool m_const;// flags in the scalar value array.uint8* m_flag;// a company buffer for store the data if the data type is not plain.VarBuf* m_buf;// the value array.ScalarValue* m_vals;public:// decode a variable length data.// null value judgement should be outside of this function.FORCE_INLINEstatic Datum Decode(ScalarValue val){return val;}// convert a datum to scalar valuestatic ScalarValue DatumToScalar(Datum datumVal, Oid datumType, bool isNull);template <Oid datumType>static ScalarValue DatumToScalarT(Datum datumVal, bool isNull);public:// constructor/deconstructor.ScalarVector();~ScalarVector();// init the ScalarVector.//void init(MemoryContext cxt, ScalarDesc desc);// used in tsdb. init with another ScalarVector object.//void init(MemoryContext cxt, ScalarVector* vec, const int batchSize);// serialize the Scalar vector//void Serialize(StringInfo buf);// serialize the Scalar vector of the particular index//void Serialize(StringInfo buf, int idx);// Deserialize the vector//char* Deserialize(char* msg, size_t len);// Add a variable length data// this var may be from// cstring, fixed length(> 8) data type, or pg traditional header-contain variable lengthDatum AddVar(Datum data, int index);// Add a header-contain variableDatum AddVarWithHeader(Datum data);// Add a variable without header on a special position. The original variable will be// transfered in together with the length of the content. And inside the funtion, the header// of the ScalarValue will be added before the actual content according to the data type.Datum AddBPCharWithoutHeader(const char* data, int maxLen, int len, int aindex);Datum AddVarCharWithoutHeader(const char* data, int len, int aindex);// Add a short decimal without header on a special position. The value of decimal// will be transfered in by int64 format together with the scale of it. And inside the function,// the header will be added and the value will be converted into PG format. Here we only support// short decimal which can be stored using int64.Datum AddShortNumericWithoutHeader(int64 value, uint8 scale, int aindex);Datum AddBigNumericWithoutHeader(int128 value, uint8 scale, int aindex);char* AddVars(const char* src, int length);// add a normal header-contain valDatum AddHeaderVar(Datum data, int index);// add a cstring type valDatum AddCStringVar(Datum data, int index);// add a fixed length valtemplate <Size len>Datum AddFixLenVar(Datum data, int index);// copy a vectorvoid copy(ScalarVector* vector, int start_idx, int endIdx);void copy(ScalarVector* vector);void copyDeep(ScalarVector* vector, int start_idx, int endIdx);void copyNth(ScalarVector* vector, int Nth);void copy(ScalarVector* vector, const bool* pSel);// convert a cstring to Scalar value.static Datum DatumCstringToScalar(Datum data, Size len);// convert a fixed len datatype to Scalar Valuestatic Datum DatumFixLenToScalar(Datum data, Size len);FORCE_INLINEbool IsNull(int i){Assert(i >= 0 && i < m_rows);return ((m_flag[i] & V_NULL_MASK) == V_NULL_MASK);}FORCE_INLINEvoid SetNull(int i){Assert(i >= 0 && i < BatchMaxSize);m_flag[i] |= V_NULL_MASK;}FORCE_INLINEvoid SetAllNull(){for (int i = 0; i < m_rows; i++) {SetNull(i);}}private:// init some function pointer.void BindingFp();Datum (ScalarVector::*m_addVar)(Datum data, int index);
};

CopySendString 函数

CopySendString 函数，这个函数的目的是将字符串数据添加到 CopyState 结构中的前端消息缓冲区中，以便之后将这些数据发送给客户端。它使用 appendBinaryStringInfo 函数将字符串数据追加到消息缓冲区中。CopySendString 函数源码如下：（src/gausskernel/optimizer/commands/copy.cpp）

// CopySendString does the same for null-terminated strings
void CopySendString(CopyState cstate, const char* str)
{appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
}

appendBinaryStringInfo 函数用于向StringInfo结构中追加任意二进制数据。首先，它会检查 StringInfo 结构是否为空。然后，根据需要分配更多空间以容纳要追加的数据。接下来，使用 memcpy_s 函数将数据追加到 StringInfo 结构的末尾，并更新长度信息。最后，会在字符串的末尾添加一个 null 字符，即使对于二进制数据来说，这个 null 字符可能没有实际用处。appendBinaryStringInfo 函数源码如下：（src/common/backend/lib/stringinfo.cpp）

/** appendBinaryStringInfo** 向StringInfo结构追加任意二进制数据，如果需要的话会分配更多空间。*/
void appendBinaryStringInfo(StringInfo str, const char* data, int datalen)
{Assert(str != NULL);  // 断言：确保StringInfo结构非空/* 如果需要的话分配更多空间 */enlargeStringInfo(str, datalen);/* 追加数据 */errno_t rc = memcpy_s(str->data + str->len, (size_t)(str->maxlen - str->len), data, (size_t)datalen);securec_check(rc, "\0", "\0");str->len += datalen;/** 保持末尾的null，尽管对于二进制数据来说可能没有用处。* （一些调用者处理的是文本数据，但是因为输入没有以null结尾，所以调用了这个函数。）*/str->data[str->len] = '\0';
}

DatumGetCString 是一个宏，作用是将 Datum 类型的数据转换为C字符串。源码如下：（路径：src/include/postgres.h）

/** DatumGetCString*		Returns C string (null-terminated string) value of a datum.** Note: C string is not a full-fledged Postgres type at present,* but type input functions use this conversion for their inputs.*/#define DatumGetCString(X) ((char*)DatumGetPointer(X))

FixedRowOut 函数

FixedRowOut 函数是在固定列宽格式下将一行数据输出。它首先根据格式信息扩展输出缓冲区，然后遍历每个字段进行处理，根据字段的值和是否为null，调用相应的处理函数输出数据或null值。源码如下：（路径：src/gausskernel/optimizer/commands/formatter.cpp）

// 固定列宽格式下输出一行数据
void FixedRowOut(CopyState cstate, Datum* values, const bool* nulls)
{// 获取输出函数信息和固定列宽格式信息FmgrInfo* out_functions = cstate->out_functions; // 输出函数信息FixFormatter* formatter = (FixFormatter*)cstate->formatter; // 固定列宽格式信息FieldDesc* descs = formatter->fieldDesc; // 字段描述char* string = NULL; // 临时字符串指针// 根据行大小扩展输出缓冲区enlargeStringInfo(cstate->fe_msgbuf, formatter->lineSize);// 遍历每个字段进行处理for (int i = 0; i < formatter->nfield; i++) {// 获取当前字段的属性序号和对应的值int attnum = formatter->fieldDesc[i].attnum; // 当前字段属性序号Datum value = values[attnum - 1]; // 当前字段值bool isnull = nulls[attnum - 1]; // 当前字段是否为null// 根据是否为null进行处理if (isnull) {// 调用AttributeOutFixed函数输出null值AttributeOutFixed<false>(cstate, descs[i].nullString, descs + i);} else {// 对非null值，调用输出函数并输出string = OutputFunctionCall(&out_functions[attnum - 1], value);Assert(string != NULL);AttributeOutFixed<false>(cstate, string, descs + i);}}
}

CopySendInt32 函数

CopySendInt32 函数用于将一个 int32 类型的值以网络字节序发送出去。它首先将传入的 int32 值转换为网络字节序，并将结果存储在 buf 中，然后通过调用 CopySendData 函数将 buf 中的数据发送出去。函数源码如下：（路径：src/gausskernel/optimizer/commands/copy.cpp）

/** 这些函数会进行一些数据转换*//** CopySendInt32 以网络字节序发送 int32 类型的值*/
static void CopySendInt32(CopyState cstate, int32 val)
{uint32 buf;// 将 int32 类型的值转换为网络字节序，并存储在 buf 中buf = htonl((uint32)val);// 调用 CopySendData 函数将 buf 中的数据发送出去，发送的字节数为 sizeof(buf)CopySendData(cstate, &buf, sizeof(buf));
}

CopySendData 函数

这段代码定义了一系列发送数据的函数，这些函数会将指定的数据追加到 cstate->fe_msgbuf 中，其中 cstate 是 CopyState 结构体的指针，表示数据拷贝的状态。这些函数分别用于发送二进制数据、以 null 结尾的字符串、单个字符以及在每行数据末尾执行适当的操作。这些函数并不会对数据进行任何转换，只是简单地将数据追加到消息缓冲区中。

/* ----------* CopySendData 将输出数据发送到目标（文件或前端）* CopySendString 对以 null 结尾的字符串执行相同操作* CopySendChar 对单个字符执行相同操作* CopySendEndOfRow 在每行数据末尾执行适当的操作*  （实际上只有在 CopySendEndOfRow 时才会刷新数据，其他函数不会刷新数据）** 注意：这些函数不会对数据进行任何转换* ----------*/
static void CopySendData(CopyState cstate, const void* databuf, int datasize)
{// 调用 appendBinaryStringInfo 函数将指定大小的数据追加到 cstate->fe_msgbuf 中appendBinaryStringInfo(cstate->fe_msgbuf, (const char*)databuf, datasize);
}

appendBinaryStringInfo 函数

appendBinaryStringInfo 函数接受一个 StringInfo 结构体指针 str，一个 const char* 类型的数据指针 data，以及一个整数 datalen，表示数据的长度。函数会首先确保 str 不为空，然后根据需要分配更多空间，将指定长度的数据复制到 str 的数据缓冲区中，然后更新已追加数据的长度，并在数据末尾添加一个 null 字符，以保证字符串的正确终止。这个函数通常用于将二进制数据添加到 StringInfo 结构体中，StringInfo 是一个动态字符串结构体，它的大小可以根据需要自动增长。源码如下：（路径：src/common/backend/lib/stringinfo.cpp）

/** appendBinaryStringInfo** 将任意的二进制数据追加到 StringInfo 中，如果需要的话会分配更多的空间。*/
void appendBinaryStringInfo(StringInfo str, const char* data, int datalen)
{Assert(str != NULL);  // 断言确保 str 不为空/* 如果需要的话分配更多空间 */enlargeStringInfo(str, datalen);/* 将数据追加到 str 中 */errno_t rc = memcpy_s(str->data + str->len, (size_t)(str->maxlen - str->len), data, (size_t)datalen);securec_check(rc, "\0", "\0");str->len += datalen;  // 更新已追加数据的长度/** 保持末尾的 null 字符，即使对于二进制数据它可能没有用处。* （一些调用者处理文本，但调用这个函数是因为输入可能没有以 null 结尾。）*/str->data[str->len] = '\0';
}