将一个大文件(这里测试文件为5.2G)切分为指定大小的文件,然后在把分割后的文件拼接合并为分割前的源文件
#include <boost/timer.hpp> // 计时函数#include <filesystem>
#include <fstream>
#include <vector>
// 分隔后文件夹的格式, 原文件名_chunk
#define FILE_SUFFIX "_chunk"
// 生成每个小块文件的名称格式, 源文件名_chunk_1.bin
#define CHUNK_NAME "%s_chunk_%d.bin"/// <summary>
/// 分隔文件函数,将一个文件按照规定的大小分割为多个小块
/// </summary>
/// <param name="inputFile">被分割的文件</param>
/// <param name="chunkSize">分割后每块文件的大小(单位字节): 如果要分割为500mb每块,该数值为 500 * 1024 * 1024 </param>
/// <param name="inputFiles">切分后后的所有文件名</param>
/// <returns></returns>
bool splitFile(const std::string& inputFile, size_t chunkSize, std::vector<std::string>& inputFiles)
{std::string inputFileName = std::filesystem::path(inputFile).filename().stem().string();std::string dirname = inputFileName + FILE_SUFFIX;std::filesystem::path chunkDir = std::filesystem::path(inputFile).parent_path();chunkDir.append(dirname);if (!std::filesystem::exists(chunkDir)){std::filesystem::create_directories(chunkDir);}std::ifstream inputFileStream(inputFile, std::ios::binary | std::ios::ate);if (!inputFileStream.is_open()) {std::cout << "Error opening file: " << inputFile << std::endl;return false;}size_t fileSize = static_cast<size_t>(inputFileStream.tellg());inputFileStream.seekg(0, std::ios::beg);//如果文件大小小于size_t numberOfChunks = (fileSize + chunkSize - 1) / chunkSize;for (size_t i = 0; i < numberOfChunks; ++i) {size_t chunkOffset = i * chunkSize;size_t chunkSizeActual = std::min(chunkSize, fileSize - chunkOffset);//std::vector<char> buffer(chunkSizeActual);char buff[256];sprintf(buff, "%s_chunk_%d.bin", inputFileName.c_str(), static_cast<int>(i));std::filesystem::path tempChunkDir = chunkDir;std::string chunkFileName = tempChunkDir.append(buff).string();inputFiles.push_back(chunkFileName);char* buffer = new char[chunkSizeActual];// 移动文件读取指针到当前块的起始位置inputFileStream.seekg(chunkOffset, std::ios::beg);inputFileStream.read(buffer, chunkSizeActual);std::ofstream chunkFile(chunkFileName, std::ios::binary);if (!chunkFile.is_open()) {std::cout << "Error creating chunk file: " << chunkFileName << std::endl;delete[] buffer;return false;}chunkFile.write(buffer, chunkSizeActual);chunkFile.close();delete[] buffer;std::cout << "save split: " << chunkFileName << std::endl;}inputFileStream.close();return true;
}/// <summary>
/// 将多个文件合并为一个文件
/// </summary>
/// <param name="outputFileName">合并后的文件</param>
/// <param name="inputFiles">需要合并的小文件</param>
/// <returns></returns>
bool mergeFiles(const std::string& outputFileName, const std::vector<std::string>& inputFiles)
{std::ofstream outputFile(outputFileName, std::ios::binary);if (!outputFile.is_open()) {std::cout << "Error creating output file: " << outputFileName << std::endl;return false;}for (const auto& inputFile : inputFiles) {std::ifstream inputFileStream(inputFile, std::ios::binary);if (!inputFileStream.is_open()) {std::cout << "Error opening input file: " << inputFile << std::endl;return false;}outputFile << inputFileStream.rdbuf();inputFileStream.close();std::cout << "merge: " << inputFile << std::endl;}outputFile.close();return true;
}int main() {std::string inputFile = "./split_file.rar"; // 这里测试文件大小为5.2Gstd::string mergedFileName = "./merged_file.rar";size_t chunkSize = 500 * 1024 * 1024; // 500 MBboost::timer tm1; // 定义后计时开始tm1.restart(); // 从新从这里开始计时std::vector<std::string> inputFiles;bool sp = splitFile(inputFile, chunkSize, inputFiles);if (sp){std::cout << "splitFile successed" << std::endl;}else{std::cout << "splitFile failed" << std::endl;}std::cout << tm1.elapsed() << std::endl; // 单位是秒tm1.restart(); // 从新从这里开始计时bool me = mergeFiles(mergedFileName, inputFiles);if (me){std::cout << "mergeFiles successed" << std::endl;}else{std::cout << "mergeFiles failed" << std::endl;}std::cout << tm1.elapsed() << std::endl; // 单位是秒return 0;
}