最近编写NTFS文件实时搜索工具, 类似 Everything 这样, 速度快还小巧, 于是花了约3周进行研究, 总结下学习过程中一些经验
实现分3部分完成
一. 解析NTFS 主文件表(MFT)
这一步是获取文件数据的唯一迅速且可靠的来源
NTFS_MFT_Parse.h
#pragma once
#include "NTFS_Base.h"
#include <functional>#define NTFS_MFT_PARSE_FILE_BUF_COUNT (1024 * 16) // MFT块分析文件项缓冲数量// 文件名处理回调
using NtfsFilenameCb = std::function<bool(const NTFS_VOLUME_INFO& volInfo, PNTFS_FILE_RECORD_HEADER pHeader, FILE_REFERENCE record, const MFT_FILE_INFO_LIST& mftInfoList)>;// NTFS 主文件表 解析类
// FlameCyclone 2024.12.11
class NTFS_MFT_Parse:public NTFS_Base
{
public:NTFS_MFT_Parse();~NTFS_MFT_Parse();//// @brief: 解析所有 NTFS 卷文件记录// @param: volFileList 卷文件列表// @param: dwDriveIndexMask 驱动器索引掩码(位组合: C: 0x01 D: 0x02 E: 0x04 F: 0x08...)// @ret: bool 操作是否成功static bool ParseRecord(DWORD dwDriveIndexMask = 0xFFFFFFFF,NtfsFilenameCb cb = nullptr);private:// 解析卷static bool _ParseVolume(const NTFS_VOLUME_INFO& volInfo, NtfsFilenameCb cb);// 解析主文件表static bool _ParseMasterFileTable(HANDLE hFile, PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart, PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,const NTFS_VOLUME_INFO& volInfo,NtfsFilenameCb cb);// 获取文件记录static bool _GetFileRecord(HANDLE hFile,FILE_REFERENCE RecordNumber,PNTFS_FILE_RECORD_HEADER pOutFileHeader);// 解析文件名static bool _ParseFileRecord_20_AttributeList(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart,PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,MFT_FILE_INFO_LIST& fileInfoList,std::vector<FILE_REFERENCE>& vRes);// 解析文件名static bool _ParseFileRecord_30_GetFilename(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart,PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,MFT_FILE_INFO_LIST& fileInfoList);// 解析文件名static bool _ParseFileRecord_30_Filename(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart,PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,MFT_FILE_INFO_LIST& fileInfoList);// 解析索引根static bool _ParseFileRecord_90_IndexRoot(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart,PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,MFT_FILE_INFO_LIST& fileInfoList);// 解析索引根static bool _ParseFileRecord_A0_IndexAllocation(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart,PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,MFT_FILE_INFO_LIST& fileInfoList);// 解析文件记录static bool _ParseFileRecordAttributes(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart, PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart, MFT_FILE_INFO_LIST& fileInfoList);// 解析主文件表数据static bool _ParseMasterFileTableData(HANDLE hFile,PNTFS_BOOT_RECORD pBootRecord,PNTFS_FILE_RECORD_HEADER pFileHeaderStart,PNTFS_ATTRIBUTE_HEADER pAttrHeaderStart,const NTFS_VOLUME_INFO& volInfo,NtfsFilenameCb cb);// 设置文件偏移static bool SetFileOffset(_In_ HANDLE hFile,_In_ uint64_t liDistanceToMove,_Out_opt_ uint64_t* lpNewFilePointer,_In_ DWORD dwMoveMethod = FILE_BEGIN);
};
二. 监控 USN 日志
为了动态监控每个文件的新建, 删除, 更名
NTFS_USN_Parse.h
#include "NTFS_USN_Parse.h"
#include <thread>NTFS_USN_Parse::NTFS_USN_Parse()
{}NTFS_USN_Parse::~NTFS_USN_Parse()
{}bool NTFS_USN_Parse::_CreateJournal(HANDLE hFile,DWORDLONG MaximumSize,DWORDLONG AllocationDelta
)
{// https://learn.microsoft.com/zh-cn/windows/win32/api/winioctl/ns-winioctl-create_usn_journal_dataCREATE_USN_JOURNAL_DATA usnCreate = { 0 };DWORD dwBytes = 0;usnCreate.MaximumSize = MaximumSize; // NTFS 文件系统为更改日志分配的目标最大大小(以字节为单位)usnCreate.AllocationDelta = AllocationDelta; // 添加到末尾并从更改日志开头删除的内存分配的大小if (!::DeviceIoControl(hFile,FSCTL_CREATE_USN_JOURNAL,&usnCreate, sizeof(usnCreate),NULL, 0,&dwBytes,NULL)){return false;}return true;
}bool NTFS_USN_Parse::_QueryJournal(HANDLE hFile,PUSN_JOURNAL_DATA_V2 pUsnJournalData
)
{DWORD dwBytes = 0;if (!::DeviceIoControl(hFile,FSCTL_QUERY_USN_JOURNAL,NULL,0,pUsnJournalData,sizeof(USN_JOURNAL_DATA_V2),&dwBytes,NULL)){return false;}return true;
}bool NTFS_USN_Parse::_ReadJournal(HANDLE hFile,PREAD_USN_JOURNAL_DATA pUsnData,LPVOID lpBuf,DWORD dwBufsize,LPDWORD dwBytes
)
{if ( !::DeviceIoControl(hFile,FSCTL_READ_USN_JOURNAL,pUsnData, sizeof(READ_USN_JOURNAL_DATA),lpBuf,dwBufsize,dwBytes,NULL) ){return false;}return true;
}void NTFS_USN_Parse::_ParseJournalV2(HANDLE hFile, const NTFS_VOLUME_INFO& volInfo, PUSN_RECORD_V2 pUsnRecord,NTFS_USN_INFO& usnInfo
)
{usnInfo.strFileName = WStrToTStr(std::wstring(pUsnRecord->FileName, pUsnRecord->FileNameLength / sizeof(wchar_t)));usnInfo.Reason.Data = pUsnRecord->Reason;usnInfo.UpdateSequenceNumber = pUsnRecord->Usn;usnInfo.ReferenceNumber = *(PFILE_REFERENCE)&pUsnRecord->FileReferenceNumber;usnInfo.ParentReferenceNumber = *(PFILE_REFERENCE)&pUsnRecord->ParentFileReferenceNumber;usnInfo.uDriveIndex = volInfo.uDriveIndex;usnInfo.FileAttributes = pUsnRecord->FileAttributes;
}void NTFS_USN_Parse::_ParseJournalV3(HANDLE hFile, const NTFS_VOLUME_INFO& volInfo, PUSN_RECORD_V3 pUsnRecord,NTFS_USN_INFO& usnInfo
)
{usnInfo.strFileName = WStrToTStr(std::wstring(pUsnRecord->FileName, pUsnRecord->FileNameLength / sizeof(wchar_t)));usnInfo.Reason.Data = pUsnRecord->Reason;usnInfo.UpdateSequenceNumber = pUsnRecord->Usn;usnInfo.ReferenceNumber = *(PFILE_REFERENCE)&pUsnRecord->FileReferenceNumber;usnInfo.ParentReferenceNumber = *(PFILE_REFERENCE)&pUsnRecord->ParentFileReferenceNumber;usnInfo.uDriveIndex = volInfo.uDriveIndex;usnInfo.FileAttributes = pUsnRecord->FileAttributes;
}bool NTFS_USN_Parse::_MonitorJournal(const NTFS_VOLUME_INFO& volInfo,DWORD dwReasonMask,UsnNotifyCb usnCb
)
{uint8_t* pBufData = nullptr;HANDLE hFile = INVALID_HANDLE_VALUE;USN usnStart = 0;DWORD dwRetBytes = 0;DWORD dwBytes = 0;DWORD dwError = 0;bool fResult = false;bool fAbort = false;do{// 打开卷hFile = OpenVolume(volInfo);if (INVALID_HANDLE_VALUE == hFile){break;}// 查询日志USN_JOURNAL_DATA_V2 usnData = { 0 };usnData.Flags = FLAG_USN_TRACK_MODIFIED_RANGES_ENABLE;usnData.FirstUsn = usnStart;if (!_QueryJournal(hFile, &usnData)){dwError = ::GetLastError();if (!(ERROR_JOURNAL_DELETE_IN_PROGRESS == dwError || ERROR_JOURNAL_NOT_ACTIVE == dwError)){break;}}// 日志未活动则创建日志if (ERROR_JOURNAL_NOT_ACTIVE == ::GetLastError()){if (!_CreateJournal(hFile, NTFS_USN_MAX_SIZE, NTFS_USN_ALLOCATION_DELTA_SIZE)){break;}usnData.Flags = FLAG_USN_TRACK_MODIFIED_RANGES_ENABLE;usnData.FirstUsn = usnStart;if (!_QueryJournal(hFile, &usnData)){dwError = ::GetLastError();if (!(ERROR_JOURNAL_DELETE_IN_PROGRESS == dwError || ERROR_JOURNAL_NOT_ACTIVE == dwError)){break;}}}usnStart = usnData.NextUsn;// 分配解析缓冲pBufData = new (std::nothrow) uint8_t[usnData.MaximumSize];if (!pBufData){break;}NTFS_USN_INFO usnInfoEmpty;while (true){// 查询日志if (!_QueryJournal(hFile, &usnData)){dwError = ::GetLastError();if (!(ERROR_JOURNAL_DELETE_IN_PROGRESS == dwError || ERROR_JOURNAL_NOT_ACTIVE == dwError)){fAbort = true;break;}}// 读取日志READ_USN_JOURNAL_DATA usnRead = { 0, 0xFFFFFFFF, FALSE, 0, 0 };usnRead.MinMajorVersion = usnData.MinSupportedMajorVersion;usnRead.MaxMajorVersion = usnData.MaxSupportedMajorVersion;usnRead.UsnJournalID = usnData.UsnJournalID;usnRead.StartUsn = usnStart;usnRead.ReasonMask = dwReasonMask;if (!_ReadJournal(hFile, &usnRead, pBufData, (DWORD)usnData.MaximumSize, &dwBytes)){dwError = ::GetLastError();if (!(ERROR_JOURNAL_DELETE_IN_PROGRESS == dwError || ERROR_JOURNAL_NOT_ACTIVE == dwError)){fAbort = true;break;}}dwRetBytes = dwBytes - sizeof(USN);// 未查询到数据if (!dwRetBytes){// 空闲时也回调一下if (!usnCb(false, usnInfoEmpty)){fAbort = true;break;}usnStart = usnData.NextUsn;::Sleep(NTFS_USN_UPDATE_TIME_INTERVAL);continue;}// 解析查询结果PUSN_RECORD_COMMON_HEADER pUsnHeader = (PUSN_RECORD_COMMON_HEADER)(pBufData + sizeof(USN));if (2 == pUsnHeader->MajorVersion){PUSN_RECORD_V2 pUsnRecord = (PUSN_RECORD_V2)(pBufData + sizeof(USN));NTFS_USN_INFO usnInfo;while (dwRetBytes > 0){_ParseJournalV2(hFile, volInfo, pUsnRecord, usnInfo);if (NTFSFileNameType::e05_Root == usnInfo.ParentReferenceNumber.RecordNumber || usnInfo.ParentReferenceNumber.RecordNumber >= NTFSFileNameType::e16_Unuse_Start){if (usnCb){if (!usnCb(true, usnInfo)){fAbort = true;break;}}}dwRetBytes -= pUsnRecord->RecordLength;pUsnRecord = (PUSN_RECORD_V2)(((PCHAR)pUsnRecord) + pUsnRecord->RecordLength);}}if (3 == pUsnHeader->MajorVersion){PUSN_RECORD_V3 pUsnRecord = (PUSN_RECORD_V3)(pBufData + sizeof(USN));NTFS_USN_INFO usnInfo;while (dwRetBytes > 0){_ParseJournalV3(hFile, volInfo, pUsnRecord, usnInfo);if (NTFSFileNameType::e05_Root == usnInfo.ParentReferenceNumber.RecordNumber ||usnInfo.ParentReferenceNumber.RecordNumber >= NTFSFileNameType::e16_Unuse_Start){if (usnCb){if (!usnCb(true, usnInfo)){fAbort = true;break;}}}dwRetBytes -= pUsnRecord->RecordLength;pUsnRecord = (PUSN_RECORD_V3)(((PCHAR)pUsnRecord) + pUsnRecord->RecordLength);}}// 空闲时也回调一下if (!usnCb(false, usnInfoEmpty)){fAbort = true;break;}// 下次解析起始日志号usnStart = usnData.NextUsn;::Sleep(NTFS_USN_UPDATE_TIME_INTERVAL);}if (fAbort){break;}fResult = true;} while (false);if (INVALID_HANDLE_VALUE != hFile){::CloseHandle(hFile);}if (pBufData){delete[] pBufData;}return fResult;
}bool NTFS_USN_Parse::MonitorJournal(DWORD dwDriveIndexMask/* = 0xFFFFFFFF*/,DWORD dwReasonMask/* = 0xFFFFFFFF*/,UsnNotifyCb usnCb/* = nullptr*/
)
{std::vector<NTFS_VOLUME_INFO> vecDriveList = GetVolumeInfoList();std::vector<std::thread> vTask;for (auto& item : vecDriveList){if ((0x01 << item.uDriveIndex) & dwDriveIndexMask){vTask.emplace_back(std::thread([&item, dwReasonMask, &usnCb]() {_MonitorJournal(item, dwReasonMask, usnCb);}));}}for (auto& item : vTask){if (item.joinable()){item.join();}}return true;
}
三. 数据库查询
采用Sqlite3进行数据库操作
NTFS_Search.h
#pragma once
#include "NTFS_Utils/NTFS_MFT_Parse.h"
#include "NTFS_Utils/NTFS_USN_Parse.h"
#include "Sqlite3/CSqlite3.h"
#include <thread>
#include <mutex>
#include <vector>#define SQL_UPDATE_TIME_INTERVAL (1000) // 数据库更新间隔时间// 计数
#define SQL_QUERY_COUNT R"(
SELECT count(*) AS count FROM file_list WHERE path NOT NULL
)"// 更新根路径
#define SQL_QUERY_UPDATE_ROOT_PATH R"(
WITH RECURSIVE sub_tree(id, parent_id, name, path) AS (SELECT id, parent_id, name, name AS pathFROM file_listWHERE parent_id = 0UNION ALLSELECT c.id, c.parent_id, c.name, p.path || '\' || c.nameFROM file_list cINNER JOIN sub_tree p ON c.parent_id = p.id
)
UPDATE file_list
SET path = (
SELECT path FROM sub_tree
WHERE sub_tree.id = file_list.id AND sub_tree.parent_id = file_list.parent_id AND sub_tree.name = file_list.name
);
)"// 删除索引
#define SQL_QUERY_DROP_INDEX R"(
DROP INDEX IF EXISTS idx_file_list_id ON file_list;
DROP INDEX IF EXISTS idx_file_list_parent_id ON file_list;
--DROP INDEX IF EXISTS idx_file_list_name ON file_list;
--DROP INDEX IF EXISTS idx_file_list_path ON file_list;
--DROP INDEX IF EXISTS idx_file_list_id_parent_id_name_path ON file_list;
)"// 创建索引
#define SQL_QUERY_CREATE_INDEX R"(
CREATE INDEX IF NOT EXISTS idx_file_list_id ON file_list(id);
CREATE INDEX IF NOT EXISTS idx_file_list_parent_id ON file_list(parent_id);
--CREATE INDEX IF NOT EXISTS idx_file_list_name ON file_list(name);
--CREATE INDEX IF NOT EXISTS idx_file_list_path ON file_list(path);
--CREATE INDEX IF NOT EXISTS idx_file_list_id_parent_id_name_path ON file_list(id, parent_id, name, path);
)"// 更新子路径
#define SQL_QUERY_UPDATE_CHILD_PATH R"(
WITH RECURSIVE sub_tree(id, parent_id, name, path) AS (SELECT id, parent_id, name, path AS pathFROM file_listWHERE id = %llu AND parent_id = %lluUNION ALLSELECT c.id, c.parent_id, c.name, p.path || '\' || c.nameFROM file_list cINNER JOIN sub_tree p ON c.parent_id = p.id
)
UPDATE file_list
SET path = st.path
FROM sub_tree st
WHERE file_list.id = st.id AND file_list.parent_id = st.parent_id AND file_list.name = st.name;
)"// 更新文件路径
#define SQL_QUERY_UPDATE_FILE_PATH R"(
WITH RECURSIVE path_cte(id, parent_id, name, path) AS (SELECT id, parent_id, name, nameFROM file_listWHERE id = %llu AND parent_id = %llu AND name = "%s"UNION ALLSELECT f.id, f.parent_id, f.name, f.name || '\' ||p.path FROM file_list fINNER JOIN path_cte p ON (f.id = p.parent_id)
)UPDATE file_list
SET path = (SELECT path FROM path_cte WHERE parent_id = 0)
WHERE id = %llu AND parent_id = %llu AND name = "%s";
)"// 按文件名查找
#define SQL_QUERY_SEARCH_NAME R"(
SELECT path FROM file_list WHERE name like "%s" ESCAPE '\' AND path NOT NULL ORDER BY path LIMIT %lld
)"// 按路径查找
#define SQL_QUERY_SEARCH_PATH R"(
SELECT path FROM file_list WHERE path like "%s" ESCAPE '\' AND path NOT NULL ORDER BY path LIMIT %lld
)"// 搜索全部
#define SQL_QUERY_SEARCH_ALL R"(
SELECT path FROM file_list WHERE path NOT NULL ORDER BY path LIMIT %lld
)"//删除表
#define SQL_QUERY_DELETE_TABLE R"(
DROP TABLE IF EXISTS file_list;
)"//创建表
#define SQL_QUERY_CREATE_TABLE R"(
CREATE TABLE IF NOT EXISTS file_list (id INTEGER NOT NULL,parent_id INT,name TEXT,attr INT,path TEXT,PRIMARY KEY(id, parent_id, name)
);
)"//建表更新数据
#define SQL_QUERY_REPLACE_PREPQRE R"(
REPLACE INTO file_list (id, parent_id, name, attr, path) VALUES (?, ?, ?, ?, ?);
)"// 替换记录
#define SQL_QUERY_REPLACE R"(
REPLACE INTO file_list (id, parent_id, name, attr) VALUES (%llu, %llu, "%s", %d);
)"// 删除记录
#define SQL_QUERY_DELETE R"(
DELETE FROM file_list where id = %llu AND parent_id = %llu AND name = "%s";
)"// 数据库使用的文件ID
typedef union _SQL_FILE_ID
{uint64_t data;struct {uint64_t ReferenceNumber : 32; // 文件引用号uint64_t dwDriveIndex : 32; // 驱动器号};}SQL_FILE_ID;bool operator < (const _SQL_FILE_ID& a, const _SQL_FILE_ID& b);class NTFS_Search
{
public:NTFS_Search();~NTFS_Search();//// @brief: 初始化// @param: dwDriveIndexMask 驱动器索引掩码(位组合: C: 0x01 D: 0x02 E: 0x04 F: 0x08...)// @param: strDbPath 数据库文件路径// @param: fRebuildDb 是否重建数据库// @ret: bool 操作是否成功bool Initialize(DWORD dwDriveIndexMask = 0xFFFFFFFF,const _tstring& strDbPath = _T(""), bool fRebuildDb = true);//// @brief: 初始化// @param: strDriveList 驱动器列表// @param: strDbPath 数据库文件路径// @param: fRebuildDb 是否重建数据库// @ret: bool 操作是否成功bool Initialize(const _tstring& strDriveList = _T("ABCDEFGHIJKLMNOPQRSTUVWXYZ"),const _tstring& strDbPath = _T(""),bool fRebuildDb = true);//// @brief: 反初始化// @ret: void 操作是否成功void Uninitialize();//// @brief: 搜索// @param: strKeyWord 关键字// @param: fileList 输出文件路径列表// @param: nLimit 限制结果数量// @ret: bool 操作是否成功bool Search(const _tstring& strKeyWord, std::vector<_tstring>& fileList,int64_t nLimit = -1);//// @brief: 获取数据库文件总数// @ret: size_t 数据库文件总数size_t GetCount();//// @brief: 获取驱动器索引掩码// @ret: DWORD 驱动器索引掩码 DWORD GetDriveListMask(const _tstring& strDriveList);private:bool _BuildDb(DWORD dwDriveIndexMask,const _tstring& strDbPath,bool fRebuildDb);bool _BuildRecord(DWORD dwDriveIndexMask);bool _BuildFile(const NTFS_VOL_FILE_MAP& volFileList);bool _BuildDrive(DWORD dwDriveIndexMask);bool _StartMonitor(DWORD dwDriveIndexMask);void _Sleep(int dwMilliseconds);int _UpdateFilePath(SQL_FILE_ID fileID, SQL_FILE_ID parentID, _tstring strFilename);int _UpdateChildPath(SQL_FILE_ID fileID, SQL_FILE_ID parentID);private:std::vector<NTFS_USN_INFO> m_usnInfoList; // 日志变化列表std::mutex m_mutex; // 互斥锁_tstring m_strDbPath; // 数据库文件路径CSqlite3 m_sql3; // 数据库std::thread m_monTask; // 监控任务std::thread m_sqlTask; // 数据库任务bool m_fInit; // 初始化状态bool m_fQuit; // 退出状态bool m_fDbReady; // 数据库准备状态
};
对比:
性能上重建数据库耗时是everything的 2倍, 不过也差不多了, 以后慢慢优化
搜索速度比不上everything, 但是也算是秒速了