背景:看书的时候经常遇到英文pdf,没有合适的翻译软件可以快速翻译全书。这里提供一个解决方案。
Step 1
- 打开英文pdf
- CTRL+A全选文字
- CTRL+C复制
- 打开记事本
- CTRL+V复制
- 保存为data.txt
Step 2
写一个C++脚本
// ToolPdf2Html.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//#include <iostream>
#include <cstring>
#include <vector>
#include <unordered_map>
#include <fstream>
#include <iostream>
#include <sstream>
#include <windows.h>//pdf->txt->html (edge translate)
static void ReadFileContentsByte(std::string filename, std::vector<char>& data)
{std::fstream fin;fin.open(filename, std::ios::in | std::ios::binary);if (!fin.is_open()){return;}//const int LENGTH = 1000;fin.seekg(0, std::ios::end);long int size = fin.tellg();fin.seekg(0, std::ios::beg);data.resize(size, 0);char temp;long i = 0;while ((temp = fin.get()) != EOF){//str.push_back((char)temp);if (i >= size) {std::cout << i << "/" << size << std::endl;exit(0);}data[i] = temp;i++;//std::cout << (byte)temp;}fin.close();
}static void ReadFileContentsLines(std::string filename, std::vector<std::string>& data)
{std::fstream fin;fin.open(filename, std::ios::in | std::ios::binary);if (!fin.is_open()){return;}std::string line;while (std::getline(fin, line)){data.push_back(line);}fin.close();
}static void WriteFileContentsByte(std::string filename, std::string& data)
{std::fstream fout;fout.open(filename, std::ios::out);if (!fout.is_open()){std::cout << "no open file " << filename << std::endl;return;}fout << data;fout.close();
}static std::string Number2Stri(int value)
{std::string str;std::stringstream ss;ss << value;ss >> str;return str;
}int main(int argc, char *argv[])
{std::cout << "Hello World!\n";std::string filepath = "data.txt";if (argc > 1) {filepath = argv[1];}std::cout << "filepath=" << filepath.c_str() << std::endl;std::vector<std::string> data;ReadFileContentsLines(filepath, data);std::cout << "data.size=" << data.size() << std::endl;//每100行分割成一个htmlstd::string htmlHead ="<!DOCTYPE html>\n""<html>\n""<head>\n""<meta charset = \"utf-8\">\n""<title>The C++ Programming Language</title>\n""</head>\n""<body>\n";std::string htmlEnd ="</body> </html>";std::string htmlPrevious ="<br/><br/><a href=\"a.hmtl\">上一个</a>";std::string htmlNext =" <a href=\"a.hmtl\">下一个</a>";int len = data.size();std::string output = htmlHead;WIN32_FIND_DATA findData;HANDLE hFind = FindFirstFile(L"output\\*", &findData);if (hFind == INVALID_HANDLE_VALUE) {std::cout << "文件夹不存在" << std::endl;if (CreateDirectory(L"output", NULL)) {std::cout << "文件夹创建成功" << std::endl;}else {std::cout << "文件夹创建失败,错误代码:" << GetLastError() << std::endl;}}else {FindClose(hFind);std::cout << "文件夹存在" << std::endl;}const int singleLen = 50;for (int i = 0; i < len; ++i) {//std::cout << i << ":" << data[i] << std::endl;if (i != 0 && i % singleLen == 0) { { //saveint index = i / singleLen;//test//if (index > 10) {// break;//}if (index == 1) {}else {output += "<br/><br/><a href=\"index" + Number2Stri((i - 1) / singleLen) + ".html\">Previous</a> ";}output += Number2Stri(i / singleLen);if ((index + 1) * singleLen >= len) {}else {output += " <a href=\"index" + Number2Stri(index + 1) + ".html\">Next</a>";}output += htmlEnd;std::string filep = "output/index" + Number2Stri(index) + ".html";std::cout << "write to " << filep.c_str() << std::endl;// std::cout << "output to " << output.c_str() << std::endl;WriteFileContentsByte(filep, output);output = htmlHead;}}output += data[i] + "<br/>";}std::cout << "finish." << std::endl;
}
- 脚本会读入data.txt
- 按行处理,每50行生成一个html
- 命令行运行脚本
ToolPdf2Html.exe data.txt
- 在output文件夹下生成一堆html
Step 3
用微软的Edge浏览器打开html,浏览器自动翻译英文
通过上一页
和下一页
翻页