NLPCC 出版部分相关源码记录

目录

Download

Unzip

Author

Title

Affiliation

Check number of tex

Zip

Rename

Delete


Download

import requests
from bs4 import BeautifulSoup# 登录网站并获取登录后的 session
def login(username, password):login_url = 'https://example.com/login'session = requests.session()login_data = {'username': username,'password': password,# 其他登录参数}response = session.post(login_url, data=login_data)if response.status_code == 200:print("登录成功!")return sessionelse:print("登录失败!")return None# 获取文件列表页面中的文件链接
def get_file_links(session, file_list_url):response = session.get(file_list_url)soup = BeautifulSoup(response.text, 'html.parser')file_links = []# 使用 BeautifulSoup 解析文件列表页面,获取文件链接# 例如:file_links = soup.find_all('a', class_='file-link')return file_links# 批量下载文件
def download_files(session, file_links, download_path):for link in file_links:file_url = link['href']file_name = link.text.strip()response = session.get(file_url, stream=True)if response.status_code == 200:# 保存文件到本地with open(f"{download_path}/{file_name}", 'wb') as file:for chunk in response.iter_content(chunk_size=8192):file.write(chunk)print(f"{file_name} 下载成功!")else:print(f"{file_name} 下载失败!")def main():username = 'your_username'password = 'your_password'file_list_url = 'https://example.com/files'  # 文件列表页面的 URLdownload_path = 'downloaded_files'  # 本地下载路径# 登录网站并获取登录后的 sessionsession = login(username, password)if session:# 获取文件列表页面中的文件链接file_links = get_file_links(session, file_list_url)if file_links:# 批量下载文件download_files(session, file_links, download_path)else:print("未找到文件链接!")else:print("登录失败,请检查用户名和密码!")# if __name__ == "__main__":
#     main()
import requests
from bs4 import BeautifulSoupdef login(username, password):login_url = 'https://softconf.com/nlpcc/Main-2023/login/scmd.cgi?scmd=login'session = requests.session()login_data = {"username": username,"password": password}response = session.post(login_url, data=login_data)# print(response.text)if response.status_code == 200:print("登录成功!")return sessionelse:print("登录失败!")return None
username, passwd = "用户名", "密码"
session = login(username, passwd)
import reids = {214,215,220,221,222,225,229,233,235,238,239,241,246,250,251,252,254,256,258,260,264,271,285,292,299,301,306,307,308,}
file_list_url = "https://softconf.com/nlpcc/Main-2023/pub/scmd.cgi?scmd=manager&ToPage=monitorFinalSubmissions&FromPage=Main"
response = session.get(file_list_url)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', id='t1')
links = table.find_all('a')
all_urls = [link.get('href') for link in links]
urls = []
for i in range(len(all_urls)):if all_urls[i] and all_urls[i].startswith('scmd.cgi?scmd=submitPaperCustom'):if (m := re.search(r"passcode=(\d+)X-.+", all_urls[i])) is not None:# print(m.group(1))if int(m.group(1)) in ids:urls.append((int(m.group(1)), "https://softconf.com/nlpcc/Main-2023/pub/"+all_urls[i]))
print(len(urls)==len(ids))
print(urls)
import time
import os
from tqdm.auto import tqdmdef download_files(session, urls:dict, paper_id:int):for file_name, file_url in urls.items():response = session.get(file_url, stream=True)save_dir = f"./downloads/{paper_id}/"os.makedirs(save_dir, exist_ok=True)if response.status_code == 200:# 保存文件到本地with open(f"{save_dir}/{file_name}", 'wb') as file:for chunk in response.iter_content(chunk_size=8192):file.write(chunk)# print(f"{paper_id}_{file_name} 下载成功!")else:print(f"{paper_id}_{file_name} 下载失败!")for paper_id, url in tqdm(urls):response = session.get(url)soup = BeautifulSoup(response.text, 'html.parser')links = soup.find_all('a')urls_ = map(lambda link: link.get('href') if link else "", links)pdf_url = [link.get('href') for link in links if link.get('href') and link.get('href').endswith("fieldid=Final_Manuscript")][0]zip_url = [link.get('href') for link in links if link.get('href') and link.get('href').endswith("fieldid=Source_File")][0]copyright_url = [link.get('href') for link in links if link.get('href') and link.get('href').endswith("fieldid=CopyRight_Springer")][0]downloads_urls = {"Final_Manuscript.pdf": pdf_url, "Source_File.zip":zip_url, "CopyRight.pdf":copyright_url}downloads_urls = {"CopyRight.pdf":copyright_url}# print(downloads_urls)try:download_files(session, downloads_urls, paper_id)except:pass# breaktime.sleep(2)

Unzip

import zipfile
import os
import pathlibdef unzip_file(zip_filepath, dest_path):with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:zip_ref.extractall(dest_path)# 使用方法
root_dir = pathlib.Path("./downloads/")
for directory in root_dir.iterdir():try:unzip_file(directory/"Source_File.zip", directory/"Source_File")except Exception as e:print(e)print(directory)# break
import pathlibroot_dir = pathlib.Path("./downloads/")
for directory in root_dir.iterdir():path = directory/"Source_File"path_true = pathlib.Path(path)dir_outputs_tex_true = path_true/"outputs_tex"dir_outputs_tex_true.mkdir(exist_ok=True)if (path/"submission.tex").exists():dir_outputs_tex = pathlib.PurePosixPath("outputs_tex")path_tex = pathlib.PurePosixPath("submission.tex")path_aux = dir_outputs_tex/"submission.aux"! cd {path_true} & pdflatex -output-directory={dir_outputs_tex} -synctex=0 -interaction=nonstopmode -file-line-error {path_tex}! cd {path_true} & bibtex {path_aux}! cd {path_true} & pdflatex -output-directory={dir_outputs_tex} -synctex=0 -interaction=nonstopmode -file-line-error {path_tex}! cd {path_true} & pdflatex -output-directory={dir_outputs_tex} -synctex=0 -interaction=nonstopmode -file-line-error {path_tex}else:print(directory)
def compile2pdf(directory):directory = pathlib.Path(directory)path = directory/"Source_File"path_true = pathlib.Path(path)dir_outputs_tex_true = path_true/"outputs_tex"dir_outputs_tex_true.mkdir(exist_ok=True)if (path/"submission.tex").exists():dir_outputs_tex = pathlib.PurePosixPath("outputs_tex")path_tex = pathlib.PurePosixPath("submission.tex")path_aux = dir_outputs_tex/"submission.aux"! cd {path_true} & pdflatex -output-directory={dir_outputs_tex} -synctex=0 -interaction=nonstopmode -file-line-error {path_tex}! cd {path_true} & bibtex {path_aux}! cd {path_true} & pdflatex -output-directory={dir_outputs_tex} -synctex=0 -interaction=nonstopmode -file-line-error {path_tex}! cd {path_true} & pdflatex -output-directory={dir_outputs_tex} -synctex=0 -interaction=nonstopmode -file-line-error {path_tex}else:print(directory)compile2pdf("downloads/306")
def is_same_file(file1, file2):with open(file1, 'rb') as f1, open(file2, 'rb') as f2:return f1.read() == f2.read()import PyPDF2from PyPDF2 import PdfReaderdef extract_text_from_pdf(file_path):with open(file_path, 'rb') as file:pdf = PdfReader(file)text = ""for page in range(len(pdf.pages)):text += pdf.pages[page].extract_text()return text, len(pdf.pages)def compare_pdfs(file_path1, file_path2):text1, n_1 = extract_text_from_pdf(file_path1)text2, n_2 = extract_text_from_pdf(file_path2)return text1 == text2, n_1, n_2root_dir = pathlib.Path("./downloads/")
for directory in root_dir.iterdir():camera_ready = directory/"Final_Manuscript.pdf"compiled = directory/"Source_File"/"outputs_tex"/"submission.pdf"try: ok, n1, n2 = compare_pdfs(camera_ready, compiled)if not ok:print(f"Not same: {directory}")print(n1, n2, sep='    ')except Exception as e:print(e)print(f"Fail to compare: {directory}")print("=========================================================================")

Author

import redef extract_author(tex_file_path):with open(tex_file_path, 'r', encoding='utf-8') as tex_file:tex_content = tex_file.read()# Use regular expression to find the \author partpattern = r"^\\author{\s*(.*?)\s*}\s+\%"matches = re.search(pattern, tex_content, re.DOTALL|re.MULTILINE)if matches:return matches.group(1)else:return ""tex_file_path = "downloads\\215\\Source_File\\submission.tex"  # Replace with the path to your .tex file
author = extract_author(tex_file_path)authors = []
root_dir = pathlib.Path("./downloads/")
for directory in root_dir.iterdir():tex_file_path = directory/"Source_File"/"submission.tex"print(f"------{directory}---------")if tex_file_path.exists():author = extract_author(tex_file_path)# author = re.sub(r"\\.*", "", author)# author = re.sub(r"[^\w\s]", "", author)# author = re.sub(r"\s*?\n\s*", ",", author)# author = author[:-1] if author.endswith(',') else author# author = re.sub(r'(?<=,)(?=[^,]*$)', 'and ', author)  #将最后一个逗号换成 `and`# # author = re.sub(r',(?=[^,]*$)', ' and ', author)  #将最后一个逗号换成 `and`authors.append(author)print(author)else:print(f"Fail to open tex: {tex_file_path}")authors.append("")print('====================================================================')
import pandas as pd# 将列表转换为DataFrame
df = pd.DataFrame(authors, columns=["author"])# 保存DataFrame到Excel文件
file_path = "./author.xlsx"
df.to_excel(file_path, index=False)

Title

import redef extract_title(tex_file_path):with open(tex_file_path, 'r', encoding='utf-8') as tex_file:tex_content = tex_file.read()# Use regular expression to find the \author partpattern = r"^\\title{\s*(.*?)\s*}\s+\%"matches = re.search(pattern, tex_content, re.DOTALL|re.MULTILINE)if matches:return matches.group(1)else:return ""# tex_file_path = "downloads\\215\\Source_File\\submission.tex"  # Replace with the path to your .tex file
# author = extract_author(tex_file_path)authors = []
root_dir = pathlib.Path("./downloads/")
for directory in root_dir.iterdir():tex_file_path = directory/"Source_File"/"submission.tex"print(f"------{directory}---------")if tex_file_path.exists():author = extract_title(tex_file_path)author = re.sub(r"\s*\\\\\s*", " ", author)author = re.sub(r"\\.*", "", author)authors.append(author)print(author)else:print(f"Fail to open tex: {tex_file_path}")authors.append("")print('====================================================================')
import pandas as pd# 将列表转换为DataFrame
df = pd.DataFrame(authors, columns=["title"])# 保存DataFrame到Excel文件
file_path = "./title.xlsx"
df.to_excel(file_path, index=False)

Affiliation

import redef extract_affiliation(tex_file_path):with open(tex_file_path, 'r', encoding='utf-8') as tex_file:tex_content = tex_file.read()# Use regular expression to find the \author partpattern = r"^\\institute{\s*(.*?)\s*}\s+\%"matches = re.search(pattern, tex_content, re.DOTALL|re.MULTILINE)if matches:return matches.group(1)else:return ""# tex_file_path = "downloads\\215\\Source_File\\submission.tex"  # Replace with the path to your .tex file
# author = extract_author(tex_file_path)authors = []
root_dir = pathlib.Path("./downloads/")
i = 2
for directory in root_dir.iterdir():tex_file_path = directory/"Source_File"/"submission.tex"print(f"------{i} {directory}---------")i += 1if tex_file_path.exists():author = extract_affiliation(tex_file_path)# author = re.sub(r"\s*\\\\\s*", " ", author)# author = re.sub(r"\\.*", "", author)authors.append(author)print(author)else:print(f"Fail to open tex: {tex_file_path}")authors.append("")print('====================================================================')
import pandas as pd# 将列表转换为DataFrame
df = pd.DataFrame(authors, columns=["affiliation"])# 保存DataFrame到Excel文件
file_path = "./affiliation.xlsx"
df.to_excel(file_path, index=False)

Check number of tex

import pathlib
root_dir = pathlib.Path("./downloads/")def num_tex(dirctory: pathlib.Path):num = 0for d in dirctory.iterdir():num += (d.suffix=='.tex')return numfor d in root_dir.iterdir():src = d/"Source_File"if num_tex(src)>1:print(d)

Zip

import os
import zipfiledef zip_directory(directory_path, zip_path):"""压缩目录到zip文件:param directory_path: 要压缩的目录路径:param zip_path: zip文件保存路径"""with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:for root, _, files in os.walk(directory_path):for file in files:file_path = os.path.join(root, file)zipf.write(file_path, os.path.relpath(file_path, directory_path))# # 示例用法
# directory_to_compress = '/path/to/source_directory'
# zip_file_path = '/path/to/destination.zip'
# zip_directory(directory_to_compress, zip_file_path)

Rename

import pathlib
root_dir = pathlib.Path("./downloads/")for d in list(root_dir.iterdir()):src = d/"Source_File"zip_directory(src, src.parent/"source.zip")submi = d/"Final_Manuscript.pdf"submi.rename(submi.with_name("submission.pdf"))cprt = d/"CopyRight.pdf"cprt.rename(cprt.rename(cprt.with_name("copyright.pdf")))

Delete

import pathlib
import shutil
import os
root_dir = pathlib.Path("./downloads/")for d in list(root_dir.iterdir()):src = d/"Source_File.zip"os.remove(src)

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/28341.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

《论文阅读》通过生成会话模型的迁移学习会话中的情感识别

《论文阅读》通过生成会话模型的迁移学习会话中的情感识别 前言简介模型结构Source TaskTarget Task损失函数前言 你是否也对于理解论文存在困惑? 你是否也像我之前搜索论文解读,得到只是中文翻译的解读后感到失望? 小白如何从零读懂论文?和我一起来探索吧! 今天为大家…

Qt6之QListWidget——Qt仿ToDesk侧边栏(1)

一、 QLitWidget概述 注意&#xff1a;本文不是简单翻译Qt文档或者接口函数&#xff0c;而侧重于无代码Qt设计器下演示使用。 QListWidget也称列表框类&#xff0c;它提供了一个类似于QListView提供的列表视图&#xff0c;但是它具有一个用于添加和删除项的经典的基于项的接口…

JavaScript算法【入门】

作者&#xff1a;20岁爱吃必胜客&#xff08;坤制作人&#xff09;&#xff0c;近十年开发经验, 跨域学习者&#xff0c;目前于海外某世界知名高校就读计算机相关专业。荣誉&#xff1a;阿里云博客专家认证、腾讯开发者社区优质创作者&#xff0c;在CTF省赛校赛多次取得好成绩。…

Jmeter(四) - 从入门到精通 - 创建网络测试计划(详解教程)

1.简介 在本节中&#xff0c;您将学习如何创建基本的 测试计划来测试网站。您将创建五个用户&#xff0c;这些用户将请求发送到JMeter网站上的两个页面。另外&#xff0c;您将告诉用户两次运行测试。因此&#xff0c;请求总数为&#xff08;5个用户&#xff09;x&#xff08;2…

[保研/考研机试] 约瑟夫问题No.2 C++实现

题目要求&#xff1a; 输入、输出样例&#xff1a; 源代码&#xff1a; #include<iostream> #include<queue> #include<vector> using namespace std;//例题5.2 约瑟夫问题No.2 int main() {int n, p, m;while (cin >> n >> p >> m) {//如…

Whisper.cpp 编译使用

Whisper.cpp 编译使用 whisper.cpp 是牛人 ggerganov 对 openai 的 whisper 语音识别模型用 C 重新实现的项目&#xff0c;开源在 github 上&#xff0c;具有轻量、性能高&#xff0c;实用性强等特点。这篇文章主要记录在 windows 平台&#xff0c;如何使用该模型在本地端进行…

rust持续学习 get_or_insert_with

通常使用一个值 if(xnull)xsome_valid_value 忽然今天看见一段代码 pub fn get_id() -> u64 { let mut res struct.data.borrow_mut(); *res.get_or_insert_with(||{let mut xx ...... some logiclet id xx.id; id}); }感觉这个名字蛮奇怪的 insert 然后翻了一下代码&a…

docker安装MinIO

简介 Minio 是一个面向对象的简单高性能存储服务。使用 Go 语言编写&#xff0c;性能高、具有跨平台性。 Minio 官网为&#xff1a;https://min.io &#xff0c;有一个中文站点&#xff0c;单内容更新不是很及时&#xff0c;建议从原始官网学习。 本文采用 Docker 安装&…

Adb发送特定广播给App和App获取权限的命令

最近在做Autostart&#xff0c;但是没有bench R1环境 目前在模拟器上调试&#xff0c;需要调试自定义的广播和获取悬浮窗权限&#xff08;因为这个app需要在开机未启动app的情况启动服务区获取传感器信号然后全局弹窗&#xff09;。 需要先adb root adb remount 1.发送广播给…

vscode自动添加注释说明

1. 安装vscode 双击安装程序,默认安装即可(如:VSCodeSetup-x64-1.70.2.exe) 2. 安装doxygen文档生成插件 1> 打开vscode软件,点击左侧插件管理菜单 2> 点击右上角’…‘按钮,选择’Install from VSIX’(联网状态可以直接搜索doxygen下载安装) 3> 选择doxygen离线安装…

【深度学习】【风格迁移】Zero-shot Image-to-Image Translation

论文&#xff1a;https://arxiv.org/abs/2302.03027 代码&#xff1a;https://github.com/pix2pixzero/pix2pix-zero/tree/main 文章目录 Abstract1. Introduction相关工作3. Method Abstract 大规模文本到图像生成模型展示了它们合成多样且高质量图像的显著能力。然而&#x…

Linux项目部署

目录 一JAVAWeb环境的部署【安装JDK&#xff0c;MySQL数据库&#xff0c;Tomcat】 二.手工部署SpringBoot项目&#xff08;写的最好的&#xff09; 1.在IDEA中开发SpringBoot项目并打成jar包--点击右侧的Maven执行package命令 2.将jar包上传到Linux服务器 3.执行以下命令&a…

mysql的高级查询语句

目录 一、本文前言 二、高效查询方式 1&#xff09;指定指字段进行查看 2&#xff09;对字段进行去重查看 3&#xff09;where条件查询 4&#xff09;and 和 or 进行逻辑关系的增加 5&#xff09;查询取值列表中的数据 6&#xff09;between的引用 7&#xff09;like…

NAT及其实验(eNSP,细致易懂)

目录 NAT产生背景 NAT概述NAT&#xff08;Network Address Translation&#xff09;&#xff0c;网络地址转换 NAT工作规则 标准NAT技术 NAPT[网络地址端口转换[Port-->传输层-端口编号]] Easy IP——最简单的PAT NAT Server 静态NAT实验 动态NAT实验 NAPT实验 N…

Redis基础 (三十八)

提示&#xff1a;文章写完后&#xff0c;目录可以自动生成&#xff0c;如何生成可参考右边的帮助文档 目录 前言 一、概述 1.1 NoSQL 1.2 Redis 二、安装 2.1 安装方式 &#xff1a; 三、目录结构 3.1 rpm -ql redis 3.2 /etc/redis.conf 主配置文件 3.3 /var/lib/redis …

相机可见区域,使用鼠标拖拽模型

知识点 向量射线检测坐标转换 思路 使用射线检测获取射线检测点与模型对象之间的偏移量 &#xff08;世界空间&#xff09;使用相机的坐标转换获取检测点与鼠标位置之间的偏移量 &#xff08;屏幕空间&#xff09;拖拽时&#xff0c;更新模型位置 代码示例 using UnityEng…

【BMC】OpenBMC开发基础2:修改原有程序

修改原有程序 通常情况下我们会需要修改OpenBMC原有的程序来适配我们的项目&#xff0c;本节将介绍一般的流程。 为此首先我们需要了解devtool这个工具&#xff0c;注意它不是前端开发用的那个devtool&#xff0c;而是由OE&#xff08;或者Yocto&#xff1f;&#xff09;提供…

Android 实现 RecyclerView下拉刷新,SwipeRefreshLayout上拉加载

上拉、下拉的效果图如下&#xff1a; 使用步骤 1、在清单文件中添加依赖 implementation ‘com.android.support:recyclerview-v7:27.1.1’ implementation “androidx.swiperefreshlayout:swiperefreshlayout:1.0.0” 2、main布局 <LinearLayout xmlns:android"http…

Codeforces Round 891 (Div. 3)

Array ColoringArray Coloring 题目大意 题目要求判断是否可以将数组元素分为两种颜色&#xff0c;使得两种颜色元素的和具有相同的奇偶性&#xff0c;并且每种颜色至少有一个元素被着色。 思路分析 可以通过统计数组中奇数和偶数的个数来判断是否满足条件。分析可知&#x…

适用HarmonyOS 3.1版本及以上的应用及服务开发工具 DevEco Studio 3.1.1 Release 安装

文章目录 安装步骤1.下载安装包2.安装成功后&#xff0c;初次运行studio2.1 配置node与ohpm的环境2.2安装sdk2.3等待安装结束 3.创建项目3.1 点击Create Project3.2 选择一个空项目3.3 项目配置3.4 Finish、等待依赖下载完毕3.5 项目创建完成 tip 提示4.配置运行环境4.1 真机运…