下载HF AutoTrain 模型的配置文件 一.在huggingface上创建AutoTrain项目 二.通过HF用户名和autotrain项目名,拼接以下url,下载模型列表(json格式)到指定目录 三.解析上面的json文件、去重、批量下载模型配置文件(权重以外的文件)
一.在huggingface上创建AutoTrain项目
二.通过HF用户名和autotrain项目名,拼接以下url,下载模型列表(json格式)到指定目录
mkdir model_names
cd model_names
wget https://username-projectname.hf.space/ui/model_choices/llm:sft -O llm_sft.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:orpo -O llm_orpo.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:generic -O llm_generic.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:dpo -O llm_dpo.txt
wget https://username-projectname.hf.space/ui/model_choices/llm:reward -O llm_reward.txt
wget https://username-projectname.hf.space/ui/model_choices/text-classification -O text_classification.txt
wget https://username-projectname.hf.space/ui/model_choices/text-regression -O text_regression.txt
wget https://username-projectname.hf.space/ui/model_choices/seq2seq -O seq2seq.txt
wget https://username-projectname.hf.space/ui/model_choices/token-classification -O token_classification.txt
wget https://username-projectname.hf.space/ui/model_choices/dreambooth -O dreambooth.txt
wget https://username-projectname.hf.space/ui/model_choices/image-classification -O image_classification.txt
wget https://username-projectname.hf.space/ui/model_choices/image-object-detection -O image_object_detection.txt
三.解析上面的json文件、去重、批量下载模型配置文件(权重以外的文件)
from huggingface_hub import snapshot_download
from pathlib import Path
import os
import glob
import json
import tqdmdef download_model ( repo_id) : models_path = Path. cwd( ) . joinpath( "models" , repo_id) models_path. mkdir( parents= True , exist_ok= True ) if len ( glob. glob( os. path. join( models_path, "*.json" ) ) ) > 0 : return snapshot_download( repo_id= repo_id, allow_patterns= [ "*.json" , "tokenizer*" , "README.md" ] , local_dir= models_path, resume_download= True , token= "hf_YOUR_TOKEN" ) def load_meta_info ( ) : file_path= "meta.txt" if os. path. exists( file_path) : repo_ids= [ ] with open ( file_path, "r" ) as f: lines= f. readlines( ) for line in lines: items= line. strip( ) . split( "," ) repo_ids. append( items[ 0 ] ) return repo_idsrepo_ids= set ( ) repo_id_model_type_map= dict ( ) for file in sorted ( glob. glob( "model_names/*.txt" ) ) : model_type= os. path. basename( file ) . split( "." ) [ 0 ] with open ( file , "r" ) as f: for item in json. loads( f. read( ) . strip( ) ) : repo_id= item[ "id" ] repo_ids. add( repo_id) if repo_id not in repo_id_model_type_map: repo_id_model_type_map[ repo_id] = set ( ) repo_id_model_type_map[ repo_id] . add( model_type) with open ( file_path, "w" ) as f: for repo_id in repo_ids: model_types= repo_id_model_type_map[ repo_id] f. write( f" { repo_id} , { model_types} \n" ) return repo_idsfor repo_id in tqdm. tqdm( load_meta_info( ) ) : print ( repo_id) if repo_id in [ "Corcelio/mobius" , "briaai/BRIA-2.3" ] : continue download_model( repo_id)