SmolVLM2: The Smollest Video Model Ever(六)

继续微调

微调视频的代码如下:

# 此Python文件用于对SmolVLM2进行视频字幕任务的微调
# 导入所需的库
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import torch
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from transformers import AutoProcessor, BitsAndBytesConfig, AutoModelForImageTextToText
from datasets import load_dataset
from transformers import TrainingArguments, Trainer
from torch.nn.utils.rnn import pad_sequence# 设置是否使用LoRA和QLoRA以及选择模型
# 设置SOCKS代理
os.environ['http_proxy'] = 'http://127.0.0.1:1080'
os.environ['https_proxy'] = 'socks5://127.0.0.1:1080'
os.environ['socks_proxy'] = 'socks5://127.0.0.1:1080'USE_LORA = False
USE_QLORA = False
SMOL = False
model_id = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" if SMOL else "/mnt/share/toky/LLMs/SmolVLM2-2.2B-Instruct"
processor = AutoProcessor.from_pretrained(model_id)
import torchif torch.cuda.is_available():print("CUDA is available!")
else:print("CUDA is not available. Please check your CUDA installation.")
# 根据设置加载模型
"""根据USE_QLORA和USE_LORA的值来加载模型。若使用 QLoRA 或 LoRA,会配置相应的参数并加载模型;若不使用,则直接加载模型并将视觉模型的参数冻结。最后打印模型占用的 GPU 内存。"""
if USE_QLORA or USE_LORA:lora_config = LoraConfig(r=8,lora_alpha=8,lora_dropout=0.1,target_modules=['down_proj', 'o_proj', 'k_proj', 'q_proj', 'gate_proj', 'up_proj', 'v_proj'],use_dora=False if USE_QLORA else True,init_lora_weights="gaussian")lora_config.inference_mode = Falseif USE_QLORA:bnb_config = BitsAndBytesConfig(load_in_4bit=True,bnb_4bit_use_double_quant=True,bnb_4bit_quant_type="nf4",bnb_4bit_compute_dtype=torch.bfloat16)model = AutoModelForImageTextToText.from_pretrained(model_id,quantization_config=bnb_config if USE_QLORA else None,_attn_implementation="flash_attention_2",device_map="auto")model.add_adapter(lora_config)model.enable_adapters()model = prepare_model_for_kbit_training(model)model = get_peft_model(model, lora_config)print(model.get_nb_trainable_parameters())
else:model = AutoModelForImageTextToText.from_pretrained(model_id,torch_dtype=torch.bfloat16,_attn_implementation="flash_attention_2",)model = model.to("cuda")  # 确保模型被移动到GPU# 如果只想微调LLM,冻结视觉模型参数for param in model.model.vision_model.parameters():param.requires_grad = Falsepeak_mem = torch.cuda.max_memory_allocated()
print(f"The model as is is holding: {peak_mem / 1024 ** 3:.2f} of GPU RAM")# 加载并预处理数据集
"""使用load_dataset加载数据集,将训练集按 0.5 的比例划分为训练集和测试集,"""
ds = load_dataset("/mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback", name='real')
split_ds = ds["train"].train_test_split(test_size=0.5)
train_ds = split_ds["train"]
del split_ds, ds# 打印数据集示例
print(f"prompt:  {train_ds[0]['text prompt']}, video: {train_ds[0]['video link']}")
"""仅保留训练集。打印数据集的一个示例,并获取<image>标记的 ID。"""
image_token_id = processor.tokenizer.additional_special_tokens_ids[processor.tokenizer.additional_special_tokens.index("<image>")
]# 数据整理函数
def collate_fn(examples):"""函数用于将多个样本整理成一个批次。它将每个样本的消息转换为模型输入,对输入 ID、注意力掩码和标签进行填充,处理视频像素值,最后返回一个包含整理后数据的字典。"""instances = []for example in examples:prompt = example["text prompt"]user_content = [{"type": "text", "text": "Caption the video."}]user_content.append({"type": "video", "path": example["video link"]})messages = [{"role": "user", "content": user_content},{"role": "assistant", "content": [{"type": "text", "text": f"{prompt}"}]}]instance = processor.apply_chat_template(messages, add_generation_prompt=False,tokenize=True, return_dict=True, return_tensors="pt").to("cuda").to(model.dtype)instances.append(instance)input_ids = pad_sequence([inst["input_ids"].squeeze(0) for inst in instances],batch_first=True,padding_value=processor.tokenizer.pad_token_id).to("cuda")  # 确保在GPU上attention_mask = pad_sequence([inst["attention_mask"].squeeze(0) for inst in instances],batch_first=True,padding_value=0).to("cuda")  # 确保在GPU上labels = pad_sequence([inst["input_ids"].squeeze(0).clone() for inst in instances],batch_first=True,padding_value=-100).to("cuda")  # 确保在GPU上labels[labels == image_token_id] = -100out = {"input_ids": input_ids,"attention_mask": attention_mask,"labels": labels}# 处理视频像素值pvs = [inst["pixel_values"].squeeze(0) for inst in instances if "pixel_values" in inst]if pvs:max_frames = max(pv.shape[0] for pv in pvs)max_h = max(pv.shape[-2] for pv in pvs)max_w = max(pv.shape[-1] for pv in pvs)else:max_h = max_w = processor.video_size['longest_edge']max_frames = 1padded_pixel_values_list = []for ex in instances:pv = ex.get("pixel_values", None).squeeze(0)if pv is None:shape_pv = (max_frames, 3, max_h, max_w)padded_pv = torch.zeros(shape_pv, dtype=torch.float32).to("cuda")  # 确保在GPU上else:f, c, h, w = pv.shapepadded_pv = torch.zeros((max_frames, c, max_h, max_w),dtype=pv.dtype,device=pv.device)padded_pv[:f, :, :h, :w] = pvpadded_pixel_values_list.append(padded_pv)out["pixel_values"] = torch.stack(padded_pixel_values_list, dim=0).to("cuda")  # 确保在GPU上return out# 训练设置
model_name = model_id.split("/")[-1]
training_args = TrainingArguments(num_train_epochs=1,per_device_train_batch_size=2,gradient_accumulation_steps=1,warmup_steps=50,learning_rate=1e-4,weight_decay=0.01,logging_steps=25,save_strategy="steps",save_steps=250,save_total_limit=1,optim="paged_adamw_8bit", # for 8-bit, keep paged_adamw_8bit, else adamw_hfbf16=True,output_dir=f"./{model_name}-video-feedback",hub_model_id=f"{model_name}-video-feedback",remove_unused_columns=False,report_to="tensorboard",dataloader_pin_memory=False
)# 初始化Trainer并进行训练
trainer = Trainer(model=model,args=training_args,data_collator=collate_fn,train_dataset=train_ds,
)
trainer.train()
# 将训练好的模型推送到Hugging Face Hub
trainer.push_to_hub()# 测试示例
messages = [{"role": "user","content": [{"type": "text", "text": "Caption the video."},{"type": "video","path": "https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/p/p000304.mp4"}]}]inputs = processor.apply_chat_template(messages, add_generation_prompt=True,tokenize=True, return_dict=True, return_tensors="pt").to("cuda").to(model.dtype)generated_ids = model.generate(**inputs, do_sample=False, max_new_tokens=64)
generated_texts = processor.batch_decode(generated_ids,skip_special_tokens=True,
)print(generated_texts[0])

在服务器终端配置了好久DaiLi,但是久了还是会报错,应该是网络不稳定:

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  2.00it/s]
The model as is is holding: 4.20 of GPU RAM
prompt:  It is an interior of a furniture store, with tables and chairs arranged in a showroom setting., video: https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/p/p110224.mp42%|▎         | 25/1000 [07:15<3:40:58, 13.60s/it]{'loss': 2.6556, 'grad_norm': 5.4375, 'learning_rate': 4.8e-05, 'epoch': 0.03}5%|▌         | 50/1000 [13:40<6:01:04, 22.80s/it]{'loss': 0.3066, 'grad_norm': 1.9296875, 'learning_rate': 9.8e-05, 'epoch': 0.05}8%|▊         | 75/1000 [20:59<3:27:40, 13.47s/it]{'loss': 0.273, 'grad_norm': 1.8515625, 'learning_rate': 9.747368421052632e-05, 'epoch': 0.07}8%|▊         | 82/1000 [22:01<2:26:58,  9.61s/it]Traceback (most recent call last):File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connectionpool.py", line 464, in _make_requestself._validate_conn(conn)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1093, in _validate_connconn.connect()File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connection.py", line 741, in connectsock_and_verified = _ssl_wrap_socket_and_match_hostname(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connection.py", line 920, in _ssl_wrap_socket_and_match_hostnamessl_sock = ssl_wrap_socket(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/util/ssl_.py", line 460, in ssl_wrap_socketssl_sock = _ssl_wrap_socket_impl(sock, context, tls_in_tls, server_hostname)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/util/ssl_.py", line 504, in _ssl_wrap_socket_implreturn ssl_context.wrap_socket(sock, server_hostname=server_hostname)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/ssl.py", line 513, in wrap_socketreturn self.sslsocket_class._create(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/ssl.py", line 1104, in _createself.do_handshake()File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/ssl.py", line 1375, in do_handshakeself._sslobj.do_handshake()
ssl.SSLEOFError: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)During handling of the above exception, another exception occurred:Traceback (most recent call last):File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopenresponse = self._make_request(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connectionpool.py", line 488, in _make_requestraise new_e
urllib3.exceptions.SSLError: [SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)The above exception was the direct cause of the following exception:Traceback (most recent call last):File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/adapters.py", line 667, in sendresp = conn.urlopen(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/connectionpool.py", line 841, in urlopenretries = retries.increment(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/urllib3/util/retry.py", line 519, in incrementraise MaxRetryError(_pool, url, reason) from reason  # type: ignore[arg-type]
urllib3.exceptions.MaxRetryError: SOCKSHTTPSConnectionPool(host='cdn-lfs-us-1.hf.co', port=443): Max retries exceeded with url: /repos/b8/3e/b83ea2bda2d9360fb510c71dcb4ba6823a277e191b811453f0920bbed536814a/4686c53313c509ea510071b69a6413ad6373ec276437df8a1f31210e2712210a?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27p108061.mp4%3B+filename%3D%22p108061.mp4%22%3B&response-content-type=video%2Fmp4&Expires=1745570852&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NTU3MDg1Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2I4LzNlL2I4M2VhMmJkYTJkOTM2MGZiNTEwYzcxZGNiNGJhNjgyM2EyNzdlMTkxYjgxMTQ1M2YwOTIwYmJlZDUzNjgxNGEvNDY4NmM1MzMxM2M1MDllYTUxMDA3MWI2OWE2NDEzYWQ2MzczZWMyNzY0MzdkZjhhMWYzMTIxMGUyNzEyMjEwYT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=uL3PDB2RUzJoXXskLGjquF7ADrZTbHuOomk4eGVBJQfi0SqM9vNErxHc8IcvbtEIvAY1Wmmz9etfXu0WbI-EPcVekP3mFfqlOExQsI4tMpAgzsz45Wc0YdMBr6bXt730~LtujvQYcl4gpxOL2ufefcEG0soGA5PvGnLPskweGEfPJQmNJ5niOrQj1AFwwtdHH0fvLuzLklYWpZs22w5qBiB0rMHQQ-zWJbHQsMbB1KDwN48FVILPC5OIHKWkqJRNjb5MhwSMLo~HvapYC18zd3An9iWoeHb~vJyYBVZc~z-0g6FPUMrJNdRp1aDosNp3h5DLsmr0XpQ0SmHYc2B4ug__&Key-Pair-Id=K24J24Z295AEI9 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)')))During handling of the above exception, another exception occurred:Traceback (most recent call last):File "/mnt/share/toky/Projects/SmolVLM2_Test/Smol_VLM_Video_FT.py", line 196, in <module>trainer.train()File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/transformers/trainer.py", line 2245, in trainreturn inner_training_loop(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/transformers/trainer.py", line 2514, in _inner_training_loopbatch_samples, num_items_in_batch = self.get_batch_samples(epoch_iterator, num_batches, args.device)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/transformers/trainer.py", line 5243, in get_batch_samplesbatch_samples.append(next(epoch_iterator))File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/accelerate/data_loader.py", line 561, in __iter__next_batch = next(dataloader_iter)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 701, in __next__data = self._next_data()File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 757, in _next_datadata = self._dataset_fetcher.fetch(index)  # may raise StopIterationFile "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetchreturn self.collate_fn(data)File "/mnt/share/toky/Projects/SmolVLM2_Test/Smol_VLM_Video_FT.py", line 106, in collate_fninstance = processor.apply_chat_template(messages, add_generation_prompt=False,File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/transformers/models/smolvlm/processing_smolvlm.py", line 451, in apply_chat_templatereturn super().apply_chat_template(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/transformers/processing_utils.py", line 1394, in apply_chat_templatevideo, metadata = load_video(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/transformers/image_utils.py", line 847, in load_videofile_obj = BytesIO(requests.get(video).content)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/api.py", line 73, in getreturn request("get", url, params=params, **kwargs)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/api.py", line 59, in requestreturn session.request(method=method, url=url, **kwargs)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/sessions.py", line 589, in requestresp = self.send(prep, **send_kwargs)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/sessions.py", line 724, in sendhistory = [resp for resp in gen]File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/sessions.py", line 724, in <listcomp>history = [resp for resp in gen]File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/sessions.py", line 265, in resolve_redirectsresp = self.send(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/sessions.py", line 703, in sendr = adapter.send(request, **kwargs)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/requests/adapters.py", line 698, in sendraise SSLError(e, request=request)
requests.exceptions.SSLError: SOCKSHTTPSConnectionPool(host='cdn-lfs-us-1.hf.co', port=443): Max retries exceeded with url: /repos/b8/3e/b83ea2bda2d9360fb510c71dcb4ba6823a277e191b811453f0920bbed536814a/4686c53313c509ea510071b69a6413ad6373ec276437df8a1f31210e2712210a?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27p108061.mp4%3B+filename%3D%22p108061.mp4%22%3B&response-content-type=video%2Fmp4&Expires=1745570852&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NTU3MDg1Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zL2I4LzNlL2I4M2VhMmJkYTJkOTM2MGZiNTEwYzcxZGNiNGJhNjgyM2EyNzdlMTkxYjgxMTQ1M2YwOTIwYmJlZDUzNjgxNGEvNDY4NmM1MzMxM2M1MDllYTUxMDA3MWI2OWE2NDEzYWQ2MzczZWMyNzY0MzdkZjhhMWYzMTIxMGUyNzEyMjEwYT9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPSoifV19&Signature=uL3PDB2RUzJoXXskLGjquF7ADrZTbHuOomk4eGVBJQfi0SqM9vNErxHc8IcvbtEIvAY1Wmmz9etfXu0WbI-EPcVekP3mFfqlOExQsI4tMpAgzsz45Wc0YdMBr6bXt730~LtujvQYcl4gpxOL2ufefcEG0soGA5PvGnLPskweGEfPJQmNJ5niOrQj1AFwwtdHH0fvLuzLklYWpZs22w5qBiB0rMHQQ-zWJbHQsMbB1KDwN48FVILPC5OIHKWkqJRNjb5MhwSMLo~HvapYC18zd3An9iWoeHb~vJyYBVZc~z-0g6FPUMrJNdRp1aDosNp3h5DLsmr0XpQ0SmHYc2B4ug__&Key-Pair-Id=K24J24Z295AEI9 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1007)')))8%|▊         | 82/1000 [22:04<4:07:05, 16.15s/it]

数据集问题

代码中使用的数据集是TIGER-Lab/VideoFeedback

示例

idimagestext promptvideo linkvisual qualitytemporal consistencydynamic degreetext-to-video alignmentfactual consistencyconversations

6000476

[ "6000476_00.jpg", "6000476_01.jpg", "6000476_02.jpg", "6000476_03.jpg", "6000476_04.jpg", "6000476_05.jpg", "6000476_06.jpg", "6000476_07.jpg", "6000476_08.jpg", "6000476_09.jpg", "6000476_10.jpg", "6000476_11.jpg", "6000476_12.jpg", "6000476_13.jpg", "6000476_14.jpg", "6000476_15.jpg" ]

An eagle sitting on top of a lush cactus devouring a snake, all framed by a backdrop of mountains. is a companion hybrid of the Valley of Mexico

https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/6/6000476.mp4

1

1

3

3

1

[ { "from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"An eagle sitting on top of a lush cactus devouring a snake, all framed by a backdrop of mountains. is a companion hybrid of the Valley of Mexico \",\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n" }, { "from": "gpt", "value": "visual quality: 1\n temporal consistency: 1\n dynamic degree: 3\n text-to-video alignment: 3\n factual consistency: 1\n" } ]

1002546

[ "1002546_00.jpg", "1002546_01.jpg", "1002546_02.jpg", "1002546_03.jpg", "1002546_04.jpg", "1002546_05.jpg", "1002546_06.jpg", "1002546_07.jpg", "1002546_08.jpg", "1002546_09.jpg", "1002546_10.jpg", "1002546_11.jpg", "1002546_12.jpg", "1002546_13.jpg", "1002546_14.jpg", "1002546_15.jpg" ]

a carpet of flowers of various colors under the morning sun

https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/1/1002546.mp4

1

1

3

1

1

[ { "from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos, \nplease watch the following frames of a given video and see the text prompt for generating the video, \nthen give scores from 5 different dimensions:\n(1) visual quality: the quality of the video in terms of clearness, resolution, brightness, and color\n(2) temporal consistency, the consistency of objects or humans in video\n(3) dynamic degree, the degree of dynamic changes\n(4) text-to-video alignment, the alignment between the text prompt and the video content\n(5) factual consistency, the consistency of the video content with the common-sense and factual knowledge\n\nFor each dimension, output a number from [1,2,3,4], \nin which '1' means 'Bad', '2' means 'Average', '3' means 'Good', \n'4' means 'Real' or 'Perfect' (the video is like a real video)\nHere is an output example:\nvisual quality: 4\ntemporal consistency: 4\ndynamic degree: 3\ntext-to-video alignment: 1\nfactual consistency: 2\n\nFor this video, the text prompt is \"a carpet of flowers of various colors under the morning sun \",\nall the frames of video are as follows: \n\n<image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> <image> \n" }, { "from": "gpt", "value": "visual quality: 1\n temporal consistency: 1\n dynamic degree: 3\n text-to-video alignment: 1\n factual consistency: 1\n" } ]

2000505

[ "2000505_00.jpg", "2000505_01.jpg", "2000505_02.jpg", "2000505_03.jpg", "2000505_04.jpg", "2000505_05.jpg", "2000505_06.jpg", "2000505_07.jpg", "2000505_08.jpg", "2000505_09.jpg", "2000505_10.jpg", "2000505_11.jpg", "2000505_12.jpg", "2000505_13.jpg", "2000505_14.jpg", "2000505_15.jpg" ]

rayman eating ice cream, globox stretching

https://huggingface.co/datasets/hexuan21/VideoFeedback-videos-mp4/resolve/main/2/2000505.mp4

2

2

2

3

1

[ { "from": "human", "value": "\nSuppose you are an expert in judging and evaluating the quality of AI-generated videos,

 本地数据集解析与对应关系

/mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/annotated/本地文件中:

这里的test-xxx.parquet里面封装的就是数据集的prompt还有一些文本信息,不过是以二进制形式存储的,所以直接打开看不懂。

对应着frames_real_test里面的80个文件夹。

相当于另外一个文件夹下的data_real.json转成了test-xxx.parquet

/mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/annotated/test-xxx.parquet

而/mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/annotated/train-00000-of-00001.parquet里面的id可以在TIGER-Lab\VideoFeedback\train\data_real.json下面找到

本来代码里面指定是读取real文件夹下的,但是发现微调使用的数据集貌似是annotated下面的但是我是把real直接复制了一份在annotated下,所以说train-00000-of-00001.parquet里面的id指向TIGER-Lab\VideoFeedback\train\data_real.json也就不奇怪了。记得在hg上annotated文件夹下的数据更多,30多个G。

总结一下:

  • /mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/real/test-00000-of-00001.parquet指向/mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/test/data_real.json
  • /mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/real/train-00000-of-00001.parquet指向/mnt/share/toky/Datasets/TIGER-Lab/VideoFeedback/train/data_real.json

自己造数据集√

一些过程prompt:

要求如下:(1)把/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/test/目录下的4文件夹,压缩成zip文件,四个文件夹分别是【frames_annotated、frames_real、videos_annotated、videos_real】,把frames_real和videos_real分别压缩成frames_real_test.zip和videos_real_test.zip,放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/real/下面

(2)把两个json文件转为parquet文件,两个json文件分别是【 "data_real.json", "data_annotated.json"】,把 "data_real.json"转成"test-00000-of-00001.parquet",放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/real/下面, "data_annotated.json"转成"test-00000-of-00001.parquet",放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/annotated/下面;
(3)把/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/train/目录下的4文件夹,压缩成zip文件,四个文件夹分别是【frames_annotated、frames_real、videos_annotated、videos_real】,把frames_real和videos_real分别压缩成frames_real_train.zip和videos_real_train.zip,放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/real/下面

(4)把两个json文件转为parquet文件,两个json文件分别是【 "data_real.json", "data_annotated.json"】,把 "data_real.json"转成"train-00000-of-00001.parquet",放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/real/下面, "data_annotated.json"转成"train-00000-of-00001.parquet",放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/annotated/下面;

(5)对了,还需要记得把在test和train下面压缩的frames_annotated文件夹、videos_annotated文件夹分别压缩成frames_real_annotated.zip和videos_real_annotated.zip放在/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/annotated/下面;

代码路径:

\mnt\share\toky\Projects\GenSurgery\toky_process_dataset\process_parquet.py

但是看到在加载的时候还是不匹配,原来在README.md文件里还有配置信息。

从hugging face 上下载的数据集这个配置文件里面还有这样的信息:【---
language:
- en
license: apache-2.0
size_categories:
- 10K<n<100K
task_categories:
- video-classification
pretty_name: VideoFeedback
tags:
- video
dataset_info:
- config_name: annotatedfeatures:- name: iddtype: string- name: imagessequence: string- name: text promptdtype: string- name: video linkdtype: string- name: visual qualitydtype: int64- name: temporal consistencydtype: int64- name: dynamic degreedtype: int64- name: text-to-video alignmentdtype: int64- name: factual consistencydtype: int64- name: conversationslist:- name: fromdtype: string- name: valuedtype: stringsplits:- name: testnum_bytes: 1348268num_examples: 680- name: trainnum_bytes: 65281005num_examples: 32901download_size: 45128599dataset_size: 66629273
- config_name: realfeatures:- name: iddtype: string- name: imagessequence: string- name: text promptdtype: string- name: video linkdtype: string- name: visual qualitydtype: int64- name: temporal consistencydtype: int64- name: dynamic degreedtype: int64- name: text-to-video alignmentdtype: int64- name: factual consistencydtype: int64- name: conversationslist:- name: fromdtype: string- name: valuedtype: stringsplits:- name: trainnum_bytes: 8072782num_examples: 4000- name: testnum_bytes: 162240num_examples: 80download_size: 3963450dataset_size: 8235022
configs:
- config_name: annotateddata_files:- split: trainpath: annotated/train-*- split: testpath: annotated/test-*
- config_name: realdata_files:- split: testpath: real/test-*- split: trainpath: real/train-*
---】

根据报错信息改了- config_name: real下面的byte大小和样本数量,没报这个错了,但是:

Traceback (most recent call last):File "/mnt/share/toky/Projects/SmolVLM2_Test/Smol_VLM_Video_FT.py", line 79, in <module>ds = load_dataset("/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback", name="real", split="test")File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/load.py", line 2062, in load_datasetbuilder_instance = load_dataset_builder(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/load.py", line 1782, in load_dataset_builderdataset_module = dataset_module_factory(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/load.py", line 1519, in dataset_module_factory).get_module()File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/load.py", line 836, in get_modulebuilder_configs, default_config_name = create_builder_configs_from_metadata_configs(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/load.py", line 649, in create_builder_configs_from_metadata_configsbuilder_config_cls(File "<string>", line 12, in __init__File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/packaged_modules/parquet/parquet.py", line 26, in __post_init__super().__post_init__()File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/builder.py", line 125, in __post_init__if invalid_char in self.name:
TypeError: argument of type 'NoneType' is not iterable

报错:TypeError: argument of type 'NoneType' is not iterable

    def __post_init__(self):# The config name is used to name the cache directory.for invalid_char in INVALID_WINDOWS_CHARACTERS_IN_PATH:if invalid_char in self.name:raise InvalidConfigName(f"Bad characters from black list '{INVALID_WINDOWS_CHARACTERS_IN_PATH}' found in '{self.name}'. "f"They could create issues when creating a directory for this config on Windows filesystem.")

解决:

这里的config_name不知道啥时候被改了,有点蠢,找了半天Bug = =

Traceback (most recent call last):File "/mnt/share/toky/Projects/SmolVLM2_Test/Smol_VLM_Video_FT.py", line 80, in <module>ds = load_dataset("/mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback", name="real")File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/load.py", line 2084, in load_datasetbuilder_instance.download_and_prepare(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/builder.py", line 925, in download_and_prepareself._download_and_prepare(File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/builder.py", line 1019, in _download_and_prepareverify_splits(self.info.splits, split_dict)File "/mnt/share/toky/CondaEnvs/LM/lib/python3.10/site-packages/datasets/utils/info_utils.py", line 77, in verify_splitsraise NonMatchingSplitsSizesError(str(bad_splits))
datasets.exceptions.NonMatchingSplitsSizesError: [{'expected': SplitInfo(name='train', num_bytes=72025, num_examples=20, shard_lengths=None, dataset_name=None), 'recorded': SplitInfo(name='train', num_bytes=14634, num_examples=7, shard_lengths=None, dataset_name='cholec80_video_feedback')}, {'expected': SplitInfo(name='test', num_bytes=215668, num_examples=60, shard_lengths=None, dataset_name=None), 'recorded': SplitInfo(name='test', num_bytes=6289, num_examples=3, shard_lengths=None, dataset_name='cholec80_video_feedback')}]

解决:

再次修改README.md

推送模型到hf

在将训练好的模型推送到Hugging Face Hub时候遇到问题。

print("开始训练")
trainer.train()
# 将训练好的模型推送到Hugging Face Hub
print("将训练好的模型推送到Hugging Face Hub")trainer.push_to_hub()
print("开始测试")

这里遇到了网络问题,==》解决网络问题,然后可以在代码里进行huggingface的登录,就可以发布模型了,如下所示:

微调通过!

/mnt/share/toky/CondaEnvs/LM/bin/python /mnt/share/toky/Projects/SmolVLM2_Test/Smol_VLM_Video_FT.py 
CUDA is available!
You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.94it/s]
The model as is is holding: 4.20 of GPU RAM
prompt:  The current surgical phase is CalotTriangleDissection, Grasper, Hook tool exists., video: /mnt/share/toky/Datasets/Toky_Generate/Cholec80VideoFeedback/train/videos_real/t04_0000000.mp4
开始训练
100%|██████████| 2/2 [00:16<00:00,  8.18s/it]
{'train_runtime': 16.3717, 'train_samples_per_second': 0.183, 'train_steps_per_second': 0.122, 'train_loss': 5.189296722412109, 'epoch': 1.0}
将训练好的模型推送到Hugging Face Hub
model.safetensors:   0%|          | 0.00/4.49G [00:00<?, ?B/s]
events.out.tfevents.1745563185.YAN-Machine.3092909.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]events.out.tfevents.1745563339.YAN-Machine.3093802.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]events.out.tfevents.1745564448.YAN-Machine.3101448.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]Upload 25 LFS files:   0%|          | 0/25 [00:00<?, ?it/s]events.out.tfevents.1745564612.YAN-Machine.3103220.0:   0%|          | 0.00/8.49k [00:00<?, ?B/s]
events.out.tfevents.1745563185.YAN-Machine.3092909.0: 100%|██████████| 8.48k/8.48k [00:06<00:00, 1.37kB/s]events.out.tfevents.1745564448.YAN-Machine.3101448.0: 100%|██████████| 8.48k/8.48k [00:06<00:00, 1.37kB/s]events.out.tfevents.1745564612.YAN-Machine.3103220.0: 100%|██████████| 8.49k/8.49k [00:06<00:00, 1.35kB/s]events.out.tfevents.1745563185.YAN-Machine.3092909.0: 100%|██████████| 8.48k/8.48k [00:06<00:00, 1.29kB/s]
events.out.tfevents.1745564448.YAN-Machine.3101448.0: 100%|██████████| 8.48k/8.48k [00:06<00:00, 1.28kB/s]
events.out.tfevents.1745564612.YAN-Machine.3103220.0: 100%|██████████| 8.49k/8.49k [00:06<00:00, 1.28kB/s]
events.out.tfevents.1745563339.YAN-Machine.3093802.0: 100%|██████████| 8.48k/8.48k [00:06<00:00, 1.26kB/s]
model.safetensors:   1%|          | 47.0M/4.49G [00:12<12:43, 5.82MB/s]
events.out.tfevents.1745565384.YAN-Machine.3118167.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]events.out.tfevents.1745565415.YAN-Machine.3119073.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]events.out.tfevents.1745565621.YAN-Machine.3121507.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]events.out.tfevents.1745565791.YAN-Machine.3125459.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 20.8kB/s]
events.out.tfevents.1745565384.YAN-Machine.3118167.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 12.9kB/s]
events.out.tfevents.1745565415.YAN-Machine.3119073.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 13.5kB/s]events.out.tfevents.1745565621.YAN-Machine.3121507.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 8.54kB/s]events.out.tfevents.1745565809.YAN-Machine.3126720.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 23.0kB/s]events.out.tfevents.1745565929.YAN-Machine.3132618.0: 100%|██████████| 9.11k/9.11k [00:00<00:00, 24.5kB/s]events.out.tfevents.1745567917.YAN-Machine.225857.0:   0%|          | 0.00/8.48k [00:00<?, ?B/s]events.out.tfevents.1745567569.YAN-Machine.4178102.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 21.3kB/s]events.out.tfevents.1745567917.YAN-Machine.225857.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 19.7kB/s]
events.out.tfevents.1745568389.YAN-Machine.544916.0: 100%|██████████| 9.31k/9.31k [00:00<00:00, 23.9kB/s]events.out.tfevents.1745570989.YAN-Machine.2231864.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 22.5kB/s]events.out.tfevents.1745571185.YAN-Machine.2416517.0:   0%|          | 0.00/12.3k [00:00<?, ?B/s]events.out.tfevents.1745571108.YAN-Machine.2336285.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 21.4kB/s]
events.out.tfevents.1745571185.YAN-Machine.2416517.0: 100%|██████████| 12.3k/12.3k [00:00<00:00, 33.9kB/s]events.out.tfevents.1745673540.YAN-Machine.3385759.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 23.4kB/s]events.out.tfevents.1745722006.YAN-Machine.1838916.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 23.5kB/s]
events.out.tfevents.1745724618.YAN-Machine.1883932.0: 100%|██████████| 8.48k/8.48k [00:00<00:00, 21.4kB/s]events.out.tfevents.1745725130.YAN-Machine.1897854.0:   0%|          | 0.00/8.83k [00:00<?, ?B/s]events.out.tfevents.1745725239.YAN-Machine.1900454.0:   0%|          | 0.00/8.83k [00:00<?, ?B/s]events.out.tfevents.1745725400.YAN-Machine.1904120.0:   0%|          | 0.00/8.83k [00:00<?, ?B/s]events.out.tfevents.1745725130.YAN-Machine.1897854.0: 100%|██████████| 8.83k/8.83k [00:00<00:00, 24.1kB/s]
events.out.tfevents.1745725239.YAN-Machine.1900454.0: 100%|██████████| 8.83k/8.83k [00:00<00:00, 24.0kB/s]
events.out.tfevents.1745725400.YAN-Machine.1904120.0: 100%|██████████| 8.83k/8.83k [00:00<00:00, 23.6kB/s]
events.out.tfevents.1745725837.YAN-Machine.1913951.0: 100%|██████████| 8.83k/8.83k [00:00<00:00, 22.1kB/s]training_args.bin: 100%|██████████| 5.37k/5.37k [00:00<00:00, 14.8kB/s]
model.safetensors: 100%|██████████| 4.49G/4.49G [20:29<00:00, 3.65MB/s]Upload 25 LFS files: 100%|██████████| 25/25 [20:31<00:00, 49.25s/it]   
开始测试
User: Caption the video.You are provided the following series of three frames from a 0:00:03 [H:MM:SS] video.Frame from 00:00:
Frame from 00:01:
Frame from 00:02:Assistant: A woman in a white shirt is standing at a podium with a microphone, speaking into it. She is holding a microphone in her hand.

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/web/77514.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

Spring Boot安装指南

&#x1f516; Spring Boot安装指南 &#x1f331; Spring Boot支持两种使用方式&#xff1a; 1️⃣ 可作为常规Java开发工具使用 2️⃣ 可作为命令行工具安装 ⚠️ 安装前提&#xff1a; &#x1f4cc; 系统需安装 Java SDK 17 或更高版本 &#x1f50d; 建议先运行检查命令…

数据结构(七)---链式栈

#### 链式栈实现 ##### linkstack.h #ifndef _LINKSTACK_H #define _LINKSTACK_H // 引入相关的库文件 #include <stdio.h> #include <stdlib.h> #include <string.h> // 定义元素类型的别名 typedef int DATA; //定义链式栈节点 typedef struct node { …

【Spring Boot】Maven中引入 springboot 相关依赖的方式

文章目录 Maven中引入 springboot 相关依赖的方式1. 不使用版本管理&#xff08;不推荐&#xff09;2、使用版本管理&#xff08;推荐&#xff09;2.1 继承 spring-boot-starter-parent2.2 使用 spring-boot-dependencies 自定义父工程2.3引入 spring-framework-bom Maven中引…

DataStreamAPI实践原理——快速上手

引入 通过编程模型&#xff0c;我们知道Flink的编程模型提供了多层级的抽象&#xff0c;越上层的API&#xff0c;其描述性和可阅读性越强&#xff0c;越下层API&#xff0c;其灵活度高、表达力越强&#xff0c;多数时候上层API能做到的事情&#xff0c;下层API也能做到&#x…

WPF 图片文本按钮 自定义按钮

效果 上面图片,下面文本 样式 <!-- 图片文本按钮样式 --> <Style x:Key="ImageTextButtonStyle" TargetType="Button"><Setter Property="Background" Value="Transparent"/><Setter Property="BorderTh…

驱动开发硬核特训 · Day 22(上篇): 电源管理体系完整梳理:I2C、Regulator、PMIC与Power-Domain框架

&#x1f4d8; 一、电源子系统总览 在现代Linux内核中&#xff0c;电源管理不仅是系统稳定性的保障&#xff0c;也是实现高效能与低功耗运行的核心机制。 系统中涉及电源管理的关键子系统包括&#xff1a; I2C子系统&#xff1a;硬件通信基础Regulator子系统&#xff1a;电源…

设计模式全解析:23种经典设计模式及其应用

创建型模式 1. 单例模式&#xff08;Singleton Pattern&#xff09; 核心思想&#xff1a;确保一个类只有一个实例&#xff0c;并提供一个全局访问点。适用场景&#xff1a;需要共享资源的场景&#xff0c;如配置管理、日志记录等。 public class Singleton {// 静态变量保存…

力扣热题100题解(c++)—矩阵

73.矩阵置零 给定一个 m x n 的矩阵&#xff0c;如果一个元素为 0 &#xff0c;则将其所在行和列的所有元素都设为 0 。请使用 原地 算法。 int m matrix.size(); // 行数int n matrix[0].size(); // 列数bool firstRowZero false; // 标记第一行是否包含 0bool f…

本地部署DeepSeek-R1(Dify升级最新版本、新增插件功能、过滤推理思考过程)

下载最新版本Dify Dify1.0版本之前不支持插件功能&#xff0c;先升级DIfy 下载最新版本&#xff0c;目前1.0.1 Git地址&#xff1a;https://github.com/langgenius/dify/releases/tag/1.0.1 我这里下载到老版本同一个目录并解压 拷贝老数据 需先停用老版本Dify PS D:\D…

PostSwigger Web 安全学习:CSRF漏洞3

CSRF 漏洞学习网站&#xff1a;What is CSRF (Cross-site request forgery)? Tutorial & Examples | Web Security Academy CSRF Token 基本原理 CSRF Token 是服务端生成的唯一、随机且不可预测的字符串&#xff0c;用于验证客户端合法校验。 作用&#xff1a;防止攻击…

用 Nodemon 解决 npm run serve 频繁重启服务

Nodemon 是一个基于 Node.js 构建的开发工具&#xff0c;专为帮助开发者自动监控项目文件的更改而设计。每当文件发生变更时&#xff0c;Nodemon 会自动重启 Node.js 服务器&#xff0c;无需手动停止并重启。这对于提升开发速度、减少人工操作非常有帮助&#xff0c;尤其适用于…

django admin 中更新表数据 之后再将数据返回管理界面

在Django中&#xff0c;更新数据库中的数据并将其重新显示在Django Admin界面上通常涉及到几个步骤。这里我将详细说明如何在Django Admin中更新表数据&#xff0c;并确保更新后的数据能够立即在管理界面上显示。 定义模型 首先&#xff0c;确保你的模型&#xff08;Model&…

真.从“零”搞 VSCode+STM32CubeMx+C <1>构建

目录 前言 准备工作 创建STM32CubeMx项目 VSCode导入项目&配置 构建错误调试 后记 前言 去年10月开始接触单片机&#xff0c;一直在用树莓派的Pico&#xff0c;之前一直用Micropython&#xff0c;玩的不亦乐乎&#xff0c;试错阶段优势明显&#xff0c;很快就能鼓捣一…

C语言学习之结构体

在C语言中&#xff0c;我们已经学了好几种类型的数据。比如整型int、char、short等&#xff0c;浮点型double、float等。但是这些都是基本数据类型&#xff0c;而这些数据类型应用在实际编程里显然是不够用的。比如我们没有办法用一旦数据类型来定义一个”人“的属性。因此这里…

架构-计算机系统基础

计算机系统基础 一、计算机系统组成 &#xff08;一&#xff09;计算机系统层次结构 硬件组成 主机&#xff1a;包含CPU&#xff08;运算器控制器&#xff09;、主存储器&#xff08;内存&#xff09;。外设&#xff1a;输入设备、输出设备、辅助存储器&#xff08;外存&…

【计算机网络性能优化】从基础理论到实战调优

目录 前言技术背景与价值当前技术痛点解决方案概述目标读者说明 一、技术原理剖析核心概念图解核心作用讲解关键技术模块说明技术选型对比 二、实战演示环境配置要求核心代码实现案例1&#xff1a;iPerf3带宽测试案例2&#xff1a;TCP窗口优化案例3&#xff1a;QoS流量整形 运行…

Python 自动化办公:Excel 数据处理的“秘密武器”

引言 在日常的 IT 办公场景里&#xff0c;Excel 是数据处理与分析的 “常胜将军”。无论是财务人员整理账目、销售团队统计业绩&#xff0c;还是运营人员分析用户数据&#xff0c;Excel 都发挥着关键作用。但面对海量数据&#xff0c;手动操作 Excel 不仅效率低下&#xff0c;还…

缓存集群技术深度解析:从原理到实战

缓存集群技术深度解析&#xff1a;从原理到实战 一、缓存集群核心定位与架构选型 1. 集群模式核心价值 缓存集群通过数据分片、高可用保障、水平扩展解决单节点瓶颈&#xff0c;核心能力包括&#xff1a; 数据分片&#xff1a;将数据分散到多个节点&#xff0c;突破单节点内…

CSDN编辑文章时如何自动生成目录

如何自动生成目录 前置条件1. 插入目录标识符2. 编写标题层级 前置条件 需要使用markdown编辑&#xff0c;并且只有按照markdown语法编写不同的标题级别&#xff0c;才能使用这个方法自动生成对应的目录。 1. 插入目录标识符 在文章的顶部添加以下代码&#xff1a; [TOC](文…

产品经理对于电商接口的梳理||电商接口文档梳理与接入

接口梳理7个注意点总结 ①注意要测试环境和生产环境。生产上线时候要提醒研发换到生产环境调用。 ②注意必输字段和选输字段&#xff0c;要传入字段的含义和校验。枚举值不清楚含义的要询问对方含义&#xff0c;比如说单据类型字段枚举值是B2C发货单&#xff0c;BBC发货单&am…