一起学大模型 - 一起动笔练习prompt的用法

文章目录

前言
一、代码演示
二、代码解析
- 1. 导入所需的库和模块：
- 2. 设置日志记录和初始化模型：
- 3. 定义一个函数用于清理GPU内存：
- 4. 定义一个继承自LLM基类的QianWenChatLLM类，并实现对话生成的逻辑：
- 5. 示例代码的主体部分：
三、运行结果
- 在这里插入图片描述
总结

前言

在之前的文章里面我们学习了Langchain的prompt接口的知识，光学习是不够的。
让我们一起练习一下Langchain prompt的用法，并更加合理地组织它。prompt的组织方法没有特定的规范，可以使用不同的前缀来标注用户、AI、历史记录或已知信息，这是可变的。只要格式明确，大模型就可以正确识别。

一、代码演示

import os
import torch
from typing import List, Optionalfrom langchain.chains import LLMChain
from langchain.llms.base import LLM
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
import logging
import torchfrom configs import log_verboselogger = logging.getLogger(__name__)tokenizer = AutoTokenizer.from_pretrained("I:/aimodels/Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("I:/aimodels/Qwen/Qwen-1_8B-Chat", device_map="cuda", trust_remote_code=True).eval()
model.generation_config = GenerationConfig.from_pretrained("I:/aimodels/Qwen/Qwen-1_8B-Chat", trust_remote_code=True)def torch_gc():try:if torch.cuda.is_available():# with torch.cuda.device(DEVICE):torch.cuda.empty_cache()torch.cuda.ipc_collect()elif torch.backends.mps.is_available():try:from torch.mps import empty_cacheempty_cache()except Exception as e:msg = ("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，""以支持及时清理 torch 产生的内存占用。")logger.error(f'{e.__class__.__name__}: {msg}',exc_info=e if log_verbose else None)except Exception:...# wrap the qwen model with langchain LLM base class
class QianWenChatLLM(LLM):max_length = 10000temperature: float = 0.01top_p = 0.9def __init__(self):super().__init__()@propertydef _llm_type(self):return "ChatLLM"def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:print(prompt)response, history = model.chat(tokenizer, prompt, history=None)torch_gc()return responseif __name__ == '__main__':qwllm = QianWenChatLLM()print('@@@ qianwen LLM created')# 使用qwllm对话qwllm.temperature = 0.01qwllm.top_p = 0.9qwllm.max_length = 10000human_prompt = "{input}"human_message_template = HumanMessagePromptTemplate.from_template(human_prompt)chat_prompt = ChatPromptTemplate.from_messages([("human", "我们来玩成语接龙，我先来，生龙活虎"),("ai", "虎头虎脑"),("human", "{input}")])chain = LLMChain(prompt=chat_prompt, llm=qwllm, verbose=True)print(chain({"input": "恼羞成怒"}))chat_prompt2 = ChatPromptTemplate.from_messages(['<指令>这里是我通过工具获取的当前信息。请你根据这些信息进行提取并有调理，简洁的回答问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题”，答案请使用中文。 </指令>\n''<已知信息>{context}</已知信息>\n''<问题>{question}</问题>\n'])# 取当前时间，格式是年月日时分秒import datetimenow = datetime.datetime.now()now_time = now.strftime("%Y-%m-%d %H:%M:%S")chain2 = LLMChain(prompt=chat_prompt2, llm=qwllm, verbose=True)print(chain2({"context": "当前的时间是" + now_time, "question": "请问现在几点了？"}))

二、代码解析

这段代码主要是使用了一个名为"Qwen"的预训练语言模型进行对话生成。以下是代码的解释：

1. 导入所需的库和模块：

import os
import torch
from typing import List, Optional
from langchain.chains import LLMChain
from langchain.llms.base import LLM
from langchain_core.prompts import HumanMessagePromptTemplate, ChatPromptTemplate
from modelscope import AutoModelForCausalLM, AutoTokenizer
from modelscope import GenerationConfig
import logging
import torch
from configs import log_verbose

2. 设置日志记录和初始化模型：

logger = logging.getLogger(__name__)
# 使用预训练模型的tokenizer和model
tokenizer = AutoTokenizer.from_pretrained("I:/aimodels/Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("I:/aimodels/Qwen/Qwen-1_8B-Chat", device_map="cuda", trust_remote_code=True).eval()
model.generation_config = GenerationConfig.from_pretrained("I:/aimodels/Qwen/Qwen-1_8B-Chat", trust_remote_code=True)

3. 定义一个函数用于清理GPU内存：

def torch_gc():try:if torch.cuda.is_available():torch.cuda.empty_cache()torch.cuda.ipc_collect()elif torch.backends.mps.is_available():try:from torch.mps import empty_cacheempty_cache()except Exception as e:msg = "如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，以支持及时清理 torch 产生的内存占用。"logger.error(f'{e.__class__.__name__}: {msg}', exc_info=e if log_verbose else None)except Exception:...

4. 定义一个继承自LLM基类的QianWenChatLLM类，并实现对话生成的逻辑：

class QianWenChatLLM(LLM):max_length = 10000temperature: float = 0.01top_p = 0.9def __init__(self):super().__init__()@propertydef _llm_type(self):return "ChatLLM"def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:print(prompt)response, history = model.chat(tokenizer, prompt, history=None)torch_gc()return response

5. 示例代码的主体部分：

if __name__ == '__main__':qwllm = QianWenChatLLM()print('@@@ qianwen LLM created')# 使用qwllm对话qwllm.temperature = 0.01qwllm.top_p = 0.9qwllm.max_length = 10000human_prompt = "{input}"human_message_template = HumanMessagePromptTemplate.from_template(human_prompt)chat_prompt = ChatPromptTemplate.from_messages([("human", "我们来玩成语接龙，我先来，生龙活虎"),("ai", "虎头虎脑"),("human", "{input}")])chain = LLMChain(prompt=chat_prompt, llm=qwllm, verbose=True)print(chain({"input": "恼羞成怒"}))chat_prompt2 = ChatPromptTemplate.from_messages(['<指令>这里是我通过工具获取的当前信息。请你根据这些信息进行提取并有调理，简洁的回答问题。如果无法从中得到答案，请说 “根据已知信息无法回答该问题”，答案请使用中文。 </指令>\n''<已知信息>{context}</已知信息>\n''<问题>{question}</问题>\n'])# 取当前时间，格式是年月日时分秒import datetimenow = datetime.datetime.now()now_time = now.strftime("%Y-%m-%d %H:%M:%S")chain2 = LLMChain(prompt=chat_prompt2, llm=qwllm, verbose=True)print(chain2({"context": "当前的时间是" + now_time, "question": "请问现在几点了？"}))