1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
| import os import time import pandas as pd from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.chains import LLMChain from langchain.llms import LlamaCpp from langchain.prompts import PromptTemplate from tqdm import tqdm
MIN_INTERVAL = 1.5
keys = dict( title='title', abstract='abs', category='cat', subcategory='subcat', )
current_path = os.getcwd()
news_df = pd.read_csv( filepath_or_buffer=os.path.join(current_path + '/data/news.tsv'), sep='\t', header=0, )
news_list = [] for news in tqdm(news_df.iterrows()): dic = {} for key in keys: dic[key] = news[1][keys[key]] news_list.append((news[1]['nid'], dic))
prompt_template = """You are asked to act as a news title enhancer. I will provide you a piece of news, with its original title, category, subcategory, and abstract (if exists). The news format is as below:
[title] {title} [abstract] {abstract} [category] {category} [subcategory] {subcategory}
where title, abstract, category, and subcategory in the brace will be filled with content. You can only response a rephrased news title which should be clear, complete, objective and neutral. You can expand the title according to the above requirements. You are not allowed to response any other words for any explanation. Your response format should be:
[newtitle]
where [newtitle] should be filled with the enhanced title. Now, your role of news title enhancer formally begins. Any other information should not disturb your role."""
save_path = current_path + '/output/news_summarizer.log'
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) llm = LlamaCpp( model_path="/Users/liuqiang/Desktop/code/llm/models/gguf/qwen1.5-72b-chat-q5_k_m.gguf", temperature=0.8, top_p=0.8, n_ctx=6000, callback_manager=callback_manager, verbose=True, )
""" PromptTemplate 与 chain的使用案例: (1)文本的使用方式 prompt = PromptTemplate( input_variables=["product"], template="What is a good name for a company that makes {product}?", )
chain = LLMChain(llm=llm, prompt=prompt) # Run the chain only specifying the input variable. print(chain.run("colorful socks"))
(2)字典的使用方式 prompt = PromptTemplate( input_variables=["company", "product"], template="What is a good name for {company} that makes {product}?", ) chain = LLMChain(llm=llm, prompt=prompt) print(chain.run({ 'company': "ABC Startup", 'product': "colorful socks" })) """
prompt = PromptTemplate( input_variables=["title", "abstract", "category", "subcategory"], template=prompt_template, ) chain = LLMChain(llm=llm, prompt=prompt)
exist_set = set() with open(save_path, 'r') as f: for line in f: if line and line.startswith('N'): exist_set.add(line.split('\t')[0])
for nid, content in tqdm(news_list): start_time = time.time() if nid in exist_set: continue try: title = content['title'] abstract = content['abstract'] category = content['category'] subcategory = content['subcategory'] enhanced = chain.run(title=title, abstract=abstract, category=category, subcategory=subcategory) enhanced = enhanced.rstrip('\n') with open(save_path, 'a') as f: f.write(f'{nid}\t{enhanced}\n') except Exception as e: print(e)
interval = time.time() - start_time if interval <= MIN_INTERVAL: time.sleep(MIN_INTERVAL - interval)
|