iShare's picture
Update main.py
0474d21
raw
history blame
4.09 kB
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
#https://python.langchain.com/docs/use_cases/summarization
from langchain import HuggingFaceHub
from huggingface_hub import InferenceClient
from bs4 import BeautifulSoup
import requests
import sys
import os
from dotenv import load_dotenv
load_dotenv()
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
repo_id=os.environ.get('repo_id')
#port = os.getenv('port')
llm = HuggingFaceHub(repo_id=repo_id, # for StarChat
huggingfacehub_api_token=hf_token, #这个变量huggingfacehub_api_token名称似乎没有问题!
model_kwargs={"min_length": 512, # for StarChat
"max_new_tokens": 1024, "do_sample": True, # for StarChat
"temperature": 0.01,
"top_k": 50,
"top_p": 0.95, "eos_token_id": 49155})
#chain = load_summarize_chain(llm, chain_type="stuff")
chain = load_summarize_chain(llm, chain_type="refine")
print(f"定义处理多余的Context文本的函数")
def remove_context(text):
# 检查 'Context:' 是否存在
if 'Context:' in text:
# 找到第一个 '\n\n' 的位置
end_of_context = text.find('\n\n')
# 删除 'Context:' 到第一个 '\n\n' 之间的部分
return text[end_of_context + 2:] # '+2' 是为了跳过两个换行符
else:
# 如果 'Context:' 不存在,返回原始文本
return text
print(f"处理多余的Context文本函数定义结束")
app = FastAPI()
class ChatRequest(BaseModel):
target_url: str
#似乎必须要有这个class定义???
#@app.post('/')
#async def home_api(request: Request):
# data = await request.json()
# target_url = data['target_url']
# print(target_url)
# return {"Message": "FastAPI Home API Deploy Success on HF"}
@app.post('/api/chat')
async def chat(chat_request: ChatRequest):
#async def chat(request: Request, chat_request: ChatRequest):
target_url = chat_request.target_url
#@app.post('/api/chat')
#async def chat(request: Request):
# data = await request.json()
# target_url = data['target_url']
#以上四行代码,可能有问题?出在data = await request.json()???理论上似乎也没有问题,因为后面的print(target_url),能够成功打印
print(target_url)
try:
loader = WebBaseLoader(target_url)
print(target_url)
docs = loader.load()
result = chain.run(docs)
print(result)
print()
result=str(result) #找到之前总是POST Error的原因:chain.run(docs)的结果,格式不是str,导致{...}的json故障所致
cleaned_initial_ai_response = remove_context(result)
print(cleaned_initial_ai_response)
print()
final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
final_result = new_final_ai_response.split('Note:')[0].strip()
#result="Hi FastAPI" #用这个调试OK
print("AI Summarization: " + final_result)
#return {'response': result} #FastAPI方式下,这个返回形式,有问题? - NO!不是的!可以工作!
return JSONResponse({'response': final_result}) #这个形式也是OK的 - 只要result是字符形式即可?
#对比FlaskAPI的:return jsonify({'response': result}),JSONResponse vs jsonify(都需要相应的from ... import ...)
except Exception as e:
err_msg = "Wrong URL or URL not parsable."
print(err_msg)
raise HTTPException(status_code=400, detail=err_msg)