Spaces:
Sleeping
Sleeping
import gradio as gr | |
from openai import OpenAI | |
import tiktoken | |
from os import getenv as os_getenv | |
from json import loads as json_loads | |
from pathlib import Path | |
import fitz | |
MODEL = 'gpt-4-turbo' | |
PRICE_PER_M = 10.00 | |
LIMIT = 120000 # some space for answer | |
api_key = os_getenv("OPENAI_APIKEY") | |
client = OpenAI(api_key=api_key) | |
def get_prompt(books, question = None): | |
prompt = ( | |
f"Read the following books.\n" + | |
f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " + | |
f"Pages may have a header and/or a footer. Consider all this maybe present." + | |
f"Please answer, for each book, all below in the suggested format, in the language of the book:\n"+ | |
f"**Title**: ...\n" | |
f"**Author**: ...\n" | |
f"**Chapter Names**: ...\n" | |
f"**Characters**: \n" | |
f"**Detailed Summary of the whole book**: \n" | |
) | |
prompt += f"{books}\n" | |
return prompt | |
def chat(message, history, files): | |
history_openai_format = [] | |
if len(history) == 0: | |
raise gr.Error("Primero hay que subir un libro") | |
if len(history) == 1: | |
if message: | |
raise gr.Error("First message must be empty") | |
message = history[0][0] | |
else: | |
for human, assistant in history: | |
if human: | |
history_openai_format.append({"role": "user", "content": human }) | |
if assistant: | |
history_openai_format.append({"role": "assistant", "content":assistant}) | |
history_openai_format.append({"role": "user", "content": message}) | |
response = client.chat.completions.create( | |
model=MODEL, | |
messages= history_openai_format, | |
temperature=1.0, | |
stream=True) | |
partial_message = "" | |
for chunk in response: | |
if chunk.choices[0].delta.content is not None: | |
partial_message = partial_message + chunk.choices[0].delta.content | |
yield partial_message | |
def get_text(filename): | |
answer = "" | |
suffix = Path(filename).suffix | |
if suffix in [".pdf"]: | |
for i,page in enumerate(fitz.open(filename)): | |
answer += f"\n### Page #{i+1}\n{page.get_text()}\n" | |
elif suffix in [".txt"]: | |
answer = open(filename).read() | |
return answer | |
def files_ready(filenames): | |
encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo') | |
books = '' | |
for i, name in enumerate(filenames): | |
books += f"\n## Document #{i+1}\nName: {Path(name).name}\n" | |
books += get_text(name) | |
prompt = get_prompt(books) | |
tokens = len(encoder.encode(prompt)) | |
cost = tokens * PRICE_PER_M / 1000000 * 2 # * 2 is too much for an answer | |
if tokens > LIMIT: | |
raise gr.Error(f"Book is too long. It's {tokens} tokens long and can't be more than {LIMIT}.") | |
return tokens, f"${cost}", [[prompt, None]] | |
def files_changed(filenames): | |
if filenames: | |
return "-", "-" | |
else: | |
return 0, "$0" | |
with gr.Blocks(title="Book summarization and more") as demo: | |
with gr.Row(): | |
files = gr.Files(file_types=["txt","doc","docx","pdf"] ) | |
with gr.Column(): | |
tokens = gr.Text("0", label="Tokens") | |
cost = gr.Text("0", label="Cost") | |
chat = gr.ChatInterface( | |
fn=chat, | |
title="Summarization and more", | |
additional_inputs=[files], | |
multimodal=False) | |
other = gr.Button(interactive=False) | |
files.upload(files_ready, [files], [tokens, cost, chat.chatbot_state]) | |
files.change(files_changed, files, [tokens, cost]) | |
auth=os_getenv("APP_USERS", "null") | |
auth=json_loads(auth) | |
demo.launch(auth=auth) | |