import gradio as gr from openai import OpenAI import tiktoken from os import getenv as os_getenv from json import loads as json_loads from pathlib import Path import fitz MODEL = 'gpt-4-turbo' PRICE_PER_M = 10.00 LIMIT = 120000 # some space for answer api_key = os_getenv("OPENAI_APIKEY") client = OpenAI(api_key=api_key) def get_prompt(books, question = None): prompt = ( f"Read the following books.\n" + f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " + f"Pages may have a header and/or a footer. Consider all this maybe present." + f"Please answer, for each book, all below in the suggested format, in the language of the book:\n"+ f"**Title**: ...\n" f"**Author**: ...\n" f"**Chapter Names**: ...\n" f"**Characters**: \n" f"**Detailed Summary of the whole book**: \n" ) prompt += f"{books}\n" return prompt def chat(message, history, files): history_openai_format = [] if len(history) == 0: raise gr.Error("Primero hay que subir un libro") if len(history) == 1: if message: raise gr.Error("First message must be empty") message = history[0][0] else: for human, assistant in history: if human: history_openai_format.append({"role": "user", "content": human }) if assistant: history_openai_format.append({"role": "assistant", "content":assistant}) history_openai_format.append({"role": "user", "content": message}) response = client.chat.completions.create( model=MODEL, messages= history_openai_format, temperature=1.0, stream=True) partial_message = "" for chunk in response: if chunk.choices[0].delta.content is not None: partial_message = partial_message + chunk.choices[0].delta.content yield partial_message def get_text(filename): answer = "" suffix = Path(filename).suffix if suffix in [".pdf"]: for i,page in enumerate(fitz.open(filename)): answer += f"\n### Page #{i+1}\n{page.get_text()}\n" elif suffix in [".txt"]: answer = open(filename).read() return answer def files_ready(filenames): encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo') books = '' for i, name in enumerate(filenames): books += f"\n## Document #{i+1}\nName: {Path(name).name}\n" books += get_text(name) prompt = get_prompt(books) tokens = len(encoder.encode(prompt)) cost = tokens * PRICE_PER_M / 1000000 * 2 # * 2 is too much for an answer if tokens > LIMIT: raise gr.Error(f"Book is too long. It's {tokens} tokens long and can't be more than {LIMIT}.") return tokens, f"${cost}", [[prompt, None]] def files_changed(filenames): if filenames: return "-", "-" else: return 0, "$0" with gr.Blocks(title="Book summarization and more") as demo: with gr.Row(): files = gr.Files(file_types=["txt","doc","docx","pdf"] ) with gr.Column(): tokens = gr.Text("0", label="Tokens") cost = gr.Text("0", label="Cost") chat = gr.ChatInterface( fn=chat, title="Summarization and more", additional_inputs=[files], multimodal=False) other = gr.Button(interactive=False) files.upload(files_ready, [files], [tokens, cost, chat.chatbot_state]) files.change(files_changed, files, [tokens, cost]) auth=os_getenv("APP_USERS", "null") auth=json_loads(auth) demo.launch(auth=auth)