import gradio as gr from openai import OpenAI import tiktoken from os import getenv as os_getenv from json import loads as json_loads from pathlib import Path import fitz api_key = os_getenv("OPENAI_APIKEY") client = OpenAI(api_key=api_key) def get_prompt(books, question = None): prompt = ( f"Read the following books.\n" + f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " + f"Pages may have a header and/or a footer. Consider all this maybe present." + f"Please answer all below in the suggested format, in the language of the book:\n"+ f"**Title**: ...\n" f"**Author**: ...\n" f"**Chapter Names**: ...\n" f"**Characters**: \n" f"**Detailed Summary**: \n" ) prompt += f"{books}\n" return prompt def chat(message, history, files): history_openai_format = [] if len(history) == 0: raise gr.Error("Primero hay que subir un libro") if len(history) == 1: if message: raise gr.Error("First message must be empty") message = history[0][0] else: for human, assistant in history: if human: history_openai_format.append({"role": "user", "content": human }) if assistant: history_openai_format.append({"role": "assistant", "content":assistant}) history_openai_format.append({"role": "user", "content": message}) response = client.chat.completions.create( model='gpt-4-turbo', messages= history_openai_format, temperature=1.0, stream=True) partial_message = "" for chunk in response: if chunk.choices[0].delta.content is not None: partial_message = partial_message + chunk.choices[0].delta.content yield partial_message def get_text(filename): answer = "" suffix = Path(filename).suffix if suffix in [".pdf"]: for i,page in enumerate(fitz.open(filename)): answer += f"\n### Page #{i+1}\n{page.get_text()}\n" elif suffix in [".txt"]: answer = open(filename).read() return answer def files_ready(filenames): encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo') answer = '' for i, name in enumerate(filenames): answer += f"\n## Document #{i+1}\nName: {Path(name).name}\n" answer += get_text(name) return len(encoder.encode(answer)), [[get_prompt(answer), None]] def files_changed(filenames): if not filenames: return 0 with gr.Blocks(title="Book summarization and more") as demo: with gr.Row(): files = gr.Files(file_types=["txt","doc","docx","pdf"] ) tokens = gr.Text("0", label="Tokens") chat = gr.ChatInterface( fn=chat, title="Summarization and more", additional_inputs=[files], multimodal=False) other = gr.Button(interactive=False) files.upload(files_ready, [files], [tokens, chat.chatbot_state]) files.change(files_changed, files, tokens) auth=os_getenv("APP_USERS", "null") auth=json_loads(auth) demo.launch(auth=auth)