Spaces:
Sleeping
Sleeping
File size: 3,173 Bytes
45cec28 0932564 ba4f426 0932564 ba4f426 b10341b 45cec28 ba4f426 0932564 ba4f426 0da928c 0932564 6a7b15a 0932564 45cec28 ba4f426 45cec28 6a7b15a ba4f426 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import gradio as gr
from openai import OpenAI
import tiktoken
from os import getenv as os_getenv
from json import loads as json_loads
from pathlib import Path
import fitz
api_key = os_getenv("OPENAI_APIKEY")
client = OpenAI(api_key=api_key)
def get_prompt(books, question = None):
prompt = (
f"Read the following books.\n" +
f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " +
f"Pages may have a header and/or a footer. Consider all this maybe present." +
f"Please answer all below in the suggested format, in the language of the book:\n"+
f"**Title**: ...\n"
f"**Author**: ...\n"
f"**Chapter Names**: ...\n"
f"**Characters**: \n"
f"**Detailed Summary**: \n"
)
prompt += f"{books}\n"
return prompt
def chat(message, history, files):
history_openai_format = []
if len(history) == 0:
raise gr.Error("Primero hay que subir un libro")
if len(history) == 1:
if message:
raise gr.Error("First message must be empty")
message = history[0][0]
else:
for human, assistant in history:
if human:
history_openai_format.append({"role": "user", "content": human })
if assistant:
history_openai_format.append({"role": "assistant", "content":assistant})
history_openai_format.append({"role": "user", "content": message})
response = client.chat.completions.create(
model='gpt-4-turbo',
messages= history_openai_format,
temperature=1.0,
stream=True)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message = partial_message + chunk.choices[0].delta.content
yield partial_message
def get_text(filename):
answer = ""
suffix = Path(filename).suffix
if suffix in [".pdf"]:
for i,page in enumerate(fitz.open(filename)):
answer += f"\n### Page #{i+1}\n{page.get_text()}\n"
elif suffix in [".txt"]:
answer = open(filename).read()
return answer
def files_ready(filenames):
encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo')
answer = ''
for i, name in enumerate(filenames):
answer += f"\n## Document #{i+1}\nName: {Path(name).name}\n"
answer += get_text(name)
return len(encoder.encode(answer)), [[get_prompt(answer), None]]
def files_changed(filenames):
if not filenames:
return 0
with gr.Blocks(title="Book summarization and more") as demo:
with gr.Row():
files = gr.Files(file_types=["txt","doc","docx","pdf"] )
tokens = gr.Text("0", label="Tokens")
chat = gr.ChatInterface(
fn=chat,
title="Summarization and more",
additional_inputs=[files],
multimodal=False)
other = gr.Button(interactive=False)
files.upload(files_ready, [files], [tokens, chat.chatbot_state])
files.change(files_changed, files, tokens)
auth=os_getenv("APP_USERS", "null")
auth=json_loads(auth)
demo.launch(auth=auth)
|