Spaces:

gerasdf
/

summarizer

Sleeping

summarizer / app.py

gera

enough for today.

4a99335 10 months ago

3.66 kB

	import gradio as gr
	from openai import OpenAI
	import tiktoken
	from os import getenv as os_getenv
	from json import loads as json_loads
	from pathlib import Path
	import fitz

	MODEL = 'gpt-4-turbo'
	PRICE_PER_M = 10.00
	LIMIT = 120000 # some space for answer

	api_key = os_getenv("OPENAI_APIKEY")
	client = OpenAI(api_key=api_key)

	def get_prompt(books, question = None):
	prompt = (
	f"Read the following books.\n" +
	f"Each book may have some pages at the beggining with data about the book, an index, or table of content, etc. " +
	f"Pages may have a header and/or a footer. Consider all this maybe present." +
	f"Please answer, for each book, all below in the suggested format, in the language of the book:\n"+
	f"Title: ...\n"
	f"Author: ...\n"
	f"Chapter Names: ...\n"
	f"Characters: \n"
	f"Detailed Summary of the whole book: \n"
	)
	prompt += f"{books}\n"

	return prompt

	def chat(message, history, files):
	history_openai_format = []

	if len(history) == 0:
	raise gr.Error("Primero hay que subir un libro")

	if len(history) == 1:
	if message:
	raise gr.Error("First message must be empty")
	message = history[0][0]
	else:
	for human, assistant in history:
	if human:
	history_openai_format.append({"role": "user", "content": human })
	if assistant:
	history_openai_format.append({"role": "assistant", "content":assistant})

	history_openai_format.append({"role": "user", "content": message})

	response = client.chat.completions.create(
	model=MODEL,
	messages= history_openai_format,
	temperature=1.0,
	stream=True)

	partial_message = ""
	for chunk in response:
	if chunk.choices[0].delta.content is not None:
	partial_message = partial_message + chunk.choices[0].delta.content
	yield partial_message

	def get_text(filename):
	answer = ""
	suffix = Path(filename).suffix
	if suffix in [".pdf"]:
	for i,page in enumerate(fitz.open(filename)):
	answer += f"\n### Page #{i+1}\n{page.get_text()}\n"
	elif suffix in [".txt"]:
	answer = open(filename).read()
	return answer

	def files_ready(filenames):
	encoder = encoding = tiktoken.encoding_for_model('gpt-4-turbo')
	books = ''
	for i, name in enumerate(filenames):
	books += f"\n## Document #{i+1}\nName: {Path(name).name}\n"
	books += get_text(name)

	prompt = get_prompt(books)
	tokens = len(encoder.encode(prompt))
	cost = tokens * PRICE_PER_M / 1000000 * 2 # * 2 is too much for an answer

	if tokens > LIMIT:
	raise gr.Error(f"Book is too long. It's {tokens} tokens long and can't be more than {LIMIT}.")
	return tokens, f"${cost}", [[prompt, None]]

	def files_changed(filenames):
	if filenames:
	return "-", "-"
	else:
	return 0, "$0"

	with gr.Blocks(title="Book summarization and more") as demo:
	with gr.Row():
	files = gr.Files(file_types=["txt","doc","docx","pdf"] )
	with gr.Column():
	tokens = gr.Text("0", label="Tokens")
	cost = gr.Text("0", label="Cost")

	chat = gr.ChatInterface(
	fn=chat,
	title="Summarization and more",
	additional_inputs=[files],
	multimodal=False)

	other = gr.Button(interactive=False)
	files.upload(files_ready, [files], [tokens, cost, chat.chatbot_state])
	files.change(files_changed, files, [tokens, cost])


	auth=os_getenv("APP_USERS", "null")
	auth=json_loads(auth)

	demo.launch(auth=auth)