File size: 3,416 Bytes
f1691d8
 
 
026783f
0dd7ae7
162dd8b
 
 
 
f1691d8
0dd7ae7
f1691d8
d100ccb
a1758a8
8639815
f6a07f3
f1691d8
 
 
162dd8b
026783f
 
 
 
 
 
 
162dd8b
 
 
026783f
241247a
026783f
 
241247a
026783f
b9c9dac
ec98a9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1691d8
241247a
f1691d8
 
b9c9dac
f1691d8
241247a
 
 
 
 
 
 
162dd8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8639815
162dd8b
 
b9c9dac
162dd8b
f1691d8
 
 
 
 
162dd8b
 
 
 
 
 
 
f1691d8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import PyPDF2
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain_core.documents import Document
from pathlib import Path
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=4096,
    temperature=0.5,
    do_sample=False,
)
llm_engine_hf = ChatHuggingFace(llm=llm)

def read_pdf(file_path):
    pdf_reader = PyPDF2.PdfReader(file_path)
    text = ""
    for page in range(len(pdf_reader.pages)):
        text += pdf_reader.pages[page].extract_text()
    return text
    
def summarize(file, n_words):
    # Read the content of the uploaded file
    file_path = file.name
    if file_path.endswith('.pdf'):
        text = read_pdf(file_path)
    else:
        with open(file_path, 'r', encoding='utf-8') as f:
            text = f.read()
            
    template = '''
    You are a commentator. Your task is to write a report on an essay.
When presented with the essay, come up with interesting questions to ask, and answer each question.
Afterward, combine all the information and write a report in the markdown format.

# Essay:
{TEXT}

# Instructions:
## Summarize:
In clear and concise language, summarize the key points and themes presented in the essay.

## Interesting Questions:
Generate three distinct and thought-provoking questions that can be asked about the content of the essay. For each question:
- After "Q: ", describe the problem
- After "A: ", provide a detailed explanation of the problem addressed in the question.
- Enclose the ultimate answer in <>.

## Write a report
Using the essay summary and the answers to the interesting questions, create a comprehensive report in Markdown format.
    '''
    
    prompt = PromptTemplate(
        template=template,
        input_variables=['TEXT']    
    )
    
    summary = ""
    while len(summary.split()) < 100:
        formatted_prompt = prompt.format(TEXT=text)
        output_summary = llm_engine_hf.invoke(formatted_prompt)
        summary = output_summary.content
    return summary

def download_summary(output_text):
    if output_text:
        file_path = Path('summary.txt')
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(output_text)
        return file_path
    else:
        return None
def create_download_file(summary_text):
    file_path = download_summary(summary_text)
    return str(file_path) if file_path else None

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Document Summarizer")

    with gr.Row():
        with gr.Column():
            file = gr.File(label="Submit a file")
        
        with gr.Column():
            output_text = gr.Textbox(label="Summary", lines=20)

    submit_button = gr.Button("Summarize")
    submit_button.click(summarize, inputs=[file], outputs=output_text)

    def generate_file():
        summary_text = output_text
        file_path = download_summary(summary_text)
        return file_path

    download_button = gr.Button("Download Summary")
    download_button.click(
        fn=create_download_file,
        inputs=[output_text],
        outputs=gr.File()
    )
# Run the Gradio app
demo.launch(share=True)