anitap commited on
Commit
152eb59
1 Parent(s): 1cc31b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -29
app.py CHANGED
@@ -1,24 +1,16 @@
1
  import fitz
2
  import gradio as gr
3
- import requests
4
- import io
5
  import re
6
- import os
7
- from PIL import Image
8
  from transformers import pipeline
9
 
10
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
  qa_model = pipeline("question-answering", model="deepset/bert-large-uncased-whole-word-masking-squad2")
12
 
13
- os.environ["HUGGINGFACE_HUB_TOKEN"] = "ctp-hw"
14
- my_key = os.environ["HUGGINGFACE_HUB_TOKEN"]
15
-
16
  def extract_text_from_pdf(pdf_file):
17
  with fitz.open(pdf_file) as pdf:
18
  text = ""
19
  for page in pdf:
20
  text += page.get_text("text")
21
-
22
  text = re.sub(r'\s+', ' ', text).strip()
23
  return text
24
 
@@ -30,39 +22,23 @@ def summarize(text):
30
  summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " "
31
  else:
32
  summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
33
-
34
  return summary
35
 
36
  def answer_question(text, question):
37
  response = qa_model(question=question, context=text)
38
- answer = response['answer']
39
  return answer
40
 
41
- def query(payload):
42
- API_URL = "https://api-inference.huggingface.co/models/Shakker-Labs/FLUX.1-dev-LoRA-AntiBlur"
43
- headers = {"Authorization": f"Bearer {my_key}"}
44
- response = requests.post(API_URL, headers=headers, json=payload)
45
- return response.content
46
-
47
  def summarize_and_qa(pdf_file, question):
48
  text = extract_text_from_pdf(pdf_file)
49
  summary = summarize(text)
50
  answer = answer_question(text, question)
51
- image_bytes = query({"inputs": summary})
52
- if image_bytes:
53
- try:
54
- image = Image.open(io.BytesIO(image_bytes))
55
- except Exception as e:
56
- return summary, answer, None
57
- else:
58
- image = None
59
-
60
- return summary, answer, image
61
 
62
  gr.Interface(
63
  fn=summarize_and_qa,
64
  inputs=["file", "text"],
65
- outputs=["textbox", "textbox", "image"],
66
  title="Understand your PDF Better",
67
- description="Upload a PDF to get a summary. You can ask any question regardging the content of the PDF. It will also generate a picture related to your PDF."
68
- ).launch(debug=True, share=True)
 
1
  import fitz
2
  import gradio as gr
 
 
3
  import re
 
 
4
  from transformers import pipeline
5
 
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
  qa_model = pipeline("question-answering", model="deepset/bert-large-uncased-whole-word-masking-squad2")
8
 
 
 
 
9
  def extract_text_from_pdf(pdf_file):
10
  with fitz.open(pdf_file) as pdf:
11
  text = ""
12
  for page in pdf:
13
  text += page.get_text("text")
 
14
  text = re.sub(r'\s+', ' ', text).strip()
15
  return text
16
 
 
22
  summary += summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]['summary_text'] + " "
23
  else:
24
  summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
 
25
  return summary
26
 
27
  def answer_question(text, question):
28
  response = qa_model(question=question, context=text)
29
+ answer = response['answer']
30
  return answer
31
 
 
 
 
 
 
 
32
  def summarize_and_qa(pdf_file, question):
33
  text = extract_text_from_pdf(pdf_file)
34
  summary = summarize(text)
35
  answer = answer_question(text, question)
36
+ return summary, answer
 
 
 
 
 
 
 
 
 
37
 
38
  gr.Interface(
39
  fn=summarize_and_qa,
40
  inputs=["file", "text"],
41
+ outputs=["textbox", "textbox"],
42
  title="Understand your PDF Better",
43
+ description="Upload a PDF to get a summary. You can ask any question regarding the content of the PDF."
44
+ ).launch(debug=True, share=True)