Ankur Goyal commited on
Commit
bc12901
1 Parent(s): 9c27f12

Initial Commit

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. README.md +5 -6
  3. app.py +50 -0
  4. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ venv
2
+ *.swo
3
+ *.swp
4
+ *.pyc
README.md CHANGED
@@ -1,13 +1,12 @@
1
  ---
2
- title: Docquery
3
- emoji: 🌖
4
- colorFrom: blue
5
- colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.10.0
8
  app_file: app.py
9
- pinned: false
10
- license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: DocQuery
3
+ emoji: 🦉
4
+ colorFrom: gray
5
+ colorTo: pink
6
  sdk: streamlit
7
  sdk_version: 1.10.0
8
  app_file: app.py
9
+ pinned: true
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
+
5
+ import streamlit as st
6
+
7
+ import torch
8
+ from docquery.pipeline import get_pipeline
9
+ from docquery.document import load_bytes
10
+
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+ pipeline = get_pipeline(device=device)
13
+
14
+
15
+ def process_document(file, question):
16
+ # prepare encoder inputs
17
+ document = load_document(file.name)
18
+ return pipeline(question=question, **document.context)
19
+
20
+
21
+ def ensure_list(x):
22
+ if isinstance(x, list):
23
+ return x
24
+ else:
25
+ return [x]
26
+
27
+
28
+ st.title("DocQuery: Query Documents Using NLP")
29
+ file = st.file_uploader("Upload a PDF or Image document")
30
+ question = st.text_input("QUESTION", "")
31
+
32
+ document = None
33
+
34
+ if file is not None:
35
+ col1, col2 = st.columns(2)
36
+
37
+ document = load_bytes(file, file.name)
38
+ col1.image(document.preview, use_column_width=True)
39
+
40
+ if document is not None and question is not None and len(question) > 0:
41
+ predictions = pipeline(question=question, **document.context)
42
+
43
+ col2.header("Probabilities")
44
+ for p in ensure_list(predictions):
45
+ col2.subheader(f"{ p['answer'] }: { round(p['score'] * 100, 1)}%")
46
+
47
+
48
+ "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
49
+
50
+ "[Github Repo](https://github.com/impira/docquery)"
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ torch
2
+ git+https://github.com/huggingface/transformers.git
3
+ docquery