lifan0127 commited on
Commit
26ef710
β€’
1 Parent(s): 7adc1fa

First commit

Browse files
Files changed (8) hide show
  1. .gitignore +160 -0
  2. README.md +19 -5
  3. app.py +180 -0
  4. assets/zotero-logo.png +0 -0
  5. functions.py +330 -0
  6. models.py +71 -0
  7. requirements.txt +5 -0
  8. style.css +17 -0
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Zotero Qa
3
- emoji: πŸ‘
4
- colorFrom: red
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.20.1
8
  app_file: app.py
@@ -10,4 +10,18 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Zotero QA
3
+ emoji: πŸ“–
4
+ colorFrom: indigo
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 3.20.1
8
  app_file: app.py
 
10
  license: mit
11
  ---
12
 
13
+ # Zotero QA
14
+
15
+ Fan Li ([@FanLi_RnD](https://twitter.com/FanLi_RnD)) - https://apex974.com/articles/literature-reviews-with-paper-qa-and-zotero
16
+
17
+ This tool allows you to ask questions based on your Zotero library. It was built upon [Paper QA](https://github.com/whitead/paper-qa), [LangChain AI](https://github.com/hwchase17/langchain) and [pyZotero](https://github.com/urschrei/pyzotero).
18
+
19
+ You are required to provide your own [OpenAI API key](https://platform.openai.com/overview).
20
+
21
+ You also need a Zotero API key and your user ID (or shared group ID):
22
+
23
+ - To create Zotero API key, visit https://www.zotero.org/settings/keys/new
24
+ - To access your own library, select "User".
25
+ - To access a shared group, select "Group".
26
+ - Personal User ID can be found at https://www.zotero.org/settings/keys.
27
+ - Group ID is part of the group URL (e.g. 4952526).
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+ from functions import (
6
+ reset_answer,
7
+ fetch_collections,
8
+ select_collection,
9
+ reset_collection,
10
+ handle_submit,
11
+ )
12
+ from models import (
13
+ Icons,
14
+ Message,
15
+ Messages,
16
+ )
17
+
18
+ load_dotenv()
19
+
20
+ css_style = Path('./style.css').read_text()
21
+
22
+ with gr.Blocks(css=css_style) as demo:
23
+ zot = gr.State(None)
24
+ zot_collections = gr.State([])
25
+ data = gr.State([])
26
+ messages = gr.State(Messages([
27
+ Message(
28
+ Icons.INFO, "Please provide all the required OpenAI and Zotero information in the left panel."),
29
+ ]))
30
+
31
+ with gr.Row():
32
+ gr.Markdown("""
33
+ <h2>
34
+ <img src='file/assets/zotero-logo.png' alt="Zotero Logo" style="width: 3.2rem; display: inline; margin-right: 10px;" />
35
+ Zotero Q&A
36
+ </h2>
37
+
38
+ Fan Li ([@FanLi_RnD](https://twitter.com/FanLi_RnD)) - https://apex974.com/articles/literature-reviews-with-paper-qa-and-zotero
39
+
40
+ This tool allows you to ask questions based on your Zotero library. It was built upon [Paper QA](https://github.com/whitead/paper-qa), [LangChain AI](https://github.com/hwchase17/langchain) and [pyZotero](https://github.com/urschrei/pyzotero).
41
+ """)
42
+
43
+ with gr.Row():
44
+ with gr.Column(scale=1):
45
+ openai_api_key = gr.Textbox(
46
+ label="OpenAI API Key", type="password", value=os.getenv('OPENAI_API_KEY'))
47
+
48
+ gr.HTML()
49
+
50
+ zot_api_key = gr.Textbox(
51
+ label="Zotero API Key", type="password", value=os.getenv('ZOTERO_API_KEY'))
52
+ zot_library_type = gr.Radio(choices=[
53
+ "User", "Group"], label="Zotero Library Type", value="User", elem_id="zotero-library-type")
54
+ zot_library_id = gr.Textbox(
55
+ label="Zotero User/Group ID", value=os.getenv('ZOTERO_LIBRARY_ID'))
56
+
57
+ zot_selected_col = gr.Radio(
58
+ [], label="Zotero Collection", elem_id="zotero-collection", visible=False)
59
+ zot_msg = gr.HTML("""
60
+ <div style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
61
+ <ul style="margin-bottom: 0;">
62
+ <li>Click <a href="https://www.zotero.org/settings/keys/new" target="_blank">here</a> to create Zotero API key.</li>
63
+ <li>To access your own library, select "User".</li>
64
+ <li>To access a shared group, select "Group".</li>
65
+ <li>Personal User ID can be found <a href="https://www.zotero.org/settings/keys" target="_blank">here</a>.</li>
66
+ <li style="margin-bottom: 0;">Group ID is part of the group URL (e.g. 4952526).</li>
67
+ </ul>
68
+ </div>
69
+ """, visible=True)
70
+ zot_fetch_col_btn = gr.Button('Fetch Collections')
71
+
72
+ gr.Error("Some Error Message")
73
+
74
+ with gr.Column(scale=3):
75
+
76
+ question = gr.Textbox(
77
+ placeholder="You have to select a Zotero collection to proceed", label="Question", interactive=False, value="What predictive models are used in materials discovery?")
78
+
79
+ gr.HTML()
80
+
81
+ with gr.Box():
82
+ with gr.Accordion("Messages"):
83
+ msg_board = gr.HTML(messages.value)
84
+
85
+ answer = gr.HTML(None, elem_id="answer")
86
+
87
+ openai_api_key.change(reset_answer, inputs=[], outputs=[
88
+ answer], show_progress=False)
89
+
90
+ zot_api_key.change(
91
+ reset_collection,
92
+ inputs=[],
93
+ outputs=[zot_selected_col, zot_fetch_col_btn, question, answer],
94
+ show_progress=False,
95
+ )
96
+ zot_library_type.change(
97
+ reset_collection,
98
+ inputs=[],
99
+ outputs=[zot_selected_col, zot_fetch_col_btn, question, answer],
100
+ show_progress=False,
101
+ )
102
+ zot_library_id.change(
103
+ reset_collection,
104
+ inputs=[],
105
+ outputs=[zot_selected_col, zot_fetch_col_btn, question, answer],
106
+ show_progress=False
107
+ )
108
+
109
+ zot_fetch_col_btn.click(
110
+ fn=fetch_collections,
111
+ inputs=[zot_library_id, zot_library_type, zot_api_key, messages],
112
+ outputs=[zot, zot_collections, zot_selected_col,
113
+ zot_fetch_col_btn, zot_msg, messages, msg_board],
114
+ show_progress=False,
115
+ )
116
+ zot_selected_col.change(
117
+ fn=select_collection,
118
+ inputs=[zot_selected_col, messages],
119
+ outputs=[question, messages, msg_board, answer],
120
+ show_progress=False
121
+ )
122
+
123
+ question.submit(
124
+ fn=handle_submit,
125
+ inputs=[zot, zot_selected_col, zot_collections, question, messages],
126
+ outputs=[messages, msg_board, answer],
127
+ show_progress=False
128
+ )
129
+
130
+ # with gr.Accordion("See Docs:", open=False):
131
+ # dataset = gr.Dataframe(
132
+ # headers=["filepath", "citation string", "key"],
133
+ # datatype=["str", "str", "str"],
134
+ # col_count=(3, "fixed"),
135
+ # interactive=False,
136
+ # label="Documents and Citations",
137
+ # overflow_row_behaviour='paginate',
138
+ # max_rows=5
139
+ # )
140
+ # buildb = gr.Textbox("⚠️Waiting for documents and key...",
141
+ # label="msg_board", interactive=False, show_label=True,
142
+ # max_lines=1)
143
+ # stats = gr.Dataframe(headers=['Docs', 'Chunks'],
144
+ # datatype=['number', 'number'],
145
+ # col_count=(2, "fixed"),
146
+ # interactive=False,
147
+ # label="Doc Stats")
148
+ # openai_api_key.change(validate_dataset, inputs=[
149
+ # dataset, openai_api_key], outputs=[buildb])
150
+ # dataset.change(validate_dataset, inputs=[
151
+ # dataset, openai_api_key], outputs=[buildb])
152
+ # uploaded_files.change(request_pathname, inputs=[
153
+ # uploaded_files, data, openai_api_key], outputs=[stats, data, dataset, buildb])
154
+ # download.click(fn=download_repo, inputs=[
155
+ # gh_repo, data, openai_api_key], outputs=[stats, data, dataset, buildb])
156
+ # query = gr.Textbox(
157
+ # placeholder="Enter your question here...", label="Question")
158
+ # with gr.Row():
159
+ # length = gr.Slider(25, 200, value=100, step=5,
160
+ # label='Words in answer')
161
+ # marg = gr.Checkbox(True, label='Max marginal relevance')
162
+ # k = gr.Slider(1, 20, value=10, step=1,
163
+ # label='Chunks to examine')
164
+ # sources = gr.Slider(1, 10, value=5, step=1,
165
+ # label='Contexts to include')
166
+
167
+ # ask = gr.Button("Ask Question")
168
+ # answer = gr.Markdown(label="Answer")
169
+ # with gr.Accordion("Context", open=True):
170
+ # context = gr.Markdown(label="Context")
171
+
172
+ # with gr.Accordion("Raw Text", open=False):
173
+ # passages = gr.Markdown(label="Passages")
174
+ # ask.click(fn=do_ask, inputs=[query, buildb,
175
+ # openai_api_key, dataset,
176
+ # length, marg, k, sources,
177
+ # docs], outputs=[answer, context, passages, docs, stats])
178
+
179
+ demo.queue(concurrency_count=10, api_open=False)
180
+ demo.launch()
assets/zotero-logo.png ADDED
functions.py ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import re
3
+ import requests
4
+ import tempfile
5
+ import time
6
+ from pyzotero import zotero
7
+ from paperqa import Docs
8
+ from lxml import html
9
+ from models import Icons, Message, Messages
10
+
11
+
12
+ def reset_answer():
13
+ return gr.HTML.update(value=None)
14
+
15
+
16
+ def fetch_collections(id, type, key, messages):
17
+ zot = zotero.Zotero(int(id), type.lower(), key)
18
+ try:
19
+ collections = zot.collections_top()
20
+ collection_names = [
21
+ f"{x['data']['name']} ({x['meta']['numItems']})" for x in collections]
22
+ messages.append(
23
+ Message(Icons.INFO, "Please select a Zotero collection to proceed."))
24
+ return (
25
+ zot,
26
+ collections,
27
+ gr.Radio.update(choices=collection_names,
28
+ visible=True, interactive=True),
29
+ gr.Button.update(visible=False),
30
+ gr.HTML.update(visible=False),
31
+ messages,
32
+ gr.HTML.update(value=str(messages)),
33
+ )
34
+ except Exception as e:
35
+ messages.append(
36
+ Message(Icons.ERR, f"Error occurred when fetching Zotero collection: {e}"))
37
+ print({'messages': str(messages)})
38
+ return (
39
+ None,
40
+ [],
41
+ None,
42
+ gr.Button.update(visible=True),
43
+ None,
44
+ messages,
45
+ gr.HTML.update(value=str(messages)),
46
+ )
47
+
48
+
49
+ def select_collection(collection, messages):
50
+ if collection is None:
51
+ return None, messages, gr.HTML.update()
52
+ collection_name = re.sub('\s\(\d+\)$', '', collection)
53
+ messages.set([Message(
54
+ Icons.OK, f"Selected collection: <span style='font-weight: bold'>{collection_name}</span>. Please type your question and hit \"Enter\".")])
55
+ return (
56
+ gr.Text.update(
57
+ placeholder="Please type your question and hit \"Enter\".", interactive=True),
58
+ messages,
59
+ gr.HTML.update(value=str(messages)),
60
+ gr.HTML.update(value=None)
61
+ )
62
+
63
+
64
+ def search_attachments(id, type, key, collection, queries=[], limit=10):
65
+ try:
66
+ zot = zotero.Zotero(int(id), type.lower(), key)
67
+ searches = [zot.collection_items(
68
+ collection['key'],
69
+ q=q,
70
+ limit=limit,
71
+ itemType='attachment',
72
+ qmode='everything'
73
+ ) for q in queries]
74
+ attachments = [x for x in {item['key']: item for search in searches for item in search if item['data']
75
+ ['contentType'] == 'application/pdf'}.values()][:limit]
76
+
77
+ parents = set([a['data']['parentItem'] for a in attachments])
78
+
79
+ message = f"<div>βœ… Found {len(attachments)} PDF {'attachments' if len(attachments) > 1 else 'attachment'} from {len(parents)} {'articles' if len(parents) > 1 else 'article'}.</div>" if len(
80
+ attachments) else "<div>❔ No results. Make sure to index your PDF attachments in Zotero.</div>"
81
+ return parents, attachments, message
82
+
83
+ except Exception as e:
84
+ message = f"<div>⚠️ Error occurred when searching in Zotero: {e}</div>"
85
+ return [], [], message
86
+
87
+
88
+ def download_attachment(id, type, key, attachment):
89
+ zot = zotero.Zotero(int(id), type.lower(), key)
90
+ link_mode = attachment['data']['linkMode']
91
+
92
+ if link_mode == 'imported_file':
93
+ return zot.file(attachment['key'])
94
+ elif link_mode == 'imported_url':
95
+ res = requests.get(attachment['data']['url'])
96
+ return res.content
97
+ else:
98
+ raise ValueError(
99
+ f'Unsupported link mode: {link_mode} for {attachment["key"]}.')
100
+
101
+
102
+ def reset_collection():
103
+ return (
104
+ gr.Radio.update(choices=[], visible=False),
105
+ gr.HTML.update(visible=True),
106
+ gr.Text.update(
107
+ placeholder="You have to select a Zotero collection to proceed", interactive=False),
108
+ gr.HTML.update(value=None)
109
+ )
110
+
111
+
112
+ def handle_submit(zot, collection_name, collections, question, messages):
113
+ collection_name_only = re.sub('\s\(\d+\)$', '', collection_name)
114
+ messages.set([Message(
115
+ Icons.OK, f"Selected collection: <span style='font-weight: bold'>{collection_name_only}</span>.")])
116
+ yield (
117
+ messages,
118
+ gr.HTML.update(value=str(messages)),
119
+ None,
120
+ )
121
+
122
+ docs = Docs()
123
+
124
+ # Generate search queries from the question by Paper QA
125
+ try:
126
+ question_prompt = 'A "keyword search" is a list of no more than 3 words, which separated by whitespace only and with no boolean operators (e.g. "dog canine puppy"). Avoid adding any new words not in the question unless they are synonyms to the existing words.'
127
+ queries = [x.strip('"').lower() for x in
128
+ docs.generate_search_query(question + '\n' + question_prompt)]
129
+ query_str = ", ".join(
130
+ [f"<span style='font-weight: bold;'>{q}</span>" for q in queries])
131
+ messages.append(
132
+ Message(Icons.WAIT, f"Searching your Zotero collection for {query_str}."))
133
+ yield (
134
+ messages,
135
+ gr.HTML.update(value=str(messages)),
136
+ None,
137
+ )
138
+ except Exception as e:
139
+ messages.append(
140
+ Message(Icons.ERR, f"Error occurred when generating search queries: {e}"))
141
+ yield (
142
+ messages,
143
+ gr.HTML.update(value=str(messages)),
144
+ None,
145
+ )
146
+ return None, None, None
147
+
148
+ # Search for attachments in Zotero
149
+ try:
150
+ collection = [
151
+ x for x in collections if f"{x['data']['name']} ({x['meta']['numItems']})" == collection_name][0]
152
+ searches = [zot.collection_items(
153
+ collection['key'],
154
+ q=q,
155
+ limit=10,
156
+ itemType='attachment',
157
+ qmode='everything'
158
+ ) for q in queries]
159
+ attachments = [x for x in {
160
+ item['key']: item for search in searches for item in search if item['data']['contentType'] == 'application/pdf'}.values()][:10]
161
+
162
+ parents = set([a['data']['parentItem'] for a in attachments])
163
+ if len(attachments) > 0:
164
+ messages.append(Message(
165
+ Icons.SUCCESS, f"Found {len(attachments)} PDF {'attachments' if len(attachments) > 1 else 'attachment'} from {len(parents)} {'articles' if len(parents) > 1 else 'article'}."))
166
+ yield (
167
+ messages,
168
+ gr.HTML.update(value=str(messages)),
169
+ None,
170
+ )
171
+ else:
172
+ messages.append(Message(
173
+ Icons.ERR, "No results. Make sure to index your PDF attachments in Zotero and try rephrasing your question."))
174
+ yield (
175
+ messages,
176
+ gr.HTML.update(value=str(messages)),
177
+ None,
178
+ )
179
+ return None, None, None
180
+
181
+ except Exception as e:
182
+ messages.append(
183
+ Message(Icons.ERR, f"Error occurred when searching in Zotero: {e}"))
184
+ yield (
185
+ messages,
186
+ gr.HTML.update(value=str(messages)),
187
+ None,
188
+ )
189
+ return None, None, None
190
+
191
+ # Compile citation metadata
192
+ citation_dict = {}
193
+ parents = {}
194
+ messages.append(
195
+ Message(Icons.WAIT, f"Fetching attachment bibliography information."))
196
+ yield (
197
+ messages,
198
+ gr.HTML.update(value=str(messages)),
199
+ None,
200
+ )
201
+ for attachment in attachments:
202
+ parent_id = attachment["data"]["parentItem"]
203
+ try:
204
+ if parent_id in parents:
205
+ citation_dict[attachment["key"]] = parents[parent_id]
206
+ else:
207
+ parent = zot.item(
208
+ attachment["data"]["parentItem"], content="bib", style="nature")[0]
209
+ bib = f"""
210
+ {html.fragment_fromstring(parent).xpath("normalize-space(div[2])")}
211
+ <a href="{attachment['links']['alternate']['href']}" target="_blank" class="zotero-link">Open in Zotero</a>
212
+ """
213
+ parents[parent_id] = bib
214
+ citation_dict[attachment["key"]] = bib
215
+ except Exception as e:
216
+ messages.append(Message(
217
+ Icons.WARN, f"Failed to retrieve bibliography for PDF attachment <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>: {e}"))
218
+ yield (
219
+ messages,
220
+ gr.HTML.update(value=str(messages)),
221
+ None,
222
+ )
223
+
224
+ # Index attachments
225
+ available_attachments = 0
226
+ for attachment in attachments:
227
+ try:
228
+ link_mode = attachment['data']['linkMode']
229
+
230
+ if link_mode in ['imported_file', 'imported_url']:
231
+ attachment_content = zot.file(attachment['key']) if link_mode == 'imported_file' else requests.get(
232
+ attachment['data']['url']).content
233
+ temp_file = tempfile.NamedTemporaryFile(suffix=".pdf")
234
+ temp_file.write(attachment_content)
235
+ temp_file.flush()
236
+ docs.add(temp_file.name, citation_dict[attachment["key"]])
237
+ messages.append(Message(
238
+ Icons.INDEX, f"Indexed PDF attachment: <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>."))
239
+ available_attachments += 1
240
+ else:
241
+ messages.append(Message(
242
+ Icons.WARN, f"Unable to access linked PDF attachment <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>: The file is not in Zotero online storage."))
243
+ yield (
244
+ messages,
245
+ gr.HTML.update(value=str(messages)),
246
+ None,
247
+ )
248
+ except Exception as e:
249
+ messages.append(Message(
250
+ Icons.WARN, f"Failed to retrieve PDF attachment <a href='{attachment['links']['alternate']['href']}' target='_blank'>{attachment['data']['title']}</a>: {e}"))
251
+ yield (
252
+ messages,
253
+ gr.HTML.update(value=str(messages)),
254
+ None,
255
+ )
256
+
257
+ # Build vector index
258
+ if available_attachments == 0:
259
+ messages.append(Message(
260
+ Icons.ERR, "No answer. Unable to access any PDF attachments from your Zotero online storage or public URLs."))
261
+ yield (
262
+ messages,
263
+ gr.HTML.update(value=str(messages)),
264
+ None,
265
+ )
266
+ return None, None, None
267
+ if docs._faiss_index is None:
268
+ messages.append(Message(
269
+ Icons.WAIT, f"Building vector index based on {available_attachments} available PDF {'attachment' if attachments==1 else 'attachments'}"))
270
+ yield (
271
+ messages,
272
+ gr.HTML.update(value=str(messages)),
273
+ None,
274
+ )
275
+ docs._build_faiss_index()
276
+
277
+ # Synthesize response
278
+ messages.append(Message(
279
+ Icons.WAIT, f"Creating answer. This will loop through all available PDF {'attachment' if attachments==1 else 'attachments'} and may take {'a few' if available_attachments > 2 else 'a couple of'} minutes."))
280
+ yield (
281
+ messages,
282
+ gr.HTML.update(value=str(messages)),
283
+ None,
284
+ )
285
+ try:
286
+ start_time = time.time()
287
+ total_time = 0
288
+ for i, answer in enumerate(docs.query_gen(question)):
289
+ end_time = time.time()
290
+ time_dif = end_time - start_time
291
+ if time_dif > 5:
292
+ start_time = end_time
293
+ total_time += time_dif
294
+ messages.append(Message(
295
+ Icons.INFO, f"Still in prgress: {total_time:.1f} seconds"))
296
+ yield (
297
+ messages,
298
+ gr.HTML.update(value=str(messages)),
299
+ None,
300
+ )
301
+ answer_text = '\n'.join(
302
+ [f"<div>{x}</div>" for x in answer.answer.split('\n')])
303
+ references = '\n'.join([f"<li>{x.split('.', 1)[1]}</li>"
304
+ for x in answer.references.split('\n\n')])
305
+ formatted_answer = f"""
306
+ <div>{answer_text}</div>
307
+
308
+ <h4 style="font-size: 1rem;">References:</h4>
309
+ <ol>
310
+ {references}
311
+ </ol>
312
+
313
+ <div>Tokens Used: {answer.tokens} Cost: ${answer.tokens/1000 * 0.002:.2f}</div>
314
+ """.strip()
315
+ messages.append(Message(
316
+ Icons.OK, f"Answer created."))
317
+ yield (
318
+ messages,
319
+ gr.HTML.update(value=str(messages)),
320
+ gr.HTML.update(value=formatted_answer)
321
+ )
322
+ except Exception as e:
323
+ messages.append(Message(
324
+ Icons.ERR, f"Error occurred when creating answer: {e}"))
325
+ yield (
326
+ messages,
327
+ gr.HTML.update(value=str(messages)),
328
+ None,
329
+ )
330
+ return None, None, None
models.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from enum import Enum
3
+
4
+ class Icons(Enum):
5
+ def __str__(self):
6
+ return str(self.value)
7
+ DOC = "πŸ“„"
8
+ ERR = "❌"
9
+ INDEX = "πŸ—„οΈ"
10
+ INFO = "ℹ️"
11
+ OK = "πŸ‘Œ"
12
+ SUCCESS = "βœ…"
13
+ WAIT = "βŒ›"
14
+ WARN = "⚠️"
15
+
16
+ class Message():
17
+ def __init__(self, icon, content):
18
+ self.icon = icon
19
+ self.content = content
20
+
21
+ def __str__(self):
22
+ return f"{self.icon} {self.content}"
23
+
24
+ class Messages():
25
+ def __init__(self, messages=[]):
26
+ self.messages = messages
27
+
28
+ def __str__(self):
29
+ return f"""
30
+ <div class="messages" style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
31
+ {("").join([f"<div>{x}</div>" for x in self.messages])}
32
+ </div>
33
+ """
34
+
35
+ def append(self, new_message):
36
+ self.messages.append(new_message)
37
+
38
+ def set(self, messages):
39
+ self.messages = messages
40
+
41
+ # class Message():
42
+
43
+ # def standing_by(self):
44
+ # return "<div>πŸ‘Œ Standing by...</div>"
45
+
46
+ # def not_ready(self):
47
+ # return """
48
+ # <div style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
49
+ # You have to select a Zotero collection to proceed.
50
+ # </div>
51
+ # """
52
+
53
+ # def openai_api_key(self):
54
+ # return """
55
+ # <div style="padding: 1rem; background-color: #fcd7da; font-size: 0.8rem;">
56
+ # OpenAI API key is either missing or incorrect.
57
+ # </div>
58
+ # """
59
+
60
+ # def use_queries(queries):
61
+ # query_str = ", ".join([f"<span style="font-weight: bold;">{q}</span>" for q in queries])
62
+ # return f"<div>Search your Zotero collection for {query_str}"
63
+
64
+
65
+
66
+ # def update_status(messages):
67
+ # return gr.HTML.update(f"""
68
+ # <div class="messages" style="padding: 1rem; background-color: #fffbe7; font-size: 0.8rem;">
69
+ # {("").join(messages)}
70
+ # </div>
71
+ # """)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ paper-qa
2
+ gradio
3
+ requests
4
+ python-dotenv
5
+ lxml
style.css ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #zotero-library-type label {
2
+ width: 48.5%;
3
+ }
4
+
5
+ #zotero-collection label {
6
+ width: 100%;
7
+ display: block;
8
+ }
9
+
10
+ .zotero-link {
11
+ font-size: 0.75rem;
12
+ color: #2d7ea9;
13
+ }
14
+
15
+ #answer .generating{
16
+ display: none;
17
+ }