hysts HF staff commited on
Commit
c637b4a
·
1 Parent(s): 0476415
Files changed (9) hide show
  1. .pre-commit-config.yaml +10 -14
  2. .vscode/extensions.json +8 -0
  3. .vscode/settings.json +7 -12
  4. README.md +1 -1
  5. app.py +1 -1
  6. papers.py +21 -19
  7. pyproject.toml +41 -6
  8. requirements.txt +124 -141
  9. uv.lock +0 -0
.pre-commit-config.yaml CHANGED
@@ -1,6 +1,6 @@
1
  repos:
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v4.6.0
4
  hooks:
5
  - id: check-executables-have-shebangs
6
  - id: check-json
@@ -18,13 +18,15 @@ repos:
18
  hooks:
19
  - id: docformatter
20
  args: ["--in-place"]
21
- - repo: https://github.com/pycqa/isort
22
- rev: 5.13.2
23
  hooks:
24
- - id: isort
25
- args: ["--profile", "black"]
 
 
26
  - repo: https://github.com/pre-commit/mirrors-mypy
27
- rev: v1.9.0
28
  hooks:
29
  - id: mypy
30
  args: ["--ignore-missing-imports"]
@@ -35,14 +37,8 @@ repos:
35
  "types-PyYAML",
36
  "types-pytz",
37
  ]
38
- - repo: https://github.com/psf/black
39
- rev: 24.4.0
40
- hooks:
41
- - id: black
42
- language_version: python3.10
43
- args: ["--line-length", "119"]
44
  - repo: https://github.com/kynan/nbstripout
45
- rev: 0.7.1
46
  hooks:
47
  - id: nbstripout
48
  args:
@@ -51,7 +47,7 @@ repos:
51
  "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
52
  ]
53
  - repo: https://github.com/nbQA-dev/nbQA
54
- rev: 1.8.5
55
  hooks:
56
  - id: nbqa-black
57
  - id: nbqa-pyupgrade
 
1
  repos:
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v5.0.0
4
  hooks:
5
  - id: check-executables-have-shebangs
6
  - id: check-json
 
18
  hooks:
19
  - id: docformatter
20
  args: ["--in-place"]
21
+ - repo: https://github.com/astral-sh/ruff-pre-commit
22
+ rev: v0.8.4
23
  hooks:
24
+ - id: ruff
25
+ args: ["--fix"]
26
+ - id: ruff-format
27
+ args: ["--line-length", "119"]
28
  - repo: https://github.com/pre-commit/mirrors-mypy
29
+ rev: v1.14.0
30
  hooks:
31
  - id: mypy
32
  args: ["--ignore-missing-imports"]
 
37
  "types-PyYAML",
38
  "types-pytz",
39
  ]
 
 
 
 
 
 
40
  - repo: https://github.com/kynan/nbstripout
41
+ rev: 0.8.1
42
  hooks:
43
  - id: nbstripout
44
  args:
 
47
  "metadata.interpreter metadata.kernelspec cell.metadata.pycharm",
48
  ]
49
  - repo: https://github.com/nbQA-dev/nbQA
50
+ rev: 1.9.1
51
  hooks:
52
  - id: nbqa-black
53
  - id: nbqa-pyupgrade
.vscode/extensions.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "recommendations": [
3
+ "ms-python.python",
4
+ "charliermarsh.ruff",
5
+ "streetsidesoftware.code-spell-checker",
6
+ "tamasfe.even-better-toml"
7
+ ]
8
+ }
.vscode/settings.json CHANGED
@@ -2,25 +2,20 @@
2
  "editor.formatOnSave": true,
3
  "files.insertFinalNewline": false,
4
  "[python]": {
5
- "editor.defaultFormatter": "ms-python.black-formatter",
6
  "editor.formatOnType": true,
7
  "editor.codeActionsOnSave": {
 
8
  "source.organizeImports": "explicit"
9
  }
10
  },
11
  "[jupyter]": {
12
  "files.insertFinalNewline": false
13
  },
14
- "black-formatter.args": [
15
- "--line-length=119"
16
- ],
17
- "isort.args": ["--profile", "black"],
18
- "flake8.args": [
19
- "--max-line-length=119"
20
- ],
21
- "ruff.lint.args": [
22
- "--line-length=119"
23
- ],
24
  "notebook.output.scrolling": true,
25
- "notebook.formatOnCellExecution": true
 
 
 
 
26
  }
 
2
  "editor.formatOnSave": true,
3
  "files.insertFinalNewline": false,
4
  "[python]": {
5
+ "editor.defaultFormatter": "charliermarsh.ruff",
6
  "editor.formatOnType": true,
7
  "editor.codeActionsOnSave": {
8
+ "source.fixAll.ruff": "explicit",
9
  "source.organizeImports": "explicit"
10
  }
11
  },
12
  "[jupyter]": {
13
  "files.insertFinalNewline": false
14
  },
 
 
 
 
 
 
 
 
 
 
15
  "notebook.output.scrolling": true,
16
+ "notebook.formatOnCellExecution": true,
17
+ "notebook.formatOnSave.enabled": true,
18
+ "notebook.codeActionsOnSave": {
19
+ "source.organizeImports": "explicit"
20
+ }
21
  }
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 📊
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.5.0
8
  app_file: app.py
9
  pinned: true
10
  license: mit
 
4
  colorFrom: pink
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 5.9.1
8
  app_file: app.py
9
  pinned: true
10
  license: mit
app.py CHANGED
@@ -24,7 +24,7 @@ paper_list = PaperList(get_df())
24
 
25
 
26
  def update_paper_list() -> None:
27
- global paper_list
28
  paper_list = PaperList(get_df())
29
 
30
 
 
24
 
25
 
26
  def update_paper_list() -> None:
27
+ global paper_list # noqa: PLW0603
28
  paper_list = PaperList(get_df())
29
 
30
 
papers.py CHANGED
@@ -23,7 +23,7 @@ abstract_retriever.search("LLM")
23
 
24
 
25
  def update_abstract_index() -> None:
26
- global abstract_retriever
27
 
28
  api.snapshot_download(
29
  repo_id=INDEX_REPO_ID,
@@ -40,10 +40,13 @@ scheduler.start()
40
 
41
 
42
  def get_df() -> pd.DataFrame:
43
- df = pd.merge(
44
- left=datasets.load_dataset("hysts-bot-data/daily-papers", split="train").to_pandas(),
45
- right=datasets.load_dataset("hysts-bot-data/daily-papers-stats", split="train").to_pandas(),
46
- on="arxiv_id",
 
 
 
47
  )
48
  df = df[::-1].reset_index(drop=True)
49
  df["date"] = df["date"].dt.strftime("%Y-%m-%d")
@@ -91,26 +94,26 @@ class Prettifier:
91
 
92
 
93
  class PaperList:
94
- COLUMN_INFO = [
95
- ["date", "markdown"],
96
- ["paper_page", "markdown"],
97
- ["title", "str"],
98
- ["github", "markdown"],
99
- ["👍", "number"],
100
- ["💬", "number"],
101
- ]
102
-
103
- def __init__(self, df: pd.DataFrame):
104
  self.df_raw = df
105
  self._prettifier = Prettifier()
106
  self.df_prettified = self._prettifier(df).loc[:, self.column_names]
107
 
108
  @property
109
- def column_names(self):
110
  return list(map(operator.itemgetter(0), self.COLUMN_INFO))
111
 
112
  @property
113
- def column_datatype(self):
114
  return list(map(operator.itemgetter(1), self.COLUMN_INFO))
115
 
116
  def search(
@@ -147,5 +150,4 @@ class PaperList:
147
  found_ids.append(arxiv_id)
148
  df = df[df["arxiv_id"].isin(found_ids)].set_index("arxiv_id").reindex(index=found_ids).reset_index()
149
 
150
- df_prettified = self._prettifier(df).loc[:, self.column_names]
151
- return df_prettified
 
23
 
24
 
25
  def update_abstract_index() -> None:
26
+ global abstract_retriever # noqa: PLW0603
27
 
28
  api.snapshot_download(
29
  repo_id=INDEX_REPO_ID,
 
40
 
41
 
42
  def get_df() -> pd.DataFrame:
43
+ df = (
44
+ datasets.load_dataset("hysts-bot-data/daily-papers", split="train")
45
+ .to_pandas()
46
+ .merge(
47
+ datasets.load_dataset("hysts-bot-data/daily-papers-stats", split="train").to_pandas(),
48
+ on="arxiv_id",
49
+ )
50
  )
51
  df = df[::-1].reset_index(drop=True)
52
  df["date"] = df["date"].dt.strftime("%Y-%m-%d")
 
94
 
95
 
96
  class PaperList:
97
+ COLUMN_INFO = (
98
+ ("date", "markdown"),
99
+ ("paper_page", "markdown"),
100
+ ("title", "str"),
101
+ ("github", "markdown"),
102
+ ("👍", "number"),
103
+ ("💬", "number"),
104
+ )
105
+
106
+ def __init__(self, df: pd.DataFrame) -> None:
107
  self.df_raw = df
108
  self._prettifier = Prettifier()
109
  self.df_prettified = self._prettifier(df).loc[:, self.column_names]
110
 
111
  @property
112
+ def column_names(self) -> list[str]:
113
  return list(map(operator.itemgetter(0), self.COLUMN_INFO))
114
 
115
  @property
116
+ def column_datatype(self) -> list[str]:
117
  return list(map(operator.itemgetter(1), self.COLUMN_INFO))
118
 
119
  def search(
 
150
  found_ids.append(arxiv_id)
151
  df = df[df["arxiv_id"].isin(found_ids)].set_index("arxiv_id").reindex(index=found_ids).reset_index()
152
 
153
+ return self._prettifier(df).loc[:, self.column_names]
 
pyproject.toml CHANGED
@@ -5,13 +5,48 @@ description = ""
5
  readme = "README.md"
6
  requires-python = ">=3.10"
7
  dependencies = [
8
- "apscheduler>=3.10.4",
9
- "datasets>=2.21.0",
10
  "gradio-calendar>=0.0.6",
11
- "gradio>=5.5.0",
12
  "hf-transfer>=0.1.8",
13
- "pandas>=2.2.2",
14
  "ragatouille>=0.0.8.post4",
15
- "setuptools>=74.1.2",
16
- "tqdm>=4.66.5",
17
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  readme = "README.md"
6
  requires-python = ">=3.10"
7
  dependencies = [
8
+ "apscheduler>=3.11.0",
9
+ "datasets>=3.2.0",
10
  "gradio-calendar>=0.0.6",
11
+ "gradio>=5.9.1",
12
  "hf-transfer>=0.1.8",
13
+ "pandas>=2.2.3",
14
  "ragatouille>=0.0.8.post4",
15
+ "setuptools>=75.6.0",
 
16
  ]
17
+
18
+ [tool.ruff]
19
+ line-length = 119
20
+
21
+ [tool.ruff.lint]
22
+ select = ["ALL"]
23
+ ignore = [
24
+ "COM812", # missing-trailing-comma
25
+ "D203", # one-blank-line-before-class
26
+ "D213", # multi-line-summary-second-line
27
+ "E501", # line-too-long
28
+ "SIM117", # multiple-with-statements
29
+ ]
30
+ extend-ignore = [
31
+ "D100", # undocumented-public-module
32
+ "D101", # undocumented-public-class
33
+ "D102", # undocumented-public-method
34
+ "D103", # undocumented-public-function
35
+ "D104", # undocumented-public-package
36
+ "D105", # undocumented-magic-method
37
+ "D107", # undocumented-public-init
38
+ "EM101", # raw-string-in-exception
39
+ "FBT001", # boolean-type-hint-positional-argument
40
+ "FBT002", # boolean-default-value-positional-argument
41
+ "PD901", # pandas-df-variable-name
42
+ "PGH003", # blanket-type-ignore
43
+ "PLR0913", # too-many-arguments
44
+ "PLR0915", # too-many-statements
45
+ "TRY003", # raise-vanilla-args
46
+ ]
47
+ unfixable = [
48
+ "F401", # unused-import
49
+ ]
50
+
51
+ [tool.ruff.format]
52
+ docstring-code-format = true
requirements.txt CHANGED
@@ -2,108 +2,104 @@
2
  # uv pip compile pyproject.toml -o requirements.txt
3
  aiofiles==23.2.1
4
  # via gradio
5
- aiohappyeyeballs==2.4.0
6
  # via aiohttp
7
- aiohttp==3.10.5
8
  # via
9
  # datasets
10
  # fsspec
11
  # langchain
12
  # llama-index-core
13
- # llama-index-legacy
14
- aiosignal==1.3.1
15
  # via aiohttp
16
  annotated-types==0.7.0
17
  # via pydantic
18
- anyio==4.4.0
19
  # via
20
  # gradio
21
  # httpx
22
  # openai
23
  # starlette
24
- apscheduler==3.10.4
25
  # via daily-papers (pyproject.toml)
26
  async-timeout==4.0.3
27
  # via
28
  # aiohttp
29
  # langchain
30
- attrs==24.2.0
31
  # via aiohttp
32
  beautifulsoup4==4.12.3
33
  # via llama-index-readers-file
34
- bitarray==2.9.2
35
  # via colbert-ai
36
- blinker==1.8.2
37
  # via flask
38
  catalogue==2.0.10
39
  # via srsly
40
- certifi==2024.8.30
41
  # via
42
  # httpcore
43
  # httpx
 
44
  # requests
45
- charset-normalizer==3.3.2
46
  # via requests
47
- click==8.1.7
48
  # via
49
  # flask
 
50
  # nltk
51
  # typer
52
  # uvicorn
53
  colbert-ai==0.2.19
54
  # via ragatouille
55
  dataclasses-json==0.6.7
56
- # via
57
- # llama-index-core
58
- # llama-index-legacy
59
- datasets==2.21.0
60
  # via
61
  # daily-papers (pyproject.toml)
62
  # colbert-ai
63
- deprecated==1.2.14
64
- # via
65
- # llama-index-core
66
- # llama-index-legacy
67
  dill==0.3.8
68
  # via
69
  # datasets
70
  # multiprocess
71
  dirtyjson==1.0.8
72
- # via
73
- # llama-index-core
74
- # llama-index-legacy
75
  distro==1.9.0
76
  # via openai
77
  exceptiongroup==1.2.2
78
  # via anyio
79
- faiss-cpu==1.8.0.post1
80
  # via ragatouille
81
  fast-pytorch-kmeans==0.2.0.1
82
  # via ragatouille
83
- fastapi==0.115.4
84
  # via gradio
85
- ffmpy==0.4.0
86
  # via gradio
87
- filelock==3.16.0
88
  # via
89
  # datasets
90
  # huggingface-hub
91
  # torch
92
  # transformers
93
  # triton
94
- flask==3.0.3
 
 
95
  # via colbert-ai
96
- frozenlist==1.4.1
97
  # via
98
  # aiohttp
99
  # aiosignal
100
- fsspec==2024.6.1
101
  # via
102
  # datasets
103
  # gradio-client
104
  # huggingface-hub
105
  # llama-index-core
106
- # llama-index-legacy
107
  # torch
108
  git-python==1.0.3
109
  # via colbert-ai
@@ -111,15 +107,15 @@ gitdb==4.0.11
111
  # via gitpython
112
  gitpython==3.1.43
113
  # via git-python
114
- gradio==5.5.0
115
  # via
116
  # daily-papers (pyproject.toml)
117
  # gradio-calendar
118
  gradio-calendar==0.0.6
119
  # via daily-papers (pyproject.toml)
120
- gradio-client==1.4.2
121
  # via gradio
122
- greenlet==3.1.0
123
  # via sqlalchemy
124
  h11==0.14.0
125
  # via
@@ -127,19 +123,18 @@ h11==0.14.0
127
  # uvicorn
128
  hf-transfer==0.1.8
129
  # via daily-papers (pyproject.toml)
130
- httpcore==1.0.5
131
  # via httpx
132
- httpx==0.27.2
133
  # via
134
  # gradio
135
  # gradio-client
136
  # langsmith
137
  # llama-cloud
138
  # llama-index-core
139
- # llama-index-legacy
140
  # openai
141
  # safehttpx
142
- huggingface-hub==0.25.2
143
  # via
144
  # datasets
145
  # gradio
@@ -147,7 +142,7 @@ huggingface-hub==0.25.2
147
  # sentence-transformers
148
  # tokenizers
149
  # transformers
150
- idna==3.8
151
  # via
152
  # anyio
153
  # httpx
@@ -155,12 +150,12 @@ idna==3.8
155
  # yarl
156
  itsdangerous==2.2.0
157
  # via flask
158
- jinja2==3.1.4
159
  # via
160
  # flask
161
  # gradio
162
  # torch
163
- jiter==0.5.0
164
  # via openai
165
  joblib==1.4.2
166
  # via
@@ -170,31 +165,30 @@ jsonpatch==1.33
170
  # via langchain-core
171
  jsonpointer==3.0.0
172
  # via jsonpatch
173
- langchain==0.2.16
174
  # via ragatouille
175
- langchain-core==0.2.39
176
  # via
177
  # langchain
178
  # langchain-text-splitters
179
  # ragatouille
180
- langchain-text-splitters==0.2.4
181
  # via langchain
182
- langsmith==0.1.117
183
  # via
184
  # langchain
185
  # langchain-core
186
- llama-cloud==0.0.17
187
  # via llama-index-indices-managed-llama-cloud
188
- llama-index==0.11.8
189
  # via ragatouille
190
- llama-index-agent-openai==0.3.1
191
  # via
192
  # llama-index
193
- # llama-index-llms-openai
194
  # llama-index-program-openai
195
- llama-index-cli==0.3.1
196
  # via llama-index
197
- llama-index-core==0.11.8
198
  # via
199
  # llama-index
200
  # llama-index-agent-openai
@@ -208,15 +202,13 @@ llama-index-core==0.11.8
208
  # llama-index-readers-file
209
  # llama-index-readers-llama-parse
210
  # llama-parse
211
- llama-index-embeddings-openai==0.2.4
212
  # via
213
  # llama-index
214
  # llama-index-cli
215
- llama-index-indices-managed-llama-cloud==0.3.0
216
- # via llama-index
217
- llama-index-legacy==0.9.48.post3
218
  # via llama-index
219
- llama-index-llms-openai==0.2.3
220
  # via
221
  # llama-index
222
  # llama-index-agent-openai
@@ -224,19 +216,19 @@ llama-index-llms-openai==0.2.3
224
  # llama-index-multi-modal-llms-openai
225
  # llama-index-program-openai
226
  # llama-index-question-gen-openai
227
- llama-index-multi-modal-llms-openai==0.2.0
228
  # via llama-index
229
- llama-index-program-openai==0.2.0
230
  # via
231
  # llama-index
232
  # llama-index-question-gen-openai
233
- llama-index-question-gen-openai==0.2.0
234
  # via llama-index
235
- llama-index-readers-file==0.2.1
236
  # via llama-index
237
- llama-index-readers-llama-parse==0.3.0
238
  # via llama-index
239
- llama-parse==0.5.5
240
  # via llama-index-readers-llama-parse
241
  markdown-it-py==3.0.0
242
  # via rich
@@ -245,7 +237,7 @@ markupsafe==2.1.5
245
  # gradio
246
  # jinja2
247
  # werkzeug
248
- marshmallow==3.22.0
249
  # via dataclasses-json
250
  mdurl==0.1.2
251
  # via markdown-it-py
@@ -260,21 +252,17 @@ multiprocess==0.70.16
260
  mypy-extensions==1.0.0
261
  # via typing-inspect
262
  nest-asyncio==1.6.0
 
 
263
  # via
264
  # llama-index-core
265
- # llama-index-legacy
266
- networkx==3.3
267
- # via
268
- # llama-index-core
269
- # llama-index-legacy
270
  # torch
271
- ninja==1.11.1.1
272
  # via colbert-ai
273
  nltk==3.9.1
274
  # via
275
  # llama-index
276
  # llama-index-core
277
- # llama-index-legacy
278
  numpy==1.26.4
279
  # via
280
  # datasets
@@ -283,59 +271,59 @@ numpy==1.26.4
283
  # gradio
284
  # langchain
285
  # llama-index-core
286
- # llama-index-legacy
287
  # onnx
288
  # pandas
289
- # pyarrow
290
  # scikit-learn
291
  # scipy
292
  # sentence-transformers
293
  # transformers
294
  # voyager
295
- nvidia-cublas-cu12==12.1.3.1
296
  # via
297
  # nvidia-cudnn-cu12
298
  # nvidia-cusolver-cu12
299
  # torch
300
- nvidia-cuda-cupti-cu12==12.1.105
301
  # via torch
302
- nvidia-cuda-nvrtc-cu12==12.1.105
303
  # via torch
304
- nvidia-cuda-runtime-cu12==12.1.105
305
  # via torch
306
  nvidia-cudnn-cu12==9.1.0.70
307
  # via torch
308
- nvidia-cufft-cu12==11.0.2.54
309
  # via torch
310
- nvidia-curand-cu12==10.3.2.106
311
  # via torch
312
- nvidia-cusolver-cu12==11.4.5.107
313
  # via torch
314
- nvidia-cusparse-cu12==12.1.0.106
315
  # via
316
  # nvidia-cusolver-cu12
317
  # torch
318
- nvidia-nccl-cu12==2.20.5
 
 
319
  # via torch
320
- nvidia-nvjitlink-cu12==12.6.68
321
  # via
322
  # nvidia-cusolver-cu12
323
  # nvidia-cusparse-cu12
324
- nvidia-nvtx-cu12==12.1.105
 
325
  # via torch
326
- onnx==1.16.2
327
  # via ragatouille
328
- openai==1.44.1
329
  # via
330
  # llama-index-agent-openai
331
  # llama-index-embeddings-openai
332
- # llama-index-legacy
333
  # llama-index-llms-openai
334
- orjson==3.10.7
335
  # via
336
  # gradio
337
  # langsmith
338
- packaging==24.1
339
  # via
340
  # datasets
341
  # faiss-cpu
@@ -345,23 +333,26 @@ packaging==24.1
345
  # langchain-core
346
  # marshmallow
347
  # transformers
348
- pandas==2.2.2
349
  # via
350
  # daily-papers (pyproject.toml)
351
  # datasets
352
  # gradio
353
- # llama-index-legacy
354
  # llama-index-readers-file
355
- pillow==10.4.0
356
  # via
357
  # gradio
358
  # llama-index-core
359
  # sentence-transformers
360
- protobuf==5.28.0
 
 
 
 
361
  # via onnx
362
- pyarrow==17.0.0
363
  # via datasets
364
- pydantic==2.9.1
365
  # via
366
  # fastapi
367
  # gradio
@@ -370,27 +361,26 @@ pydantic==2.9.1
370
  # langsmith
371
  # llama-cloud
372
  # llama-index-core
 
373
  # openai
374
- pydantic-core==2.23.3
375
  # via pydantic
376
  pydub==0.25.1
377
  # via gradio
378
  pygments==2.18.0
379
  # via rich
380
- pynvml==11.5.3
381
  # via fast-pytorch-kmeans
382
- pypdf==4.3.1
383
  # via llama-index-readers-file
384
  python-dateutil==2.9.0.post0
385
  # via pandas
386
  python-dotenv==1.0.1
387
  # via colbert-ai
388
- python-multipart==0.0.12
389
  # via gradio
390
  pytz==2024.2
391
- # via
392
- # apscheduler
393
- # pandas
394
  pyyaml==6.0.2
395
  # via
396
  # datasets
@@ -402,7 +392,7 @@ pyyaml==6.0.2
402
  # transformers
403
  ragatouille==0.0.8.post4
404
  # via daily-papers (pyproject.toml)
405
- regex==2024.7.24
406
  # via
407
  # nltk
408
  # tiktoken
@@ -414,18 +404,20 @@ requests==2.32.3
414
  # langchain
415
  # langsmith
416
  # llama-index-core
417
- # llama-index-legacy
418
  # tiktoken
419
  # transformers
420
- rich==13.8.1
 
 
421
  # via typer
422
- ruff==0.6.4
423
  # via gradio
424
- safehttpx==0.1.1
425
  # via gradio
426
  safetensors==0.4.5
427
  # via transformers
428
- scikit-learn==1.5.1
429
  # via sentence-transformers
430
  scipy==1.14.1
431
  # via
@@ -436,62 +428,54 @@ semantic-version==2.10.0
436
  # via gradio
437
  sentence-transformers==2.7.0
438
  # via ragatouille
439
- setuptools==74.1.2
440
  # via daily-papers (pyproject.toml)
441
  shellingham==1.5.4
442
  # via typer
443
- six==1.16.0
444
- # via
445
- # apscheduler
446
- # python-dateutil
447
  smmap==5.0.1
448
  # via gitdb
449
  sniffio==1.3.1
450
  # via
451
  # anyio
452
- # httpx
453
  # openai
454
  soupsieve==2.6
455
  # via beautifulsoup4
456
- sqlalchemy==2.0.34
457
  # via
458
  # langchain
459
  # llama-index-core
460
- # llama-index-legacy
461
  srsly==2.4.8
462
  # via ragatouille
463
- starlette==0.41.2
464
  # via
465
  # fastapi
466
  # gradio
467
  striprtf==0.0.26
468
  # via llama-index-readers-file
469
- sympy==1.13.2
470
  # via torch
471
- tenacity==8.5.0
472
  # via
473
  # langchain
474
  # langchain-core
475
  # llama-index-core
476
- # llama-index-legacy
477
  threadpoolctl==3.5.0
478
  # via scikit-learn
479
- tiktoken==0.7.0
480
- # via
481
- # llama-index-core
482
- # llama-index-legacy
483
- tokenizers==0.19.1
484
  # via transformers
485
- tomlkit==0.12.0
486
  # via gradio
487
- torch==2.4.1
488
  # via
489
  # fast-pytorch-kmeans
490
  # ragatouille
491
  # sentence-transformers
492
- tqdm==4.66.5
493
  # via
494
- # daily-papers (pyproject.toml)
495
  # colbert-ai
496
  # datasets
497
  # huggingface-hub
@@ -500,14 +484,14 @@ tqdm==4.66.5
500
  # openai
501
  # sentence-transformers
502
  # transformers
503
- transformers==4.44.2
504
  # via
505
  # colbert-ai
506
  # ragatouille
507
  # sentence-transformers
508
- triton==3.0.0
509
  # via torch
510
- typer==0.12.5
511
  # via gradio
512
  typing-extensions==4.12.2
513
  # via
@@ -518,12 +502,12 @@ typing-extensions==4.12.2
518
  # huggingface-hub
519
  # langchain-core
520
  # llama-index-core
521
- # llama-index-legacy
522
  # multidict
523
  # openai
524
  # pydantic
525
  # pydantic-core
526
  # pypdf
 
527
  # sqlalchemy
528
  # torch
529
  # typer
@@ -533,28 +517,27 @@ typing-inspect==0.9.0
533
  # via
534
  # dataclasses-json
535
  # llama-index-core
536
- # llama-index-legacy
537
- tzdata==2024.1
538
  # via pandas
539
  tzlocal==5.2
540
  # via apscheduler
541
  ujson==5.10.0
542
  # via colbert-ai
543
- urllib3==2.2.2
544
  # via requests
545
- uvicorn==0.30.6
546
  # via gradio
547
- voyager==2.0.9
548
  # via ragatouille
549
- websockets==12.0
550
  # via gradio-client
551
- werkzeug==3.0.4
552
  # via flask
553
- wrapt==1.16.0
554
  # via
555
  # deprecated
556
  # llama-index-core
557
  xxhash==3.5.0
558
  # via datasets
559
- yarl==1.11.1
560
  # via aiohttp
 
2
  # uv pip compile pyproject.toml -o requirements.txt
3
  aiofiles==23.2.1
4
  # via gradio
5
+ aiohappyeyeballs==2.4.4
6
  # via aiohttp
7
+ aiohttp==3.11.11
8
  # via
9
  # datasets
10
  # fsspec
11
  # langchain
12
  # llama-index-core
13
+ aiosignal==1.3.2
 
14
  # via aiohttp
15
  annotated-types==0.7.0
16
  # via pydantic
17
+ anyio==4.7.0
18
  # via
19
  # gradio
20
  # httpx
21
  # openai
22
  # starlette
23
+ apscheduler==3.11.0
24
  # via daily-papers (pyproject.toml)
25
  async-timeout==4.0.3
26
  # via
27
  # aiohttp
28
  # langchain
29
+ attrs==24.3.0
30
  # via aiohttp
31
  beautifulsoup4==4.12.3
32
  # via llama-index-readers-file
33
+ bitarray==3.0.0
34
  # via colbert-ai
35
+ blinker==1.9.0
36
  # via flask
37
  catalogue==2.0.10
38
  # via srsly
39
+ certifi==2024.12.14
40
  # via
41
  # httpcore
42
  # httpx
43
+ # llama-cloud
44
  # requests
45
+ charset-normalizer==3.4.1
46
  # via requests
47
+ click==8.1.8
48
  # via
49
  # flask
50
+ # llama-parse
51
  # nltk
52
  # typer
53
  # uvicorn
54
  colbert-ai==0.2.19
55
  # via ragatouille
56
  dataclasses-json==0.6.7
57
+ # via llama-index-core
58
+ datasets==3.2.0
 
 
59
  # via
60
  # daily-papers (pyproject.toml)
61
  # colbert-ai
62
+ deprecated==1.2.15
63
+ # via llama-index-core
 
 
64
  dill==0.3.8
65
  # via
66
  # datasets
67
  # multiprocess
68
  dirtyjson==1.0.8
69
+ # via llama-index-core
 
 
70
  distro==1.9.0
71
  # via openai
72
  exceptiongroup==1.2.2
73
  # via anyio
74
+ faiss-cpu==1.9.0.post1
75
  # via ragatouille
76
  fast-pytorch-kmeans==0.2.0.1
77
  # via ragatouille
78
+ fastapi==0.115.6
79
  # via gradio
80
+ ffmpy==0.5.0
81
  # via gradio
82
+ filelock==3.16.1
83
  # via
84
  # datasets
85
  # huggingface-hub
86
  # torch
87
  # transformers
88
  # triton
89
+ filetype==1.2.0
90
+ # via llama-index-core
91
+ flask==3.1.0
92
  # via colbert-ai
93
+ frozenlist==1.5.0
94
  # via
95
  # aiohttp
96
  # aiosignal
97
+ fsspec==2024.9.0
98
  # via
99
  # datasets
100
  # gradio-client
101
  # huggingface-hub
102
  # llama-index-core
 
103
  # torch
104
  git-python==1.0.3
105
  # via colbert-ai
 
107
  # via gitpython
108
  gitpython==3.1.43
109
  # via git-python
110
+ gradio==5.9.1
111
  # via
112
  # daily-papers (pyproject.toml)
113
  # gradio-calendar
114
  gradio-calendar==0.0.6
115
  # via daily-papers (pyproject.toml)
116
+ gradio-client==1.5.2
117
  # via gradio
118
+ greenlet==3.1.1
119
  # via sqlalchemy
120
  h11==0.14.0
121
  # via
 
123
  # uvicorn
124
  hf-transfer==0.1.8
125
  # via daily-papers (pyproject.toml)
126
+ httpcore==1.0.7
127
  # via httpx
128
+ httpx==0.28.1
129
  # via
130
  # gradio
131
  # gradio-client
132
  # langsmith
133
  # llama-cloud
134
  # llama-index-core
 
135
  # openai
136
  # safehttpx
137
+ huggingface-hub==0.27.0
138
  # via
139
  # datasets
140
  # gradio
 
142
  # sentence-transformers
143
  # tokenizers
144
  # transformers
145
+ idna==3.10
146
  # via
147
  # anyio
148
  # httpx
 
150
  # yarl
151
  itsdangerous==2.2.0
152
  # via flask
153
+ jinja2==3.1.5
154
  # via
155
  # flask
156
  # gradio
157
  # torch
158
+ jiter==0.8.2
159
  # via openai
160
  joblib==1.4.2
161
  # via
 
165
  # via langchain-core
166
  jsonpointer==3.0.0
167
  # via jsonpatch
168
+ langchain==0.3.13
169
  # via ragatouille
170
+ langchain-core==0.3.28
171
  # via
172
  # langchain
173
  # langchain-text-splitters
174
  # ragatouille
175
+ langchain-text-splitters==0.3.4
176
  # via langchain
177
+ langsmith==0.2.7
178
  # via
179
  # langchain
180
  # langchain-core
181
+ llama-cloud==0.1.7
182
  # via llama-index-indices-managed-llama-cloud
183
+ llama-index==0.12.8
184
  # via ragatouille
185
+ llama-index-agent-openai==0.4.1
186
  # via
187
  # llama-index
 
188
  # llama-index-program-openai
189
+ llama-index-cli==0.4.0
190
  # via llama-index
191
+ llama-index-core==0.12.9
192
  # via
193
  # llama-index
194
  # llama-index-agent-openai
 
202
  # llama-index-readers-file
203
  # llama-index-readers-llama-parse
204
  # llama-parse
205
+ llama-index-embeddings-openai==0.3.1
206
  # via
207
  # llama-index
208
  # llama-index-cli
209
+ llama-index-indices-managed-llama-cloud==0.6.3
 
 
210
  # via llama-index
211
+ llama-index-llms-openai==0.3.12
212
  # via
213
  # llama-index
214
  # llama-index-agent-openai
 
216
  # llama-index-multi-modal-llms-openai
217
  # llama-index-program-openai
218
  # llama-index-question-gen-openai
219
+ llama-index-multi-modal-llms-openai==0.4.1
220
  # via llama-index
221
+ llama-index-program-openai==0.3.1
222
  # via
223
  # llama-index
224
  # llama-index-question-gen-openai
225
+ llama-index-question-gen-openai==0.3.0
226
  # via llama-index
227
+ llama-index-readers-file==0.4.1
228
  # via llama-index
229
+ llama-index-readers-llama-parse==0.4.0
230
  # via llama-index
231
+ llama-parse==0.5.19
232
  # via llama-index-readers-llama-parse
233
  markdown-it-py==3.0.0
234
  # via rich
 
237
  # gradio
238
  # jinja2
239
  # werkzeug
240
+ marshmallow==3.23.2
241
  # via dataclasses-json
242
  mdurl==0.1.2
243
  # via markdown-it-py
 
252
  mypy-extensions==1.0.0
253
  # via typing-inspect
254
  nest-asyncio==1.6.0
255
+ # via llama-index-core
256
+ networkx==3.4.2
257
  # via
258
  # llama-index-core
 
 
 
 
 
259
  # torch
260
+ ninja==1.11.1.3
261
  # via colbert-ai
262
  nltk==3.9.1
263
  # via
264
  # llama-index
265
  # llama-index-core
 
266
  numpy==1.26.4
267
  # via
268
  # datasets
 
271
  # gradio
272
  # langchain
273
  # llama-index-core
 
274
  # onnx
275
  # pandas
 
276
  # scikit-learn
277
  # scipy
278
  # sentence-transformers
279
  # transformers
280
  # voyager
281
+ nvidia-cublas-cu12==12.4.5.8
282
  # via
283
  # nvidia-cudnn-cu12
284
  # nvidia-cusolver-cu12
285
  # torch
286
+ nvidia-cuda-cupti-cu12==12.4.127
287
  # via torch
288
+ nvidia-cuda-nvrtc-cu12==12.4.127
289
  # via torch
290
+ nvidia-cuda-runtime-cu12==12.4.127
291
  # via torch
292
  nvidia-cudnn-cu12==9.1.0.70
293
  # via torch
294
+ nvidia-cufft-cu12==11.2.1.3
295
  # via torch
296
+ nvidia-curand-cu12==10.3.5.147
297
  # via torch
298
+ nvidia-cusolver-cu12==11.6.1.9
299
  # via torch
300
+ nvidia-cusparse-cu12==12.3.1.170
301
  # via
302
  # nvidia-cusolver-cu12
303
  # torch
304
+ nvidia-ml-py==12.560.30
305
+ # via pynvml
306
+ nvidia-nccl-cu12==2.21.5
307
  # via torch
308
+ nvidia-nvjitlink-cu12==12.4.127
309
  # via
310
  # nvidia-cusolver-cu12
311
  # nvidia-cusparse-cu12
312
+ # torch
313
+ nvidia-nvtx-cu12==12.4.127
314
  # via torch
315
+ onnx==1.17.0
316
  # via ragatouille
317
+ openai==1.58.1
318
  # via
319
  # llama-index-agent-openai
320
  # llama-index-embeddings-openai
 
321
  # llama-index-llms-openai
322
+ orjson==3.10.13
323
  # via
324
  # gradio
325
  # langsmith
326
+ packaging==24.2
327
  # via
328
  # datasets
329
  # faiss-cpu
 
333
  # langchain-core
334
  # marshmallow
335
  # transformers
336
+ pandas==2.2.3
337
  # via
338
  # daily-papers (pyproject.toml)
339
  # datasets
340
  # gradio
 
341
  # llama-index-readers-file
342
+ pillow==11.0.0
343
  # via
344
  # gradio
345
  # llama-index-core
346
  # sentence-transformers
347
+ propcache==0.2.1
348
+ # via
349
+ # aiohttp
350
+ # yarl
351
+ protobuf==5.29.2
352
  # via onnx
353
+ pyarrow==18.1.0
354
  # via datasets
355
+ pydantic==2.10.4
356
  # via
357
  # fastapi
358
  # gradio
 
361
  # langsmith
362
  # llama-cloud
363
  # llama-index-core
364
+ # llama-parse
365
  # openai
366
+ pydantic-core==2.27.2
367
  # via pydantic
368
  pydub==0.25.1
369
  # via gradio
370
  pygments==2.18.0
371
  # via rich
372
+ pynvml==12.0.0
373
  # via fast-pytorch-kmeans
374
+ pypdf==5.1.0
375
  # via llama-index-readers-file
376
  python-dateutil==2.9.0.post0
377
  # via pandas
378
  python-dotenv==1.0.1
379
  # via colbert-ai
380
+ python-multipart==0.0.20
381
  # via gradio
382
  pytz==2024.2
383
+ # via pandas
 
 
384
  pyyaml==6.0.2
385
  # via
386
  # datasets
 
392
  # transformers
393
  ragatouille==0.0.8.post4
394
  # via daily-papers (pyproject.toml)
395
+ regex==2024.11.6
396
  # via
397
  # nltk
398
  # tiktoken
 
404
  # langchain
405
  # langsmith
406
  # llama-index-core
407
+ # requests-toolbelt
408
  # tiktoken
409
  # transformers
410
+ requests-toolbelt==1.0.0
411
+ # via langsmith
412
+ rich==13.9.4
413
  # via typer
414
+ ruff==0.8.4
415
  # via gradio
416
+ safehttpx==0.1.6
417
  # via gradio
418
  safetensors==0.4.5
419
  # via transformers
420
+ scikit-learn==1.6.0
421
  # via sentence-transformers
422
  scipy==1.14.1
423
  # via
 
428
  # via gradio
429
  sentence-transformers==2.7.0
430
  # via ragatouille
431
+ setuptools==75.6.0
432
  # via daily-papers (pyproject.toml)
433
  shellingham==1.5.4
434
  # via typer
435
+ six==1.17.0
436
+ # via python-dateutil
 
 
437
  smmap==5.0.1
438
  # via gitdb
439
  sniffio==1.3.1
440
  # via
441
  # anyio
 
442
  # openai
443
  soupsieve==2.6
444
  # via beautifulsoup4
445
+ sqlalchemy==2.0.36
446
  # via
447
  # langchain
448
  # llama-index-core
 
449
  srsly==2.4.8
450
  # via ragatouille
451
+ starlette==0.41.3
452
  # via
453
  # fastapi
454
  # gradio
455
  striprtf==0.0.26
456
  # via llama-index-readers-file
457
+ sympy==1.13.1
458
  # via torch
459
+ tenacity==9.0.0
460
  # via
461
  # langchain
462
  # langchain-core
463
  # llama-index-core
 
464
  threadpoolctl==3.5.0
465
  # via scikit-learn
466
+ tiktoken==0.8.0
467
+ # via llama-index-core
468
+ tokenizers==0.21.0
 
 
469
  # via transformers
470
+ tomlkit==0.13.2
471
  # via gradio
472
+ torch==2.5.1
473
  # via
474
  # fast-pytorch-kmeans
475
  # ragatouille
476
  # sentence-transformers
477
+ tqdm==4.67.1
478
  # via
 
479
  # colbert-ai
480
  # datasets
481
  # huggingface-hub
 
484
  # openai
485
  # sentence-transformers
486
  # transformers
487
+ transformers==4.47.1
488
  # via
489
  # colbert-ai
490
  # ragatouille
491
  # sentence-transformers
492
+ triton==3.1.0
493
  # via torch
494
+ typer==0.15.1
495
  # via gradio
496
  typing-extensions==4.12.2
497
  # via
 
502
  # huggingface-hub
503
  # langchain-core
504
  # llama-index-core
 
505
  # multidict
506
  # openai
507
  # pydantic
508
  # pydantic-core
509
  # pypdf
510
+ # rich
511
  # sqlalchemy
512
  # torch
513
  # typer
 
517
  # via
518
  # dataclasses-json
519
  # llama-index-core
520
+ tzdata==2024.2
 
521
  # via pandas
522
  tzlocal==5.2
523
  # via apscheduler
524
  ujson==5.10.0
525
  # via colbert-ai
526
+ urllib3==2.3.0
527
  # via requests
528
+ uvicorn==0.34.0
529
  # via gradio
530
+ voyager==2.1.0
531
  # via ragatouille
532
+ websockets==14.1
533
  # via gradio-client
534
+ werkzeug==3.1.3
535
  # via flask
536
+ wrapt==1.17.0
537
  # via
538
  # deprecated
539
  # llama-index-core
540
  xxhash==3.5.0
541
  # via datasets
542
+ yarl==1.18.3
543
  # via aiohttp
uv.lock CHANGED
The diff for this file is too large to render. See raw diff