Add LLM Developer source to project
Browse files
data/scraping_scripts/add_context_to_nodes.py
CHANGED
@@ -162,13 +162,6 @@ async def process(
|
|
162 |
|
163 |
results: List[TextNode] = await tqdm.gather(*tasks, desc="Processing chunks")
|
164 |
|
165 |
-
# results: List[TextNode] = []
|
166 |
-
# # Add tqdm progress bar with semaphore limit
|
167 |
-
# for task in tqdm(
|
168 |
-
# asyncio.as_completed(tasks), total=len(tasks), desc="Processing chunks"
|
169 |
-
# ):
|
170 |
-
# result = await task
|
171 |
-
# results.append(result)
|
172 |
# pdb.set_trace()
|
173 |
|
174 |
return results
|
|
|
162 |
|
163 |
results: List[TextNode] = await tqdm.gather(*tasks, desc="Processing chunks")
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
# pdb.set_trace()
|
166 |
|
167 |
return results
|
data/scraping_scripts/process_md_files.py
CHANGED
@@ -416,6 +416,18 @@ SOURCE_CONFIGS = {
|
|
416 |
"included_root_files": [],
|
417 |
"url_extension": "",
|
418 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
}
|
420 |
|
421 |
|
|
|
416 |
"included_root_files": [],
|
417 |
"url_extension": "",
|
418 |
},
|
419 |
+
"llm_developer": {
|
420 |
+
"base_url": "",
|
421 |
+
"input_directory": "data/llm_developer",
|
422 |
+
"output_file": "data/llm_developer_data.jsonl", # From Beginner to Advanced LLM Developer
|
423 |
+
"source_name": "llm_developer",
|
424 |
+
"use_include_list": False,
|
425 |
+
"included_dirs": [],
|
426 |
+
"excluded_dirs": [],
|
427 |
+
"excluded_root_files": [],
|
428 |
+
"included_root_files": [],
|
429 |
+
"url_extension": "",
|
430 |
+
},
|
431 |
}
|
432 |
|
433 |
|
scripts/main.py
CHANGED
@@ -129,6 +129,7 @@ def generate_completion(
|
|
129 |
"OpenAI Cookbooks": "openai_cookbooks",
|
130 |
"Towards AI Blog": "tai_blog",
|
131 |
"8 Hour Primer": "8-hour_primer",
|
|
|
132 |
}
|
133 |
|
134 |
for source in sources:
|
@@ -245,6 +246,7 @@ sources = gr.CheckboxGroup(
|
|
245 |
"OpenAI Cookbooks",
|
246 |
"Towards AI Blog",
|
247 |
"8 Hour Primer",
|
|
|
248 |
# "All Sources",
|
249 |
],
|
250 |
interactive=True,
|
|
|
129 |
"OpenAI Cookbooks": "openai_cookbooks",
|
130 |
"Towards AI Blog": "tai_blog",
|
131 |
"8 Hour Primer": "8-hour_primer",
|
132 |
+
"Advanced LLM Developer": "llm_developer",
|
133 |
}
|
134 |
|
135 |
for source in sources:
|
|
|
246 |
"OpenAI Cookbooks",
|
247 |
"Towards AI Blog",
|
248 |
"8 Hour Primer",
|
249 |
+
"Advanced LLM Developer",
|
250 |
# "All Sources",
|
251 |
],
|
252 |
interactive=True,
|
scripts/setup.py
CHANGED
@@ -178,6 +178,7 @@ AVAILABLE_SOURCES_UI = [
|
|
178 |
"OpenAI Cookbooks",
|
179 |
"Towards AI Blog",
|
180 |
"8 Hour Primer",
|
|
|
181 |
# "All Sources",
|
182 |
]
|
183 |
|
@@ -190,6 +191,7 @@ AVAILABLE_SOURCES = [
|
|
190 |
"openai_cookbooks",
|
191 |
"tai_blog",
|
192 |
"8-hour_primer",
|
|
|
193 |
# "all_sources",
|
194 |
]
|
195 |
|
|
|
178 |
"OpenAI Cookbooks",
|
179 |
"Towards AI Blog",
|
180 |
"8 Hour Primer",
|
181 |
+
"Advanced LLM Developer",
|
182 |
# "All Sources",
|
183 |
]
|
184 |
|
|
|
191 |
"openai_cookbooks",
|
192 |
"tai_blog",
|
193 |
"8-hour_primer",
|
194 |
+
"llm_developer",
|
195 |
# "all_sources",
|
196 |
]
|
197 |
|