omarsol commited on
Commit
d96c047
Β·
1 Parent(s): 50ca230

Add LLM Developer source to project

Browse files
data/scraping_scripts/add_context_to_nodes.py CHANGED
@@ -162,13 +162,6 @@ async def process(
162
 
163
  results: List[TextNode] = await tqdm.gather(*tasks, desc="Processing chunks")
164
 
165
- # results: List[TextNode] = []
166
- # # Add tqdm progress bar with semaphore limit
167
- # for task in tqdm(
168
- # asyncio.as_completed(tasks), total=len(tasks), desc="Processing chunks"
169
- # ):
170
- # result = await task
171
- # results.append(result)
172
  # pdb.set_trace()
173
 
174
  return results
 
162
 
163
  results: List[TextNode] = await tqdm.gather(*tasks, desc="Processing chunks")
164
 
 
 
 
 
 
 
 
165
  # pdb.set_trace()
166
 
167
  return results
data/scraping_scripts/process_md_files.py CHANGED
@@ -416,6 +416,18 @@ SOURCE_CONFIGS = {
416
  "included_root_files": [],
417
  "url_extension": "",
418
  },
 
 
 
 
 
 
 
 
 
 
 
 
419
  }
420
 
421
 
 
416
  "included_root_files": [],
417
  "url_extension": "",
418
  },
419
+ "llm_developer": {
420
+ "base_url": "",
421
+ "input_directory": "data/llm_developer",
422
+ "output_file": "data/llm_developer_data.jsonl", # From Beginner to Advanced LLM Developer
423
+ "source_name": "llm_developer",
424
+ "use_include_list": False,
425
+ "included_dirs": [],
426
+ "excluded_dirs": [],
427
+ "excluded_root_files": [],
428
+ "included_root_files": [],
429
+ "url_extension": "",
430
+ },
431
  }
432
 
433
 
scripts/main.py CHANGED
@@ -129,6 +129,7 @@ def generate_completion(
129
  "OpenAI Cookbooks": "openai_cookbooks",
130
  "Towards AI Blog": "tai_blog",
131
  "8 Hour Primer": "8-hour_primer",
 
132
  }
133
 
134
  for source in sources:
@@ -245,6 +246,7 @@ sources = gr.CheckboxGroup(
245
  "OpenAI Cookbooks",
246
  "Towards AI Blog",
247
  "8 Hour Primer",
 
248
  # "All Sources",
249
  ],
250
  interactive=True,
 
129
  "OpenAI Cookbooks": "openai_cookbooks",
130
  "Towards AI Blog": "tai_blog",
131
  "8 Hour Primer": "8-hour_primer",
132
+ "Advanced LLM Developer": "llm_developer",
133
  }
134
 
135
  for source in sources:
 
246
  "OpenAI Cookbooks",
247
  "Towards AI Blog",
248
  "8 Hour Primer",
249
+ "Advanced LLM Developer",
250
  # "All Sources",
251
  ],
252
  interactive=True,
scripts/setup.py CHANGED
@@ -178,6 +178,7 @@ AVAILABLE_SOURCES_UI = [
178
  "OpenAI Cookbooks",
179
  "Towards AI Blog",
180
  "8 Hour Primer",
 
181
  # "All Sources",
182
  ]
183
 
@@ -190,6 +191,7 @@ AVAILABLE_SOURCES = [
190
  "openai_cookbooks",
191
  "tai_blog",
192
  "8-hour_primer",
 
193
  # "all_sources",
194
  ]
195
 
 
178
  "OpenAI Cookbooks",
179
  "Towards AI Blog",
180
  "8 Hour Primer",
181
+ "Advanced LLM Developer",
182
  # "All Sources",
183
  ]
184
 
 
191
  "openai_cookbooks",
192
  "tai_blog",
193
  "8-hour_primer",
194
+ "llm_developer",
195
  # "all_sources",
196
  ]
197