Spaces:
Sleeping
Sleeping
adityaiiitr
commited on
Update main.py
Browse files
main.py
CHANGED
@@ -214,7 +214,7 @@ async def scrape_and_crawl(
|
|
214 |
combined_text = "\n".join(text_group) # Combine all the texts in this group
|
215 |
|
216 |
logger.info(f"Structuring data for group {i // group_size + 1} with {len(text_group)} links.")
|
217 |
-
prompt = f"Convert the following unstructured text into a well-written and comprehensive structured form with titles and content. --- {combined_text} ---"
|
218 |
|
219 |
# Generate structured content using Google Generative AI
|
220 |
try:
|
|
|
214 |
combined_text = "\n".join(text_group) # Combine all the texts in this group
|
215 |
|
216 |
logger.info(f"Structuring data for group {i // group_size + 1} with {len(text_group)} links.")
|
217 |
+
prompt = f"Convert the following unstructured text into a well-written and comprehensive structured form with titles and content containing all relevant data. The response should be a detailed paragraph mentioning everything about just the college named '{topic_title}' and not of any other college, ensuring no important information is missed. Include details such as connectivity, placement, nearby colleges, infrastructure, courses, branches, students, festivals, clubs, reviews, Q&A, and any other college-related parameters available in the text. Provide the response text with no formatting! --- \n{combined_text} ---. Use only the text between the '---' markers as input source text. If information is not available about any specific thing don't mention its heading. Also try not to include table of contents and remove the repetitive information. Also avoid taking comparisons from other colleges and alwasys stick to the college and just write about its different parameters. Also remove any unnecessary things like any social media link of the website. Also do not include FAQs and similar colleges column. Try to include only the parameters of the same college."
|
218 |
|
219 |
# Generate structured content using Google Generative AI
|
220 |
try:
|