Spaces:
Sleeping
Sleeping
import pandas as pd # for data manipulation (pip install pandas) | |
from langchain.chat_models import ChatOpenAI | |
from langchain.chains import create_extraction_chain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.prompts import ChatPromptTemplate | |
import gradio as gr | |
import os | |
import collections | |
# Schema | |
schema = { | |
"properties": { | |
"keyword": {"type": "string"}, | |
"category": {"type": "string"}, | |
}, | |
"required": ["keyword", "category"], | |
} | |
# Input | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", "You are an expert marketing researcher"), | |
("human", """{prompt_input}. | |
Here you have the categories splitted by coma: {categories}. | |
and Here you have the keywords splitted by coma: {keywords}."""), | |
("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."), | |
] | |
) | |
prompt_no_cat = ChatPromptTemplate.from_messages( | |
[ | |
("system", "You are an expert marketing researcher"), | |
("human", """{prompt_input}. | |
and Here you have the keywords splitted by coma: {keywords}."""), | |
("human", "Tip: Make sure to answer in the correct format and DO NOT leave keywords without category and DO NOT skip keywords. Please categorize all the keywords that I give you, each keyword must have just one and only one category."), | |
] | |
) | |
llm = ChatOpenAI(temperature=0, openai_api_key=os.getenv("OpenAI_APIKEY"), model="gpt-3.5-turbo") | |
chain = create_extraction_chain(schema, llm, prompt, verbose=1) | |
chain_no_cat = create_extraction_chain(schema, llm, prompt_no_cat, verbose=1) | |
def run_chain(input_prompt, keywords_file, categories_file=None, batch_size=50): | |
results = [] | |
batch_size = batch_size | |
index = 0 | |
try: | |
keywords = pd.read_csv(keywords_file.name) | |
except: | |
keywords = pd.read_excel(keywords_file.name) | |
if categories_file != None: | |
try: | |
categories = pd.read_csv(categories_file.name) | |
except: | |
categories = pd.read_excel(categories_file.name) | |
categories = list(categories[categories.columns[0]].values) | |
keywords = list(keywords[keywords.columns[0]].values) | |
while index < len(keywords): | |
try: | |
batch = keywords[index:index+batch_size] | |
except: | |
batch = keywords[index:] | |
try: | |
result = chain.run({'prompt_input':input_prompt, 'categories':','.join(categories), 'keywords':','.join(batch)}) | |
except Exception as E: | |
print('this batch did not worked from {} to {}'.format(index, index + batch_size)) | |
print(E) | |
result = [] | |
results += result | |
index += batch_size | |
results_to_csv(results) | |
#print((index, batch_size, len(keywords))) | |
return results, 'themes_results.csv' | |
else: | |
keywords = list(keywords[keywords.columns[0]].values) | |
batch_size = len(keywords) | |
while index < len(keywords): | |
try: | |
batch = keywords[index:index+batch_size] | |
except: | |
batch = keywords[index:] | |
try: | |
result = chain_no_cat.run({'prompt_input':input_prompt, 'keywords':','.join(batch)}) | |
except Exception as E: | |
print('this batch did not worked from {} to {}'.format(index, index + batch_size)) | |
print(E) | |
result = [] | |
results += result | |
index += batch_size | |
results_to_csv(results) | |
#print((index, batch_size, len(keywords))) | |
return results, 'themes_results.csv' | |
def results_to_csv(results): | |
super_dict = collections.defaultdict(list) | |
for d in results: | |
for k, v in d.items(): # d.items() in Python 3+ | |
super_dict[k].append(v) | |
pd.DataFrame(super_dict).to_csv('themes_results.csv', index=False) | |
with gr.Blocks() as demo: | |
prompt_input = gr.Text("""I need your help to analyze and categorize the provided list of keywords | |
into the appropriate categories. | |
The goal is to understand information demand on search engines within this industry. Each keyword represents a search and it should have a relation with the category. | |
Extract each keyword and assign the best category among the given categories. Return every keyword with the relative category in pairs. | |
If the categories are not given """) | |
gr.Markdown("Upload CSV or xlsx with keywords: Just a csv with all the keywords in one column. Should have a header") | |
keywords_file = gr.File(file_types=['csv', 'xlsx'], label='keywords') | |
gr.Markdown("Upload CSV or xlsx with categories: Just a csv with all the keywords in one column. Should have a header") | |
categories_file = gr.File(file_types=['.csv', '.xlsx'], label='categories') | |
btn = gr.Button(value="Run with categories") | |
btn2 = gr.Button(value="Run without categories") | |
txt_3 = gr.Textbox(value="", label="Output") | |
output_file = gr.File(label="Output File", | |
file_count="single", | |
file_types=["", ".", ".csv",".xls",".xlsx"]) | |
btn.click(run_chain, inputs=[prompt_input, keywords_file, categories_file], outputs=[txt_3, output_file]) | |
btn2.click(run_chain, inputs=[prompt_input, keywords_file], outputs=[txt_3, output_file]) | |
demo.launch() |