fchoquette-ebay's picture
feat: add auto tagging logic
ec962e4 unverified
raw
history blame
2.2 kB
import gradio as gr
from sentence_transformers import SentenceTransformer, util
threshold = 0.65
sentence_length = 6
questions = [
"Is it new or used", "Are there any wear & tear", "Does it come with dust bag, receipt & original box",
"Are there any scratches, marks", "Are there any fading, stains, discolorization",
"Is this item customized, repainted or has hardware been replaced", "Is it special edition", "Is there any odour",
"Are there multiple items or extra add-ons in this listing?",
"Is there a date code or serial number present on the item?"
]
model = SentenceTransformer("all-MiniLM-L6-v2")
def generate_phrases(desc: str, length: int):
desc_list = desc.split()
phrase_list = []
if len(desc_list) >= length:
for i in range(len(desc_list) - (length - 1)):
sub_list = []
for j in range(i, i + length):
sub_list.append(desc_list[j])
phrase_list.append(' '.join(sub_list))
else:
phrase_list.append(' '.join(desc_list))
return phrase_list
def extract(description: str):
sentences = generate_phrases(description, sentence_length)
sentences_embedding = model.encode(sentences)
answers = []
for question in questions:
query_embedding = model.encode(question)
similarities = util.cos_sim(query_embedding, sentences_embedding)
similarity_i = 0
new_row = None
for similarity in similarities[0]:
model_answer = sentences[similarity_i]
similarity_i += 1
if round(similarity.item(), 2) > threshold:
if new_row is not None and similarity < new_row['Similarity']:
continue
new_row = {'ModelAnswer': model_answer, 'Similarity': similarity.item()}
if new_row is not None:
answers.append(new_row['ModelAnswer'])
else:
answers.append('No answer')
return answers
def map_question_to_text(question):
return gr.Text(label=question)
demo = gr.Interface(fn=extract, inputs=gr.Textbox(label="Description"),
outputs=list(map(map_question_to_text, questions)))
demo.launch()