Spaces:

ebay-ginza
/

ginza_auto_tagging

Sleeping

App Files Files

fchoquette-ebay commited on Jun 7

Commit

ec962e4

•

1 Parent(s): b244c44

feat: add auto tagging logic

Browse files

Files changed (3) hide show

.gitignore +2 -0
app.py +70 -0
requirements.txt +87 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .venv
2	+ .idea

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+from sentence_transformers import SentenceTransformer, util
+threshold = 0.65
+sentence_length = 6
+questions = [
+    "Is it  new or used", "Are there any wear & tear", "Does it come with dust bag, receipt & original box",
+    "Are there any scratches, marks", "Are there any fading, stains, discolorization",
+    "Is this item customized, repainted or has hardware been replaced", "Is it special edition", "Is there any odour",
+    "Are there multiple items or extra add-ons in this listing?",
+    "Is there a date code or serial number present on the item?"
+]
+model = SentenceTransformer("all-MiniLM-L6-v2")
+def generate_phrases(desc: str, length: int):
+    desc_list = desc.split()
+    phrase_list = []
+    if len(desc_list) >= length:
+        for i in range(len(desc_list) - (length - 1)):
+            sub_list = []
+            for j in range(i, i + length):
+                sub_list.append(desc_list[j])
+            phrase_list.append(' '.join(sub_list))
+    else:
+        phrase_list.append(' '.join(desc_list))
+    return phrase_list
+def extract(description: str):
+    sentences = generate_phrases(description, sentence_length)
+    sentences_embedding = model.encode(sentences)
+    answers = []
+    for question in questions:
+        query_embedding = model.encode(question)
+        similarities = util.cos_sim(query_embedding, sentences_embedding)
+        similarity_i = 0
+        new_row = None
+        for similarity in similarities[0]:
+            model_answer = sentences[similarity_i]
+            similarity_i += 1
+            if round(similarity.item(), 2) > threshold:
+                if new_row is not None and similarity < new_row['Similarity']:
+                    continue
+                new_row = {'ModelAnswer': model_answer, 'Similarity': similarity.item()}
+        if new_row is not None:
+            answers.append(new_row['ModelAnswer'])
+        else:
+            answers.append('No answer')
+    return answers
+def map_question_to_text(question):
+    return gr.Text(label=question)
+demo = gr.Interface(fn=extract, inputs=gr.Textbox(label="Description"),
+                    outputs=list(map(map_question_to_text, questions)))
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,87 @@

+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.6.2
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+dnspython==2.6.1
+email_validator==2.1.1
+exceptiongroup==1.2.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.14.0
+fonttools==4.53.0
+fsspec==2024.6.0
+gradio==4.36.0
+gradio_client==1.0.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.3
+idna==3.7
+importlib_resources==6.4.0
+Jinja2==3.1.4
+joblib==1.4.2
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.2.1
+numpy==1.26.4
+orjson==3.10.3
+packaging==24.0
+pandas==2.2.2
+pillow==10.3.0
+pydantic==2.7.3
+pydantic_core==2.18.4
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.18.1
+ruff==0.4.8
+safetensors==0.4.3
+scikit-learn==1.5.0
+scipy==1.13.1
+semantic-version==2.10.0
+sentence-transformers==3.0.1
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.37.2
+sympy==1.12.1
+threadpoolctl==3.5.0
+tokenizers==0.19.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.3.1
+tqdm==4.66.4
+transformers==4.41.2
+typer==0.12.3
+typing_extensions==4.12.1
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.1
+uvicorn==0.30.1
+uvloop==0.19.0
+watchfiles==0.22.0
+websockets==11.0.3
+zipp==3.19.2