Spaces:

kzyd
/

Thoughtful-AI-Screening

Sleeping

App Files Files Community

Kazuki Yoda commited on Oct 12, 2024

Commit

f5b8cbf

1 Parent(s): 97f91f5

Implement the logic to get predefined answer

Browse files

Files changed (2) hide show

app.py +12 -0
predefined.py +100 -0

app.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
@@ -15,6 +19,14 @@ def respond(
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:

 import gradio as gr
 from huggingface_hub import InferenceClient
+from predefined import get_predefined_answer_for_closest_predefined_question
 """
+Copied and modified from HuggingFace Gradio default ChatInterface space
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
     temperature,
     top_p,
 ):
+    ### Modified from here ###
+    predefined_answer = get_predefined_answer_for_closest_predefined_question(message)
+    if predefined_answer:
+        yield predefined_answer
+        return
+    ### Modified until here ###
     messages = [{"role": "system", "content": system_message}]
     for val in history:

predefined.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""This entire file was solely written by the applicant, Kazuki Yoda."""
+import json
+from typing import Optional
+# # For Debugging only
+# from scipy.spatial import distance_matrix
+# from sklearn.metrics.pairwise import cosine_similarity
+from huggingface_hub import InferenceClient
+zero_shot_classification_client = InferenceClient("facebook/bart-large-mnli")
+def load_predefined_questions_to_answers_as_dict(path="predefined.json"
+) -> dict[str, str]:
+    """Load the predefined question-answer pairs as dict of.
+    key: question (str), value: answer (str)"""
+    with open(path) as file:
+        data = json.load(file)
+    if "questions" not in data:
+        raise ValueError("`questions` key is expected but missing.")
+    question_to_answer = dict()
+    for item in data.get("questions"):
+        question = item.get("question")
+        answer = item.get("answer")
+        # Skip if either "question" or "answer" key not found
+        if question and answer:
+            question_to_answer[question] = answer
+    return question_to_answer
+def get_embeddings(texts: list[str]):
+    client = InferenceClient("efederici/sentence-bert-base")
+    return [client.feature_extraction(text) for text in texts]
+def get_predefined_answer_for_closest_predefined_question(
+    question: str,
+    cutoff=0.5,  # Minimum classification score to use the predefined answer
+) -> Optional[str]:
+    question_to_answer = load_predefined_questions_to_answers_as_dict()
+    labels = list(question_to_answer.keys())
+    zero_shot_classification_result = zero_shot_classification_client.zero_shot_classification(
+        text=question,
+        labels=labels,
+        multi_label=True,
+    )
+    max_score_result = max(zero_shot_classification_result,
+                           key=lambda x: x.score)
+    if max_score_result.score > cutoff:
+        closest_predefined_question = max_score_result.label
+        return question_to_answer[closest_predefined_question]
+    else:
+        # Switch back to the normal LLM response
+        return None
+if __name__ == "__main__":
+    """Run some print debugs. Not executed from the Gradio app."""
+    question_to_answer = load_predefined_questions_to_answers_as_dict()
+    print(question_to_answer)
+    additional_questions = [
+        "What does EVA do?",
+        "How does PHIL work?",
+        "Thoughtful AI",
+        ### Irrelevant but confusing questions ###
+        "Who is the CEO of Thoughtful AI?",
+        "How much does Thoughtful AI pay for its ML engineers?",
+        "What's Evangelion (EVA)?"
+    ]
+    predefined_questions = list(question_to_answer.keys())
+    questions = predefined_questions + additional_questions
+    embeddings = get_embeddings(questions)
+    for embedding in embeddings:
+        print(embedding.shape)
+    # For DEBUG, check the embeddings
+    # print(distance_matrix(embeddings, embeddings[:len(predefined_questions)]))
+    # print(cosine_similarity(embeddings, embeddings[:len(predefined_questions)]))
+    for question in questions:
+        closest_question = get_predefined_answer_for_closest_predefined_question(question)
+        print(f"question: {question}")
+        print(f"closest_question: {closest_question}")
+        print()