rchrdgwr commited on
Commit
8370241
·
1 Parent(s): 6d01d7e

Fix evaluation score

Browse files
__pycache__/classes.cpython-311.pyc CHANGED
Binary files a/__pycache__/classes.cpython-311.pyc and b/__pycache__/classes.cpython-311.pyc differ
 
__pycache__/utils_evaluate.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils_evaluate.cpython-311.pyc and b/__pycache__/utils_evaluate.cpython-311.pyc differ
 
__pycache__/utils_evaluate_objections.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils_evaluate_objections.cpython-311.pyc and b/__pycache__/utils_evaluate_objections.cpython-311.pyc differ
 
__pycache__/utils_opportunity_review.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils_opportunity_review.cpython-311.pyc and b/__pycache__/utils_opportunity_review.cpython-311.pyc differ
 
__pycache__/utils_output.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils_output.cpython-311.pyc and b/__pycache__/utils_output.cpython-311.pyc differ
 
__pycache__/utils_prep.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils_prep.cpython-311.pyc and b/__pycache__/utils_prep.cpython-311.pyc differ
 
__pycache__/utils_prompt.cpython-311.pyc CHANGED
Binary files a/__pycache__/utils_prompt.cpython-311.pyc and b/__pycache__/utils_prompt.cpython-311.pyc differ
 
classes.py CHANGED
@@ -41,7 +41,7 @@ class SessionState:
41
  self.do_opportunity_analysis = True
42
  self.do_customer_research = True
43
  self.do_objections = False
44
- self.add_objections_to_analysis = False
45
  self.ask_objections = True
46
  self.use_objection_cache = True
47
  self.do_ragas_evaluation = False
 
41
  self.do_opportunity_analysis = True
42
  self.do_customer_research = True
43
  self.do_objections = False
44
+ self.add_objections_to_analysis = True
45
  self.ask_objections = True
46
  self.use_objection_cache = True
47
  self.do_ragas_evaluation = False
utils_evaluate.py CHANGED
@@ -10,10 +10,11 @@ from ragas.metrics import (
10
  from rouge_score import rouge_scorer
11
  from sentence_transformers import SentenceTransformer, util
12
 
13
- from utils_evaluate_objections import generate_objection_scores
14
 
15
 
16
- def evaluate_objections(session):
 
17
 
18
  for response in session.responses:
19
  question = response.get("question", "")
@@ -24,8 +25,10 @@ def evaluate_objections(session):
24
  q_and_a = {
25
  "objection": question,
26
  "answer": answer
27
- }
28
- score = generate_objection_scores(q_and_a)
 
 
29
  response["evaluation_score"] = score
30
 
31
 
 
10
  from rouge_score import rouge_scorer
11
  from sentence_transformers import SentenceTransformer, util
12
 
13
+ from utils_evaluate_objections import generate_objection_score
14
 
15
 
16
+ async def evaluate_objections(session):
17
+ print("evaluate_objections()")
18
 
19
  for response in session.responses:
20
  question = response.get("question", "")
 
25
  q_and_a = {
26
  "objection": question,
27
  "answer": answer
28
+ }
29
+ print(q_and_a)
30
+ score = await generate_objection_score(q_and_a)
31
+ print(score)
32
  response["evaluation_score"] = score
33
 
34
 
utils_evaluate_objections.py CHANGED
@@ -1,18 +1,22 @@
1
- from ragas.metrics.base import MetricWithLLM, SingleTurnMetric
2
- from ragas.prompt.pydantic_prompt import PydanticPrompt
3
- from pydantic import BaseModel, Field
4
  import pandas as pd
5
- from typing import List, Tuple
6
- from datetime import datetime
7
  import sys
 
 
8
  from dataclasses import dataclass, field
 
 
 
 
 
9
  from ragas.metrics.base import MetricType
10
- from ragas.messages import AIMessage, HumanMessage, ToolMessage, ToolCall
11
- from ragas import SingleTurnSample, MultiTurnSample
12
- import typing as t
13
- import asyncio
14
- import dotenv
15
- import os
16
  # Load environment variables from .env file
17
  dotenv.load_dotenv()
18
 
@@ -92,10 +96,8 @@ class SatisfyRate(MetricWithLLM, SingleTurnMetric):
92
  )
93
  return int(prompt_response.satisfy)
94
 
95
- async def generate_objection_scores(question_answer):
96
- from langchain_openai import ChatOpenAI
97
- from ragas.llms.base import LangchainLLMWrapper
98
- import pandas as pd
99
  # user_response= pd.read_csv(file_path)
100
  openai_model = LangchainLLMWrapper(ChatOpenAI(model_name="gpt-4o", api_key=OPENAI_API_KEY))
101
  scorer = SatisfyRate(llm=openai_model)
@@ -104,6 +106,7 @@ async def generate_objection_scores(question_answer):
104
 
105
  #(user_response['objection'][num], user_response['response'][num])
106
  satisfy_0_1 = await scorer.single_turn_ascore(sample)
 
107
 
108
  print (question_answer['objection'], question_answer['answer'], satisfy_0_1)
109
  # Implement your logic to generate a response based on the user's input
 
1
+ import asyncio
2
+ import dotenv
3
+ import os
4
  import pandas as pd
 
 
5
  import sys
6
+ import typing as t
7
+
8
  from dataclasses import dataclass, field
9
+ from datetime import datetime
10
+ from langchain_openai import ChatOpenAI
11
+ from pydantic import BaseModel, Field
12
+ from ragas import SingleTurnSample
13
+ from ragas.llms.base import LangchainLLMWrapper
14
  from ragas.metrics.base import MetricType
15
+ from ragas.metrics.base import MetricWithLLM, SingleTurnMetric
16
+ from ragas.prompt.pydantic_prompt import PydanticPrompt
17
+ from typing import List, Tuple
18
+
19
+
 
20
  # Load environment variables from .env file
21
  dotenv.load_dotenv()
22
 
 
96
  )
97
  return int(prompt_response.satisfy)
98
 
99
+ async def generate_objection_score(question_answer):
100
+ print("generate_objection_scores()")
 
 
101
  # user_response= pd.read_csv(file_path)
102
  openai_model = LangchainLLMWrapper(ChatOpenAI(model_name="gpt-4o", api_key=OPENAI_API_KEY))
103
  scorer = SatisfyRate(llm=openai_model)
 
106
 
107
  #(user_response['objection'][num], user_response['response'][num])
108
  satisfy_0_1 = await scorer.single_turn_ascore(sample)
109
+ print(satisfy_0_1)
110
 
111
  print (question_answer['objection'], question_answer['answer'], satisfy_0_1)
112
  # Implement your logic to generate a response based on the user's input
utils_output.py CHANGED
@@ -60,11 +60,13 @@ def format_datetime(dt):
60
  async def display_evaluation_results(cl, session_state):
61
  out_text = "*Preparing evaluation results ...*"
62
  await cl.Message(content=out_text).send()
63
-
 
 
64
  if session_state.do_evaluation:
65
  evaluate_answers(session_state)
66
  elif session_state.add_objections_to_analysis:
67
- evaluate_objections(session_state)
68
  await asyncio.sleep(1)
69
 
70
  output = f"**Session Summary**"
@@ -82,9 +84,9 @@ async def display_evaluation_results(cl, session_state):
82
  averages = results_df[columns_to_average].mean()
83
 
84
  await cl.Message(content="**Overall Summary (By SalesBuddy)**").send()
85
- output = f"**SalesBuddy Score:** {session_state.responses[-1]['overall_score']} \n"
86
  output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n"
87
- output = output + f"**SalesBuddy Final Mood Score:** {session_state.responses[-1]['mood_score']} \n"
88
  await cl.Message(content=output).send()
89
 
90
  if session_state.do_ragas_evaluation:
@@ -101,7 +103,7 @@ async def display_evaluation_results(cl, session_state):
101
  **Question:** {resp.get('question', 'N/A')}
102
  **Answer:** {resp.get('response', 'N/A')}
103
  **SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}
104
- **Evaluation Score:** {resp.get('response_score', 'N/A')}
105
  """
106
  if session_state.do_ragas_evaluation:
107
  scores = session_state.scores[index]
 
60
  async def display_evaluation_results(cl, session_state):
61
  out_text = "*Preparing evaluation results ...*"
62
  await cl.Message(content=out_text).send()
63
+ print("Checking evaluation and objection flags")
64
+ print(session_state.do_evaluation)
65
+ print(session_state.add_objections_to_analysis)
66
  if session_state.do_evaluation:
67
  evaluate_answers(session_state)
68
  elif session_state.add_objections_to_analysis:
69
+ await evaluate_objections(session_state)
70
  await asyncio.sleep(1)
71
 
72
  output = f"**Session Summary**"
 
84
  averages = results_df[columns_to_average].mean()
85
 
86
  await cl.Message(content="**Overall Summary (By SalesBuddy)**").send()
87
+ output = f"**SalesBuddy Score (1-10):** {session_state.responses[-1]['overall_score']} \n"
88
  output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n"
89
+ output = output + f"**SalesBuddy Final Mood Score (1-10):** {session_state.responses[-1]['mood_score']} \n"
90
  await cl.Message(content=output).send()
91
 
92
  if session_state.do_ragas_evaluation:
 
103
  **Question:** {resp.get('question', 'N/A')}
104
  **Answer:** {resp.get('response', 'N/A')}
105
  **SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}
106
+ **Evaluation Score:** {resp.get('evaluation_score', 'N/A')}
107
  """
108
  if session_state.do_ragas_evaluation:
109
  scores = session_state.scores[index]
utils_prompt.py CHANGED
@@ -103,6 +103,7 @@ def get_system_template_openai_short():
103
  You are playing a role in a conversation with a sales representative.
104
  Your name is in the 'Name:' section.
105
  They can use your first name, full name or address you with a title and last name.
 
106
  Your name does not need to match exactly what they say.
107
  Be chatty and conversational and friendly.
108
  Your compnay information is in the 'Company:' section.
@@ -116,10 +117,12 @@ def get_system_template_openai_short():
116
  You can make conversation but you must follow the command.
117
  If a previous question and answer are provided, you must evaluate the rep's answer.
118
  You will perform evaluation based on how well and thoroughly the rep answered the previous question.
 
119
  If asked to provide a conclusion, you must consider all of the rep's answers to your questions.
120
  These are provided in the 'All questions and answers:' section.
121
  You will ALWAYS provide your response in valid JSON format
122
  Remember all string values must be enclosed in double quotes.
 
123
  You will include with the following fields in JSON format:
124
  - Continue: Yes or No depending on if you want to continue the conversation based on the reps answer to your question.
125
  - Ask Follow Up: Yes or No depending on if you want to ask a follow up question.
 
103
  You are playing a role in a conversation with a sales representative.
104
  Your name is in the 'Name:' section.
105
  They can use your first name, full name or address you with a title and last name.
106
+ If they get your name wrong, you can correct them once.
107
  Your name does not need to match exactly what they say.
108
  Be chatty and conversational and friendly.
109
  Your compnay information is in the 'Company:' section.
 
117
  You can make conversation but you must follow the command.
118
  If a previous question and answer are provided, you must evaluate the rep's answer.
119
  You will perform evaluation based on how well and thoroughly the rep answered the previous question.
120
+ If the reps answer does not make sense or is not clear, set the score to a 1.
121
  If asked to provide a conclusion, you must consider all of the rep's answers to your questions.
122
  These are provided in the 'All questions and answers:' section.
123
  You will ALWAYS provide your response in valid JSON format
124
  Remember all string values must be enclosed in double quotes.
125
+ Remember do not include a question in your response.
126
  You will include with the following fields in JSON format:
127
  - Continue: Yes or No depending on if you want to continue the conversation based on the reps answer to your question.
128
  - Ask Follow Up: Yes or No depending on if you want to ask a follow up question.