Adr740 commited on
Commit
57a9174
·
verified ·
1 Parent(s): ba12e94

Update get_similar_profiles.py

Browse files
Files changed (1) hide show
  1. get_similar_profiles.py +21 -1
get_similar_profiles.py CHANGED
@@ -17,9 +17,30 @@ def _get_embedding(text, model="text-embedding-3-large"):
17
  None
18
  return client.embeddings.create(input = [text], model=model).data[0].embedding
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def get_similar_profiles(profile, prefix, k=20):
22
  query = prefix + profile + cohort_data[profile]
 
23
  df = pd.read_csv("embedded_cohort.csv")
24
  embedding_query = _get_embedding(query, model="text-embedding-3-large")
25
  df['similarity'] = df.embeddings.apply(lambda x: cosine_similarity(eval(x), embedding_query))
@@ -38,5 +59,4 @@ def get_similar_profiles(profile, prefix, k=20):
38
  for result in results:
39
  final_md += "### " + result.replace("\n","\n\n")
40
 
41
- # breakpoint()
42
  return final_md
 
17
  None
18
  return client.embeddings.create(input = [text], model=model).data[0].embedding
19
 
20
+ def build_complementary(profile):
21
+ complementary = client.chat.completions.create(
22
+ model="gpt-4",
23
+ messages=[
24
+ {
25
+ "role": "system",
26
+ "content": "Follow the Entrepreneur First Edge method to list out as bullet points the main complementary characteristic of the proposed profiles. We aim at building the teams that are most likely to success"
27
+ },
28
+ {
29
+ "role": "user",
30
+ "content": f"PROFILE: \n\n{profile}\n\nCOMPLEMENTARY PROFILE:"
31
+ }
32
+ ],
33
+ temperature=1,
34
+ max_tokens=1110,
35
+ top_p=1,
36
+ frequency_penalty=0,
37
+ presence_penalty=0
38
+ ).choices[0].message.content
39
+ return complementary
40
 
41
  def get_similar_profiles(profile, prefix, k=20):
42
  query = prefix + profile + cohort_data[profile]
43
+ complementary = build_complementary(query)
44
  df = pd.read_csv("embedded_cohort.csv")
45
  embedding_query = _get_embedding(query, model="text-embedding-3-large")
46
  df['similarity'] = df.embeddings.apply(lambda x: cosine_similarity(eval(x), embedding_query))
 
59
  for result in results:
60
  final_md += "### " + result.replace("\n","\n\n")
61
 
 
62
  return final_md