Spaces:
Sleeping
Sleeping
Update get_similar_profiles.py
Browse files- get_similar_profiles.py +21 -1
get_similar_profiles.py
CHANGED
@@ -17,9 +17,30 @@ def _get_embedding(text, model="text-embedding-3-large"):
|
|
17 |
None
|
18 |
return client.embeddings.create(input = [text], model=model).data[0].embedding
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
def get_similar_profiles(profile, prefix, k=20):
|
22 |
query = prefix + profile + cohort_data[profile]
|
|
|
23 |
df = pd.read_csv("embedded_cohort.csv")
|
24 |
embedding_query = _get_embedding(query, model="text-embedding-3-large")
|
25 |
df['similarity'] = df.embeddings.apply(lambda x: cosine_similarity(eval(x), embedding_query))
|
@@ -38,5 +59,4 @@ def get_similar_profiles(profile, prefix, k=20):
|
|
38 |
for result in results:
|
39 |
final_md += "### " + result.replace("\n","\n\n")
|
40 |
|
41 |
-
# breakpoint()
|
42 |
return final_md
|
|
|
17 |
None
|
18 |
return client.embeddings.create(input = [text], model=model).data[0].embedding
|
19 |
|
20 |
+
def build_complementary(profile):
|
21 |
+
complementary = client.chat.completions.create(
|
22 |
+
model="gpt-4",
|
23 |
+
messages=[
|
24 |
+
{
|
25 |
+
"role": "system",
|
26 |
+
"content": "Follow the Entrepreneur First Edge method to list out as bullet points the main complementary characteristic of the proposed profiles. We aim at building the teams that are most likely to success"
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"role": "user",
|
30 |
+
"content": f"PROFILE: \n\n{profile}\n\nCOMPLEMENTARY PROFILE:"
|
31 |
+
}
|
32 |
+
],
|
33 |
+
temperature=1,
|
34 |
+
max_tokens=1110,
|
35 |
+
top_p=1,
|
36 |
+
frequency_penalty=0,
|
37 |
+
presence_penalty=0
|
38 |
+
).choices[0].message.content
|
39 |
+
return complementary
|
40 |
|
41 |
def get_similar_profiles(profile, prefix, k=20):
|
42 |
query = prefix + profile + cohort_data[profile]
|
43 |
+
complementary = build_complementary(query)
|
44 |
df = pd.read_csv("embedded_cohort.csv")
|
45 |
embedding_query = _get_embedding(query, model="text-embedding-3-large")
|
46 |
df['similarity'] = df.embeddings.apply(lambda x: cosine_similarity(eval(x), embedding_query))
|
|
|
59 |
for result in results:
|
60 |
final_md += "### " + result.replace("\n","\n\n")
|
61 |
|
|
|
62 |
return final_md
|