Spaces:

Adr740
/

EF-AI-Co-Finder

Sleeping

App Files Files Community

EF-AI-Co-Finder / get_complementary_profiles.py

Adr740

Rename get_similar_profiles.py to get_complementary_profiles.py

3dd7bab verified 9 months ago

raw

history blame

1.97 kB

	import numpy as np
	import pandas as pd
	from cohort_members import cohort_data
	from tech_stuff import api_key

	from openai import OpenAI

	client = OpenAI(api_key=api_key)

	def cosine_similarity(a, b):
	return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

	def _get_embedding(text, model="text-embedding-3-large"):
	try:
	text = text.replace("\n", " ")
	except:
	None
	return client.embeddings.create(input = [text], model=model).data[0].embedding

	def build_complementary(profile):
	complementary = client.chat.completions.create(
	model="gpt-4",
	messages=[
	{
	"role": "system",
	"content": "Follow the Entrepreneur First Edge method to list out as bullet points the main complementary characteristic of the proposed profiles. We aim at building the teams that are most likely to success while making sure profiles don't overlap too much"
	},
	{
	"role": "user",
	"content": f"PROFILE: \n\n{profile}\n\nCOMPLEMENTARY PROFILE:"
	}
	],
	temperature=1,
	max_tokens=1110,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	).choices[0].message.content
	return complementary

	def get_similar_profiles(profile, prefix, k=20):
	query = prefix + profile + cohort_data[profile]
	complementary = build_complementary(query)
	df = pd.read_csv("embedded_cohort.csv")
	embedding_query = _get_embedding(query, model="text-embedding-3-large")
	df['similarity'] = df.embeddings.apply(lambda x: cosine_similarity(eval(x), embedding_query))
	df = df.sort_values('similarity', ascending=False).head(int(k))
	raw_results = df["Name"] + df["Description"]


	results = []
	for result in raw_results.to_list():
	if result[:20] == (profile + cohort_data[profile])[:20]:
	print("ah")
	else:
	results.append(result)

	final_md = ""
	for result in results:
	final_md += "### " + result.replace("\n","\n\n")

	return final_md