|
""" |
|
Util functions for openai api |
|
""" |
|
import json |
|
import os |
|
from thefuzz import process |
|
import openai |
|
from utils import search_document, search_document_annoy |
|
|
|
|
|
def get_lab_member_info(name: str): |
|
database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json') |
|
with open(database_addr, 'r') as fin: |
|
all_members_info = json.load(fin) |
|
|
|
choices = set() |
|
for field in all_members_info: |
|
toSearch = all_members_info[field] |
|
for i in toSearch: |
|
choices.add(i['name']) |
|
best = process.extractOne(name, choices) |
|
|
|
for field in all_members_info: |
|
toSearch = all_members_info[field] |
|
for i in toSearch: |
|
if best[0].lower() in i['name'].lower(): |
|
return json.dumps(i) |
|
|
|
return json.dumps({}) |
|
|
|
|
|
def get_lab_member_detailed_info(name: str, detailed_info: str): |
|
database_addr = os.path.join(os.getcwd(), 'database/original_documents/members.json') |
|
with open(database_addr, 'r') as fin: |
|
all_members_info = json.load(fin) |
|
|
|
choices = set() |
|
for field in all_members_info: |
|
toSearch = all_members_info[field] |
|
for i in toSearch: |
|
choices.add(i['name']) |
|
best = process.extractOne(name, choices) |
|
|
|
for field in all_members_info: |
|
toSearch = all_members_info[field] |
|
for i in toSearch: |
|
if best[0].lower() in i['name'].lower(): |
|
if "link" in detailed_info.lower() or "homepage" in detailed_info.lower(): |
|
return json.dumps(i['links']) |
|
elif "photo" in detailed_info.lower() or "pic" in detailed_info.lower() or "picture" in detailed_info.lower(): |
|
return json.dumps(i['photo']) |
|
else: |
|
return json.dumps(i["description"]) |
|
|
|
return json.dumps({}) |
|
|
|
|
|
def get_publication_by_year(year: str): |
|
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') |
|
with open(database_addr, 'r') as fin: |
|
all_pub_info = json.load(fin) |
|
data = {} |
|
for pub in all_pub_info: |
|
if int(year) == pub['year']: |
|
data.update(pub) |
|
|
|
|
|
|
|
|
|
|
|
return json.dumps(data) |
|
|
|
|
|
def get_pub_info(name: str): |
|
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') |
|
with open(database_addr, 'r') as fin: |
|
all_members_info = json.load(fin) |
|
|
|
for i in all_members_info: |
|
if name.lower() in i['title'].lower(): |
|
return json.dumps(i) |
|
return json.dumps({}) |
|
|
|
|
|
def get_pub_by_name(name: str): |
|
choices = set() |
|
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') |
|
with open(database_addr, 'r') as fin: |
|
all_members_info = json.load(fin) |
|
for i in all_members_info: |
|
for author in i['authors']: |
|
choices.add(author) |
|
best = process.extractOne(name, choices) |
|
|
|
database_addr = os.path.join(os.getcwd(), 'database/original_documents/publications.json') |
|
with open(database_addr, 'r') as fin: |
|
all_members_info = json.load(fin) |
|
|
|
data = {} |
|
for i in all_members_info: |
|
for author in i['authors']: |
|
if best[0].lower() in author.lower(): |
|
data.update(i) |
|
|
|
return json.dumps(data) |
|
|
|
|
|
def semantic_search(query: str): |
|
response = openai.Embedding.create(model="text-embedding-ada-002", input=query) |
|
embedding = response['data'][0]['embedding'] |
|
function_response = search_document(embedding, 3) |
|
return function_response |
|
|
|
|
|
def search_downloads(input_title: str): |
|
|
|
download_fn = os.path.join(os.getcwd(), 'database/original_documents/parsed_downloads.json') |
|
with open(download_fn, 'r') as fin: |
|
all_download_info = json.load(fin) |
|
choices = set() |
|
for i in all_download_info.keys(): |
|
choices.add(i) |
|
best = process.extractOne(input_title, choices) |
|
data = {} |
|
for title, entry in all_download_info.items(): |
|
if best[0].lower() in title.lower(): |
|
data.update(entry) |
|
|
|
return json.dumps(data) |
|
|
|
|
|
def get_member_list_by_edu_status(criterion: str): |
|
|
|
|
|
|
|
member_json = os.path.join(os.getcwd(), f'database/original_documents/members/{criterion}.json') |
|
with open(member_json, 'r') as fin: |
|
member_info_list = json.load(fin) |
|
|
|
return json.dumps(member_info_list) |
|
|