promtai / main.py
Nasma's picture
Upload 2 files
acbd5a0 verified
raw
history blame
12.2 kB
# main.py
from fastapi import FastAPI
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.schema import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
app = FastAPI()
if "GOOGLE_API_KEY" not in os.environ:
os.environ["GOOGLE_API_KEY"] = "AIzaSyDeyTMR8zf574760YBz6W34m1CcEONsuSE"
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
docs = [
Document(
page_content="",
metadata={
"firstname": "Pathum",
"lastname": "Lakshan",
"gender": "Male",
"skills": "Spring Boot, Node.js, NestJS, Java, JavaScript, MongoDB, MySQL, Docker, AWS, GCP, Apache Kafka, Redis, REST APIs, CI/CD, JWT, OAuth2, GitHub, Agile Methodologies, Software Architecture, Cybersecurity, DevOps, Web Development",
"industry": "Software Development, IT",
"position": "Associate Software Engineer, Software Developer",
"home_town": "Colombo, Sri Lanka",
"home_address": "Colombo, Sri Lanka",
"experience": "2 years",
"education": "B.Sc. (Hons) Computer Science and Software Engineering",
"years_of_experience": 2,
},
),
Document(
page_content="",
metadata={
"firstname": "Prasanna",
"lastname": "Ileperuma",
"gender": "Male",
"skills": "Project Management, Problem Solving, Computer Literacy, Creative Design, Adaptability, Communication, Operating Analytical Instruments",
"industry": "Laboratory, Chemical",
"position": "Intern Biyagama Water Treatment Plant, Research Assistant",
"home_town": "Atabage",
"home_address": "245/c, Anugurumulla Lower Division, Wattahena, Atabage, 20500",
"experience": "2 years",
"education": "B.Sc. (Hons) in Chemistry",
"years_of_experience": 1,
},
),
Document(
page_content="",
metadata={
"firstname": "Indika",
"lastname": "Madushankha",
"gender": "Male",
"skills": "Quality Control, Pharmaceutical Instrumentation, Stability Analysis, Root Cause Analysis, GMP Certification, ISO 9001:2015 Certification, Method Development, Validation, HPLC, GC, UV, FT-IR, Dissolution Tester, Karl Fisher Potentiometer, Analytical Method Development, Problem-Solving, Communication Skills, Networking",
"industry": "Pharmaceutical, Quality Control",
"position": "Senior Quality Executive",
"home_town": "Kadawatha, Sri Lanka",
"home_address": "288, Dalupitiya, Kadawatha, Sri Lanka",
"experience": "7 years",
"education": "B.Sc. (Hons) in Chemistry (Second Class), University of Jaffna; MBA (Ongoing), University of Kelaniya",
"years_of_experience": 7,
},
),
Document(
page_content="",
metadata={
"firstname": "Chamadhi",
"lastname": "Atapattu Arachchi",
"gender": "Male",
"skills": "Molecular Biology Techniques, Microbiological Analysis, Laboratory Management, Data Analysis, Documentation and Record Keeping, Team Collaboration, Problem Solving, Communication Skills, Adaptability",
"industry": "Biotechnology, Microbiology",
"position": "Quality Assurance and Laboratory Officer - Junior Executive, Laboratory Trainee, Lab Assistant",
"home_town": "Malabe, Sri Lanka",
"home_address": "", # Not specified in the CV
"experience": "Approx. 2 years (based on internships and work experience)",
"education": "B.Sc. Special (Hons) in Biotechnology, Sri Lanka Institute of Information Technology",
"years_of_experience": 2, # Based on provided details of roles and internships
},
),
Document(
page_content="",
metadata={
"firstname": "Mohamed Naeem",
"lastname": "A. Mubarak",
"gender": "Male",
"skills": "Laboratory Management, Analytical Chemistry, Calibration, Operation and Maintenance of High-end Analytical Instruments, ISO/IEC 17025:2017 Accreditation, Technical Assessment, Quality System Consulting, AMV Training, Research and Development, Problem-solving, Team Leadership, Strategic Planning, Time Management, Technical Reporting",
"industry": "Analytical Chemistry, Laboratory Management",
"position": "Laboratory Director, Principal Research Scientist, Chartered Chemist, Technical Assessor",
"home_town": "Colombo, Sri Lanka",
"home_address": "No: 69/2, Raja Mawatha, Ratmalana, Sri Lanka",
"experience": "25+ years",
"education": "B.Sc. Special (Hons) in Analytical Chemistry, University of Ruhuna; MSc in Integrated Water Resources Management, UNESCO-IHE Institute for Water Education",
"years_of_experience": 25,
},
),
Document(
page_content="",
metadata={
"firstname": "Virantha",
"lastname": "Dasanayake",
"gender": "Male",
"skills": "Angular, HTML, CSS, Typescript, Data Analytics, Bootstrap, PrimeNG, Flutter, Node.js, Sails.js, C#, ASP.NET Core, JIRA, Azure DevOps, Git, GitHub, GitLab, Bitbucket, Google Cloud Platform, Figma, AdobeXD, MySQL, PostgreSQL, Google Tag Manager, Google Analytics, Firebase",
"industry": "Software Engineering",
"position": "Senior Software Engineer",
"home_town": "Gampaha",
"home_address": "87/D/2, Flower Terrace, Kehelbaddara, Gampaha",
"experience": "Senior Software Engineer at LB Finance (Feb 2023 - Present), Software Engineer at Electrily (Sep 2021 - Feb 2023), Electrical Engineer Intern at KIK Lanka (Sep 2019 - Dec 2019)",
"education": "BSc. Electrical Engineering Honours Degree, University of Moratuwa (2017 - 2021), G.C.E Advanced Level, Bandaranayake College (2012 - 2014)",
"years_of_experience": 3,
},
),
Document(
page_content="",
metadata={
"firstname": "Geesara",
"lastname": "Siriwardhana",
"gender": "Female",
"skills": "JAVA, Spring Boot, SpringMVC, Google Cloud Platform, JavaScript, jQuery, MySQL, Git, JPA, ScrumMaster, Agile/JIRA, Jenkins, CI/CD, Windows, Linux, SonarQube, Docker, Kubernetes, Microservices",
"industry": "Software Engineering",
"position": "Technical Specialist",
"home_town": "Colombo",
"home_address": "",
"experience": "Technical Specialist at LOLC Technologies Services Limited (October 2023 - Present), Senior Software Engineer at LOLC Technologies Services Limited (May 2022 - September 2023), Software Engineer at LOLC Technologies Services Limited (September 2021 - April 2022), Software Engineer Trainee at LOLC Technologies Services Limited (September 2017 - September 2021)",
"education": "Bachelor of Engineering (BEng) Honors in Software Engineering, University of Westminster, Sri Lanka (2017β€”2021)",
"years_of_experience": 6,
},
),
Document(
page_content="",
metadata={
"firstname": "Irosh",
"lastname": "Rupasinghe",
"gender": "male",
"skills": "Java Programming, Problem-solving, Time Management, Communication, Performance Optimization, Scalability Optimization, Agile Methodologies, Spring, Spring Boot, Hibernate, WSO2 Integration Platforms, REST, SOAP, JSON, XML, XSD, XPath, XSLT, NodeJS, Angular, Typescript, Salesforce Development, MongoDB, MySQL, H2, DB2, Oracle, MSSQL, CI/CD Processes, Team Collaboration Platforms",
"industry": "Software Development and Technology",
"position": "Senior Software Engineer",
"home_town": "Colombo",
"home_address": "",
"experience": "Senior Software Engineer at ICP Techno LLC (08/2023 – Present), Tech Lead at Jetwing Travels (11/2018 – 07/2023), Senior Engineer-Technology at Virtusa (01/2017 – 08/2018), Engineer-Technology at Virtusa (07/2015 – 01/2017), Associate Engineer-Technology at Virtusa (11/2014 – 07/2015)",
"education": "MSc Data Science (Reading), Cardiff Metropolitan University - UK (08/2024 – Ongoing), BEng in Software Engineering, IIC University of Technology, Cambodia (08/2018 – 08/2021), BSc in Information Technology (Specialized in Software Engineering), Java Institute, Sri Lanka (08/2011 – 10/2014)",
"years_of_experience": 9,
},
),
]
# Extracting document metadata for embedding
metadata = [str(doc.metadata) for doc in docs] # Assuming doc.metadata is a dictionary or object
# Generate embeddings for the metadata
doc_vectors = embeddings.embed_documents(metadata)
# Output the number of documents and the size of one vector
print(f"Number of documents: {len(doc_vectors)}")
print(f"Size of each embedding vector: {len(doc_vectors[0])}")
import re
from sklearn.metrics.pairwise import cosine_similarity
# Function to normalize the query
def normalize_query(query):
"""Normalize the query string."""
return query.lower().strip()
# Function to extract role and location from the query
def extract_query_components(query):
"""
Extract role and location from the user query.
Assumes the query is of the form 'Need a [position] home town from [hometown]'.
"""
match = re.search(r"Need a (.+?) home town from (.+)", query, re.IGNORECASE)
if match:
role = match.group(1).strip()
location = match.group(2).strip()
return role, location
return None, None
# Function to pre-filter documents based on role and location
def pre_filter_docs(normalized_query, docs):
"""
Pre-filter documents based on role and location.
"""
role, location = extract_query_components(normalized_query)
if not role or not location:
return docs # If role/location not found, return all docs
# Filter documents matching the role and location
filtered = [
doc for doc in docs
if role.lower() in doc.metadata.get("position", "").lower()
and location.lower() in doc.metadata.get("home_town", "").lower()
]
return filtered
# Example usage
query = "Need a software Engineer home town from Gampaha"
# Normalize query
normalized_query = normalize_query(query)
# Embed query
query_vector = embeddings.embed_query(normalized_query)
# Pre-filter documents
filtered_docs = pre_filter_docs(normalized_query, docs)
# Re-rank filtered documents
if filtered_docs:
# Filter corresponding document vectors
filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
# Compute similarities
similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
# Rank documents
ranked_docs = sorted(
zip(similarities, filtered_docs),
key=lambda x: x[0],
reverse=True
)
print("Top Matches:")
for score, doc in ranked_docs[:3]:
print(f"Score: {score:.4f}, Content: {doc.metadata}")
else:
print("No relevant documents found.")
@app.get("/search")
def search(query: str):
query = normalize_query(query)
query_vector = embeddings.embed_query(query)
filtered_docs = pre_filter_docs(query, docs)
if filtered_docs:
filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
ranked_docs = sorted(zip(similarities, filtered_docs), key=lambda x: x[0], reverse=True)
return [{"score": score, "content": doc.metadata} for score, doc in ranked_docs[:3]]
return {"message": "No relevant documents found."}