|
|
|
from fastapi import FastAPI
|
|
import os
|
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
|
from langchain.schema import Document
|
|
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
|
|
|
app = FastAPI()
|
|
|
|
if "GOOGLE_API_KEY" not in os.environ:
|
|
os.environ["GOOGLE_API_KEY"] = "AIzaSyDeyTMR8zf574760YBz6W34m1CcEONsuSE"
|
|
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
|
|
|
|
|
|
|
|
docs = [
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Pathum",
|
|
"lastname": "Lakshan",
|
|
"gender": "Male",
|
|
"skills": "Spring Boot, Node.js, NestJS, Java, JavaScript, MongoDB, MySQL, Docker, AWS, GCP, Apache Kafka, Redis, REST APIs, CI/CD, JWT, OAuth2, GitHub, Agile Methodologies, Software Architecture, Cybersecurity, DevOps, Web Development",
|
|
"industry": "Software Development, IT",
|
|
"position": "Associate Software Engineer, Software Developer",
|
|
"home_town": "Colombo, Sri Lanka",
|
|
"home_address": "Colombo, Sri Lanka",
|
|
"experience": "2 years",
|
|
"education": "B.Sc. (Hons) Computer Science and Software Engineering",
|
|
"years_of_experience": 2,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Prasanna",
|
|
"lastname": "Ileperuma",
|
|
"gender": "Male",
|
|
"skills": "Project Management, Problem Solving, Computer Literacy, Creative Design, Adaptability, Communication, Operating Analytical Instruments",
|
|
"industry": "Laboratory, Chemical",
|
|
"position": "Intern Biyagama Water Treatment Plant, Research Assistant",
|
|
"home_town": "Atabage",
|
|
"home_address": "245/c, Anugurumulla Lower Division, Wattahena, Atabage, 20500",
|
|
"experience": "2 years",
|
|
"education": "B.Sc. (Hons) in Chemistry",
|
|
"years_of_experience": 1,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Indika",
|
|
"lastname": "Madushankha",
|
|
"gender": "Male",
|
|
"skills": "Quality Control, Pharmaceutical Instrumentation, Stability Analysis, Root Cause Analysis, GMP Certification, ISO 9001:2015 Certification, Method Development, Validation, HPLC, GC, UV, FT-IR, Dissolution Tester, Karl Fisher Potentiometer, Analytical Method Development, Problem-Solving, Communication Skills, Networking",
|
|
"industry": "Pharmaceutical, Quality Control",
|
|
"position": "Senior Quality Executive",
|
|
"home_town": "Kadawatha, Sri Lanka",
|
|
"home_address": "288, Dalupitiya, Kadawatha, Sri Lanka",
|
|
"experience": "7 years",
|
|
"education": "B.Sc. (Hons) in Chemistry (Second Class), University of Jaffna; MBA (Ongoing), University of Kelaniya",
|
|
"years_of_experience": 7,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Chamadhi",
|
|
"lastname": "Atapattu Arachchi",
|
|
"gender": "Male",
|
|
"skills": "Molecular Biology Techniques, Microbiological Analysis, Laboratory Management, Data Analysis, Documentation and Record Keeping, Team Collaboration, Problem Solving, Communication Skills, Adaptability",
|
|
"industry": "Biotechnology, Microbiology",
|
|
"position": "Quality Assurance and Laboratory Officer - Junior Executive, Laboratory Trainee, Lab Assistant",
|
|
"home_town": "Malabe, Sri Lanka",
|
|
"home_address": "",
|
|
"experience": "Approx. 2 years (based on internships and work experience)",
|
|
"education": "B.Sc. Special (Hons) in Biotechnology, Sri Lanka Institute of Information Technology",
|
|
"years_of_experience": 2,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Mohamed Naeem",
|
|
"lastname": "A. Mubarak",
|
|
"gender": "Male",
|
|
"skills": "Laboratory Management, Analytical Chemistry, Calibration, Operation and Maintenance of High-end Analytical Instruments, ISO/IEC 17025:2017 Accreditation, Technical Assessment, Quality System Consulting, AMV Training, Research and Development, Problem-solving, Team Leadership, Strategic Planning, Time Management, Technical Reporting",
|
|
"industry": "Analytical Chemistry, Laboratory Management",
|
|
"position": "Laboratory Director, Principal Research Scientist, Chartered Chemist, Technical Assessor",
|
|
"home_town": "Colombo, Sri Lanka",
|
|
"home_address": "No: 69/2, Raja Mawatha, Ratmalana, Sri Lanka",
|
|
"experience": "25+ years",
|
|
"education": "B.Sc. Special (Hons) in Analytical Chemistry, University of Ruhuna; MSc in Integrated Water Resources Management, UNESCO-IHE Institute for Water Education",
|
|
"years_of_experience": 25,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Virantha",
|
|
"lastname": "Dasanayake",
|
|
"gender": "Male",
|
|
"skills": "Angular, HTML, CSS, Typescript, Data Analytics, Bootstrap, PrimeNG, Flutter, Node.js, Sails.js, C#, ASP.NET Core, JIRA, Azure DevOps, Git, GitHub, GitLab, Bitbucket, Google Cloud Platform, Figma, AdobeXD, MySQL, PostgreSQL, Google Tag Manager, Google Analytics, Firebase",
|
|
"industry": "Software Engineering",
|
|
"position": "Senior Software Engineer",
|
|
"home_town": "Gampaha",
|
|
"home_address": "87/D/2, Flower Terrace, Kehelbaddara, Gampaha",
|
|
"experience": "Senior Software Engineer at LB Finance (Feb 2023 - Present), Software Engineer at Electrily (Sep 2021 - Feb 2023), Electrical Engineer Intern at KIK Lanka (Sep 2019 - Dec 2019)",
|
|
"education": "BSc. Electrical Engineering Honours Degree, University of Moratuwa (2017 - 2021), G.C.E Advanced Level, Bandaranayake College (2012 - 2014)",
|
|
"years_of_experience": 3,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Geesara",
|
|
"lastname": "Siriwardhana",
|
|
"gender": "Female",
|
|
"skills": "JAVA, Spring Boot, SpringMVC, Google Cloud Platform, JavaScript, jQuery, MySQL, Git, JPA, ScrumMaster, Agile/JIRA, Jenkins, CI/CD, Windows, Linux, SonarQube, Docker, Kubernetes, Microservices",
|
|
"industry": "Software Engineering",
|
|
"position": "Technical Specialist",
|
|
"home_town": "Colombo",
|
|
"home_address": "",
|
|
"experience": "Technical Specialist at LOLC Technologies Services Limited (October 2023 - Present), Senior Software Engineer at LOLC Technologies Services Limited (May 2022 - September 2023), Software Engineer at LOLC Technologies Services Limited (September 2021 - April 2022), Software Engineer Trainee at LOLC Technologies Services Limited (September 2017 - September 2021)",
|
|
"education": "Bachelor of Engineering (BEng) Honors in Software Engineering, University of Westminster, Sri Lanka (2017β2021)",
|
|
"years_of_experience": 6,
|
|
},
|
|
),
|
|
Document(
|
|
page_content="",
|
|
metadata={
|
|
"firstname": "Irosh",
|
|
"lastname": "Rupasinghe",
|
|
"gender": "male",
|
|
"skills": "Java Programming, Problem-solving, Time Management, Communication, Performance Optimization, Scalability Optimization, Agile Methodologies, Spring, Spring Boot, Hibernate, WSO2 Integration Platforms, REST, SOAP, JSON, XML, XSD, XPath, XSLT, NodeJS, Angular, Typescript, Salesforce Development, MongoDB, MySQL, H2, DB2, Oracle, MSSQL, CI/CD Processes, Team Collaboration Platforms",
|
|
"industry": "Software Development and Technology",
|
|
"position": "Senior Software Engineer",
|
|
"home_town": "Colombo",
|
|
"home_address": "",
|
|
"experience": "Senior Software Engineer at ICP Techno LLC (08/2023 β Present), Tech Lead at Jetwing Travels (11/2018 β 07/2023), Senior Engineer-Technology at Virtusa (01/2017 β 08/2018), Engineer-Technology at Virtusa (07/2015 β 01/2017), Associate Engineer-Technology at Virtusa (11/2014 β 07/2015)",
|
|
"education": "MSc Data Science (Reading), Cardiff Metropolitan University - UK (08/2024 β Ongoing), BEng in Software Engineering, IIC University of Technology, Cambodia (08/2018 β 08/2021), BSc in Information Technology (Specialized in Software Engineering), Java Institute, Sri Lanka (08/2011 β 10/2014)",
|
|
"years_of_experience": 9,
|
|
},
|
|
),
|
|
]
|
|
|
|
|
|
metadata = [str(doc.metadata) for doc in docs]
|
|
|
|
|
|
doc_vectors = embeddings.embed_documents(metadata)
|
|
|
|
|
|
print(f"Number of documents: {len(doc_vectors)}")
|
|
print(f"Size of each embedding vector: {len(doc_vectors[0])}")
|
|
|
|
import re
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
def normalize_query(query):
|
|
"""Normalize the query string."""
|
|
return query.lower().strip()
|
|
|
|
|
|
def extract_query_components(query):
|
|
"""
|
|
Extract role and location from the user query.
|
|
Assumes the query is of the form 'Need a [position] home town from [hometown]'.
|
|
"""
|
|
match = re.search(r"Need a (.+?) home town from (.+)", query, re.IGNORECASE)
|
|
if match:
|
|
role = match.group(1).strip()
|
|
location = match.group(2).strip()
|
|
return role, location
|
|
return None, None
|
|
|
|
|
|
def pre_filter_docs(normalized_query, docs):
|
|
"""
|
|
Pre-filter documents based on role and location.
|
|
"""
|
|
role, location = extract_query_components(normalized_query)
|
|
if not role or not location:
|
|
return docs
|
|
|
|
|
|
filtered = [
|
|
doc for doc in docs
|
|
if role.lower() in doc.metadata.get("position", "").lower()
|
|
and location.lower() in doc.metadata.get("home_town", "").lower()
|
|
]
|
|
return filtered
|
|
|
|
|
|
query = "Need a software Engineer home town from Gampaha"
|
|
|
|
|
|
normalized_query = normalize_query(query)
|
|
|
|
|
|
query_vector = embeddings.embed_query(normalized_query)
|
|
|
|
|
|
filtered_docs = pre_filter_docs(normalized_query, docs)
|
|
|
|
|
|
if filtered_docs:
|
|
|
|
filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
|
|
|
|
|
|
similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
|
|
|
|
|
|
ranked_docs = sorted(
|
|
zip(similarities, filtered_docs),
|
|
key=lambda x: x[0],
|
|
reverse=True
|
|
)
|
|
|
|
print("Top Matches:")
|
|
for score, doc in ranked_docs[:3]:
|
|
print(f"Score: {score:.4f}, Content: {doc.metadata}")
|
|
else:
|
|
print("No relevant documents found.")
|
|
|
|
|
|
@app.get("/search")
|
|
def search(query: str):
|
|
query = normalize_query(query)
|
|
query_vector = embeddings.embed_query(query)
|
|
filtered_docs = pre_filter_docs(query, docs)
|
|
|
|
if filtered_docs:
|
|
filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
|
|
similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
|
|
ranked_docs = sorted(zip(similarities, filtered_docs), key=lambda x: x[0], reverse=True)
|
|
return [{"score": score, "content": doc.metadata} for score, doc in ranked_docs[:3]]
|
|
return {"message": "No relevant documents found."}
|
|
|
|
|
|
|
|
|
|
|