GitMatch_AI / app.py
anantdayanithi's picture
Rename main.py to app.py
ec684c9 verified
raw
history blame
13.7 kB
import streamlit as st
import requests
import time
import json
import google.generativeai as genai
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import GoogleGenerativeAI
from dotenv import load_dotenv
# Configure page
st.set_page_config(
page_title="GitHub Repository Analyzer",
page_icon="πŸ”",
layout="wide"
)
# Add custom CSS
st.markdown("""
<style>
.metric-card {
background-color: #f0f2f6;
padding: 20px;
border-radius: 10px;
margin: 10px 0;
}
.repo-card {
background-color: white;
padding: 20px;
border-radius: 10px;
margin: 10px 0;
border: 1px solid #e6e6e6;
}
.stTextArea textarea {
height: 200px;
}
</style>
""", unsafe_allow_html=True)
# Initialize session state
if 'analysis_complete' not in st.session_state:
st.session_state.analysis_complete = False
if 'github_token' not in st.session_state:
st.session_state.github_token = ""
if 'gemini_key' not in st.session_state:
st.session_state.gemini_key = ""
def initialize_api(github_token, gemini_key):
"""Initialize API configurations"""
try:
headers = {"Authorization": f"token {github_token}", "Accept": "application/vnd.github.v3+json"}
# Configure Gemini API
genai.configure(api_key=gemini_key)
llm = GoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2, google_api_key=gemini_key)
# Test the configuration
test_response = llm.invoke("Test")
return headers, llm
except Exception as e:
st.error(f"Error initializing APIs: {str(e)}")
st.error("Please ensure your API keys are correct and try again.")
return None, None
def get_github_repos(username, headers):
"""Fetch repositories from a user's GitHub profile."""
url = f"https://api.github.com/users/{username}/repos"
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
st.error(f"Failed to fetch repositories. Status code: {response.status_code}")
return []
def get_repo_details(username, repo_name, headers):
"""Fetch README, latest commits, and repo structure."""
readme_url = f"https://api.github.com/repos/{username}/{repo_name}/readme"
commits_url = f"https://api.github.com/repos/{username}/{repo_name}/commits"
contents_url = f"https://api.github.com/repos/{username}/{repo_name}/contents"
languages_url = f"https://api.github.com/repos/{username}/{repo_name}/languages"
readme_content = ""
commit_messages = []
file_structure = []
languages_used = []
with st.spinner(f"Fetching details for {repo_name}..."):
# Fetch README
readme_response = requests.get(readme_url, headers=headers)
if readme_response.status_code == 200:
readme_content = requests.get(readme_response.json()['download_url']).text
# Fetch latest 5 commits
commit_response = requests.get(commits_url, headers=headers)
if commit_response.status_code == 200:
commit_messages = [commit['commit']['message'] for commit in commit_response.json()[:5]]
# Fetch file structure
content_response = requests.get(contents_url, headers=headers)
if content_response.status_code == 200:
file_structure = [file['name'] for file in content_response.json()]
# Fetch languages used
lang_response = requests.get(languages_url, headers=headers)
if lang_response.status_code == 200:
languages_used = list(lang_response.json().keys())
return readme_content, commit_messages, file_structure, languages_used
def analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm):
"""Use Gemini AI to analyze repository and match with JD."""
prompt_template = PromptTemplate(
input_variables=["readme", "files", "commits", "languages", "jd"],
template="""
You are an AI technical recruiter. Analyze the following GitHub project details and job description:
Job Description:
{jd}
Repository Details:
README: {readme}
File Structure: {files}
Commit Messages: {commits}
Languages: {languages}
Provide output as structured JSON:
{{
"languages": ["list of languages"],
"tech_stack": ["list of frameworks & libraries"],
"algorithms": ["list of key algorithms used"],
"complexity": "low/medium/high",
"commit_activity": "active/moderate/inactive",
"jd_match_score": "1-100",
"jd_match_reasons": ["list of reasons why this repository matches or doesn't match the JD"]
}}
"""
)
try:
response = llm.invoke(prompt_template.format(
readme=readme,
files=", ".join(file_structure),
commits=", ".join(commits),
languages=", ".join(languages),
jd=jd
))
json_start = response.find("{")
json_end = response.rfind("}") + 1
json_data = json.loads(response[json_start:json_end].strip())
return json_data
except Exception as e:
st.error(f"Error analyzing repository: {e}")
return {
"languages": [],
"tech_stack": [],
"algorithms": [],
"complexity": "unknown",
"commit_activity": "unknown",
"jd_match_score": 0,
"jd_match_reasons": []
}
def calculate_repo_score(analysis_data):
"""Calculate a score for a repository based on its analysis and JD match."""
base_score = 0
# Score based on number of languages (max 10 points)
base_score += min(len(analysis_data['languages']) * 2, 10)
# Score based on tech stack (max 15 points)
base_score += min(len(analysis_data['tech_stack']) * 3, 15)
# Score based on algorithms (max 15 points)
base_score += min(len(analysis_data['algorithms']) * 3, 15)
# Score based on complexity (max 30 points)
complexity_scores = {"low": 10, "medium": 20, "high": 30, "unknown": 0}
base_score += complexity_scores.get(analysis_data['complexity'].lower(), 0)
# Score based on commit activity (max 30 points)
activity_scores = {"inactive": 10, "moderate": 20, "active": 30, "unknown": 0}
base_score += activity_scores.get(analysis_data['commit_activity'].lower(), 0)
# Include JD match score in final calculation
jd_match_score = float(analysis_data.get('jd_match_score', 0))
# Final score is weighted average of base score and JD match score
final_score = (base_score * 0.6) + (jd_match_score * 0.4)
return round(final_score)
def evaluate_candidate(total_score, num_repos):
"""Evaluate candidate suitability based on average repository score."""
if num_repos == 0:
return "Unable to evaluate - no repositories found"
avg_score = total_score / num_repos
if avg_score >= 75:
return "Highly Suitable"
elif avg_score >= 50:
return "Moderately Suitable"
elif avg_score >= 25:
return "Potentially Suitable"
else:
return "Not Suitable"
def display_repo_analysis(repo_name, analysis_data, repo_score):
"""Display repository analysis in Streamlit."""
with st.expander(f"πŸ“ {repo_name} - Score: {repo_score}/100", expanded=True):
col1, col2 = st.columns(2)
with col1:
st.markdown("### πŸ›  Technical Details")
st.write("**Languages:**", ", ".join(analysis_data['languages']))
st.write("**Tech Stack:**", ", ".join(analysis_data['tech_stack']) if analysis_data['tech_stack'] else "None detected")
st.write("**Algorithms:**", ", ".join(analysis_data['algorithms']) if analysis_data['algorithms'] else "None detected")
with col2:
st.markdown("### πŸ“Š Metrics")
st.write("**Complexity:**", analysis_data['complexity'].capitalize())
st.write("**Commit Activity:**", analysis_data['commit_activity'].capitalize())
st.write("**JD Match Score:**", f"{analysis_data.get('jd_match_score', 0)}/100")
st.progress(repo_score/100)
if analysis_data.get('jd_match_reasons'):
st.markdown("### 🎯 JD Match Analysis")
for reason in analysis_data['jd_match_reasons']:
st.write(f"- {reason}")
def analyze_github_repos(username, headers, llm, jd):
"""Analyze GitHub projects and generate summaries."""
repos = get_github_repos(username, headers)
if not repos:
st.error("No repositories found or failed to fetch repositories.")
return []
results = []
total_score = 0
progress_bar = st.progress(0)
for idx, repo in enumerate(repos):
repo_name = repo['name']
with st.spinner(f"Analyzing {repo_name}..."):
readme, commits, file_structure, languages = get_repo_details(username, repo_name, headers)
analysis_data = analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm)
repo_score = calculate_repo_score(analysis_data)
total_score += repo_score
results.append((repo_name, analysis_data, repo_score))
progress_bar.progress((idx + 1) / len(repos))
time.sleep(1)
progress_bar.empty()
return results, total_score
def main():
st.title("πŸ” GitHub Repository Analyzer")
st.markdown("""
This tool analyzes GitHub repositories to evaluate technical capabilities and project quality.
Please provide the required information below to begin the analysis.
""")
# API Keys input
with st.expander("πŸ”‘ API Configuration", expanded=True):
col1, col2 = st.columns(2)
with col1:
github_token = st.text_input("GitHub Token", type="password",
value=st.session_state.get('github_token', ''))
with col2:
gemini_key = st.text_input("Google Gemini API Key", type="password",
value=st.session_state.get('gemini_key', ''))
# Job Description input
st.subheader("πŸ“ Job Description")
jd = st.text_area("Paste the job description here", height=200)
# GitHub username input
username = st.text_input("πŸ‘€ Enter GitHub Username")
# Save API keys to session state
if github_token:
st.session_state.github_token = github_token
if gemini_key:
st.session_state.gemini_key = gemini_key
if st.button("Analyze Repositories") and username and jd and github_token and gemini_key:
headers, llm = initialize_api(github_token, gemini_key)
if headers and llm:
with st.spinner("Analyzing repositories..."):
repo_analysis, total_score = analyze_github_repos(username, headers, llm, jd)
if repo_analysis:
num_repos = len(repo_analysis)
# Display overall summary
st.header("πŸ“Š Analysis Summary")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Repositories", num_repos)
with col2:
avg_score = round(total_score / num_repos if num_repos > 0 else 0)
st.metric("Average Repository Score", f"{avg_score}/100")
with col3:
suitability = evaluate_candidate(total_score, num_repos)
st.metric("Candidate Suitability", suitability)
# Display individual repository analysis
st.header("πŸ“ Repository Details")
sorted_analysis = sorted(repo_analysis, key=lambda x: x[2], reverse=True)
for repo_name, analysis_data, repo_score in sorted_analysis:
display_repo_analysis(repo_name, analysis_data, repo_score)
# Export option
if st.button("Export Analysis"):
export_data = {
"username": username,
"total_repos": num_repos,
"average_score": avg_score,
"suitability": suitability,
"repositories": [
{
"name": repo_name,
"score": repo_score,
"analysis": analysis_data
}
for repo_name, analysis_data, repo_score in sorted_analysis
]
}
st.download_button(
"Download Analysis Report",
data=json.dumps(export_data, indent=2),
file_name=f"github_analysis_{username}.json",
mime="application/json"
)
else:
st.error("No repositories found or analysis failed.")
else:
st.error("Failed to initialize APIs. Please check your API keys and try again.")
if __name__ == "__main__":
main()