import streamlit as st import requests import time import json import google.generativeai as genai from langchain_core.prompts import PromptTemplate from langchain_google_genai import GoogleGenerativeAI from dotenv import load_dotenv # Configure page st.set_page_config( page_title="GitHub Repository Analyzer", page_icon="🔍", layout="wide" ) # Add custom CSS st.markdown(""" """, unsafe_allow_html=True) # Initialize session state if 'analysis_complete' not in st.session_state: st.session_state.analysis_complete = False if 'github_token' not in st.session_state: st.session_state.github_token = "" if 'gemini_key' not in st.session_state: st.session_state.gemini_key = "" def initialize_api(github_token, gemini_key): """Initialize API configurations""" try: headers = {"Authorization": f"token {github_token}", "Accept": "application/vnd.github.v3+json"} # Configure Gemini API genai.configure(api_key=gemini_key) llm = GoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2, google_api_key=gemini_key) # Test the configuration test_response = llm.invoke("Test") return headers, llm except Exception as e: st.error(f"Error initializing APIs: {str(e)}") st.error("Please ensure your API keys are correct and try again.") return None, None def get_github_repos(username, headers): """Fetch repositories from a user's GitHub profile.""" url = f"https://api.github.com/users/{username}/repos" response = requests.get(url, headers=headers) if response.status_code == 200: return response.json() else: st.error(f"Failed to fetch repositories. Status code: {response.status_code}") return [] def get_repo_details(username, repo_name, headers): """Fetch README, latest commits, and repo structure.""" readme_url = f"https://api.github.com/repos/{username}/{repo_name}/readme" commits_url = f"https://api.github.com/repos/{username}/{repo_name}/commits" contents_url = f"https://api.github.com/repos/{username}/{repo_name}/contents" languages_url = f"https://api.github.com/repos/{username}/{repo_name}/languages" readme_content = "" commit_messages = [] file_structure = [] languages_used = [] with st.spinner(f"Fetching details for {repo_name}..."): # Fetch README readme_response = requests.get(readme_url, headers=headers) if readme_response.status_code == 200: readme_content = requests.get(readme_response.json()['download_url']).text # Fetch latest 5 commits commit_response = requests.get(commits_url, headers=headers) if commit_response.status_code == 200: commit_messages = [commit['commit']['message'] for commit in commit_response.json()[:5]] # Fetch file structure content_response = requests.get(contents_url, headers=headers) if content_response.status_code == 200: file_structure = [file['name'] for file in content_response.json()] # Fetch languages used lang_response = requests.get(languages_url, headers=headers) if lang_response.status_code == 200: languages_used = list(lang_response.json().keys()) return readme_content, commit_messages, file_structure, languages_used def analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm): """Use Gemini AI to analyze repository and match with JD.""" prompt_template = PromptTemplate( input_variables=["readme", "files", "commits", "languages", "jd"], template=""" You are an AI technical recruiter. Analyze the following GitHub project details and job description: Job Description: {jd} Repository Details: README: {readme} File Structure: {files} Commit Messages: {commits} Languages: {languages} Provide output as structured JSON: {{ "languages": ["list of languages"], "tech_stack": ["list of frameworks & libraries"], "algorithms": ["list of key algorithms used"], "complexity": "low/medium/high", "commit_activity": "active/moderate/inactive", "jd_match_score": "1-100", "jd_match_reasons": ["list of reasons why this repository matches or doesn't match the JD"] }} """ ) try: response = llm.invoke(prompt_template.format( readme=readme, files=", ".join(file_structure), commits=", ".join(commits), languages=", ".join(languages), jd=jd )) json_start = response.find("{") json_end = response.rfind("}") + 1 json_data = json.loads(response[json_start:json_end].strip()) return json_data except Exception as e: st.error(f"Error analyzing repository: {e}") return { "languages": [], "tech_stack": [], "algorithms": [], "complexity": "unknown", "commit_activity": "unknown", "jd_match_score": 0, "jd_match_reasons": [] } def calculate_repo_score(analysis_data): """Calculate a score for a repository based on its analysis and JD match.""" base_score = 0 # Score based on number of languages (max 10 points) base_score += min(len(analysis_data['languages']) * 2, 10) # Score based on tech stack (max 15 points) base_score += min(len(analysis_data['tech_stack']) * 3, 15) # Score based on algorithms (max 15 points) base_score += min(len(analysis_data['algorithms']) * 3, 15) # Score based on complexity (max 30 points) complexity_scores = {"low": 10, "medium": 20, "high": 30, "unknown": 0} base_score += complexity_scores.get(analysis_data['complexity'].lower(), 0) # Score based on commit activity (max 30 points) activity_scores = {"inactive": 10, "moderate": 20, "active": 30, "unknown": 0} base_score += activity_scores.get(analysis_data['commit_activity'].lower(), 0) # Include JD match score in final calculation jd_match_score = float(analysis_data.get('jd_match_score', 0)) # Final score is weighted average of base score and JD match score final_score = (base_score * 0.6) + (jd_match_score * 0.4) return round(final_score) def evaluate_candidate(total_score, num_repos): """Evaluate candidate suitability based on average repository score.""" if num_repos == 0: return "Unable to evaluate - no repositories found" avg_score = total_score / num_repos if avg_score >= 75: return "Highly Suitable" elif avg_score >= 50: return "Moderately Suitable" elif avg_score >= 25: return "Potentially Suitable" else: return "Not Suitable" def display_repo_analysis(repo_name, analysis_data, repo_score): """Display repository analysis in Streamlit.""" with st.expander(f"📁 {repo_name} - Score: {repo_score}/100", expanded=True): col1, col2 = st.columns(2) with col1: st.markdown("### 🛠 Technical Details") st.write("**Languages:**", ", ".join(analysis_data['languages'])) st.write("**Tech Stack:**", ", ".join(analysis_data['tech_stack']) if analysis_data['tech_stack'] else "None detected") st.write("**Algorithms:**", ", ".join(analysis_data['algorithms']) if analysis_data['algorithms'] else "None detected") with col2: st.markdown("### 📊 Metrics") st.write("**Complexity:**", analysis_data['complexity'].capitalize()) st.write("**Commit Activity:**", analysis_data['commit_activity'].capitalize()) st.write("**JD Match Score:**", f"{analysis_data.get('jd_match_score', 0)}/100") st.progress(repo_score/100) if analysis_data.get('jd_match_reasons'): st.markdown("### 🎯 JD Match Analysis") for reason in analysis_data['jd_match_reasons']: st.write(f"- {reason}") def analyze_github_repos(username, headers, llm, jd): """Analyze GitHub projects and generate summaries.""" repos = get_github_repos(username, headers) if not repos: st.error("No repositories found or failed to fetch repositories.") return [] results = [] total_score = 0 progress_bar = st.progress(0) for idx, repo in enumerate(repos): repo_name = repo['name'] with st.spinner(f"Analyzing {repo_name}..."): readme, commits, file_structure, languages = get_repo_details(username, repo_name, headers) analysis_data = analyze_repo_and_jd_match(readme, file_structure, commits, languages, jd, llm) repo_score = calculate_repo_score(analysis_data) total_score += repo_score results.append((repo_name, analysis_data, repo_score)) progress_bar.progress((idx + 1) / len(repos)) time.sleep(1) progress_bar.empty() return results, total_score def main(): st.title("🔍 GitHub Repository Analyzer") st.markdown(""" This tool analyzes GitHub repositories to evaluate technical capabilities and project quality. Please provide the required information below to begin the analysis. """) # API Keys input with st.expander("🔑 API Configuration", expanded=True): col1, col2 = st.columns(2) with col1: github_token = st.text_input("GitHub Token", type="password", value=st.session_state.get('github_token', '')) with col2: gemini_key = st.text_input("Google Gemini API Key", type="password", value=st.session_state.get('gemini_key', '')) # Job Description input st.subheader("📝 Job Description") jd = st.text_area("Paste the job description here", height=200) # GitHub username input username = st.text_input("👤 Enter GitHub Username") # Save API keys to session state if github_token: st.session_state.github_token = github_token if gemini_key: st.session_state.gemini_key = gemini_key if st.button("Analyze Repositories") and username and jd and github_token and gemini_key: headers, llm = initialize_api(github_token, gemini_key) if headers and llm: with st.spinner("Analyzing repositories..."): repo_analysis, total_score = analyze_github_repos(username, headers, llm, jd) if repo_analysis: num_repos = len(repo_analysis) # Display overall summary st.header("📊 Analysis Summary") col1, col2, col3 = st.columns(3) with col1: st.metric("Total Repositories", num_repos) with col2: avg_score = round(total_score / num_repos if num_repos > 0 else 0) st.metric("Average Repository Score", f"{avg_score}/100") with col3: suitability = evaluate_candidate(total_score, num_repos) st.metric("Candidate Suitability", suitability) # Display individual repository analysis st.header("📁 Repository Details") sorted_analysis = sorted(repo_analysis, key=lambda x: x[2], reverse=True) for repo_name, analysis_data, repo_score in sorted_analysis: display_repo_analysis(repo_name, analysis_data, repo_score) # Export option if st.button("Export Analysis"): export_data = { "username": username, "total_repos": num_repos, "average_score": avg_score, "suitability": suitability, "repositories": [ { "name": repo_name, "score": repo_score, "analysis": analysis_data } for repo_name, analysis_data, repo_score in sorted_analysis ] } st.download_button( "Download Analysis Report", data=json.dumps(export_data, indent=2), file_name=f"github_analysis_{username}.json", mime="application/json" ) else: st.error("No repositories found or analysis failed.") else: st.error("Failed to initialize APIs. Please check your API keys and try again.") if __name__ == "__main__": main()