siddhartharya commited on
Commit
2f50c94
1 Parent(s): ebcf536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -49
app.py CHANGED
@@ -1,11 +1,11 @@
1
- import gradio as gr
2
  import requests
 
3
  import os
4
- from bs4 import BeautifulSoup # For scraping company and role info
5
 
6
  # Load API keys securely from environment variables
7
  proxycurl_api_key = os.getenv("PROXYCURL_API_KEY") # Proxycurl API key
8
  groq_api_key = os.getenv("GROQ_CLOUD_API_KEY") # Groq Cloud API key
 
9
 
10
  class EmailAgent:
11
  def __init__(self, linkedin_url, company_name, role, word_limit, user_name, email, phone, linkedin):
@@ -23,7 +23,7 @@ class EmailAgent:
23
  self.company_info = None
24
  self.role_description = None
25
 
26
- # Reason: Decide what information is needed and if we need to take additional steps
27
  def reason_about_data(self):
28
  print("Reasoning: Deciding what data we need...")
29
  if not self.linkedin_url:
@@ -56,55 +56,29 @@ class EmailAgent:
56
  self.skills = ["Adaptable", "Hardworking"]
57
  self.experiences = ["Worked across various industries"]
58
 
59
- # Action: Fetch company information via Proxycurl or use defaults
60
- def fetch_company_info(self):
61
  if not self.company_name:
62
  print("Action: No company name provided, using default company info.")
63
  self.company_info = "A leading company in its field."
64
  else:
65
- print(f"Action: Fetching company info for {self.company_name}.")
66
- headers = {"Authorization": f"Bearer {proxycurl_api_key}"}
67
- url = f"https://nubela.co/proxycurl/api/v2/linkedin/company?company_name={self.company_name}"
68
- response = requests.get(url, headers=headers)
 
 
 
 
 
69
  if response.status_code == 200:
70
- data = response.json()
71
- self.company_info = data.get("description", "No detailed company info available.")
 
72
  else:
73
- print(f"Error: Unable to fetch company info for {self.company_name}. Using default info.")
74
  self.company_info = "A leading company in its field."
75
 
76
- # Action: Scrape the company's website for role-specific information or use defaults
77
- def scrape_role_from_website(self):
78
- print(f"Action: Scraping role description from the company's website for {self.role}.")
79
- if not self.company_name:
80
- print("Error: No company name or URL provided for scraping.")
81
- return False
82
-
83
- # Try scraping the website for role descriptions
84
- try:
85
- response = requests.get(f"https://{self.company_name}.com/careers")
86
- if response.status_code == 200:
87
- soup = BeautifulSoup(response.text, 'html.parser')
88
- role_descriptions = soup.find_all(string=lambda text: self.role.lower() in text.lower())
89
- if role_descriptions:
90
- self.role_description = role_descriptions[0]
91
- print(f"Found role description: {self.role_description}")
92
- return True
93
- else:
94
- print(f"No specific role description found on the website for {self.role}.")
95
- return False
96
- else:
97
- print(f"Error: Unable to reach company's website at {self.company_name}.com.")
98
- return False
99
- except Exception as e:
100
- print(f"Error during scraping: {e}")
101
- return False
102
-
103
- # Action: Use default logic for role description if no role is available
104
- def use_default_role_description(self):
105
- print(f"Action: Using default logic for the role of {self.role}.")
106
- self.role_description = f"The role of {self.role} at {self.company_name} involves leadership and management."
107
-
108
  # Reflection: Check if we have enough data to generate the email
109
  def reflect_on_data(self):
110
  print("Reflection: Do we have enough data?")
@@ -116,7 +90,7 @@ class EmailAgent:
116
  def generate_email(self):
117
  print("Action: Generating the email with the gathered information.")
118
 
119
- # Updated and fully dynamic LLM prompt
120
  prompt = f"""
121
  Write a professional email applying for the {self.role} position at {self.company_name}.
122
 
@@ -161,10 +135,7 @@ class EmailAgent:
161
  def run(self):
162
  self.reason_about_data() # Reasoning step
163
  self.fetch_linkedin_data() # Fetch LinkedIn data
164
- self.fetch_company_info() # Fetch company data
165
- # Scrape the company's website for role-specific information or use defaults
166
- if not self.scrape_role_from_website():
167
- self.use_default_role_description()
168
  # Reflect on whether the data is sufficient
169
  if self.reflect_on_data():
170
  return self.generate_email() # Final action: generate email
 
 
1
  import requests
2
+ import gradio as gr
3
  import os
 
4
 
5
  # Load API keys securely from environment variables
6
  proxycurl_api_key = os.getenv("PROXYCURL_API_KEY") # Proxycurl API key
7
  groq_api_key = os.getenv("GROQ_CLOUD_API_KEY") # Groq Cloud API key
8
+ firecrawl_api_key = os.getenv("FIRECRAWL_API_KEY") # Firecrawl API key
9
 
10
  class EmailAgent:
11
  def __init__(self, linkedin_url, company_name, role, word_limit, user_name, email, phone, linkedin):
 
23
  self.company_info = None
24
  self.role_description = None
25
 
26
+ # Reason: Decide what information is needed
27
  def reason_about_data(self):
28
  print("Reasoning: Deciding what data we need...")
29
  if not self.linkedin_url:
 
56
  self.skills = ["Adaptable", "Hardworking"]
57
  self.experiences = ["Worked across various industries"]
58
 
59
+ # Action: Fetch company information via Firecrawl API
60
+ def fetch_company_info_with_firecrawl(self):
61
  if not self.company_name:
62
  print("Action: No company name provided, using default company info.")
63
  self.company_info = "A leading company in its field."
64
  else:
65
+ print(f"Action: Fetching company info for {self.company_name} using Firecrawl.")
66
+ headers = {"Authorization": f"Bearer {firecrawl_api_key}"}
67
+ firecrawl_url = "https://api.firecrawl.dev/v1/scrape"
68
+ data = {
69
+ "url": f"https://{self.company_name}.com",
70
+ "patterns": ["description", "about", "careers", "company overview"]
71
+ }
72
+
73
+ response = requests.post(firecrawl_url, json=data, headers=headers)
74
  if response.status_code == 200:
75
+ firecrawl_data = response.json()
76
+ self.company_info = firecrawl_data.get("description", "No detailed company info available.")
77
+ print(f"Company info fetched: {self.company_info}")
78
  else:
79
+ print(f"Error: Unable to fetch company info via Firecrawl. Using default info.")
80
  self.company_info = "A leading company in its field."
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  # Reflection: Check if we have enough data to generate the email
83
  def reflect_on_data(self):
84
  print("Reflection: Do we have enough data?")
 
90
  def generate_email(self):
91
  print("Action: Generating the email with the gathered information.")
92
 
93
+ # Dynamic LLM prompt
94
  prompt = f"""
95
  Write a professional email applying for the {self.role} position at {self.company_name}.
96
 
 
135
  def run(self):
136
  self.reason_about_data() # Reasoning step
137
  self.fetch_linkedin_data() # Fetch LinkedIn data
138
+ self.fetch_company_info_with_firecrawl() # Fetch company data using Firecrawl
 
 
 
139
  # Reflect on whether the data is sufficient
140
  if self.reflect_on_data():
141
  return self.generate_email() # Final action: generate email