sh / app.py
srinuksv's picture
Create app.py
22571b0 verified
raw
history blame
4.4 kB
import time
import pandas as pd
import re
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options
app = FastAPI()
# Serve static files
app.mount("/static", StaticFiles(directory="static"), name="static")
def scrape_upwork_data(search_query, num_jobs, page):
options = Options()
options.add_argument("--headless") # Run in headless mode for faster scraping
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)
job_listings = []
try:
url = f'https://www.upwork.com/nx/search/jobs?amount=500-&hourly_rate=25-&location=Americas,Europe,Australia%20and%20New%20Zealand,Canada,India,Switzerland,United%20States&per_page=50&q={search_query}&sort=recency&t=0,1&page={page}'
driver.get(url)
time.sleep(5) # Wait for the page to load
jobs = driver.find_elements(By.CSS_SELECTOR, 'article[data-test="JobTile"]')
for job in jobs:
try:
posted_date = job.find_element(By.CSS_SELECTOR, 'small[data-test="job-pubilshed-date"]').text.strip()
title_element = job.find_element(By.CSS_SELECTOR, 'h2.job-tile-title > a')
title = title_element.text.strip()
link = title_element.get_attribute('href')
description = job.find_element(By.CSS_SELECTOR, 'div[data-test="JobTileDetails"] > div > div > p').text.strip()
job_info = job.find_element(By.CSS_SELECTOR, 'ul.job-tile-info-list')
job_type = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="job-type-label"]').text.strip()
experience_level = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="experience-level"]').text.strip()
# Check for budget (fixed price or hourly)
try:
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
except:
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()
job_listings.append({
'title': title,
'date': posted_date,
'link': link,
'description': description,
'job_type': job_type,
'experience_level': experience_level,
'budget': budget
})
except Exception as e:
print(f'Error parsing job listing: {e}')
finally:
driver.quit()
return job_listings
@app.get("/", response_class=HTMLResponse)
async def read_root():
return """
<html>
<head>
<title>Upwork Job Listings</title>
</head>
<body>
<h1>Welcome to Upwork Job Scraper</h1>
<form action="/jobs" method="get">
<input type="text" name="query" placeholder="Search Query" required>
<input type="number" name="num_jobs" value="50" min="1" max="100" required>
<button type="submit">Search Jobs</button>
</form>
</body>
</html>
"""
@app.get("/jobs", response_class=HTMLResponse)
async def get_jobs(query: str, num_jobs: int = 50):
jobs = []
for page in range(1, 3): # Change to however many pages you want to scrape
job_listings = scrape_upwork_data(query, num_jobs, page)
jobs.extend(job_listings)
# Generate HTML output for jobs
html_output = "<h2>Job Listings</h2>"
for job in jobs:
html_output += f"""
<div>
<h3><a href="{job['link']}">{job['title']}</a></h3>
<p>Posted Date: {job['date']}</p>
<p>Type: {job['job_type']}</p>
<p>Experience Level: {job['experience_level']}</p>
<p>Budget: {job['budget']}</p>
<p>Description: {job['description']}</p>
</div>
<hr>
"""
return HTMLResponse(content=html_output)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)