|
import time |
|
import logging |
|
from fastapi import FastAPI |
|
from fastapi.responses import HTMLResponse |
|
from fastapi.staticfiles import StaticFiles |
|
from selenium import webdriver |
|
from selenium.webdriver.common.by import By |
|
from selenium.webdriver.chrome.service import Service |
|
from selenium.webdriver.chrome.options import Options |
|
from webdriver_manager.chrome import ChromeDriverManager |
|
|
|
app = FastAPI() |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
|
|
|
app.mount("/static", StaticFiles(directory="static"), name="static") |
|
|
|
def scrape_upwork_data(search_query, num_jobs, page): |
|
|
|
options = webdriver.ChromeOptions() |
|
options.add_argument('--headless') |
|
options.add_argument('--no-sandbox') |
|
options.add_argument('--disable-dev-shm-usage') |
|
|
|
|
|
driver=webdriver.Chrome(options=options) |
|
job_listings = [] |
|
try: |
|
url = f'https://www.upwork.com/nx/search/jobs?amount=500-&hourly_rate=25-&location=Americas,Europe,Australia%20and%20New%20Zealand,Canada,India,Switzerland,United%20States&per_page=50&q={search_query}&sort=recency&t=0,1&page={page}' |
|
driver.get(url) |
|
driver.set_window_size(1080, 720) |
|
time.sleep(5) |
|
page_title = driver.title |
|
jobs = driver.find_elements(By.CSS_SELECTOR, 'article[data-test="JobTile"]') |
|
|
|
for job in jobs: |
|
try: |
|
posted_date = job.find_element(By.CSS_SELECTOR, 'small[data-test="job-pubilshed-date"]').text.strip() |
|
title_element = job.find_element(By.CSS_SELECTOR, 'h2.job-tile-title > a') |
|
title = title_element.text.strip() |
|
link = title_element.get_attribute('href') |
|
description = job.find_element(By.CSS_SELECTOR, 'div[data-test="JobTileDetails"] > div > div > p').text.strip() |
|
|
|
job_info = job.find_element(By.CSS_SELECTOR, 'ul.job-tile-info-list') |
|
job_type = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="job-type-label"]').text.strip() |
|
experience_level = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="experience-level"]').text.strip() |
|
|
|
|
|
try: |
|
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip() |
|
except Exception as e: |
|
logging.error(f'Error finding budget: {e}') |
|
budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip() |
|
|
|
job_listings.append({ |
|
'title': title, |
|
'date': posted_date, |
|
'link': link, |
|
'description': description, |
|
'job_type': job_type, |
|
'experience_level': experience_level, |
|
'budget': budget |
|
}) |
|
|
|
except Exception as e: |
|
logging.error(f'Error parsing job listing: {e}') |
|
|
|
finally: |
|
driver.quit() |
|
|
|
return job_listings |
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def read_root(): |
|
return """ |
|
<html> |
|
<head> |
|
<title>Upwork Job Listings</title> |
|
</head> |
|
<body> |
|
<h1>Welcome to Upwork Job Scraper</h1> |
|
<form action="/jobs" method="get"> |
|
<input type="text" name="query" placeholder="Search Query" required> |
|
<input type="number" name="num_jobs" value="50" min="1" max="100" required> |
|
<button type="submit">Search Jobs</button> |
|
</form> |
|
</body> |
|
</html> |
|
""" |
|
|
|
@app.get("/jobs", response_class=HTMLResponse) |
|
async def get_jobs(query: str, num_jobs: int = 50): |
|
jobs = [] |
|
for page in range(1, (num_jobs // 50) + 1): |
|
job_listings = scrape_upwork_data(query, num_jobs, page) |
|
jobs.extend(job_listings) |
|
|
|
|
|
html_output = "<h2>Job Listings</h2>" |
|
for job in jobs: |
|
html_output += f""" |
|
<div> |
|
<h3><a href="{job['link']}">{job['title']}</a></h3> |
|
<p>Posted Date: {job['date']}</p> |
|
<p>Type: {job['job_type']}</p> |
|
<p>Experience Level: {job['experience_level']}</p> |
|
<p>Budget: {job['budget']}</p> |
|
<p>Description: {job['description']}</p> |
|
</div> |
|
<hr> |
|
""" |
|
return HTMLResponse(content=html_output) |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |
|
|