File size: 4,679 Bytes
22571b0
8106f26
22571b0
 
 
 
 
8106f26
22571b0
8106f26
22571b0
 
 
8106f26
 
 
22571b0
 
 
 
8106f26
adb6ea7
 
 
 
8106f26
 
adb6ea7
22571b0
 
 
 
adb6ea7
22571b0
adb6ea7
22571b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8106f26
 
22571b0
 
 
 
 
 
 
 
 
 
 
 
 
8106f26
22571b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8106f26
22571b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import time
import logging
from fastapi import FastAPI
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

app = FastAPI()

# Configure logging
logging.basicConfig(level=logging.INFO)

# Serve static files
app.mount("/static", StaticFiles(directory="static"), name="static")

def scrape_upwork_data(search_query, num_jobs, page):
    # Setup Chrome options for remote WebDriver
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    # Setup the remote WebDriver
    driver=webdriver.Chrome(options=options)
    job_listings = []
    try:
        url = f'https://www.upwork.com/nx/search/jobs?amount=500-&hourly_rate=25-&location=Americas,Europe,Australia%20and%20New%20Zealand,Canada,India,Switzerland,United%20States&per_page=50&q={search_query}&sort=recency&t=0,1&page={page}'
        driver.get(url)
        driver.set_window_size(1080, 720)  
        time.sleep(5)  # Wait for the page to load
        page_title = driver.title
        jobs = driver.find_elements(By.CSS_SELECTOR, 'article[data-test="JobTile"]')

        for job in jobs:
            try:
                posted_date = job.find_element(By.CSS_SELECTOR, 'small[data-test="job-pubilshed-date"]').text.strip()
                title_element = job.find_element(By.CSS_SELECTOR, 'h2.job-tile-title > a')
                title = title_element.text.strip()
                link = title_element.get_attribute('href')
                description = job.find_element(By.CSS_SELECTOR, 'div[data-test="JobTileDetails"] > div > div > p').text.strip()

                job_info = job.find_element(By.CSS_SELECTOR, 'ul.job-tile-info-list')
                job_type = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="job-type-label"]').text.strip()
                experience_level = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="experience-level"]').text.strip()

                # Check for budget (fixed price or hourly)
                try:
                    budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="is-fixed-price"]').text.strip()
                except Exception as e:
                    logging.error(f'Error finding budget: {e}')
                    budget = job_info.find_element(By.CSS_SELECTOR, 'li[data-test="duration-label"]').text.strip()

                job_listings.append({
                    'title': title,
                    'date': posted_date,
                    'link': link,
                    'description': description,
                    'job_type': job_type,
                    'experience_level': experience_level,
                    'budget': budget
                })
                
            except Exception as e:
                logging.error(f'Error parsing job listing: {e}')

    finally:
        driver.quit()

    return job_listings

@app.get("/", response_class=HTMLResponse)
async def read_root():
    return """
    <html>
        <head>
            <title>Upwork Job Listings</title>
        </head>
        <body>
            <h1>Welcome to Upwork Job Scraper</h1>
            <form action="/jobs" method="get">
                <input type="text" name="query" placeholder="Search Query" required>
                <input type="number" name="num_jobs" value="50" min="1" max="100" required>
                <button type="submit">Search Jobs</button>
            </form>
        </body>
    </html>
    """

@app.get("/jobs", response_class=HTMLResponse)
async def get_jobs(query: str, num_jobs: int = 50):
    jobs = []
    for page in range(1, (num_jobs // 50) + 1):  # Scrape as many pages as needed
        job_listings = scrape_upwork_data(query, num_jobs, page)
        jobs.extend(job_listings)

    # Generate HTML output for jobs
    html_output = "<h2>Job Listings</h2>"
    for job in jobs:
        html_output += f"""
        <div>
            <h3><a href="{job['link']}">{job['title']}</a></h3>
            <p>Posted Date: {job['date']}</p>
            <p>Type: {job['job_type']}</p>
            <p>Experience Level: {job['experience_level']}</p>
            <p>Budget: {job['budget']}</p>
            <p>Description: {job['description']}</p>
        </div>
        <hr>
        """
    return HTMLResponse(content=html_output)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)