from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager def start_requests(self): query = input("Enter your search query: ") google_search_url = f"https://www.google.com/search?q={query}" # Set up Selenium options = webdriver.ChromeOptions() options.add_argument('--headless') # Run in headless mode options.add_argument('--disable-gpu') options.add_argument('--no-sandbox') driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) driver.get(google_search_url) soup = BeautifulSoup(driver.page_source, 'html.parser') driver.quit() urls = [] for link in soup.find_all('a', href=True): href = link['href'] if href.startswith('/url?q='): url = href.split('/url?q=')[1].split('&')[0] if not url.startswith('http'): continue urls.append(url) if len(urls) == self.max_scrapes: break if not urls: self.logger.error("No URLs extracted from Google search results.") return self.logger.info(f"Extracted URLs: {urls}") for url in urls: yield Request(url, callback=self.parse)