khulnasoft commited on
Commit
9a3880d
·
verified ·
1 Parent(s): 2249885

Create spidder.py

Browse files
Files changed (1) hide show
  1. spidder.py +21 -0
spidder.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+
4
+ # Specify the URL
5
+ url = "https://example.com"
6
+
7
+ # Send a request to the website
8
+ response = requests.get(url)
9
+
10
+ # Parse the HTML content
11
+ soup = BeautifulSoup(response.content, "html.parser")
12
+
13
+ # Extract the data you need
14
+ data = []
15
+ for item in soup.find_all("div", class_="example-class"):
16
+ data.append(item.text.strip())
17
+
18
+ # Save the data to a file or a list for further processing
19
+ with open("data.txt", "w") as file:
20
+ for entry in data:
21
+ file.write(entry + "\n")