Container commited on
Commit
efebe44
·
verified ·
1 Parent(s): 0d8732a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -16
app.py CHANGED
@@ -8,24 +8,21 @@ from urllib.parse import unquote, urlparse
8
 
9
  app = FastAPI()
10
 
 
11
  def convert_cookies_to_dict(cookies):
12
- cookies = dict([l.split("=", 1) for l in cookies.split("; ")])
13
- return cookies
 
14
 
 
15
  def get_root_domain(url):
16
- # 解析URL
17
  parsed_url = urlparse(url)
18
- # 获取域名部分
19
  domain = parsed_url.netloc
20
 
21
- # 分割域名部分以获取根域名
22
- # 假设根域名是域名的最后两个部分
23
  parts = domain.split('.')
24
  if len(parts) > 1:
25
- # 返回根域名部分
26
  return '.'.join(parts[-2:])
27
  else:
28
- # 如果域名部分少于两个部分,返回整个域名
29
  return domain
30
 
31
  @app.get("/")
@@ -60,39 +57,49 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
60
  # 如果输入了cookie
61
  if type(cookie) == str:
62
  header_array.update({"cookie":unquote(cookie)})
63
-
 
64
  options = Options()
 
 
65
  options.add_argument('--headless')
66
-
 
67
  driver = webdriver.Chrome(options=options)
68
 
 
69
  driver.get(target_url)
70
 
 
71
  if 'cookie' in header_array:
72
  cookie_array = convert_cookies_to_dict(header_array['cookie'])
73
  del header_array['cookie']
74
  for key, value in cookie_array.items():
75
  driver.add_cookie({"name": key, "value": value, "domain": f'.{target_domain}', "path": "/", "secure": False})
76
-
 
77
  driver.header_overrides = header_array
78
-
 
79
  driver.get(target_url)
80
 
 
81
  print(driver.page_source)
82
 
 
83
  if wait_time > 0:
84
  time.sleep(wait_time)
85
 
86
- # 获取当前URL
87
  current_url = driver.current_url
88
 
89
- # 获取页面源代码
90
  page_source = driver.page_source
91
 
92
- # 获取cookie
93
  cookies = driver.get_cookies()
94
 
95
- # 是否有跳转过
96
  is_jump = (target_url != current_url)
97
 
98
  data = {
 
8
 
9
  app = FastAPI()
10
 
11
+ # 解析cookie字符串为字典
12
  def convert_cookies_to_dict(cookies):
13
+ cookie_items = cookies.split("; ")
14
+ parsed_cookies = {item.split("=", 1)[0].strip(): item.split("=", 1)[1].strip() if "=" in item else "" for item in cookie_items}
15
+ return parsed_cookies#
16
 
17
+ # 获取域名字符串的根域
18
  def get_root_domain(url):
 
19
  parsed_url = urlparse(url)
 
20
  domain = parsed_url.netloc
21
 
 
 
22
  parts = domain.split('.')
23
  if len(parts) > 1:
 
24
  return '.'.join(parts[-2:])
25
  else:
 
26
  return domain
27
 
28
  @app.get("/")
 
57
  # 如果输入了cookie
58
  if type(cookie) == str:
59
  header_array.update({"cookie":unquote(cookie)})
60
+
61
+ # 初始化浏览器
62
  options = Options()
63
+
64
+ # 设置为无头模式
65
  options.add_argument('--headless')
66
+
67
+ # 实例化
68
  driver = webdriver.Chrome(options=options)
69
 
70
+ # 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
71
  driver.get(target_url)
72
 
73
+ # 对浏览器追加指定域名的cookie
74
  if 'cookie' in header_array:
75
  cookie_array = convert_cookies_to_dict(header_array['cookie'])
76
  del header_array['cookie']
77
  for key, value in cookie_array.items():
78
  driver.add_cookie({"name": key, "value": value, "domain": f'.{target_domain}', "path": "/", "secure": False})
79
+
80
+ # 覆写下次访问的请求头(没有修改的则保持原样)
81
  driver.header_overrides = header_array
82
+
83
+ # 再次访问网址
84
  driver.get(target_url)
85
 
86
+ # 输出此时访问的网页源码
87
  print(driver.page_source)
88
 
89
+ # 等待多少秒,来预估网页完全的加载完成(执行完内部的所有js,因为部分js可能涉及到请求后的动态处理,或者延时跳转)
90
  if wait_time > 0:
91
  time.sleep(wait_time)
92
 
93
+ # 获取完全加载完成时,页面的URL
94
  current_url = driver.current_url
95
 
96
+ # 获取完全加载完成时,页面的源代码
97
  page_source = driver.page_source
98
 
99
+ # 获取完全加载完成时,页面的cookie
100
  cookies = driver.get_cookies()
101
 
102
+ # 完全加载完成时,页面是否有发生过 301 302 跳转过
103
  is_jump = (target_url != current_url)
104
 
105
  data = {