Container commited on
Commit
ca9d760
·
verified ·
1 Parent(s): 25b053a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -16
app.py CHANGED
@@ -26,7 +26,6 @@ def get_root_domain(url):
26
  else:
27
  return domain
28
 
29
- # 网络抓包内容过滤
30
  def filter_type(_type: str):
31
  types = [
32
  'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
@@ -42,10 +41,10 @@ def main():
42
 
43
  @app.get("/chrome")
44
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
45
- # 设置日志性能监听参数
46
  caps = {
47
  "browserName": "chrome",
48
- 'goog:loggingPrefs': {'performance': 'ALL'}
49
  }
50
 
51
  # 必须有目标url
@@ -79,8 +78,6 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
79
 
80
  # 设置为无头模式
81
  options.add_argument('--headless')
82
-
83
- # 开启日志性能监听
84
  for key, value in caps.items():
85
  options.set_capability(key, value)
86
 
@@ -126,9 +123,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
126
 
127
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
128
  is_jump = (target_url != current_url)
129
-
130
- network = []
131
- # 从日志性能监听中抓取网络包(Network),并进行过滤
132
  performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
133
  for packet in performance_log:
134
  message = json.loads(packet.get('message')).get('message') # 获取message的数据
@@ -141,10 +136,9 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
141
  url = message.get('params').get('response').get('url') # 获取 该请求 url
142
  try:
143
  resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
144
- request_headers = driver.execute_cdp_cmd('Network.getRequestHeaders', {'requestId': requestId})
145
- network.append({"url":url, "type":packet_type})
146
  print(f'response: {resp}')
147
- print(f'request_headers: {request_headers}')
148
  except WebDriverException: # 忽略异常
149
  pass
150
 
@@ -152,14 +146,11 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
152
  "url": current_url,
153
  "page_source": page_source,
154
  "cookies": cookies,
155
- "is_jump": is_jump,
156
- "network": network,
157
  }
158
 
159
  driver.quit()
160
  return {"code": 200,"data":data}
161
 
162
  if __name__ == '__main__':
163
- uvicorn.run(app='app:app', host="0.0.0.0", port=7860)
164
-
165
-
 
26
  else:
27
  return domain
28
 
 
29
  def filter_type(_type: str):
30
  types = [
31
  'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
 
41
 
42
  @app.get("/chrome")
43
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
44
+
45
  caps = {
46
  "browserName": "chrome",
47
+ 'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
48
  }
49
 
50
  # 必须有目标url
 
78
 
79
  # 设置为无头模式
80
  options.add_argument('--headless')
 
 
81
  for key, value in caps.items():
82
  options.set_capability(key, value)
83
 
 
123
 
124
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
125
  is_jump = (target_url != current_url)
126
+
 
 
127
  performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
128
  for packet in performance_log:
129
  message = json.loads(packet.get('message')).get('message') # 获取message的数据
 
136
  url = message.get('params').get('response').get('url') # 获取 该请求 url
137
  try:
138
  resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
139
+ print(f'type: {packet_type} url: {url}')
 
140
  print(f'response: {resp}')
141
+ print()
142
  except WebDriverException: # 忽略异常
143
  pass
144
 
 
146
  "url": current_url,
147
  "page_source": page_source,
148
  "cookies": cookies,
149
+ "is_jump": is_jump
 
150
  }
151
 
152
  driver.quit()
153
  return {"code": 200,"data":data}
154
 
155
  if __name__ == '__main__':
156
+ uvicorn.run(app='app:app', host="0.0.0.0", port=7860)