Container commited on
Commit
1497d6f
·
verified ·
1 Parent(s): 0d18c41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -2
app.py CHANGED
@@ -24,14 +24,28 @@ def get_root_domain(url):
24
  return '.'.join(parts[-2:])
25
  else:
26
  return domain
27
-
 
 
 
 
 
 
 
 
 
28
  @app.get("/")
29
  def main():
30
  return {"code": 200,"msg":"Success"}
31
 
32
  @app.get("/chrome")
33
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
34
-
 
 
 
 
 
35
  # 必须有目标url
36
  if type(url) == str:
37
  target_url = unquote(url)
@@ -63,6 +77,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
63
 
64
  # 设置为无头模式
65
  options.add_argument('--headless')
 
 
66
 
67
  # 实例化
68
  driver = webdriver.Chrome(options=options)
@@ -107,6 +123,24 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
107
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
108
  is_jump = (target_url != current_url)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  data = {
111
  "url": current_url,
112
  "page_source": page_source,
 
24
  return '.'.join(parts[-2:])
25
  else:
26
  return domain
27
+
28
+ def filter_type(_type: str):
29
+ types = [
30
+ 'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
31
+ 'image/jpeg', 'image/x-icon', 'application/octet-stream'
32
+ ]
33
+ if _type not in types:
34
+ return True
35
+ return False
36
+
37
  @app.get("/")
38
  def main():
39
  return {"code": 200,"msg":"Success"}
40
 
41
  @app.get("/chrome")
42
  def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
43
+
44
+ caps = {
45
+ "browserName": "chrome",
46
+ 'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
47
+ }
48
+
49
  # 必须有目标url
50
  if type(url) == str:
51
  target_url = unquote(url)
 
77
 
78
  # 设置为无头模式
79
  options.add_argument('--headless')
80
+ for key, value in caps.items():
81
+ options.set_capability(key, value)
82
 
83
  # 实例化
84
  driver = webdriver.Chrome(options=options)
 
123
  # 完全加载完成时,页面是否有发生过 301 302 跳转过
124
  is_jump = (target_url != current_url)
125
 
126
+ performance_log = browser.get_log('performance') # 获取名称为 performance 的日志
127
+ for packet in performance_log:
128
+ message = json.loads(packet.get('message')).get('message') # 获取message的数据
129
+ if message.get('method') != 'Network.responseReceived': # 如果method 不是 responseReceived 类型就不往下执行
130
+ continue
131
+ packet_type = message.get('params').get('response').get('mimeType') # 获取该请求返回的type
132
+ if not filter_type(_type=packet_type): # 过滤type
133
+ continue
134
+ requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
135
+ url = message.get('params').get('response').get('url') # 获取 该请求 url
136
+ try:
137
+ resp = browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
138
+ print(f'type: {packet_type} url: {url}')
139
+ print(f'response: {resp}')
140
+ print()
141
+ except WebDriverException: # 忽略异常
142
+ pass
143
+
144
  data = {
145
  "url": current_url,
146
  "page_source": page_source,