Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -24,14 +24,28 @@ def get_root_domain(url):
|
|
24 |
return '.'.join(parts[-2:])
|
25 |
else:
|
26 |
return domain
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
@app.get("/")
|
29 |
def main():
|
30 |
return {"code": 200,"msg":"Success"}
|
31 |
|
32 |
@app.get("/chrome")
|
33 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
# 必须有目标url
|
36 |
if type(url) == str:
|
37 |
target_url = unquote(url)
|
@@ -63,6 +77,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
63 |
|
64 |
# 设置为无头模式
|
65 |
options.add_argument('--headless')
|
|
|
|
|
66 |
|
67 |
# 实例化
|
68 |
driver = webdriver.Chrome(options=options)
|
@@ -107,6 +123,24 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
107 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
108 |
is_jump = (target_url != current_url)
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
data = {
|
111 |
"url": current_url,
|
112 |
"page_source": page_source,
|
|
|
24 |
return '.'.join(parts[-2:])
|
25 |
else:
|
26 |
return domain
|
27 |
+
|
28 |
+
def filter_type(_type: str):
|
29 |
+
types = [
|
30 |
+
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
31 |
+
'image/jpeg', 'image/x-icon', 'application/octet-stream'
|
32 |
+
]
|
33 |
+
if _type not in types:
|
34 |
+
return True
|
35 |
+
return False
|
36 |
+
|
37 |
@app.get("/")
|
38 |
def main():
|
39 |
return {"code": 200,"msg":"Success"}
|
40 |
|
41 |
@app.get("/chrome")
|
42 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
43 |
+
|
44 |
+
caps = {
|
45 |
+
"browserName": "chrome",
|
46 |
+
'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
|
47 |
+
}
|
48 |
+
|
49 |
# 必须有目标url
|
50 |
if type(url) == str:
|
51 |
target_url = unquote(url)
|
|
|
77 |
|
78 |
# 设置为无头模式
|
79 |
options.add_argument('--headless')
|
80 |
+
for key, value in caps.items():
|
81 |
+
options.set_capability(key, value)
|
82 |
|
83 |
# 实例化
|
84 |
driver = webdriver.Chrome(options=options)
|
|
|
123 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
124 |
is_jump = (target_url != current_url)
|
125 |
|
126 |
+
performance_log = browser.get_log('performance') # 获取名称为 performance 的日志
|
127 |
+
for packet in performance_log:
|
128 |
+
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
129 |
+
if message.get('method') != 'Network.responseReceived': # 如果method 不是 responseReceived 类型就不往下执行
|
130 |
+
continue
|
131 |
+
packet_type = message.get('params').get('response').get('mimeType') # 获取该请求返回的type
|
132 |
+
if not filter_type(_type=packet_type): # 过滤type
|
133 |
+
continue
|
134 |
+
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
135 |
+
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
136 |
+
try:
|
137 |
+
resp = browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
138 |
+
print(f'type: {packet_type} url: {url}')
|
139 |
+
print(f'response: {resp}')
|
140 |
+
print()
|
141 |
+
except WebDriverException: # 忽略异常
|
142 |
+
pass
|
143 |
+
|
144 |
data = {
|
145 |
"url": current_url,
|
146 |
"page_source": page_source,
|