Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -26,7 +26,6 @@ def get_root_domain(url):
|
|
26 |
else:
|
27 |
return domain
|
28 |
|
29 |
-
# 网络抓包内容过滤
|
30 |
def filter_type(_type: str):
|
31 |
types = [
|
32 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
@@ -42,10 +41,10 @@ def main():
|
|
42 |
|
43 |
@app.get("/chrome")
|
44 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
45 |
-
|
46 |
caps = {
|
47 |
"browserName": "chrome",
|
48 |
-
'goog:loggingPrefs': {'performance': 'ALL'}
|
49 |
}
|
50 |
|
51 |
# 必须有目标url
|
@@ -79,8 +78,6 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
79 |
|
80 |
# 设置为无头模式
|
81 |
options.add_argument('--headless')
|
82 |
-
|
83 |
-
# 开启日志性能监听
|
84 |
for key, value in caps.items():
|
85 |
options.set_capability(key, value)
|
86 |
|
@@ -126,9 +123,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
126 |
|
127 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
128 |
is_jump = (target_url != current_url)
|
129 |
-
|
130 |
-
network = []
|
131 |
-
# 从日志性能监听中抓取网络包(Network),并进行过滤
|
132 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
133 |
for packet in performance_log:
|
134 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
@@ -141,10 +136,9 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
141 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
142 |
try:
|
143 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
144 |
-
|
145 |
-
network.append({"url":url, "type":packet_type})
|
146 |
print(f'response: {resp}')
|
147 |
-
print(
|
148 |
except WebDriverException: # 忽略异常
|
149 |
pass
|
150 |
|
@@ -152,14 +146,11 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
152 |
"url": current_url,
|
153 |
"page_source": page_source,
|
154 |
"cookies": cookies,
|
155 |
-
"is_jump": is_jump
|
156 |
-
"network": network,
|
157 |
}
|
158 |
|
159 |
driver.quit()
|
160 |
return {"code": 200,"data":data}
|
161 |
|
162 |
if __name__ == '__main__':
|
163 |
-
uvicorn.run(app='app:app', host="0.0.0.0", port=7860)
|
164 |
-
|
165 |
-
|
|
|
26 |
else:
|
27 |
return domain
|
28 |
|
|
|
29 |
def filter_type(_type: str):
|
30 |
types = [
|
31 |
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
|
|
41 |
|
42 |
@app.get("/chrome")
|
43 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
44 |
+
|
45 |
caps = {
|
46 |
"browserName": "chrome",
|
47 |
+
'goog:loggingPrefs': {'performance': 'ALL'} # 开启日志性能监听
|
48 |
}
|
49 |
|
50 |
# 必须有目标url
|
|
|
78 |
|
79 |
# 设置为无头模式
|
80 |
options.add_argument('--headless')
|
|
|
|
|
81 |
for key, value in caps.items():
|
82 |
options.set_capability(key, value)
|
83 |
|
|
|
123 |
|
124 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
125 |
is_jump = (target_url != current_url)
|
126 |
+
|
|
|
|
|
127 |
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
128 |
for packet in performance_log:
|
129 |
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
|
|
136 |
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
137 |
try:
|
138 |
resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId}) # selenium调用 cdp
|
139 |
+
print(f'type: {packet_type} url: {url}')
|
|
|
140 |
print(f'response: {resp}')
|
141 |
+
print()
|
142 |
except WebDriverException: # 忽略异常
|
143 |
pass
|
144 |
|
|
|
146 |
"url": current_url,
|
147 |
"page_source": page_source,
|
148 |
"cookies": cookies,
|
149 |
+
"is_jump": is_jump
|
|
|
150 |
}
|
151 |
|
152 |
driver.quit()
|
153 |
return {"code": 200,"data":data}
|
154 |
|
155 |
if __name__ == '__main__':
|
156 |
+
uvicorn.run(app='app:app', host="0.0.0.0", port=7860)
|
|
|
|