Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
from seleniumwire import webdriver
|
2 |
from selenium.webdriver.chrome.options import Options
|
3 |
-
from selenium.common.exceptions import WebDriverException
|
4 |
from fastapi import FastAPI, Request
|
5 |
import uvicorn
|
6 |
import time
|
@@ -25,29 +24,14 @@ def get_root_domain(url):
|
|
25 |
return '.'.join(parts[-2:])
|
26 |
else:
|
27 |
return domain
|
28 |
-
|
29 |
-
# 网络抓包内容过滤
|
30 |
-
def filter_type(_type: str):
|
31 |
-
types = [
|
32 |
-
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
33 |
-
'image/jpeg', 'image/x-icon', 'application/octet-stream'
|
34 |
-
]
|
35 |
-
if _type not in types:
|
36 |
-
return True
|
37 |
-
return False
|
38 |
-
|
39 |
@app.get("/")
|
40 |
def main():
|
41 |
return {"code": 200,"msg":"Success"}
|
42 |
|
43 |
@app.get("/chrome")
|
44 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
45 |
-
|
46 |
-
caps = {
|
47 |
-
"browserName": "chrome",
|
48 |
-
'goog:loggingPrefs': {'performance': 'ALL'}
|
49 |
-
}
|
50 |
-
|
51 |
# 必须有目标url
|
52 |
if type(url) == str:
|
53 |
target_url = unquote(url)
|
@@ -80,20 +64,17 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
80 |
# 设置为无头模式
|
81 |
options.add_argument('--headless')
|
82 |
|
83 |
-
# 开启日志性能监听
|
84 |
-
for key, value in caps.items():
|
85 |
-
options.set_capability(key, value)
|
86 |
-
|
87 |
# 实例化
|
88 |
driver = webdriver.Chrome(options=options)
|
89 |
|
90 |
# 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
|
91 |
driver.get(target_url)
|
92 |
|
93 |
-
# 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage
|
94 |
driver.delete_all_cookies()
|
95 |
driver.execute_script("window.sessionStorage.clear();")
|
96 |
driver.execute_script("window.localStorage.clear();")
|
|
|
97 |
|
98 |
# 对浏览器追加我们传递进来的cookie
|
99 |
if 'cookie' in header_array:
|
@@ -126,34 +107,19 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
126 |
|
127 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
128 |
is_jump = (target_url != current_url)
|
129 |
-
|
130 |
-
network = []
|
131 |
-
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
132 |
-
for packet in performance_log:
|
133 |
-
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
134 |
-
if message.get('method') != 'Network.responseReceived': # 如果method 不是 responseReceived 类型就不往下执行
|
135 |
-
continue
|
136 |
-
packet_type = message.get('params').get('response').get('mimeType') # 获取该请求返回的type
|
137 |
-
if not filter_type(_type=packet_type): # 过滤type
|
138 |
-
continue
|
139 |
-
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
140 |
-
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
141 |
-
headers = message.get('params').get('response').get('headers') # 获取 该请求 url
|
142 |
-
status = message.get('params').get('response').get('status') # 获取 该请求 url
|
143 |
-
try:
|
144 |
-
network.append({"status":status ,"url":url, "type":packet_type, "headers":headers})
|
145 |
-
# resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})
|
146 |
-
# print(f'response: {resp}')
|
147 |
-
except WebDriverException: # 忽略异常
|
148 |
-
pass
|
149 |
-
print(driver.requests)
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
data = {
|
152 |
"url": current_url,
|
153 |
"page_source": page_source,
|
154 |
"cookies": cookies,
|
155 |
"is_jump": is_jump,
|
156 |
-
"network": network
|
157 |
}
|
158 |
|
159 |
driver.quit()
|
|
|
1 |
from seleniumwire import webdriver
|
2 |
from selenium.webdriver.chrome.options import Options
|
|
|
3 |
from fastapi import FastAPI, Request
|
4 |
import uvicorn
|
5 |
import time
|
|
|
24 |
return '.'.join(parts[-2:])
|
25 |
else:
|
26 |
return domain
|
27 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
@app.get("/")
|
29 |
def main():
|
30 |
return {"code": 200,"msg":"Success"}
|
31 |
|
32 |
@app.get("/chrome")
|
33 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
34 |
+
|
|
|
|
|
|
|
|
|
|
|
35 |
# 必须有目标url
|
36 |
if type(url) == str:
|
37 |
target_url = unquote(url)
|
|
|
64 |
# 设置为无头模式
|
65 |
options.add_argument('--headless')
|
66 |
|
|
|
|
|
|
|
|
|
67 |
# 实例化
|
68 |
driver = webdriver.Chrome(options=options)
|
69 |
|
70 |
# 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
|
71 |
driver.get(target_url)
|
72 |
|
73 |
+
# 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage,并删除因此次访问所产生的 har
|
74 |
driver.delete_all_cookies()
|
75 |
driver.execute_script("window.sessionStorage.clear();")
|
76 |
driver.execute_script("window.localStorage.clear();")
|
77 |
+
del driver.requests
|
78 |
|
79 |
# 对浏览器追加我们传递进来的cookie
|
80 |
if 'cookie' in header_array:
|
|
|
107 |
|
108 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
109 |
is_jump = (target_url != current_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
+
network = []
|
112 |
+
# 遍历输出过程中的har
|
113 |
+
for request in driver.requests:
|
114 |
+
if request.response:
|
115 |
+
network.append({"method":request.response.method, "status":request.response.status_code ,"url":request.url, "headers":request.response.headers})
|
116 |
+
|
117 |
data = {
|
118 |
"url": current_url,
|
119 |
"page_source": page_source,
|
120 |
"cookies": cookies,
|
121 |
"is_jump": is_jump,
|
122 |
+
"network": network
|
123 |
}
|
124 |
|
125 |
driver.quit()
|