Spaces:

AZILS
/

Selenium-Script

Build error

App Files Files Community

Container commited on Jun 20, 2024

Commit

1497d6f

verified ·

1 Parent(s): 0d18c41

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -2

app.py CHANGED Viewed

@@ -24,14 +24,28 @@ def get_root_domain(url):
         return '.'.join(parts[-2:])
     else:
         return domain
 @app.get("/")
 def main():
     return {"code": 200,"msg":"Success"}
 @app.get("/chrome")
 def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
     # 必须有目标url
     if type(url) == str:
         target_url = unquote(url)
@@ -63,6 +77,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
     # 设置为无头模式
     options.add_argument('--headless')
     # 实例化
     driver = webdriver.Chrome(options=options)
@@ -107,6 +123,24 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
     # 完全加载完成时，页面是否有发生过 301 302 跳转过
     is_jump = (target_url != current_url)
     data = {
         "url": current_url,
         "page_source": page_source,

         return '.'.join(parts[-2:])
     else:
         return domain
+def filter_type(_type: str):
+    types = [
+        'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
+        'image/jpeg', 'image/x-icon', 'application/octet-stream'
+    ]
+    if _type not in types:
+        return True
+    return False
 @app.get("/")
 def main():
     return {"code": 200,"msg":"Success"}
 @app.get("/chrome")
 def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
+    caps = {
+        "browserName": "chrome",
+        'goog:loggingPrefs': {'performance': 'ALL'}  # 开启日志性能监听
+    }
     # 必须有目标url
     if type(url) == str:
         target_url = unquote(url)
     # 设置为无头模式
     options.add_argument('--headless')
+    for key, value in caps.items():
+        options.set_capability(key, value)
     # 实例化
     driver = webdriver.Chrome(options=options)
     # 完全加载完成时，页面是否有发生过 301 302 跳转过
     is_jump = (target_url != current_url)
+    performance_log = browser.get_log('performance')  # 获取名称为 performance 的日志
+    for packet in performance_log:
+        message = json.loads(packet.get('message')).get('message')  # 获取message的数据
+        if message.get('method') != 'Network.responseReceived':  # 如果method 不是 responseReceived 类型就不往下执行
+            continue
+        packet_type = message.get('params').get('response').get('mimeType')  # 获取该请求返回的type
+        if not filter_type(_type=packet_type):  # 过滤type
+            continue
+        requestId = message.get('params').get('requestId')  # 唯一的请求标识符。相当于该请求的身份证
+        url = message.get('params').get('response').get('url')  # 获取 该请求  url
+        try:
+            resp = browser.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})  # selenium调用 cdp
+            print(f'type: {packet_type} url: {url}')
+            print(f'response: {resp}')
+            print()
+        except WebDriverException:  # 忽略异常
+            pass
     data = {
         "url": current_url,
         "page_source": page_source,