Spaces:
Build error
Build error
from seleniumwire import webdriver | |
from selenium.webdriver.chrome.options import Options | |
from fastapi import FastAPI, Request | |
import uvicorn | |
import time | |
import json | |
from urllib.parse import unquote, urlparse | |
app = FastAPI() | |
def convert_cookies_to_dict(cookies): | |
cookies = dict([l.split("=", 1) for l in cookies.split("; ")]) | |
return cookies | |
def get_root_domain(url): | |
# 解析URL | |
parsed_url = urlparse(url) | |
# 获取域名部分 | |
domain = parsed_url.netloc | |
# 分割域名部分以获取根域名 | |
# 假设根域名是域名的最后两个部分 | |
parts = domain.split('.') | |
if len(parts) > 1: | |
# 返回根域名部分 | |
return '.'.join(parts[-2:]) | |
else: | |
# 如果域名部分少于两个部分,返回整个域名 | |
return domain | |
def main(): | |
return {"code": 200,"msg":"Success"} | |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None): | |
# 必须有目标url | |
if type(url) == str: | |
target_url = unquote(url) | |
else: | |
return {"code": 500,"msg":"No target URL"} | |
# 等待时间必须在 0 到 30 之间 | |
if wait in range(0, 31): | |
wait_time = wait | |
else: | |
return {"code": 500,"msg":"The waiting time must be between 0 and 30"} | |
header_array = {} | |
# header可以覆写,但必须传入json | |
try: | |
if type(header) == str: | |
header_array.update(json.loads(unquote(header))) | |
except Exception as e: | |
return {"code": 500,"msg":"The header field is not JSON"} | |
# 如果输入了cookie | |
if type(cookie) == str: | |
header_array.update({"cookie":unquote(cookie)}) | |
options = Options() | |
options.add_argument('--headless') | |
driver = webdriver.Chrome(options=options) | |
if 'cookie' in header_array: | |
cookie_array = convert_cookies_to_dict(header_array['cookie']) | |
del header_array['cookie'] | |
cookie_domain = f'.{get_root_domain(target_url)}' | |
for key, value in cookie_array.items(): | |
driver.execute_script(f'document.cookie = "{key}={value}; path=/; domain={cookie_domain}; expires=Thu, 01 Jan 2050 00:00:00 GMT;"'); | |
print(driver.execute_script(f'return document.cookie')) | |
driver.header_overrides = header_array | |
driver.get(target_url) | |
print(driver.page_source) | |
if wait_time > 0: | |
time.sleep(wait_time) | |
# 获取当前URL | |
current_url = driver.current_url | |
# 获取页面源代码 | |
page_source = driver.page_source | |
# 获取cookie | |
cookies = driver.get_cookies() | |
# 是否有跳转过 | |
is_jump = (target_url != current_url) | |
data = { | |
"url": current_url, | |
"page_source": page_source, | |
"cookies": cookies, | |
"is_jump": is_jump | |
} | |
driver.quit() | |
return {"code": 200,"data":data} | |
if __name__ == '__main__': | |
uvicorn.run(app='app:app', host="0.0.0.0", port=7860) | |