Selenium-Script / app.py
Container's picture
Update app.py
7f07e98 verified
raw
history blame
2.98 kB
from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Options
from fastapi import FastAPI, Request
import uvicorn
import time
import json
from urllib.parse import unquote, urlparse
app = FastAPI()
def convert_cookies_to_dict(cookies):
cookies = dict([l.split("=", 1) for l in cookies.split("; ")])
return cookies
def get_root_domain(url):
# 解析URL
parsed_url = urlparse(url)
# 获取域名部分
domain = parsed_url.netloc
# 分割域名部分以获取根域名
# 假设根域名是域名的最后两个部分
parts = domain.split('.')
if len(parts) > 1:
# 返回根域名部分
return '.'.join(parts[-2:])
else:
# 如果域名部分少于两个部分,返回整个域名
return domain
@app.get("/")
def main():
return {"code": 200,"msg":"Success"}
@app.get("/chrome")
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
# 必须有目标url
if type(url) == str:
target_url = unquote(url)
else:
return {"code": 500,"msg":"No target URL"}
# 等待时间必须在 0 到 30 之间
if wait in range(0, 31):
wait_time = wait
else:
return {"code": 500,"msg":"The waiting time must be between 0 and 30"}
header_array = {}
# header可以覆写,但必须传入json
try:
if type(header) == str:
header_array.update(json.loads(unquote(header)))
except Exception as e:
return {"code": 500,"msg":"The header field is not JSON"}
# 如果输入了cookie
if type(cookie) == str:
header_array.update({"cookie":unquote(cookie)})
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)
if 'cookie' in header_array:
cookie_array = convert_cookies_to_dict(header_array['cookie'])
del header_array['cookie']
cookie_domain = f'.{get_root_domain(target_url)}'
for key, value in cookie_array.items():
driver.execute_script(f'document.cookie = "{key}={value}; path=/; domain={cookie_domain}; expires=Thu, 01 Jan 2050 00:00:00 GMT;"');
print(driver.execute_script(f'return document.cookie'))
driver.header_overrides = header_array
driver.get(target_url)
print(driver.page_source)
if wait_time > 0:
time.sleep(wait_time)
# 获取当前URL
current_url = driver.current_url
# 获取页面源代码
page_source = driver.page_source
# 获取cookie
cookies = driver.get_cookies()
# 是否有跳转过
is_jump = (target_url != current_url)
data = {
"url": current_url,
"page_source": page_source,
"cookies": cookies,
"is_jump": is_jump
}
driver.quit()
return {"code": 200,"data":data}
if __name__ == '__main__':
uvicorn.run(app='app:app', host="0.0.0.0", port=7860)