AppleJupyter-test / waifuc /source /anime_pictures.py
LittleApple-fp16's picture
Upload 88 files
4f8ad24
raw
history blame
3.96 kB
import os
from enum import Enum
from typing import Iterator, Tuple, Union, List, Literal
import cloudscraper
from hbutils.system import urlsplit
from pyquery import PyQuery as pq
from .web import WebDataSource
from ..utils import get_requests_session, srequest
class OrderBy(str, Enum):
STAR_DATE = "stars_date"
DATE = "date"
DATE_REVERS = "date_r"
RATING = "rating"
DOWNLOADS = "views"
SIZE = "size"
TAG_COUNT = "tag_num"
class Period(str, Enum):
ANYTIME = "0"
PAST_DAY = "3"
PAST_WEEK = "1"
PAST_MONTH = "2"
PAST_6_MONTHS = "4"
PAST_YEAR = "5"
PAST_2_YEARS = "6"
PAST_3_YEARS = "7"
class AnimePicturesSource(WebDataSource):
__root__ = 'https://anime-pictures.net'
def __init__(self, tags: List[str], tag_mode: Literal['or', 'and'] = 'and',
denied_tags: List[str] = None, denied_tag_mode: Literal['or', 'and'] = 'or',
order_by: OrderBy = OrderBy.RATING, period: Period = Period.ANYTIME,
select: Literal['thumbnail', 'preview', 'original'] = 'original',
group_name: str = 'anime_pictures', download_silent: bool = True, **kwargs):
WebDataSource.__init__(
self, group_name,
get_requests_session(session=cloudscraper.create_scraper()),
download_silent,
)
self.tags, self.tag_mode = tags, tag_mode
self.denied_tags, self.denied_tag_mode = (denied_tags or []), denied_tag_mode
self.tag_mode = tag_mode
self.order_by = order_by
self.period = period
self.select = select
self.kwargs = kwargs
def _params(self, page):
params = {
'order_by': self.order_by.value,
'ldate': self.period.value,
'lang': 'en',
'page': str(page),
}
if self.tag_mode == 'and':
params['search_tag'] = '&&'.join(self.tags)
else:
params['search_tag'] = '||'.join(self.tags)
if self.denied_tags:
if self.denied_tag_mode == 'and':
params['denied_tags'] = '&&'.join(self.denied_tags)
else:
params['denied_tags'] = '||'.join(self.denied_tags)
return {**params, **self.kwargs}
def _get_url(self, post, resp):
id_, md5 = post['id'], post['md5']
if self.select == 'thumbnail':
return f'https://cdn.anime-pictures.net/previews/{md5[:3]}/{md5}_bp.jpg'
elif self.select == 'preview':
return f'https://cdn.anime-pictures.net/previews/{md5[:3]}/{md5}_cp.jpg'
elif self.select == 'original':
return pq(resp.text)('#rating a.download_icon').attr('href')
else:
raise ValueError(f'Invalid image selection - {self.select!r}.')
def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
page = 0
while True:
resp = srequest(self.session, 'GET', f'{self.__root__}/api/v3/posts', params=self._params(page))
resp.raise_for_status()
posts = resp.json()['posts']
if not posts:
break
for post in posts:
resp_page = srequest(self.session, 'GET', f'{self.__root__}/posts/{post["id"]}?lang=en')
resp_page.raise_for_status()
url = self._get_url(post, resp_page)
tags = [item.text().replace(' ', '_') for item in pq(resp_page.text)('ul.tags li > a').items()]
_, ext_name = os.path.splitext(urlsplit(url).filename)
filename = f'{self.group_name}_{post["id"]}{ext_name}'
meta = {
'anime_pictures': post,
'group_id': f'{self.group_name}_{post["id"]}',
'filename': filename,
'tags': {key: 1.0 for key in tags}
}
yield post['id'], url, meta
page += 1