AppleJupyter-test

Runtime error

File size: 6,732 Bytes

4f8ad24

import logging
import os
from typing import Iterator, Optional, Union, Tuple, Literal

from hbutils.system import urlsplit
from pixivpy3 import AppPixivAPI

from .web import WebDataSource
from ..utils import get_requests_session

_FILTER = Literal["for_ios", ""]
_TYPE = Literal["illust", "manga", ""]
_RESTRICT = Literal["public", "private", ""]
_CONTENT_TYPE = Literal["illust", "manga", ""]
_MODE = Literal[
    "day",
    "week",
    "month",
    "day_male",
    "day_female",
    "week_original",
    "week_rookie",
    "day_manga",
    "day_r18",
    "day_male_r18",
    "day_female_r18",
    "week_r18",
    "week_r18g",
    "",
]
_SEARCH_TARGET = Literal[
    "partial_match_for_tags", "exact_match_for_tags", "title_and_caption", "keyword", ""
]
_SORT = Literal["date_desc", "date_asc", "popular_desc", ""]
_DURATION = Literal[
    "within_last_day", "within_last_week", "within_last_month", "", None
]
_BOOL = Literal["true", "false"]
_SELECT = Literal['square_medium', 'medium', 'large', 'original']


class BasePixivSource(WebDataSource):
    def __init__(self, group_name: str = 'pixiv', select: _SELECT = 'large',
                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
        self.select = select
        self.no_ai = no_ai
        self.refresh_token = refresh_token
        self.client = AppPixivAPI()
        self.client.requests = get_requests_session(session=self.client.requests)
        self.client.requests.headers.update({"Referer": "https://app-api.pixiv.net/"})
        WebDataSource.__init__(self, group_name, self.client.requests, download_silent)

    def _iter_illustration(self) -> Iterator[dict]:
        raise NotImplementedError  # pragma: no cover

    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
        if self.refresh_token:
            self.client.auth(refresh_token=self.refresh_token)

        for illust in self._iter_illustration():
            if illust['type'] != 'illust':
                continue
            if self.no_ai and illust['illust_ai_type'] == 2:
                continue

            if illust['page_count'] == 1:
                if self.select != 'original':
                    urls = [illust['image_urls'][self.select]]
                else:
                    urls = [illust['meta_single_page']['original_image_url']]

            else:
                urls = [page['image_urls'][self.select] for page in illust['meta_pages']]

            for i, url in enumerate(urls):
                _, ext_name = os.path.splitext(urlsplit(url).filename)
                filename = f'{self.group_name}_{illust["id"]}_{i}{ext_name}'
                meta = {
                    'pixiv': illust,
                    'group_id': f'{self.group_name}_{illust["id"]}',
                    'instance_id': f'{self.group_name}_{illust["id"]}_{i}',
                    'filename': filename,
                }
                yield f'{illust["id"]}_{i}', url, meta


class PixivSearchSource(BasePixivSource):
    def __init__(self, word: str, search_target: _SEARCH_TARGET = "partial_match_for_tags",
                 sort: _SORT = "date_desc", duration: _DURATION = None, start_date: Optional[str] = None,
                 end_date: Optional[str] = None, filter: _FILTER = "for_ios", req_auth: bool = True,
                 group_name: str = 'pixiv', select: _SELECT = 'large',
                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
        BasePixivSource.__init__(self, group_name, select, no_ai, refresh_token, download_silent)
        self.word = word
        self.search_target = search_target
        self.sort = sort
        self.duration = duration
        self.start_date = start_date
        self.end_date = end_date
        self.filter = filter
        self.req_auth = req_auth

    def _iter_illustration(self) -> Iterator[dict]:
        offset = 0
        while True:
            data = self.client.search_illust(self.word, self.search_target, self.sort, self.duration,
                                             self.start_date, self.end_date, self.filter, offset, self.req_auth)
            if 'illusts' not in data:
                logging.warning(f'Illusts not found in page (offset: {offset!r}), skipped: {data!r}.')
                break
            illustrations = data['illusts']
            yield from illustrations

            offset += len(illustrations)
            if not illustrations:
                break


class PixivUserSource(BasePixivSource):
    def __init__(self, user_id: Union[int, str], type: _TYPE = "illust",
                 filter: _FILTER = "for_ios", req_auth: bool = True,
                 group_name: str = 'pixiv', select: _SELECT = 'large',
                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
        BasePixivSource.__init__(self, group_name, select, no_ai, refresh_token, download_silent)
        self.user_id = user_id
        self.type = type
        self.filter = filter
        self.req_auth = req_auth

    def _iter_illustration(self) -> Iterator[dict]:
        offset = 0
        while True:
            data = self.client.user_illusts(self.user_id, self.type, self.filter, offset, self.req_auth)
            if 'illusts' not in data:
                logging.warning(f'Illusts not found in page (offset: {offset!r}), skipped: {data!r}.')
                break
            illustrations = data['illusts']
            yield from illustrations

            offset += len(illustrations)
            if not illustrations:
                break


class PixivRankingSource(BasePixivSource):
    def __init__(self, mode: _MODE = "day", filter: _FILTER = "for_ios",
                 date: Optional[str] = None, req_auth: bool = True,
                 group_name: str = 'pixiv', select: _SELECT = 'large',
                 no_ai: bool = False, refresh_token: Optional[str] = None, download_silent: bool = True):
        BasePixivSource.__init__(self, group_name, select, no_ai, refresh_token, download_silent)
        self.mode = mode
        self.filter = filter
        self.date = date
        self.req_auth = req_auth

    def _iter_illustration(self) -> Iterator[dict]:
        offset = 0
        while True:
            data = self.client.illust_ranking(self.mode, self.filter, self.date, offset, self.req_auth)
            if 'illusts' not in data:
                logging.warning(f'Illusts not found in page (offset: {offset!r}), skipped: {data!r}.')
                break
            illustrations = data['illusts']
            yield from illustrations

            offset += len(illustrations)
            if not illustrations:
                break