Spaces:
Runtime error
Runtime error
File size: 9,033 Bytes
4f8ad24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import os
import re
from typing import Iterator, Tuple, Union, List, Optional
from hbutils.system import urlsplit
from .web import WebDataSource, NoURL
from ..utils import get_requests_session, srequest
class KonachanLikeSource(WebDataSource):
def __init__(self, site_name: str, site_url: str,
tags: List[str], start_page: int = 1, min_size: Optional[int] = 800,
group_name: Optional[str] = None, download_silent: bool = True):
WebDataSource.__init__(self, group_name or site_name, get_requests_session(), download_silent)
self.site_name = site_name
self.site_url = site_url
self.start_page = start_page
self.min_size = min_size
self.tags: List[str] = tags
def _select_url(self, data):
if self.min_size is not None:
url_names = [key for key in data.keys() if key.endswith('_url')]
name_pairs = [
*(
(name, f'{name[:-4]}_width', f'{name[:-4]}_height')
for name in url_names
),
('file_url', 'width', 'height'),
]
f_url, f_width, f_height = None, None, None
for url_name, width_name, height_name in name_pairs:
if url_name in data and width_name in data and height_name in data:
url, width, height = data[url_name], data[width_name], data[height_name]
if width >= self.min_size and height >= self.min_size:
if f_url is None or width < f_width:
f_url, f_width, f_height = url, width, height
if f_url is not None:
return f_url
if 'file_url' in data:
return data['file_url']
else:
raise NoURL
def _request(self, page):
return srequest(self.session, 'GET', f'{self.site_url}/post.json', params={
'tags': ' '.join(self.tags),
'limit': '100',
'page': str(page),
})
def _get_data_from_raw(self, raw):
return raw
def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
page = self.start_page
while True:
resp = self._request(page)
resp.raise_for_status()
# response may be simply empty in rule34.xxx and xbooru.com
if not resp.text.strip():
break
page_list = self._get_data_from_raw(resp.json())
if not page_list:
break
for data in page_list:
try:
url = self._select_url(data)
except NoURL:
continue
_, ext_name = os.path.splitext(urlsplit(url).filename)
filename = f'{self.group_name}_{data["id"]}{ext_name}'
meta = {
self.site_name: data,
'group_id': f'{self.group_name}_{data["id"]}',
'filename': filename,
'tags': {key: 1.0 for key in re.split(r'\s+', data['tags'])}
}
yield data["id"], url, meta
page += 1
class YandeSource(KonachanLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'yande', download_silent: bool = True):
KonachanLikeSource.__init__(self, 'yande', 'https://yande.re',
tags, 1, min_size, group_name, download_silent)
class KonachanSource(KonachanLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'konachan', download_silent: bool = True):
KonachanLikeSource.__init__(self, 'konachan', 'https://konachan.com',
tags, 1, min_size, group_name, download_silent)
class KonachanNetSource(KonachanLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'konachan_net', download_silent: bool = True):
KonachanLikeSource.__init__(self, 'konachan_net', 'https://konachan.net',
tags, 1, min_size, group_name, download_silent)
class LolibooruSource(KonachanLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'lolibooru', download_silent: bool = True):
KonachanLikeSource.__init__(self, 'lolibooru', 'https://lolibooru.moe',
tags, 1, min_size, group_name, download_silent)
def _request(self, page):
return srequest(self.session, 'GET', f'{self.site_url}/post/index.json', params={
'tags': ' '.join(self.tags),
'limit': '100',
'page': str(page),
})
class Rule34LikeSource(KonachanLikeSource):
def __init__(self, site_name: str, site_url: str,
tags: List[str], min_size: Optional[int] = 800,
group_name: Optional[str] = None, download_silent: bool = True):
KonachanLikeSource.__init__(self, site_name, site_url, tags, 0, min_size, group_name, download_silent)
def _request(self, page):
return srequest(self.session, 'GET', f'{self.site_url}/index.php', params={
'page': 'dapi',
's': 'post',
'q': 'index',
'tags': ' '.join(self.tags),
'json': '1',
'limit': '100',
'pid': str(page),
})
class Rule34Source(Rule34LikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'rule34', download_silent: bool = True):
Rule34LikeSource.__init__(self, 'rule34', 'https://rule34.xxx',
tags, min_size, group_name, download_silent)
class HypnoHubSource(Rule34LikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'hypnohub', download_silent: bool = True):
Rule34LikeSource.__init__(self, 'hypnohub', 'https://hypnohub.net',
tags, min_size, group_name, download_silent)
class GelbooruSource(Rule34LikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'gelbooru', download_silent: bool = True):
Rule34LikeSource.__init__(self, 'gelbooru', 'https://gelbooru.com',
tags, min_size, group_name, download_silent)
def _get_data_from_raw(self, raw):
return raw['post'] if 'post' in raw else None
class XbooruLikeSource(Rule34LikeSource):
def __init__(self, site_name: str, site_url: str, img_site_url: str,
tags: List[str], min_size: Optional[int] = 800,
group_name: Optional[str] = None, download_silent: bool = True):
Rule34LikeSource.__init__(self, site_name, site_url, tags, min_size, group_name, download_silent)
self.img_site_url = img_site_url
def _select_url(self, data):
name, _ = os.path.splitext(data['image'])
urls = [(f'{self.img_site_url}/images/{data["directory"]}/{data["image"]}', data['width'], data['height'])]
if data['sample']:
urls.append((
f'{self.img_site_url}/samples/{data["directory"]}/sample_{name}.jpg?{data["id"]}',
data['sample_width'], data['sample_height'],
))
if self.min_size is not None:
f_url, f_width, f_height = None, None, None
for url, width, height in urls:
if width >= self.min_size and height >= self.min_size:
if f_url is None or width < f_width:
f_url, f_width, f_height = url, width, height
if f_url is not None:
return f_url
return urls[0][0]
class XbooruSource(XbooruLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'xbooru', download_silent: bool = True):
XbooruLikeSource.__init__(
self, 'xbooru', 'https://xbooru.com', 'https://img.xbooru.com',
tags, min_size, group_name, download_silent,
)
class SafebooruOrgSource(XbooruLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'safebooru_org', download_silent: bool = True):
XbooruLikeSource.__init__(
self, 'safebooru_org', 'https://safebooru.org', 'https://safebooru.org',
tags, min_size, group_name, download_silent,
)
class TBIBSource(XbooruLikeSource):
def __init__(self, tags: List[str], min_size: Optional[int] = 800,
group_name: str = 'tbib', download_silent: bool = True):
XbooruLikeSource.__init__(
self, 'tbib', 'https://tbib.org', 'https://tbib.org',
tags, min_size, group_name, download_silent,
)
|