73 lines
2.0 KiB
Python
73 lines
2.0 KiB
Python
import random
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
from pathlib import Path
|
|
from typing import Union, List
|
|
from xml.etree import ElementTree
|
|
|
|
import requests
|
|
from click import BadParameter
|
|
|
|
from traps.utils import filename_from_url
|
|
|
|
__all__ = ["get"]
|
|
API_URL = "https://safebooru.org/index.php"
|
|
MAX_OFFSET = 130 # Do not change.
|
|
|
|
|
|
def _fetch_urls(n: int = 1) -> List[str]:
|
|
if n > 5000:
|
|
raise BadParameter("you can't download more than 5000 files at a time")
|
|
if n < 1:
|
|
raise BadParameter("you can't download a negative number of files")
|
|
used_offsets = []
|
|
urls = []
|
|
|
|
def fetch(limit):
|
|
offset = random.randint(1, MAX_OFFSET)
|
|
while offset in used_offsets:
|
|
offset = random.randint(1, MAX_OFFSET)
|
|
else:
|
|
used_offsets.append(offset)
|
|
params = {
|
|
"page": "dapi",
|
|
"s": "post",
|
|
"q": "index",
|
|
"limit": 100,
|
|
"pid": offset,
|
|
"tags": "trap"
|
|
}
|
|
resp = requests.get(API_URL, params)
|
|
posts = ElementTree.fromstring(resp.text).iter("post")
|
|
return [
|
|
next(posts).attrib["file_url"]
|
|
for _ in range(limit)
|
|
]
|
|
|
|
if n > 100:
|
|
with ThreadPoolExecutor(max_workers=16) as p:
|
|
for i in p.map(lambda _: fetch(100), range(n // 100)):
|
|
urls += i
|
|
n %= 100
|
|
if n < 100:
|
|
urls += fetch(n)
|
|
return urls
|
|
|
|
|
|
def _download(directory: Path, url: str) -> None:
|
|
resp = requests.get(url, stream=True)
|
|
filename = filename_from_url(url)
|
|
with open(directory / filename, "wb") as f:
|
|
for part in resp.iter_content(1024):
|
|
if not part:
|
|
break
|
|
f.write(part)
|
|
|
|
|
|
def get(directory: Union[str, Path] = "traps", amount: int = 1) -> None:
|
|
if not isinstance(directory, Path):
|
|
directory = Path(directory)
|
|
directory.mkdir(exist_ok=True)
|
|
urls = _fetch_urls(amount)
|
|
with ThreadPoolExecutor(max_workers=16) as p:
|
|
p.map(lambda url: _download(directory, url), urls)
|