Initial commit
Photo-based book cataloger with AI identification. Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend; vanilla JS SPA; OpenAI-compatible plugin system for boundary detection, text recognition, and archive search.
This commit is contained in:
59
src/plugins/archives/rsl.py
Normal file
59
src/plugins/archives/rsl.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""RSL (Russian State Library) AJAX JSON search API plugin (search.rsl.ru)."""
|
||||
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from models import CandidateRecord
|
||||
|
||||
from ..rate_limiter import RateLimiter
|
||||
|
||||
_DOMAIN = "search.rsl.ru"
|
||||
|
||||
|
||||
class RSLPlugin:
|
||||
category = "archive_searchers"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
plugin_id: str,
|
||||
name: str,
|
||||
rate_limiter: RateLimiter,
|
||||
rate_limit_seconds: float,
|
||||
auto_queue: bool,
|
||||
timeout: int,
|
||||
config: dict[str, Any],
|
||||
):
|
||||
self.plugin_id = plugin_id
|
||||
self.name = name
|
||||
self._rl = rate_limiter
|
||||
self.rate_limit_seconds = rate_limit_seconds
|
||||
self.auto_queue = auto_queue
|
||||
self.timeout = timeout
|
||||
|
||||
def search(self, query: str) -> list[CandidateRecord]:
|
||||
self._rl.wait_and_record(_DOMAIN, self.rate_limit_seconds)
|
||||
r = httpx.get(
|
||||
"https://search.rsl.ru/site/ajax-search",
|
||||
params={"language": "ru", "q": query, "page": 1, "perPage": 5},
|
||||
timeout=self.timeout,
|
||||
headers={"Accept": "application/json"},
|
||||
)
|
||||
data: dict[str, Any] = r.json()
|
||||
records: list[dict[str, Any]] = data.get("records") or data.get("items") or data.get("data") or []
|
||||
out: list[CandidateRecord] = []
|
||||
for rec in records[:3]:
|
||||
title = (str(rec.get("title") or rec.get("name") or "")).strip()
|
||||
if not title:
|
||||
continue
|
||||
out.append(
|
||||
CandidateRecord(
|
||||
source=self.plugin_id,
|
||||
title=title,
|
||||
author=(str(rec.get("author") or rec.get("authors") or "")).strip(),
|
||||
year=str(rec.get("year") or rec.get("pubyear") or "").strip(),
|
||||
isbn=(str(rec.get("isbn") or "")).strip(),
|
||||
publisher=(str(rec.get("publisher") or "")).strip(),
|
||||
)
|
||||
)
|
||||
return out
|
||||
Reference in New Issue
Block a user