bookshelf/src/plugins/ai_compat/book_identifier.py

"""Book identifier plugin — VLM result + archive candidates → ranked identification blocks.

Input: raw_text string (from text_recognizer), archive_results (deduplicated candidates),
       images (list of (b64, mime) pairs if is_vlm).
Output: list of IdentifyBlock dicts ranked by descending confidence score.
Result stored as books.ai_blocks JSON.
"""

import json
from typing import Any, TypeGuard

from models import AIConfig, CandidateRecord, IdentifyBlock

from ._client import AIClient


def _is_str_dict(v: object) -> TypeGuard[dict[str, Any]]:
    return isinstance(v, dict)


def _is_any_list(v: object) -> TypeGuard[list[Any]]:
    return isinstance(v, list)


class BookIdentifierPlugin:
    """Identifies a book by combining VLM spine text with archive search results."""

    category = "book_identifiers"
    OUTPUT_FORMAT = (
        '[{"title": "The Master and Margarita", "author": "Mikhail Bulgakov", '
        '"year": "1967", "isbn": "", "publisher": "YMCA Press", '
        '"score": 0.95, "sources": ["rusneb", "openlibrary"]}, '
        '{"title": "Master i Margarita", "author": "M. Bulgakov", '
        '"year": "2005", "isbn": "978-5-17-123456-7", "publisher": "AST", '
        '"score": 0.72, "sources": ["web"]}]'
    )

    def __init__(
        self,
        plugin_id: str,
        name: str,
        ai_config: AIConfig,
        prompt_text: str,
        auto_queue: bool,
        rate_limit_seconds: float,
    ):
        self.plugin_id = plugin_id
        self.name = name
        self.auto_queue = auto_queue
        self.rate_limit_seconds = rate_limit_seconds
        self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
        self._prompt_text = prompt_text

    def identify(
        self,
        raw_text: str,
        archive_results: list[CandidateRecord],
        images: list[tuple[str, str]],
    ) -> list[IdentifyBlock]:
        """Call the AI model to produce ranked identification blocks.

        Args:
            raw_text: Verbatim text read from the book spine.
            archive_results: Deduplicated candidates from archive searchers.
            images: (base64, mime_type) pairs; non-empty only when is_vlm is True.

        Returns:
            List of IdentifyBlock dicts ranked by descending score.
        """
        archive_json = json.dumps(archive_results, ensure_ascii=False)
        raw = self._client.call(
            self._prompt_text,
            images,
            text_vars={"RAW_TEXT": raw_text, "ARCHIVE_RESULTS": archive_json},
            output_is_list=True,
        )
        blocks: list[IdentifyBlock] = []
        for item in raw:
            if not _is_str_dict(item):
                continue
            sources: list[str] = []
            sources_val = item.get("sources")
            if _is_any_list(sources_val):
                for sv in sources_val:
                    if isinstance(sv, str):
                        sources.append(sv)
            block = IdentifyBlock(
                title=str(item.get("title") or "").strip(),
                author=str(item.get("author") or "").strip(),
                year=str(item.get("year") or "").strip(),
                isbn=str(item.get("isbn") or "").strip(),
                publisher=str(item.get("publisher") or "").strip(),
                score=float(item.get("score") or 0.0),
                sources=sources,
            )
            blocks.append(block)
        return sorted(blocks, key=lambda b: b.get("score", 0.0), reverse=True)

    @property
    def model(self) -> str:
        """AI model name used for identification."""
        return self._client.cfg["model"]

    @property
    def max_image_px(self) -> int:
        """Maximum pixel dimension for images passed to the AI model."""
        return self._client.cfg["max_image_px"]

    @property
    def confidence_threshold(self) -> float:
        """Minimum score threshold for the top block to set ai_* fields."""
        return self._client.cfg["confidence_threshold"]

    @property
    def is_vlm(self) -> bool:
        """True if images should be included in the request."""
        return self._client.cfg["is_vlm"]