bookshelf/src/logic/identification.py

"""Book identification logic: status computation, AI result application, plugin runners."""

import json

import db
from db import now
from errors import BookNotFoundError, NoRawTextError
from logic.boundaries import book_spine_source
from logic.images import prep_img_b64
from models import (
    AIIdentifyResult,
    BookIdentifierPlugin,
    BookRow,
    CandidateRecord,
    TextRecognizeResult,
    TextRecognizerPlugin,
)

AI_FIELDS = ("title", "author", "year", "isbn", "publisher")
_APPROVED_REQUIRED = ("title", "author", "year")


def compute_status(book: BookRow) -> str:
    """Return the identification_status string derived from current book field values.

    Args:
        book: The book row to evaluate.

    Returns:
        One of 'unidentified', 'ai_identified', or 'user_approved'.
    """
    if not (book.ai_title or "").strip():
        return "unidentified"
    filled = all((getattr(book, f) or "").strip() for f in _APPROVED_REQUIRED)
    no_diff = all(
        not (getattr(book, f"ai_{f}") or "").strip()
        or (getattr(book, f) or "").strip() == (getattr(book, f"ai_{f}") or "").strip()
        for f in AI_FIELDS
    )
    return "user_approved" if (filled and no_diff) else "ai_identified"


def build_query(book: BookRow) -> str:
    """Build a search query string from the best available candidate fields.

    Prefers the first candidate with a non-empty author+title pair; falls back to
    AI fields, then raw OCR text.

    Args:
        book: The book row to build a query for.

    Returns:
        Query string, empty if no usable data is available.
    """
    candidates: list[dict[str, object]] = json.loads(book.candidates or "[]")
    for c in candidates:
        q = " ".join(filter(None, [(str(c.get("author") or "")).strip(), (str(c.get("title") or "")).strip()]))
        if q:
            return q
    q = " ".join(filter(None, [(book.ai_author or "").strip(), (book.ai_title or "").strip()]))
    if q:
        return q
    return (book.raw_text or "").strip()


def save_user_fields(book_id: str, title: str, author: str, year: str, isbn: str, publisher: str, notes: str) -> str:
    """Persist user-edited fields and recompute identification status.

    Also sets ai_* fields to match user values so they are treated as approved.

    Args:
        book_id: ID of the book to update.
        title: User-provided title.
        author: User-provided author.
        year: User-provided year.
        isbn: User-provided ISBN.
        publisher: User-provided publisher.
        notes: User-provided notes.

    Returns:
        Updated identification_status string.
    """
    with db.transaction() as c:
        db.set_user_book_fields(c, book_id, title, author, year, isbn, publisher, notes)
        book = db.get_book(c, book_id)
        status = compute_status(book) if book else "unidentified"
        db.set_book_status(c, book_id, status)
    return status


def dismiss_field(book_id: str, field: str, value: str) -> tuple[str, list[CandidateRecord]]:
    """Dismiss a candidate suggestion for a field.

    If value is non-empty: removes matching candidates and reverts ai_field to the
    user value if it matched. If value is empty: sets ai_field to the current user value.

    Args:
        book_id: ID of the book.
        field: Field name (one of AI_FIELDS).
        value: Candidate value to dismiss, or empty string to dismiss the AI suggestion.

    Returns:
        (identification_status, updated_candidates).

    Raises:
        BookNotFoundError: If book_id does not exist.
    """
    with db.transaction() as c:
        book = db.get_book(c, book_id)
        if not book:
            raise BookNotFoundError(book_id)
        candidates: list[CandidateRecord] = json.loads(book.candidates or "[]")
        if value:
            candidates = [cand for cand in candidates if (str(cand.get(field) or "")).strip() != value]
            db.set_book_candidates(c, book_id, json.dumps(candidates))
            if (getattr(book, f"ai_{field}") or "").strip() == value:
                db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
        else:
            db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
        book = db.get_book(c, book_id)
        status = compute_status(book) if book else "unidentified"
        db.set_book_status(c, book_id, status)
        candidates = json.loads(book.candidates or "[]") if book else []
    return status, candidates


def apply_ai_result(book_id: str, result: AIIdentifyResult, confidence_threshold: float = 0.8) -> None:
    """Apply an AI identification result to a book.

    Stores confidence unconditionally; sets ai_* fields only when confidence meets the threshold.

    Args:
        book_id: ID of the book to update.
        result: AI identification result dict.
        confidence_threshold: Minimum confidence to write ai_* fields (default 0.8).
    """
    confidence = float(result.get("confidence") or 0)
    with db.transaction() as c:
        db.set_book_confidence(c, book_id, confidence, now())
        if confidence < confidence_threshold:
            return
        db.set_book_ai_fields(
            c,
            book_id,
            result.get("title") or "",
            result.get("author") or "",
            result.get("year") or "",
            result.get("isbn") or "",
            result.get("publisher") or "",
        )
        book = db.get_book(c, book_id)
        if book:
            db.set_book_status(c, book_id, compute_status(book))


def run_text_recognizer(plugin: TextRecognizerPlugin, book_id: str) -> BookRow:
    """Recognize text from a book spine image and store the result.

    Calls the plugin with the book's spine image, stores raw_text, and merges
    the result into the candidates list.

    Args:
        plugin: The text recognizer plugin to execute.
        book_id: ID of the book to process.

    Returns:
        Updated BookRow after storing the result.

    Raises:
        BookNotFoundError: If book_id does not exist.
    """
    with db.transaction() as c:
        book = db.get_book(c, book_id)
        if not book:
            raise BookNotFoundError(book_id)
        spine_path, spine_crop = book_spine_source(c, book_id)
        b64, mt = prep_img_b64(spine_path, spine_crop, max_px=plugin.max_image_px)
        result: TextRecognizeResult = plugin.recognize(b64, mt)
        raw_text = result.get("raw_text") or ""
        cand: CandidateRecord = {
            "source": plugin.plugin_id,
            "title": (result.get("title") or "").strip(),
            "author": (result.get("author") or "").strip(),
            "year": (result.get("year") or "").strip(),
            "publisher": (result.get("publisher") or "").strip(),
            "isbn": "",
        }
        existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
        existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
        if any([cand["title"], cand["author"], cand["year"], cand["publisher"]]):
            existing.append(cand)
        db.set_book_raw_text(c, book_id, raw_text)
        db.set_book_candidates(c, book_id, json.dumps(existing))
        updated = db.get_book(c, book_id)
        if not updated:
            raise BookNotFoundError(book_id)
        return updated


def run_book_identifier(plugin: BookIdentifierPlugin, book_id: str) -> BookRow:
    """Identify a book using AI and update ai_* fields and candidates.

    Requires raw_text to have been populated by a text recognizer first.

    Args:
        plugin: The book identifier plugin to execute.
        book_id: ID of the book to process.

    Returns:
        Updated BookRow after storing the identification result.

    Raises:
        BookNotFoundError: If book_id does not exist.
        NoRawTextError: If the book has no raw_text (text recognizer has not run).
    """
    with db.transaction() as c:
        book = db.get_book(c, book_id)
        if not book:
            raise BookNotFoundError(book_id)
        raw_text = (book.raw_text or "").strip()
        if not raw_text:
            raise NoRawTextError(book_id)
        result: AIIdentifyResult = plugin.identify(raw_text)
    # apply_ai_result manages its own transaction
    apply_ai_result(book_id, result, plugin.confidence_threshold)
    with db.transaction() as c:
        book = db.get_book(c, book_id)
        if not book:
            raise BookNotFoundError(book_id)
        cand: CandidateRecord = {
            "source": plugin.plugin_id,
            "title": (result.get("title") or "").strip(),
            "author": (result.get("author") or "").strip(),
            "year": (result.get("year") or "").strip(),
            "isbn": (result.get("isbn") or "").strip(),
            "publisher": (result.get("publisher") or "").strip(),
        }
        existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
        existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
        existing.append(cand)
        db.set_book_candidates(c, book_id, json.dumps(existing))
        updated = db.get_book(c, book_id)
        if not updated:
            raise BookNotFoundError(book_id)
        return updated