Files
bookshelf/src/logic/identification.py
night f29678ebf1 Initial commit
Photo-based book cataloger with AI identification.
Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend;
vanilla JS SPA; OpenAI-compatible plugin system for boundary
detection, text recognition, and archive search.
2026-03-09 14:11:11 +03:00

246 lines
9.0 KiB
Python

"""Book identification logic: status computation, AI result application, plugin runners."""
import json
import db
from db import now
from errors import BookNotFoundError, NoRawTextError
from logic.boundaries import book_spine_source
from logic.images import prep_img_b64
from models import (
AIIdentifyResult,
BookIdentifierPlugin,
BookRow,
CandidateRecord,
TextRecognizeResult,
TextRecognizerPlugin,
)
AI_FIELDS = ("title", "author", "year", "isbn", "publisher")
_APPROVED_REQUIRED = ("title", "author", "year")
def compute_status(book: BookRow) -> str:
"""Return the identification_status string derived from current book field values.
Args:
book: The book row to evaluate.
Returns:
One of 'unidentified', 'ai_identified', or 'user_approved'.
"""
if not (book.ai_title or "").strip():
return "unidentified"
filled = all((getattr(book, f) or "").strip() for f in _APPROVED_REQUIRED)
no_diff = all(
not (getattr(book, f"ai_{f}") or "").strip()
or (getattr(book, f) or "").strip() == (getattr(book, f"ai_{f}") or "").strip()
for f in AI_FIELDS
)
return "user_approved" if (filled and no_diff) else "ai_identified"
def build_query(book: BookRow) -> str:
"""Build a search query string from the best available candidate fields.
Prefers the first candidate with a non-empty author+title pair; falls back to
AI fields, then raw OCR text.
Args:
book: The book row to build a query for.
Returns:
Query string, empty if no usable data is available.
"""
candidates: list[dict[str, object]] = json.loads(book.candidates or "[]")
for c in candidates:
q = " ".join(filter(None, [(str(c.get("author") or "")).strip(), (str(c.get("title") or "")).strip()]))
if q:
return q
q = " ".join(filter(None, [(book.ai_author or "").strip(), (book.ai_title or "").strip()]))
if q:
return q
return (book.raw_text or "").strip()
def save_user_fields(book_id: str, title: str, author: str, year: str, isbn: str, publisher: str, notes: str) -> str:
"""Persist user-edited fields and recompute identification status.
Also sets ai_* fields to match user values so they are treated as approved.
Args:
book_id: ID of the book to update.
title: User-provided title.
author: User-provided author.
year: User-provided year.
isbn: User-provided ISBN.
publisher: User-provided publisher.
notes: User-provided notes.
Returns:
Updated identification_status string.
"""
with db.transaction() as c:
db.set_user_book_fields(c, book_id, title, author, year, isbn, publisher, notes)
book = db.get_book(c, book_id)
status = compute_status(book) if book else "unidentified"
db.set_book_status(c, book_id, status)
return status
def dismiss_field(book_id: str, field: str, value: str) -> tuple[str, list[CandidateRecord]]:
"""Dismiss a candidate suggestion for a field.
If value is non-empty: removes matching candidates and reverts ai_field to the
user value if it matched. If value is empty: sets ai_field to the current user value.
Args:
book_id: ID of the book.
field: Field name (one of AI_FIELDS).
value: Candidate value to dismiss, or empty string to dismiss the AI suggestion.
Returns:
(identification_status, updated_candidates).
Raises:
BookNotFoundError: If book_id does not exist.
"""
with db.transaction() as c:
book = db.get_book(c, book_id)
if not book:
raise BookNotFoundError(book_id)
candidates: list[CandidateRecord] = json.loads(book.candidates or "[]")
if value:
candidates = [cand for cand in candidates if (str(cand.get(field) or "")).strip() != value]
db.set_book_candidates(c, book_id, json.dumps(candidates))
if (getattr(book, f"ai_{field}") or "").strip() == value:
db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
else:
db.set_book_ai_field(c, book_id, field, str(getattr(book, field) or ""))
book = db.get_book(c, book_id)
status = compute_status(book) if book else "unidentified"
db.set_book_status(c, book_id, status)
candidates = json.loads(book.candidates or "[]") if book else []
return status, candidates
def apply_ai_result(book_id: str, result: AIIdentifyResult, confidence_threshold: float = 0.8) -> None:
"""Apply an AI identification result to a book.
Stores confidence unconditionally; sets ai_* fields only when confidence meets the threshold.
Args:
book_id: ID of the book to update.
result: AI identification result dict.
confidence_threshold: Minimum confidence to write ai_* fields (default 0.8).
"""
confidence = float(result.get("confidence") or 0)
with db.transaction() as c:
db.set_book_confidence(c, book_id, confidence, now())
if confidence < confidence_threshold:
return
db.set_book_ai_fields(
c,
book_id,
result.get("title") or "",
result.get("author") or "",
result.get("year") or "",
result.get("isbn") or "",
result.get("publisher") or "",
)
book = db.get_book(c, book_id)
if book:
db.set_book_status(c, book_id, compute_status(book))
def run_text_recognizer(plugin: TextRecognizerPlugin, book_id: str) -> BookRow:
"""Recognize text from a book spine image and store the result.
Calls the plugin with the book's spine image, stores raw_text, and merges
the result into the candidates list.
Args:
plugin: The text recognizer plugin to execute.
book_id: ID of the book to process.
Returns:
Updated BookRow after storing the result.
Raises:
BookNotFoundError: If book_id does not exist.
"""
with db.transaction() as c:
book = db.get_book(c, book_id)
if not book:
raise BookNotFoundError(book_id)
spine_path, spine_crop = book_spine_source(c, book_id)
b64, mt = prep_img_b64(spine_path, spine_crop, max_px=plugin.max_image_px)
result: TextRecognizeResult = plugin.recognize(b64, mt)
raw_text = result.get("raw_text") or ""
cand: CandidateRecord = {
"source": plugin.plugin_id,
"title": (result.get("title") or "").strip(),
"author": (result.get("author") or "").strip(),
"year": (result.get("year") or "").strip(),
"publisher": (result.get("publisher") or "").strip(),
"isbn": "",
}
existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
if any([cand["title"], cand["author"], cand["year"], cand["publisher"]]):
existing.append(cand)
db.set_book_raw_text(c, book_id, raw_text)
db.set_book_candidates(c, book_id, json.dumps(existing))
updated = db.get_book(c, book_id)
if not updated:
raise BookNotFoundError(book_id)
return updated
def run_book_identifier(plugin: BookIdentifierPlugin, book_id: str) -> BookRow:
"""Identify a book using AI and update ai_* fields and candidates.
Requires raw_text to have been populated by a text recognizer first.
Args:
plugin: The book identifier plugin to execute.
book_id: ID of the book to process.
Returns:
Updated BookRow after storing the identification result.
Raises:
BookNotFoundError: If book_id does not exist.
NoRawTextError: If the book has no raw_text (text recognizer has not run).
"""
with db.transaction() as c:
book = db.get_book(c, book_id)
if not book:
raise BookNotFoundError(book_id)
raw_text = (book.raw_text or "").strip()
if not raw_text:
raise NoRawTextError(book_id)
result: AIIdentifyResult = plugin.identify(raw_text)
# apply_ai_result manages its own transaction
apply_ai_result(book_id, result, plugin.confidence_threshold)
with db.transaction() as c:
book = db.get_book(c, book_id)
if not book:
raise BookNotFoundError(book_id)
cand: CandidateRecord = {
"source": plugin.plugin_id,
"title": (result.get("title") or "").strip(),
"author": (result.get("author") or "").strip(),
"year": (result.get("year") or "").strip(),
"isbn": (result.get("isbn") or "").strip(),
"publisher": (result.get("publisher") or "").strip(),
}
existing: list[CandidateRecord] = json.loads(book.candidates or "[]")
existing = [cd for cd in existing if cd.get("source") != plugin.plugin_id]
existing.append(cand)
db.set_book_candidates(c, book_id, json.dumps(existing))
updated = db.get_book(c, book_id)
if not updated:
raise BookNotFoundError(book_id)
return updated