Add per-request AI logging, DB batch queue, WS entity updates, and UI polish

- log_thread.py: thread-safe ContextVar bridge so executor threads can log
  individual LLM calls and archive searches back to the event loop
- ai_log.py: init_thread_logging(), notify_entity_update(); WS now pushes
  entity_update messages when book data changes after any plugin or batch run
- batch.py: replace batch_pending.json with batch_queue SQLite table;
  run_batch_consumer() reads queue dynamically so new books can be added
  while batch is running; add_to_queue() deduplicates
- migrate.py: fix _migrate_v1 (clear-on-startup bug); add _migrate_v2 for
  batch_queue table
- _client.py / archive.py / identification.py: wrap each LLM API call and
  archive search with log_thread start/finish entries
- api.py: POST /api/batch returns {already_running, added}; notify_entity_update
  after identify pipeline
- models.default.yaml: strengthen ai_identify confidence-scoring instructions;
  warn against placeholder data
- detail-render.js: book log entries show clickable ID + spine thumbnail;
  book spine/title images open full-screen popup
- events.js: batch-start handles already_running+added; open-img-popup action
- init.js: entity_update WS handler; image popup close listeners
- overlays.css / index.html: full-screen image popup overlay
- eslint.config.js: add new globals; fix no-redeclare/no-unused-vars for
  multi-file global architecture; all lint errors resolved

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-11 12:10:54 +03:00
parent fd32be729f
commit b94f222c96
41 changed files with 2566 additions and 586 deletions

View File

@@ -1,23 +1,38 @@
"""Book identifier plugin — raw spine text → bibliographic metadata.
"""Book identifier plugin — VLM result + archive candidates → ranked identification blocks.
Input: raw_text string (from text_recognizer).
Output: {"title": "...", "author": "...", "year": "...", "isbn": "...",
"publisher": "...", "confidence": 0.95}
confidence — float 0-1; results below confidence_threshold are discarded by logic.py.
Result added to books.candidates and books.ai_* fields.
Input: raw_text string (from text_recognizer), archive_results (deduplicated candidates),
images (list of (b64, mime) pairs if is_vlm).
Output: list of IdentifyBlock dicts ranked by descending confidence score.
Result stored as books.ai_blocks JSON.
"""
from models import AIConfig, AIIdentifyResult
import json
from typing import Any, TypeGuard
from models import AIConfig, CandidateRecord, IdentifyBlock
from ._client import AIClient
def _is_str_dict(v: object) -> TypeGuard[dict[str, Any]]:
return isinstance(v, dict)
def _is_any_list(v: object) -> TypeGuard[list[Any]]:
return isinstance(v, list)
class BookIdentifierPlugin:
"""Identifies a book from spine text using a VLM with web-search capability."""
"""Identifies a book by combining VLM spine text with archive search results."""
category = "book_identifiers"
OUTPUT_FORMAT = (
'{"title": "...", "author": "...", "year": "...", ' '"isbn": "...", "publisher": "...", "confidence": 0.95}'
'[{"title": "The Master and Margarita", "author": "Mikhail Bulgakov", '
'"year": "1967", "isbn": "", "publisher": "YMCA Press", '
'"score": 0.95, "sources": ["rusneb", "openlibrary"]}, '
'{"title": "Master i Margarita", "author": "M. Bulgakov", '
'"year": "2005", "isbn": "978-5-17-123456-7", "publisher": "AST", '
'"score": 0.72, "sources": ["web"]}]'
)
def __init__(
@@ -36,21 +51,67 @@ class BookIdentifierPlugin:
self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
self._prompt_text = prompt_text
def identify(self, raw_text: str) -> AIIdentifyResult:
"""Returns AIIdentifyResult with title/author/year/isbn/publisher/confidence."""
raw = self._client.call(self._prompt_text, [], text_vars={"RAW_TEXT": raw_text})
result = AIIdentifyResult(
title=str(raw.get("title") or ""),
author=str(raw.get("author") or ""),
year=str(raw.get("year") or ""),
isbn=str(raw.get("isbn") or ""),
publisher=str(raw.get("publisher") or ""),
def identify(
self,
raw_text: str,
archive_results: list[CandidateRecord],
images: list[tuple[str, str]],
) -> list[IdentifyBlock]:
"""Call the AI model to produce ranked identification blocks.
Args:
raw_text: Verbatim text read from the book spine.
archive_results: Deduplicated candidates from archive searchers.
images: (base64, mime_type) pairs; non-empty only when is_vlm is True.
Returns:
List of IdentifyBlock dicts ranked by descending score.
"""
archive_json = json.dumps(archive_results, ensure_ascii=False)
raw = self._client.call(
self._prompt_text,
images,
text_vars={"RAW_TEXT": raw_text, "ARCHIVE_RESULTS": archive_json},
output_is_list=True,
)
conf = raw.get("confidence")
if conf is not None:
result["confidence"] = float(conf)
return result
blocks: list[IdentifyBlock] = []
for item in raw:
if not _is_str_dict(item):
continue
sources: list[str] = []
sources_val = item.get("sources")
if _is_any_list(sources_val):
for sv in sources_val:
if isinstance(sv, str):
sources.append(sv)
block = IdentifyBlock(
title=str(item.get("title") or "").strip(),
author=str(item.get("author") or "").strip(),
year=str(item.get("year") or "").strip(),
isbn=str(item.get("isbn") or "").strip(),
publisher=str(item.get("publisher") or "").strip(),
score=float(item.get("score") or 0.0),
sources=sources,
)
blocks.append(block)
return sorted(blocks, key=lambda b: b.get("score", 0.0), reverse=True)
@property
def model(self) -> str:
"""AI model name used for identification."""
return self._client.cfg["model"]
@property
def max_image_px(self) -> int:
"""Maximum pixel dimension for images passed to the AI model."""
return self._client.cfg["max_image_px"]
@property
def confidence_threshold(self) -> float:
"""Minimum score threshold for the top block to set ai_* fields."""
return self._client.cfg["confidence_threshold"]
@property
def is_vlm(self) -> bool:
"""True if images should be included in the request."""
return self._client.cfg["is_vlm"]