Add per-request AI logging, DB batch queue, WS entity updates, and UI polish

- log_thread.py: thread-safe ContextVar bridge so executor threads can log
  individual LLM calls and archive searches back to the event loop
- ai_log.py: init_thread_logging(), notify_entity_update(); WS now pushes
  entity_update messages when book data changes after any plugin or batch run
- batch.py: replace batch_pending.json with batch_queue SQLite table;
  run_batch_consumer() reads queue dynamically so new books can be added
  while batch is running; add_to_queue() deduplicates
- migrate.py: fix _migrate_v1 (clear-on-startup bug); add _migrate_v2 for
  batch_queue table
- _client.py / archive.py / identification.py: wrap each LLM API call and
  archive search with log_thread start/finish entries
- api.py: POST /api/batch returns {already_running, added}; notify_entity_update
  after identify pipeline
- models.default.yaml: strengthen ai_identify confidence-scoring instructions;
  warn against placeholder data
- detail-render.js: book log entries show clickable ID + spine thumbnail;
  book spine/title images open full-screen popup
- events.js: batch-start handles already_running+added; open-img-popup action
- init.js: entity_update WS handler; image popup close listeners
- overlays.css / index.html: full-screen image popup overlay
- eslint.config.js: add new globals; fix no-redeclare/no-unused-vars for
  multi-file global architecture; all lint errors resolved

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-11 12:10:54 +03:00
parent fd32be729f
commit b94f222c96
41 changed files with 2566 additions and 586 deletions

View File

@@ -70,6 +70,7 @@ def _build_ai_cfg(model_cfg: ModelConfig, cred_cfg: CredentialConfig, func: AIFu
max_image_px=func.max_image_px,
confidence_threshold=func.confidence_threshold,
extra_body=model_cfg.extra_body,
is_vlm=func.is_vlm,
)
@@ -227,6 +228,21 @@ def get_auto_queue(
return []
def get_all_text_recognizers() -> list[TextRecognizerPlugin]:
"""Return all registered text recognizer plugins."""
return list(_text_recognizers.values())
def get_all_book_identifiers() -> list[BookIdentifierPlugin]:
"""Return all registered book identifier plugins."""
return list(_book_identifiers.values())
def get_all_archive_searchers() -> list[ArchiveSearcherPlugin]:
"""Return all registered archive searcher plugins."""
return list(_archive_searchers.values())
def get_plugin(plugin_id: str) -> PluginLookupResult:
"""Find a plugin by ID across all categories. Returns a discriminated (category, plugin) tuple."""
if plugin_id in _boundary_detectors:

View File

@@ -2,12 +2,14 @@
Caches openai.OpenAI instances per (base_url, api_key) to avoid re-creating on each call.
AIClient wraps the raw API call: fills prompt template, encodes images, parses JSON response.
Individual LLM API calls are logged via log_thread if a log context is set.
"""
import json
import re
import time
from string import Template
from typing import Any, cast
from typing import Any, Literal, cast, overload
import openai
from openai.types.chat import ChatCompletionMessageParam
@@ -17,6 +19,7 @@ from openai.types.chat.chat_completion_content_part_image_param import (
)
from openai.types.chat.chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
import log_thread
from models import AIConfig
# Module-level cache of openai.OpenAI instances keyed by (base_url, api_key)
@@ -48,6 +51,24 @@ def _parse_json(text: str) -> dict[str, Any]:
return cast(dict[str, Any], result)
def _parse_json_list(text: str) -> list[Any]:
"""Extract and parse the first JSON array found in text.
Raises ValueError if no JSON array is found or the JSON is malformed.
"""
text = text.strip()
m = re.search(r"\[.*\]", text, re.DOTALL)
if not m:
raise ValueError(f"No JSON array found in AI response: {text[:200]!r}")
try:
result = json.loads(m.group())
except json.JSONDecodeError as exc:
raise ValueError(f"Failed to parse AI response as JSON: {exc}") from exc
if not isinstance(result, list):
raise ValueError(f"Expected JSON array, got {type(result).__name__}")
return cast(list[Any], result)
ContentPart = ChatCompletionContentPartImageParam | ChatCompletionContentPartTextParam
@@ -62,16 +83,41 @@ class AIClient:
self.cfg = cfg
self.output_format = output_format
@overload
def call(
self,
prompt_template: str,
images: list[tuple[str, str]],
text_vars: dict[str, str] | None = None,
) -> dict[str, Any]:
output_is_list: Literal[False] = False,
) -> dict[str, Any]: ...
@overload
def call(
self,
prompt_template: str,
images: list[tuple[str, str]],
text_vars: dict[str, str] | None,
output_is_list: Literal[True],
) -> list[Any]: ...
def call(
self,
prompt_template: str,
images: list[tuple[str, str]],
text_vars: dict[str, str] | None = None,
output_is_list: bool = False,
) -> dict[str, Any] | list[Any]:
"""Substitute template vars, call API with optional images, return parsed JSON.
images: list of (base64_str, mime_type) tuples.
text_vars: extra ${KEY} substitutions beyond ${OUTPUT_FORMAT}.
Args:
prompt_template: Prompt string with ${KEY} placeholders.
images: List of (base64_str, mime_type) tuples.
text_vars: Extra ${KEY} substitutions beyond ${OUTPUT_FORMAT}.
output_is_list: If True, parse the response as a JSON array instead of object.
Returns:
Parsed JSON — dict if output_is_list is False, list otherwise.
"""
vars_: dict[str, str] = {"OUTPUT_FORMAT": self.output_format}
if text_vars:
@@ -87,8 +133,17 @@ class AIClient:
]
parts.append(ChatCompletionContentPartTextParam(type="text", text=prompt))
messages: list[ChatCompletionMessageParam] = [{"role": "user", "content": parts}]
r = client.chat.completions.create(
model=self.cfg["model"], max_tokens=2048, messages=messages, extra_body=self.cfg["extra_body"]
)
raw = r.choices[0].message.content or ""
started = time.time()
entry_id = log_thread.start_entry(self.cfg["model"], prompt[:120])
try:
r = client.chat.completions.create(
model=self.cfg["model"], max_tokens=4096, messages=messages, extra_body=self.cfg["extra_body"]
)
raw = r.choices[0].message.content or ""
log_thread.finish_entry(entry_id, "ok", raw[:120], started)
except Exception as exc:
log_thread.finish_entry(entry_id, "error", str(exc), started)
raise
if output_is_list:
return _parse_json_list(raw)
return _parse_json(raw)

View File

@@ -1,23 +1,38 @@
"""Book identifier plugin — raw spine text → bibliographic metadata.
"""Book identifier plugin — VLM result + archive candidates → ranked identification blocks.
Input: raw_text string (from text_recognizer).
Output: {"title": "...", "author": "...", "year": "...", "isbn": "...",
"publisher": "...", "confidence": 0.95}
confidence — float 0-1; results below confidence_threshold are discarded by logic.py.
Result added to books.candidates and books.ai_* fields.
Input: raw_text string (from text_recognizer), archive_results (deduplicated candidates),
images (list of (b64, mime) pairs if is_vlm).
Output: list of IdentifyBlock dicts ranked by descending confidence score.
Result stored as books.ai_blocks JSON.
"""
from models import AIConfig, AIIdentifyResult
import json
from typing import Any, TypeGuard
from models import AIConfig, CandidateRecord, IdentifyBlock
from ._client import AIClient
def _is_str_dict(v: object) -> TypeGuard[dict[str, Any]]:
return isinstance(v, dict)
def _is_any_list(v: object) -> TypeGuard[list[Any]]:
return isinstance(v, list)
class BookIdentifierPlugin:
"""Identifies a book from spine text using a VLM with web-search capability."""
"""Identifies a book by combining VLM spine text with archive search results."""
category = "book_identifiers"
OUTPUT_FORMAT = (
'{"title": "...", "author": "...", "year": "...", ' '"isbn": "...", "publisher": "...", "confidence": 0.95}'
'[{"title": "The Master and Margarita", "author": "Mikhail Bulgakov", '
'"year": "1967", "isbn": "", "publisher": "YMCA Press", '
'"score": 0.95, "sources": ["rusneb", "openlibrary"]}, '
'{"title": "Master i Margarita", "author": "M. Bulgakov", '
'"year": "2005", "isbn": "978-5-17-123456-7", "publisher": "AST", '
'"score": 0.72, "sources": ["web"]}]'
)
def __init__(
@@ -36,21 +51,67 @@ class BookIdentifierPlugin:
self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
self._prompt_text = prompt_text
def identify(self, raw_text: str) -> AIIdentifyResult:
"""Returns AIIdentifyResult with title/author/year/isbn/publisher/confidence."""
raw = self._client.call(self._prompt_text, [], text_vars={"RAW_TEXT": raw_text})
result = AIIdentifyResult(
title=str(raw.get("title") or ""),
author=str(raw.get("author") or ""),
year=str(raw.get("year") or ""),
isbn=str(raw.get("isbn") or ""),
publisher=str(raw.get("publisher") or ""),
def identify(
self,
raw_text: str,
archive_results: list[CandidateRecord],
images: list[tuple[str, str]],
) -> list[IdentifyBlock]:
"""Call the AI model to produce ranked identification blocks.
Args:
raw_text: Verbatim text read from the book spine.
archive_results: Deduplicated candidates from archive searchers.
images: (base64, mime_type) pairs; non-empty only when is_vlm is True.
Returns:
List of IdentifyBlock dicts ranked by descending score.
"""
archive_json = json.dumps(archive_results, ensure_ascii=False)
raw = self._client.call(
self._prompt_text,
images,
text_vars={"RAW_TEXT": raw_text, "ARCHIVE_RESULTS": archive_json},
output_is_list=True,
)
conf = raw.get("confidence")
if conf is not None:
result["confidence"] = float(conf)
return result
blocks: list[IdentifyBlock] = []
for item in raw:
if not _is_str_dict(item):
continue
sources: list[str] = []
sources_val = item.get("sources")
if _is_any_list(sources_val):
for sv in sources_val:
if isinstance(sv, str):
sources.append(sv)
block = IdentifyBlock(
title=str(item.get("title") or "").strip(),
author=str(item.get("author") or "").strip(),
year=str(item.get("year") or "").strip(),
isbn=str(item.get("isbn") or "").strip(),
publisher=str(item.get("publisher") or "").strip(),
score=float(item.get("score") or 0.0),
sources=sources,
)
blocks.append(block)
return sorted(blocks, key=lambda b: b.get("score", 0.0), reverse=True)
@property
def model(self) -> str:
"""AI model name used for identification."""
return self._client.cfg["model"]
@property
def max_image_px(self) -> int:
"""Maximum pixel dimension for images passed to the AI model."""
return self._client.cfg["max_image_px"]
@property
def confidence_threshold(self) -> float:
"""Minimum score threshold for the top block to set ai_* fields."""
return self._client.cfg["confidence_threshold"]
@property
def is_vlm(self) -> bool:
"""True if images should be included in the request."""
return self._client.cfg["is_vlm"]

View File

@@ -41,6 +41,10 @@ class BoundaryDetectorBooksPlugin:
boundaries: list[float] = [float(b) for b in raw_bounds if isinstance(b, (int, float))]
return BoundaryDetectResult(boundaries=boundaries)
@property
def model(self) -> str:
return self._client.cfg["model"]
@property
def max_image_px(self) -> int:
return self._client.cfg["max_image_px"]

View File

@@ -46,6 +46,10 @@ class BoundaryDetectorShelvesPlugin:
result["confidence"] = float(conf)
return result
@property
def model(self) -> str:
return self._client.cfg["model"]
@property
def max_image_px(self) -> int:
return self._client.cfg["max_image_px"]

View File

@@ -51,6 +51,10 @@ class TextRecognizerPlugin:
other=str(raw.get("other") or ""),
)
@property
def model(self) -> str:
return self._client.cfg["model"]
@property
def max_image_px(self) -> int:
return self._client.cfg["max_image_px"]