Add per-request AI logging, DB batch queue, WS entity updates, and UI polish
- log_thread.py: thread-safe ContextVar bridge so executor threads can log
individual LLM calls and archive searches back to the event loop
- ai_log.py: init_thread_logging(), notify_entity_update(); WS now pushes
entity_update messages when book data changes after any plugin or batch run
- batch.py: replace batch_pending.json with batch_queue SQLite table;
run_batch_consumer() reads queue dynamically so new books can be added
while batch is running; add_to_queue() deduplicates
- migrate.py: fix _migrate_v1 (clear-on-startup bug); add _migrate_v2 for
batch_queue table
- _client.py / archive.py / identification.py: wrap each LLM API call and
archive search with log_thread start/finish entries
- api.py: POST /api/batch returns {already_running, added}; notify_entity_update
after identify pipeline
- models.default.yaml: strengthen ai_identify confidence-scoring instructions;
warn against placeholder data
- detail-render.js: book log entries show clickable ID + spine thumbnail;
book spine/title images open full-screen popup
- events.js: batch-start handles already_running+added; open-img-popup action
- init.js: entity_update WS handler; image popup close listeners
- overlays.css / index.html: full-screen image popup overlay
- eslint.config.js: add new globals; fix no-redeclare/no-unused-vars for
multi-file global architecture; all lint errors resolved
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -70,6 +70,7 @@ def _build_ai_cfg(model_cfg: ModelConfig, cred_cfg: CredentialConfig, func: AIFu
|
||||
max_image_px=func.max_image_px,
|
||||
confidence_threshold=func.confidence_threshold,
|
||||
extra_body=model_cfg.extra_body,
|
||||
is_vlm=func.is_vlm,
|
||||
)
|
||||
|
||||
|
||||
@@ -227,6 +228,21 @@ def get_auto_queue(
|
||||
return []
|
||||
|
||||
|
||||
def get_all_text_recognizers() -> list[TextRecognizerPlugin]:
|
||||
"""Return all registered text recognizer plugins."""
|
||||
return list(_text_recognizers.values())
|
||||
|
||||
|
||||
def get_all_book_identifiers() -> list[BookIdentifierPlugin]:
|
||||
"""Return all registered book identifier plugins."""
|
||||
return list(_book_identifiers.values())
|
||||
|
||||
|
||||
def get_all_archive_searchers() -> list[ArchiveSearcherPlugin]:
|
||||
"""Return all registered archive searcher plugins."""
|
||||
return list(_archive_searchers.values())
|
||||
|
||||
|
||||
def get_plugin(plugin_id: str) -> PluginLookupResult:
|
||||
"""Find a plugin by ID across all categories. Returns a discriminated (category, plugin) tuple."""
|
||||
if plugin_id in _boundary_detectors:
|
||||
|
||||
@@ -2,12 +2,14 @@
|
||||
|
||||
Caches openai.OpenAI instances per (base_url, api_key) to avoid re-creating on each call.
|
||||
AIClient wraps the raw API call: fills prompt template, encodes images, parses JSON response.
|
||||
Individual LLM API calls are logged via log_thread if a log context is set.
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from string import Template
|
||||
from typing import Any, cast
|
||||
from typing import Any, Literal, cast, overload
|
||||
|
||||
import openai
|
||||
from openai.types.chat import ChatCompletionMessageParam
|
||||
@@ -17,6 +19,7 @@ from openai.types.chat.chat_completion_content_part_image_param import (
|
||||
)
|
||||
from openai.types.chat.chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
|
||||
|
||||
import log_thread
|
||||
from models import AIConfig
|
||||
|
||||
# Module-level cache of openai.OpenAI instances keyed by (base_url, api_key)
|
||||
@@ -48,6 +51,24 @@ def _parse_json(text: str) -> dict[str, Any]:
|
||||
return cast(dict[str, Any], result)
|
||||
|
||||
|
||||
def _parse_json_list(text: str) -> list[Any]:
|
||||
"""Extract and parse the first JSON array found in text.
|
||||
|
||||
Raises ValueError if no JSON array is found or the JSON is malformed.
|
||||
"""
|
||||
text = text.strip()
|
||||
m = re.search(r"\[.*\]", text, re.DOTALL)
|
||||
if not m:
|
||||
raise ValueError(f"No JSON array found in AI response: {text[:200]!r}")
|
||||
try:
|
||||
result = json.loads(m.group())
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"Failed to parse AI response as JSON: {exc}") from exc
|
||||
if not isinstance(result, list):
|
||||
raise ValueError(f"Expected JSON array, got {type(result).__name__}")
|
||||
return cast(list[Any], result)
|
||||
|
||||
|
||||
ContentPart = ChatCompletionContentPartImageParam | ChatCompletionContentPartTextParam
|
||||
|
||||
|
||||
@@ -62,16 +83,41 @@ class AIClient:
|
||||
self.cfg = cfg
|
||||
self.output_format = output_format
|
||||
|
||||
@overload
|
||||
def call(
|
||||
self,
|
||||
prompt_template: str,
|
||||
images: list[tuple[str, str]],
|
||||
text_vars: dict[str, str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
output_is_list: Literal[False] = False,
|
||||
) -> dict[str, Any]: ...
|
||||
|
||||
@overload
|
||||
def call(
|
||||
self,
|
||||
prompt_template: str,
|
||||
images: list[tuple[str, str]],
|
||||
text_vars: dict[str, str] | None,
|
||||
output_is_list: Literal[True],
|
||||
) -> list[Any]: ...
|
||||
|
||||
def call(
|
||||
self,
|
||||
prompt_template: str,
|
||||
images: list[tuple[str, str]],
|
||||
text_vars: dict[str, str] | None = None,
|
||||
output_is_list: bool = False,
|
||||
) -> dict[str, Any] | list[Any]:
|
||||
"""Substitute template vars, call API with optional images, return parsed JSON.
|
||||
|
||||
images: list of (base64_str, mime_type) tuples.
|
||||
text_vars: extra ${KEY} substitutions beyond ${OUTPUT_FORMAT}.
|
||||
Args:
|
||||
prompt_template: Prompt string with ${KEY} placeholders.
|
||||
images: List of (base64_str, mime_type) tuples.
|
||||
text_vars: Extra ${KEY} substitutions beyond ${OUTPUT_FORMAT}.
|
||||
output_is_list: If True, parse the response as a JSON array instead of object.
|
||||
|
||||
Returns:
|
||||
Parsed JSON — dict if output_is_list is False, list otherwise.
|
||||
"""
|
||||
vars_: dict[str, str] = {"OUTPUT_FORMAT": self.output_format}
|
||||
if text_vars:
|
||||
@@ -87,8 +133,17 @@ class AIClient:
|
||||
]
|
||||
parts.append(ChatCompletionContentPartTextParam(type="text", text=prompt))
|
||||
messages: list[ChatCompletionMessageParam] = [{"role": "user", "content": parts}]
|
||||
r = client.chat.completions.create(
|
||||
model=self.cfg["model"], max_tokens=2048, messages=messages, extra_body=self.cfg["extra_body"]
|
||||
)
|
||||
raw = r.choices[0].message.content or ""
|
||||
started = time.time()
|
||||
entry_id = log_thread.start_entry(self.cfg["model"], prompt[:120])
|
||||
try:
|
||||
r = client.chat.completions.create(
|
||||
model=self.cfg["model"], max_tokens=4096, messages=messages, extra_body=self.cfg["extra_body"]
|
||||
)
|
||||
raw = r.choices[0].message.content or ""
|
||||
log_thread.finish_entry(entry_id, "ok", raw[:120], started)
|
||||
except Exception as exc:
|
||||
log_thread.finish_entry(entry_id, "error", str(exc), started)
|
||||
raise
|
||||
if output_is_list:
|
||||
return _parse_json_list(raw)
|
||||
return _parse_json(raw)
|
||||
|
||||
@@ -1,23 +1,38 @@
|
||||
"""Book identifier plugin — raw spine text → bibliographic metadata.
|
||||
"""Book identifier plugin — VLM result + archive candidates → ranked identification blocks.
|
||||
|
||||
Input: raw_text string (from text_recognizer).
|
||||
Output: {"title": "...", "author": "...", "year": "...", "isbn": "...",
|
||||
"publisher": "...", "confidence": 0.95}
|
||||
confidence — float 0-1; results below confidence_threshold are discarded by logic.py.
|
||||
Result added to books.candidates and books.ai_* fields.
|
||||
Input: raw_text string (from text_recognizer), archive_results (deduplicated candidates),
|
||||
images (list of (b64, mime) pairs if is_vlm).
|
||||
Output: list of IdentifyBlock dicts ranked by descending confidence score.
|
||||
Result stored as books.ai_blocks JSON.
|
||||
"""
|
||||
|
||||
from models import AIConfig, AIIdentifyResult
|
||||
import json
|
||||
from typing import Any, TypeGuard
|
||||
|
||||
from models import AIConfig, CandidateRecord, IdentifyBlock
|
||||
|
||||
from ._client import AIClient
|
||||
|
||||
|
||||
def _is_str_dict(v: object) -> TypeGuard[dict[str, Any]]:
|
||||
return isinstance(v, dict)
|
||||
|
||||
|
||||
def _is_any_list(v: object) -> TypeGuard[list[Any]]:
|
||||
return isinstance(v, list)
|
||||
|
||||
|
||||
class BookIdentifierPlugin:
|
||||
"""Identifies a book from spine text using a VLM with web-search capability."""
|
||||
"""Identifies a book by combining VLM spine text with archive search results."""
|
||||
|
||||
category = "book_identifiers"
|
||||
OUTPUT_FORMAT = (
|
||||
'{"title": "...", "author": "...", "year": "...", ' '"isbn": "...", "publisher": "...", "confidence": 0.95}'
|
||||
'[{"title": "The Master and Margarita", "author": "Mikhail Bulgakov", '
|
||||
'"year": "1967", "isbn": "", "publisher": "YMCA Press", '
|
||||
'"score": 0.95, "sources": ["rusneb", "openlibrary"]}, '
|
||||
'{"title": "Master i Margarita", "author": "M. Bulgakov", '
|
||||
'"year": "2005", "isbn": "978-5-17-123456-7", "publisher": "AST", '
|
||||
'"score": 0.72, "sources": ["web"]}]'
|
||||
)
|
||||
|
||||
def __init__(
|
||||
@@ -36,21 +51,67 @@ class BookIdentifierPlugin:
|
||||
self._client = AIClient(ai_config, self.OUTPUT_FORMAT)
|
||||
self._prompt_text = prompt_text
|
||||
|
||||
def identify(self, raw_text: str) -> AIIdentifyResult:
|
||||
"""Returns AIIdentifyResult with title/author/year/isbn/publisher/confidence."""
|
||||
raw = self._client.call(self._prompt_text, [], text_vars={"RAW_TEXT": raw_text})
|
||||
result = AIIdentifyResult(
|
||||
title=str(raw.get("title") or ""),
|
||||
author=str(raw.get("author") or ""),
|
||||
year=str(raw.get("year") or ""),
|
||||
isbn=str(raw.get("isbn") or ""),
|
||||
publisher=str(raw.get("publisher") or ""),
|
||||
def identify(
|
||||
self,
|
||||
raw_text: str,
|
||||
archive_results: list[CandidateRecord],
|
||||
images: list[tuple[str, str]],
|
||||
) -> list[IdentifyBlock]:
|
||||
"""Call the AI model to produce ranked identification blocks.
|
||||
|
||||
Args:
|
||||
raw_text: Verbatim text read from the book spine.
|
||||
archive_results: Deduplicated candidates from archive searchers.
|
||||
images: (base64, mime_type) pairs; non-empty only when is_vlm is True.
|
||||
|
||||
Returns:
|
||||
List of IdentifyBlock dicts ranked by descending score.
|
||||
"""
|
||||
archive_json = json.dumps(archive_results, ensure_ascii=False)
|
||||
raw = self._client.call(
|
||||
self._prompt_text,
|
||||
images,
|
||||
text_vars={"RAW_TEXT": raw_text, "ARCHIVE_RESULTS": archive_json},
|
||||
output_is_list=True,
|
||||
)
|
||||
conf = raw.get("confidence")
|
||||
if conf is not None:
|
||||
result["confidence"] = float(conf)
|
||||
return result
|
||||
blocks: list[IdentifyBlock] = []
|
||||
for item in raw:
|
||||
if not _is_str_dict(item):
|
||||
continue
|
||||
sources: list[str] = []
|
||||
sources_val = item.get("sources")
|
||||
if _is_any_list(sources_val):
|
||||
for sv in sources_val:
|
||||
if isinstance(sv, str):
|
||||
sources.append(sv)
|
||||
block = IdentifyBlock(
|
||||
title=str(item.get("title") or "").strip(),
|
||||
author=str(item.get("author") or "").strip(),
|
||||
year=str(item.get("year") or "").strip(),
|
||||
isbn=str(item.get("isbn") or "").strip(),
|
||||
publisher=str(item.get("publisher") or "").strip(),
|
||||
score=float(item.get("score") or 0.0),
|
||||
sources=sources,
|
||||
)
|
||||
blocks.append(block)
|
||||
return sorted(blocks, key=lambda b: b.get("score", 0.0), reverse=True)
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
"""AI model name used for identification."""
|
||||
return self._client.cfg["model"]
|
||||
|
||||
@property
|
||||
def max_image_px(self) -> int:
|
||||
"""Maximum pixel dimension for images passed to the AI model."""
|
||||
return self._client.cfg["max_image_px"]
|
||||
|
||||
@property
|
||||
def confidence_threshold(self) -> float:
|
||||
"""Minimum score threshold for the top block to set ai_* fields."""
|
||||
return self._client.cfg["confidence_threshold"]
|
||||
|
||||
@property
|
||||
def is_vlm(self) -> bool:
|
||||
"""True if images should be included in the request."""
|
||||
return self._client.cfg["is_vlm"]
|
||||
|
||||
@@ -41,6 +41,10 @@ class BoundaryDetectorBooksPlugin:
|
||||
boundaries: list[float] = [float(b) for b in raw_bounds if isinstance(b, (int, float))]
|
||||
return BoundaryDetectResult(boundaries=boundaries)
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
return self._client.cfg["model"]
|
||||
|
||||
@property
|
||||
def max_image_px(self) -> int:
|
||||
return self._client.cfg["max_image_px"]
|
||||
|
||||
@@ -46,6 +46,10 @@ class BoundaryDetectorShelvesPlugin:
|
||||
result["confidence"] = float(conf)
|
||||
return result
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
return self._client.cfg["model"]
|
||||
|
||||
@property
|
||||
def max_image_px(self) -> int:
|
||||
return self._client.cfg["max_image_px"]
|
||||
|
||||
@@ -51,6 +51,10 @@ class TextRecognizerPlugin:
|
||||
other=str(raw.get("other") or ""),
|
||||
)
|
||||
|
||||
@property
|
||||
def model(self) -> str:
|
||||
return self._client.cfg["model"]
|
||||
|
||||
@property
|
||||
def max_image_px(self) -> int:
|
||||
return self._client.cfg["max_image_px"]
|
||||
|
||||
Reference in New Issue
Block a user