Initial commit

Photo-based book cataloger with AI identification.
Room → Cabinet → Shelf → Book hierarchy; FastAPI + SQLite backend;
vanilla JS SPA; OpenAI-compatible plugin system for boundary
detection, text recognition, and archive search.
This commit is contained in:
night
2026-03-09 14:11:11 +03:00
commit f29678ebf1
64 changed files with 8605 additions and 0 deletions

View File

@@ -0,0 +1,9 @@
# API credentials — connection endpoints only (no model, no prompt).
# Override api_key in credentials.user.yaml.
credentials:
openrouter:
base_url: "https://openrouter.ai/api/v1"
api_key: "sk-or-..."
# openai:
# base_url: "https://api.openai.com/v1"
# api_key: "sk-..."

View File

@@ -0,0 +1,103 @@
# Function configurations — dict per category (not lists).
# AI functions reference a model from models.*.yaml.
# Archive functions specify a type and optional config dict.
# Keys within each category serve as plugin_id; must be unique across all categories.
# Override individual functions in functions.user.yaml.
functions:
# ── Boundary detection: image → {boundaries: [...], confidence: 0.x}
# ai_shelf_boundaries / ai_book_boundaries stored as {functionId: [fractions]} per entity.
boundary_detectors:
shelves: # key = plugin_id = target; runs on cabinet images
model: vl_detect_shelves
max_image_px: 1600
auto_queue: false
rate_limit_seconds: 0
timeout: 30
books: # key = plugin_id = target; runs on shelf images
model: vl_detect_books
max_image_px: 1600
auto_queue: false
rate_limit_seconds: 0
timeout: 30
# ── Text recognition: spine image → {raw_text, title, author, year, publisher, other}
text_recognizers:
recognize:
model: vl_recognize
max_image_px: 1600
auto_queue: true
rate_limit_seconds: 0
timeout: 30
# ── Book identification: raw_text → {title, author, year, isbn, publisher, confidence}
book_identifiers:
identify:
model: ai_identify
confidence_threshold: 0.8
auto_queue: false
rate_limit_seconds: 0
timeout: 30
# ── Archive searchers: query → [{source, title, author, year, isbn, publisher}, ...]
archive_searchers:
openlibrary:
name: "OpenLibrary"
type: openlibrary
auto_queue: true
rate_limit_seconds: 5
timeout: 8
rsl:
name: "РГБ"
type: rsl
auto_queue: true
rate_limit_seconds: 5
timeout: 8
rusneb:
name: "НЭБ"
type: html_scraper
auto_queue: true
rate_limit_seconds: 5
timeout: 8
config:
url: "https://rusneb.ru/search/"
search_param: q
title_class: "title"
author_class: "author"
alib_web:
name: "Alib (web)"
type: html_scraper
auto_queue: false
rate_limit_seconds: 5
timeout: 8
config:
url: "https://www.alib.ru/find3.php4"
search_param: tfind
extra_params: {f: "5", s: "0"}
link_href_pattern: "t[a-z]+\\.phtml"
author_class: "aut"
nlr:
name: "НЛР"
type: sru_catalog
auto_queue: false
rate_limit_seconds: 5
timeout: 8
config:
url: "http://www.nlr.ru/search/query"
query_prefix: "title="
shpl:
name: "ШПИЛ"
type: html_scraper
auto_queue: false
rate_limit_seconds: 5
timeout: 8
config:
url: "https://www.shpl.ru/cgi-bin/irbis64/cgiirbis_64.exe"
search_param: S21ALL
extra_params: {C21COM: S, I21DBN: BIBL, P21DBN: BIBL, S21FMT: briefWebRus, Z21ID: ""}
brief_class: "brief"

View File

@@ -0,0 +1,50 @@
# AI model configurations — each model references a credential and provides
# the model string, optional openrouter routing (extra_body), and the prompt.
# ${OUTPUT_FORMAT} is injected by the plugin from its hardcoded schema constant.
# Override individual models in models.user.yaml.
models:
vl_detect_shelves:
credentials: openrouter
model: "google/gemini-flash-1.5"
prompt: |
# ${OUTPUT_FORMAT} — JSON schema injected by BoundaryDetectorShelvesPlugin
Look at this photo of a bookcase/shelf unit.
Count the number of horizontal shelves visible.
For each interior boundary between adjacent shelves, give its vertical position
as a fraction 0-1 (0=top of image, 1=bottom). Do NOT include 0 or 1 themselves.
Return ONLY valid JSON, no explanation:
${OUTPUT_FORMAT}
vl_detect_books:
credentials: openrouter
model: "google/gemini-flash-1.5"
prompt: |
# ${OUTPUT_FORMAT} — JSON schema injected by BoundaryDetectorBooksPlugin
Look at this shelf photo. Identify every book spine visible left-to-right.
For each interior boundary between adjacent books, give its horizontal position
as a fraction 0-1 (0=left edge of image, 1=right edge). Do NOT include 0 or 1.
Return ONLY valid JSON, no explanation:
${OUTPUT_FORMAT}
vl_recognize:
credentials: openrouter
model: "google/gemini-flash-1.5"
prompt: |
# ${OUTPUT_FORMAT} — JSON schema injected by TextRecognizerPlugin
Look at this book spine image. Read all visible text exactly as it appears,
preserving line breaks between distinct text blocks.
Then use visual cues (font size, position, layout) to identify which part is the title,
author, publisher, year, and any other notable text.
Return ONLY valid JSON, no explanation:
${OUTPUT_FORMAT}
ai_identify:
credentials: openrouter
model: "google/gemini-flash-1.5"
prompt: |
# ${RAW_TEXT} — text read from the book spine (multi-line)
# ${OUTPUT_FORMAT} — JSON schema injected by BookIdentifierPlugin
The following text was read from a book spine:
${RAW_TEXT}
Identify this book. Search for it if needed. Return ONLY valid JSON, no explanation:
${OUTPUT_FORMAT}

3
config/ui.default.yaml Normal file
View File

@@ -0,0 +1,3 @@
# UI settings. Override in ui.user.yaml.
ui:
boundary_grab_px: 14 # pixel grab threshold for dragging boundary lines