Initial commit

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
root
2026-01-20 03:53:49 +00:00
commit 1c4aaf18b2
17 changed files with 1441 additions and 0 deletions

28
.gitignore vendored Normal file
View File

@@ -0,0 +1,28 @@
# Virtual environment
.venv/
venv/
env/
# Python bytecode
__pycache__/
*.py[cod]
*$py.class
*.pyo
# Environment variables (contains secrets)
.env
# Database
*.db
*.sqlite3
# IDE/Editor
.idea/
.vscode/
*.swp
*.swo
*~
# OS files
.DS_Store
Thumbs.db

664
app.py Normal file
View File

@@ -0,0 +1,664 @@
#!/usr/bin/env python3
import os
import re
import json
from urllib.parse import urljoin, urlencode
from datetime import datetime, timedelta
import requests
import pytz
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from flask import (
Flask, request, jsonify, render_template, abort,
redirect, url_for, make_response, session
)
from flask_cors import CORS
from readability import Document
from werkzeug.middleware.proxy_fix import ProxyFix
from werkzeug.security import check_password_hash
from db import get_db
load_dotenv()
APP_ROOT = os.environ.get("APPLICATION_ROOT", "/readitlater")
API_TOKEN = os.environ.get("API_TOKEN", "")
SECRET_KEY = os.environ.get("SECRET_KEY", "dev")
# Login creds (single-user)
LOGIN_USERNAME = os.environ.get("LOGIN_USERNAME", "admin")
LOGIN_PASSWORD_HASH = os.environ.get("LOGIN_PASSWORD_HASH", "") # werkzeug hash
NY_TZ = pytz.timezone("America/New_York")
def create_app():
app = Flask(__name__, static_url_path=f"{APP_ROOT}/static", static_folder="static")
app.config.update(
SECRET_KEY=SECRET_KEY,
SESSION_COOKIE_SECURE=True,
SESSION_COOKIE_HTTPONLY=True,
SESSION_COOKIE_SAMESITE="Lax",
PERMANENT_SESSION_LIFETIME=timedelta(days=7),
)
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
# CORS for API (tighten as desired)
CORS(app, resources={rf"{APP_ROOT}/api/*": {"origins": [
"https://www.jaredlog.com", "https://jaredlog.com"
]}})
# -------------------------------------------------------------------------
# NOTE (extension patch): In your Chrome extension, prefer:
# document.documentElement.getHTML({ includeShadowRoots: true })
# and fall back to outerHTML. That captures Shadow DOM content IBM/others use.
# -------------------------------------------------------------------------
# --- Sanitization policy for captured HTML ---
ALLOWED_TAGS = {
"article", "section", "header", "footer",
"h1", "h2", "h3", "h4", "p", "blockquote", "pre", "code",
"ul", "ol", "li", "a", "em", "strong", "b", "i",
"img", "figure", "figcaption", "hr", "br",
"picture", "source"
}
ALLOWED_ATTRS = {
"href", "src", "alt", "title", "target", "rel", "loading",
"srcset", "sizes", "referrerpolicy"
}
TAG_RE = re.compile(r"\s+")
# ----------------------------- Auth gate -----------------------------
def is_ui_path(path: str) -> bool:
# Protect everything under APP_ROOT except allowlisted paths
if path == APP_ROOT or path.startswith(APP_ROOT + "/"):
if (path.startswith(f"{APP_ROOT}/static/")
or path.startswith(f"{APP_ROOT}/api/")
or path == f"{APP_ROOT}/healthz"
or path == f"{APP_ROOT}/login"
or path == f"{APP_ROOT}/logout"):
return False
return True
return False
@app.before_request
def _gate_ui():
if not is_ui_path(request.path):
return
if session.get("auth_ok") is True:
return
next_qs = urlencode({"next": request.full_path if request.query_string else request.path})
return redirect(f"{APP_ROOT}/login?{next_qs}", code=302)
# ----------------------------- Utils -----------------------------
def require_token() -> bool:
return bool(API_TOKEN) and request.headers.get("Authorization", "") == f"Bearer {API_TOKEN}"
def normalize_tag(s: str) -> str:
s = TAG_RE.sub(" ", (s or "").strip().lower())
return s
# --------- Helpers for images / shadow DOM / JSON-LD ---------
def is_image_url(u: str) -> bool:
u = (u or "").lower()
return any(u.split("?")[0].endswith(ext) for ext in (".png", ".jpg", ".jpeg", ".gif", ".webp", ".avif", ".svg"))
def absolutize_srcset(value: str, base_url: str) -> str:
parts = []
for part in (value or "").split(","):
bits = part.strip().split()
if bits:
bits[0] = urljoin(base_url, bits[0])
parts.append(" ".join(bits))
return ", ".join(parts)
def inline_declarative_shadow_dom(raw_html: str) -> str:
try:
soup = BeautifulSoup(raw_html, "lxml")
for tpl in soup.find_all("template"):
if tpl.has_attr("shadowrootmode"):
frag = BeautifulSoup(tpl.decode_contents(), "lxml")
tpl.replace_with(frag)
return str(soup)
except Exception:
return raw_html
def extract_jsonld_article_body(raw_html: str) -> tuple[str | None, str | None]:
try:
doc = BeautifulSoup(raw_html, "lxml")
for s in doc.find_all("script", type="application/ld+json"):
text = s.string
if not text:
continue
data = json.loads(text)
items = data if isinstance(data, list) else [data]
for it in items:
t = (it.get("@type") or it.get("type") or "")
t = t.lower() if isinstance(t, str) else str(t).lower()
if "article" in t or "newsarticle" in t or "blogposting" in t:
body = it.get("articleBody") or it.get("articlebody")
title = it.get("headline") or it.get("name")
if body and isinstance(body, str) and len(body.strip()) > 400:
html = "".join(
f"<p>{p.strip()}</p>"
for p in body.split("\n")
if p.strip()
)
return title, html
except Exception:
pass
return None, None
def pick_thumbnail(raw_html: str, cleaned_soup: BeautifulSoup, base_url: str) -> str | None:
img = cleaned_soup.find("img", src=True)
if img and img.get("src"):
return urljoin(base_url, img["src"])
for tag in cleaned_soup.find_all("img"):
lazy = (
tag.get("data-src")
or tag.get("data-lazy-src")
or tag.get("data-original")
or tag.get("data-srcset")
)
if not tag.get("src") and lazy:
return urljoin(base_url, lazy)
if not tag.get("src"):
ss = tag.get("srcset")
if ss:
first = ss.split(",")[0].strip().split(" ")[0]
if first:
return urljoin(base_url, first)
for pic in cleaned_soup.find_all("picture"):
for source in pic.find_all("source"):
ss = source.get("srcset")
if ss:
first = ss.split(",")[0].strip().split(" ")[0]
if first:
return urljoin(base_url, first)
try:
full = BeautifulSoup(raw_html, "lxml")
meta = (
full.find("meta", property="og:image") or
full.find("meta", attrs={"name": "og:image"}) or
full.find("meta", attrs={"name": "twitter:image"}) or
full.find("meta", property="twitter:image")
)
if meta and meta.get("content"):
return urljoin(base_url, meta["content"])
except Exception:
pass
return None
def extract_and_clean(raw_html: str, base_url: str) -> tuple[str, str, str, str | None]:
# 1) Prepass: inline Declarative Shadow DOM
pre_html = inline_declarative_shadow_dom(raw_html)
# 2) Readability extraction
doc = Document(pre_html)
title = (doc.short_title() or doc.title() or "").strip()
summary_html = doc.summary(html_partial=True)
soup = BeautifulSoup(summary_html, "lxml")
# Keep <noscript> for now; strip scripts/styles only
for t in soup(["script", "style"]):
t.decompose()
# 3) Allowlist + absolutize + lazy promotion
for tag in soup.find_all(True):
if tag.name not in ALLOWED_TAGS:
tag.unwrap()
continue
if tag.name == "img":
if not tag.get("src"):
lazy_src = (
tag.get("data-src")
or tag.get("data-lazy-src")
or tag.get("data-original")
or tag.get("data-srcset")
)
if lazy_src:
tag["src"] = urljoin(base_url, lazy_src)
if not tag.get("srcset") and tag.get("data-srcset"):
tag["srcset"] = tag["data-srcset"]
if tag.name == "source":
if not tag.get("srcset") and tag.get("data-srcset"):
tag["srcset"] = tag["data-srcset"]
safe = {}
for k, v in list(tag.attrs.items()):
if isinstance(v, list):
v = " ".join(v)
if k in ALLOWED_ATTRS:
if k in ("href", "src"):
v = urljoin(base_url, v)
elif k == "srcset":
v = absolutize_srcset(v, base_url)
safe[k] = v
tag.attrs = safe
if tag.name == "a":
tag.attrs.setdefault("rel", "noopener noreferrer")
tag.attrs.setdefault("target", "_blank")
if tag.name == "img":
tag.attrs.setdefault("loading", "lazy")
tag.attrs.setdefault("referrerpolicy", "no-referrer")
# 4) Convert bare image links into <img>
for a in list(soup.find_all("a", href=True)):
href = a.get("href")
if href and is_image_url(href) and not a.find("img"):
href = urljoin(base_url, href)
img = soup.new_tag("img", src=href, loading="lazy", referrerpolicy="no-referrer")
text_alt = a.get_text(" ", strip=True)
if text_alt:
img["alt"] = text_alt.replace("Image:", "").strip()
a.replace_with(img)
# 5) Pick thumbnail before we drop noscripts
thumb_url = pick_thumbnail(raw_html, soup, base_url)
# 6) Remove residual noscript wrappers
for t in soup(["noscript"]):
t.decompose()
cleaned_html = str(soup)
text = BeautifulSoup(cleaned_html, "lxml").get_text("\n", strip=True)
text = "\n\n".join([line.strip() for line in text.split("\n") if line.strip()])
# 7) Fallback: if content is suspiciously short, try JSON-LD articleBody
if len(text) < 800:
jt, jhtml = extract_jsonld_article_body(raw_html)
if jhtml:
jsoup = BeautifulSoup(jhtml, "lxml")
for tag in jsoup.find_all(True):
if tag.name == "a" and tag.get("href"):
tag["href"] = urljoin(base_url, tag["href"])
tag["target"] = "_blank"
tag["rel"] = "noopener noreferrer"
if tag.name == "img":
if tag.get("src"):
tag["src"] = urljoin(base_url, tag["src"])
tag["loading"] = "lazy"
tag["referrerpolicy"] = "no-referrer"
cleaned_html = str(jsoup)
text = BeautifulSoup(cleaned_html, "lxml").get_text("\n", strip=True)
text = "\n\n".join([line.strip() for line in text.split("\n") if line.strip()])
if not title:
title = jt or title
return title, cleaned_html, text, thumb_url
# ---------------- Tag helpers ----------------
def ensure_tags(conn, names: list[str]) -> list[int]:
tag_ids = []
for n in names:
n = normalize_tag(n)
if not n:
continue
row = conn.execute("SELECT id FROM tags WHERE name = ?", (n,)).fetchone()
if row:
tag_ids.append(row["id"])
else:
cur = conn.execute("INSERT INTO tags(name) VALUES (?)", (n,))
tag_ids.append(cur.lastrowid)
return tag_ids
def add_item_tags(conn, item_id: int, names: list[str]) -> None:
names = [normalize_tag(t) for t in names if normalize_tag(t)]
if not names:
return
tag_ids = ensure_tags(conn, names)
conn.executemany(
"INSERT OR IGNORE INTO item_tags(item_id, tag_id) VALUES (?, ?)",
[(item_id, tid) for tid in tag_ids],
)
# ---------------- Authentication routes ----------------
@app.get(f"{APP_ROOT}/login")
def login_form():
err = request.args.get("err", "")
return render_template("login.html", error=err)
@app.post(f"{APP_ROOT}/login")
def login_submit():
username = (request.form.get("username") or "").strip()
password = (request.form.get("password") or "")
next_url = request.args.get("next") or f"{APP_ROOT}/"
if username == LOGIN_USERNAME and LOGIN_PASSWORD_HASH and check_password_hash(LOGIN_PASSWORD_HASH, password):
session.clear()
session.permanent = True # honors PERMANENT_SESSION_LIFETIME=7 days
session["auth_ok"] = True
session["who"] = username
return redirect(next_url, code=302)
qs = urlencode({"err": "Invalid credentials"})
return redirect(f"{APP_ROOT}/login?{qs}", code=302)
@app.post(f"{APP_ROOT}/logout")
def logout():
session.clear()
return redirect(f"{APP_ROOT}/login", code=302)
# ---------------- UI ----------------
@app.get(f"{APP_ROOT}/")
def index():
q = request.args.get("q", "").strip()
tag = request.args.get("tag", "").strip().lower()
tags_csv = request.args.get("tags", "").strip().lower()
filter_tags = []
if tag:
filter_tags = [normalize_tag(tag)]
elif tags_csv:
filter_tags = [normalize_tag(t) for t in tags_csv.split(",") if normalize_tag(t)]
with get_db() as db:
params: list = []
if q:
base_sql = """
SELECT i.id, i.title, i.url, i.added_at, i.thumb_url
FROM items_fts f
JOIN items i ON i.id = f.rowid
WHERE items_fts MATCH ?
"""
params.append(q)
else:
base_sql = """
SELECT i.id, i.title, i.url, i.added_at, i.thumb_url
FROM items i
"""
if filter_tags:
sqls = []
for tname in filter_tags:
if q:
sqls.append(
base_sql + """
AND EXISTS (
SELECT 1
FROM item_tags it
JOIN tags tg ON tg.id = it.tag_id
WHERE it.item_id = i.id AND tg.name = ?
)
"""
)
else:
sqls.append(
base_sql + """
WHERE EXISTS (
SELECT 1
FROM item_tags it
JOIN tags tg ON tg.id = it.tag_id
WHERE it.item_id = i.id AND tg.name = ?
)
"""
)
params.append(tname)
final_sql = " INTERSECT ".join(sqls) + " ORDER BY i.id DESC LIMIT 100"
rows = db.execute(final_sql, params).fetchall()
else:
final_sql = base_sql + (" ORDER BY bm25(items_fts) LIMIT 100" if q else " ORDER BY i.id DESC LIMIT 100")
rows = db.execute(final_sql, params).fetchall()
# Collect tags for visible items
if rows:
ids = [r["id"] for r in rows]
qmarks = ",".join("?" for _ in ids)
tagmap = {}
for tr in db.execute(
f"""
SELECT it.item_id, tg.name
FROM item_tags it
JOIN tags tg ON tg.id = it.tag_id
WHERE it.item_id IN ({qmarks})
ORDER BY tg.name
""",
ids,
).fetchall():
tagmap.setdefault(tr["item_id"], []).append(tr["name"])
else:
tagmap = {}
# All tags (for top row), with counts
tags_all = db.execute("""
SELECT tg.name AS name, COUNT(*) AS cnt
FROM tags tg
JOIN item_tags it ON it.tag_id = tg.id
GROUP BY tg.id
ORDER BY tg.name
""").fetchall()
return render_template(
"index.html",
rows=rows,
q=q,
filter_tags=filter_tags,
tagmap=tagmap,
tags_all=tags_all,
)
@app.get(f"{APP_ROOT}/item/<int:item_id>")
def detail(item_id: int):
with get_db() as db:
row = db.execute("SELECT * FROM items WHERE id = ?", (item_id,)).fetchone()
if not row:
abort(404)
tags = db.execute("""
SELECT tg.name FROM item_tags it
JOIN tags tg ON tg.id = it.tag_id
WHERE it.item_id = ? ORDER BY tg.name
""", (item_id,)).fetchall()
# All tags (for quick-add palette)
tags_all = db.execute("""
SELECT tg.name AS name, COUNT(*) AS cnt
FROM tags tg
LEFT JOIN item_tags it ON it.tag_id = tg.id
GROUP BY tg.id
ORDER BY tg.name
""").fetchall()
return render_template(
"detail.html",
item=row,
tags=[t["name"] for t in tags],
tags_all=tags_all,
)
# ---------------- Add via URL (server-side fetch) ----------------
@app.post(f"{APP_ROOT}/add-url")
def add_url():
url = (request.form.get("u") or "").strip()
if not url:
return jsonify({"error": "missing url"}), 400
try:
resp = requests.get(
url,
timeout=12,
headers={
"User-Agent": (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/120.0.0.0 Safari/537.36"
)
},
)
resp.raise_for_status()
content = resp.content
if len(content) > 8 * 1024 * 1024:
return jsonify({"error": "page too large"}), 413
html_text = resp.text
except requests.RequestException:
return jsonify({"error": "fetch_failed"}), 502
# Try to extract a <title> and serialize <html> back out
title_guess = None
try:
soup = BeautifulSoup(html_text, "lxml")
t = soup.find("title")
title_guess = (t.get_text() if t else "").strip() or None
html_node = soup.find("html")
if html_node:
html_text = "<!doctype html>" + str(html_node)
else:
html_text = "<!doctype html>" + html_text
except Exception:
html_text = "<!doctype html>" + html_text
# Reuse the same extraction path
try:
etitle, cleaned, text, thumb_url = extract_and_clean(html_text, url)
title = title_guess or etitle or url or "Untitled"
except Exception as e:
return jsonify({"error": f"extract_failed:{type(e).__name__}"}), 400
with get_db() as db:
db.execute(
"""
INSERT INTO items (url, title, content_html, content_text, thumb_url)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
title = excluded.title,
content_html = excluded.content_html,
content_text = excluded.content_text,
thumb_url = excluded.thumb_url,
added_at = datetime('now')
""",
(url, title, cleaned, text, thumb_url),
)
item_id = db.execute("SELECT id FROM items WHERE url = ?", (url,)).fetchone()["id"]
db.commit()
return redirect(url_for("detail", item_id=item_id))
# ---------------- API (extension) ----------------
@app.post(f"{APP_ROOT}/api/v1/capture")
def capture():
if not require_token():
return jsonify({"error": "unauthorized"}), 401
data = request.get_json(silent=True) or {}
url = (data.get("url") or "").strip()
title = (data.get("title") or "").strip()
raw = data.get("html") or ""
tags = data.get("tags") or []
if isinstance(tags, str):
tags = [t.strip() for t in tags.split(",") if t.strip()]
if not raw:
return jsonify({"error": "missing html"}), 400
try:
etitle, cleaned, text, thumb_url = extract_and_clean(raw, url)
if not title:
title = etitle or url or "Untitled"
except Exception as e:
return jsonify({"error": f"extract_failed:{type(e).__name__}"}), 400
with get_db() as db:
db.execute(
"""
INSERT INTO items (url, title, content_html, content_text, thumb_url)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(url) DO UPDATE SET
title = excluded.title,
content_html = excluded.content_html,
content_text = excluded.content_text,
thumb_url = excluded.thumb_url,
added_at = datetime('now')
""",
(url, title, cleaned, text, thumb_url),
)
item_id = db.execute("SELECT id FROM items WHERE url = ?", (url,)).fetchone()["id"]
if tags:
add_item_tags(db, item_id, tags)
db.commit()
return jsonify({"ok": True, "id": item_id, "title": title})
@app.get(f"{APP_ROOT}/healthz")
def healthz():
return jsonify({"status": "ok"})
# Delete (HTMX refresh)
@app.post(f"{APP_ROOT}/item/<int:item_id>/delete")
@app.delete(f"{APP_ROOT}/item/<int:item_id>")
def delete_item(item_id: int):
with get_db() as db:
db.execute("DELETE FROM items WHERE id = ?", (item_id,))
db.commit()
if request.headers.get("HX-Request") == "true":
resp = make_response("", 204)
resp.headers["HX-Refresh"] = "true" # full page reload
return resp
return redirect(url_for("index"))
# -------- Tag HTMX endpoints --------
@app.post(f"{APP_ROOT}/item/<int:item_id>/tag")
def add_tag(item_id: int):
name = normalize_tag(request.form.get("name", ""))
if not name:
return ("", 204)
with get_db() as db:
item = db.execute("SELECT * FROM items WHERE id = ?", (item_id,)).fetchone()
if not item:
abort(404)
add_item_tags(db, item_id, [name])
db.commit()
tags = db.execute("""
SELECT tg.name FROM item_tags it JOIN tags tg ON tg.id = it.tag_id
WHERE it.item_id = ? ORDER BY tg.name
""", (item_id,)).fetchall()
return render_template("tags.html", item=item, tags=[t["name"] for t in tags])
@app.post(f"{APP_ROOT}/item/<int:item_id>/tag/<name>/delete")
def delete_tag(item_id: int, name: str):
name = normalize_tag(name)
with get_db() as db:
item = db.execute("SELECT * FROM items WHERE id = ?", (item_id,)).fetchone()
if not item:
abort(404)
db.execute("""
DELETE FROM item_tags
WHERE item_id = ? AND tag_id = (SELECT id FROM tags WHERE name = ?)
""", (item_id, name))
db.commit()
tags = db.execute("""
SELECT tg.name FROM item_tags it JOIN tags tg ON tg.id = it.tag_id
WHERE it.item_id = ? ORDER BY tg.name
""", (item_id,)).fetchall()
return render_template("tags.html", item=item, tags=[t["name"] for t in tags])
return app
app = create_app()
def format_est(dt_str: str) -> str:
"""Convert UTC string to EST/EDT in 12-hour format."""
try:
dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S")
dt = pytz.utc.localize(dt).astimezone(NY_TZ)
return dt.strftime("%b %d, %Y %I:%M %p")
except Exception:
return dt_str
app.jinja_env.filters["format_est"] = format_est
if __name__ == "__main__":
app.run(host="127.0.0.1", port=8013, debug=True)

55
cat_ai_files.sh Executable file
View File

@@ -0,0 +1,55 @@
#!/bin/bash
# Get sub-directory name from command-line argument
sub_dir="$1"
# Check if the directory is provided
if [ -z "$sub_dir" ]; then
echo "Error: No sub-directory provided. Usage: $0 <sub-directory>"
exit 1
fi
# Check if the directory exists
if [ ! -d "$sub_dir" ]; then
echo "Error: Directory '$sub_dir' not found."
exit 1
fi
# Recursively iterate through the sub-directory
find "$sub_dir" -type d | while read dir; do
# Skip if any component in the full path is exactly "node_modules", "documents", or "uploads"
if [[ "$dir" =~ (^|/)(node_modules|.npm|.venv|dist|lib64|var|__pycache__)($|/) ]]; then
continue
fi
# If the directory is exactly a backend/python directory, include .py files.
# (This regex matches directories ending with "backend/python" exactly.)
if [[ "$dir" =~ (^|/)intelaide-backend/python$ ]]; then
files=$(find "$dir" -maxdepth 1 -type f \( -name "*.jsx" -o -name "*.css" -o -name "*.js" -o -name "*.json" -o -name "*.py" \) ! -name "package-lock.json")
else
files=$(find "$dir" -maxdepth 1 -type f \( -name "*.py" -o -name "*.html" -o -name "*.jsx" -o -name "*.yaml" -o -name "*.css" -o -name "*.js" -o -name "*.json" \) ! -name "package-lock.json")
fi
# If no files are found, skip to the next directory
if [ -z "$files" ]; then
continue
fi
# Print directory and file listing
echo "====================================="
echo "Directory: $dir"
echo ""
echo "Files found:"
echo "$files" | awk -F'/' '{print $NF}'
echo "====================================="
# Iterate through each file and print its content
for file in $files; do
echo ""
echo "-------------------------------------"
echo "File: $(basename "$file")"
echo "-------------------------------------"
cat "$file"
echo ""
done
done

View File

@@ -0,0 +1,151 @@
jared.evans@JARED-EVANS-C02G14ZWQ05N webext_readitlater % cat manifest.json
{
"manifest_version": 3,
"name": "readitlater — Save Page",
"version": "0.1.0",
"action": { "default_title": "Save to readitlater" },
"permissions": ["activeTab", "scripting", "storage"],
"host_permissions": ["https://www.jaredlog.com/*"],
"background": { "service_worker": "background.js" },
"icons": {
"16": "icon16.png",
"48": "icon48.png",
"128": "icon128.png"
}
}
jared.evans@JARED-EVANS-C02G14ZWQ05N webext_readitlater % cat background.js
const API_URL = "https://www.jaredlog.com/readitlater/api/v1/capture";
async function getToken() {
return new Promise((resolve) => {
chrome.storage.sync.get(["READITLATER_TOKEN"], (res) => resolve(res.READITLATER_TOKEN || ""));
});
}
async function injectBanner(tabId, text, ok = true) {
try {
await chrome.scripting.executeScript({
target: { tabId },
world: "MAIN", // we want to touch the page DOM
func: (text, ok) => {
// Create a host and shadow root so site CSS can't break us
const host = document.createElement("div");
host.setAttribute("id", "readitlater-toast-host");
host.style.all = "initial"; // reduce leakage in some edge cases
const shadow = host.attachShadow({ mode: "closed" });
// Styles
const style = document.createElement("style");
style.textContent = `
@keyframes slideDown {
from { transform: translateY(-110%); opacity: 0; }
to { transform: translateY(0); opacity: 1; }
}
@keyframes fadeOut {
to { opacity: 0; transform: translateY(-110%); }
}
.toast {
position: fixed;
top: 0; left: 50%;
transform: translateX(-50%);
z-index: 2147483647;
margin: 12px auto 0;
padding: 10px 14px;
border-radius: 8px;
font: 14px/1.2 -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
color: #0b1f0b;
background: ${ok ? "#c7f9cc" : "#ffd6d6"};
border: 1px solid ${ok ? "#94d7a2" : "#ffabab"};
box-shadow: 0 10px 20px rgba(0,0,0,.12), 0 2px 6px rgba(0,0,0,.08);
animation: slideDown 180ms ease-out;
pointer-events: none; /* don't block page */
max-width: 90vw;
text-align: center;
}
.toast .text {
white-space: pre-wrap;
}
@media (prefers-reduced-motion: reduce) {
.toast { animation: none; }
}
`;
// Container
const toast = document.createElement("div");
toast.className = "toast";
toast.setAttribute("role", "status");
toast.setAttribute("aria-live", "polite");
toast.innerHTML = `<span class="text">${text}</span>`;
shadow.append(style, toast);
document.documentElement.appendChild(host);
// Auto-remove after 2 seconds (fade for 200ms)
const remove = () => host.remove();
setTimeout(() => {
toast.style.animation = "fadeOut 200ms ease-in forwards";
setTimeout(remove, 220);
}, 2000);
},
args: [text, ok],
});
} catch (e) {
// Non-fatal if injection fails (e.g., restricted pages)
console.warn("readitlater: banner inject failed", e);
}
}
chrome.action.onClicked.addListener(async (tab) => {
if (!tab?.id) return;
const [result] = await chrome.scripting.executeScript({
target: { tabId: tab.id },
func: () => ({
url: window.location.href,
title: document.title,
html: "<!doctype html>" + document.documentElement.outerHTML
}),
});
const payload = result?.result || null;
if (!payload) return;
const token = await getToken();
try {
const resp = await fetch(API_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": "Bearer " + token
},
body: JSON.stringify(payload),
});
if (resp.ok) {
// Success banner
await injectBanner(tab.id, "Link saved.", true);
} else {
console.error("Capture failed:", resp.status, await resp.text());
await injectBanner(tab.id, "Save failed.", false);
}
} catch (e) {
console.error("readitlater: network error", e);
await injectBanner(tab.id, "Network error.", false);
}
});
// One-time: set your token in DevTools console on any page:
// chrome.storage.sync.set({ READITLATER_TOKEN: "YOUR_HEX_TOKEN" });
How to add the hex token:
Use the extensions background page (preferred)
Go to chrome://extensions/.
Enable Developer mode (toggle top right).
Find your readitlater extension.
Click “service worker” link under “Inspect views”. This opens a DevTools console for the extension background.
Now paste:
chrome.storage.sync.set({ READITLATER_TOKEN: "2cb9f5b875af65b4de7ff7736e384ae9d33e1bf2176c45afac24a713804ef6d4" });

65
database_schema_display.sh Executable file
View File

@@ -0,0 +1,65 @@
#!/usr/bin/env bash
# list SQLite schemas, tables, and columns (type, notnull, default, pk)
# Usage: ./database_schema_display.sh path/to/db.sqlite
set -euo pipefail
DB_PATH="${1:-}"
if [[ -z "$DB_PATH" ]]; then
echo "Usage: $0 path/to/database.sqlite" >&2
exit 1
fi
if [[ ! -f "$DB_PATH" ]]; then
echo "Error: Database file '$DB_PATH' does not exist." >&2
exit 1
fi
echo "=== Schemas (databases) ==="
# Show as-is for human info
sqlite3 "$DB_PATH" ".databases" || true
echo ""
# Get schema names via PRAGMA (stable across sqlite versions)
schemas=$(sqlite3 -noheader -separator '|' "$DB_PATH" "PRAGMA database_list;" | awk -F'|' '{print $2}')
# Fallback if empty (very old sqlite): assume main
if [[ -z "$schemas" ]]; then
schemas="main"
fi
for schema in $schemas; do
echo "=== Schema: $schema ==="
# Get user tables (exclude internal sqlite_% tables)
tables=$(sqlite3 -noheader "$DB_PATH" \
"SELECT name
FROM \"$schema\".sqlite_schema
WHERE type='table' AND name NOT LIKE 'sqlite_%'
ORDER BY name;")
if [[ -z "$tables" ]]; then
echo " (no user tables)"
echo ""
continue
fi
# Iterate tables safely line-by-line
while IFS= read -r table; do
[[ -z "$table" ]] && continue
echo " -> Table: $table"
# Columns: cid|name|type|notnull|dflt_value|pk
sqlite3 -noheader -separator '|' "$DB_PATH" "PRAGMA \"$schema\".table_info('$table');" \
| awk -F'|' '
BEGIN {
printf " %-28s %-16s %-8s %-16s %-3s\n", "Column", "Type", "NotNull", "Default", "PK"
printf " %-28s %-16s %-8s %-16s %-3s\n", "------", "----", "-------", "-------", "--"
}
{
dv = ($5 == "" ? "NULL" : $5)
printf " %-28s %-16s %-8s %-16s %-3s\n", $2, $3, $4, dv, $6
}
'
echo ""
done <<< "$tables"
done

19
db.py Normal file
View File

@@ -0,0 +1,19 @@
import sqlite3, os
from contextlib import contextmanager
DB_PATH = os.environ.get("DATABASE_PATH", "readitlater.db")
def init_db():
conn = sqlite3.connect(DB_PATH)
conn.executescript(open("schema.sql", "r", encoding="utf-8").read())
conn.commit()
conn.close()
@contextmanager
def get_db():
conn = sqlite3.connect(DB_PATH)
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()

6
env.txt Normal file
View File

@@ -0,0 +1,6 @@
SECRET_KEY="omg_what_a_secret_ooh"
API_TOKEN="2cb9f5b875af65b4de7ff7736e384ae9d33e1bf2176c45afac24a713804ef6d4"
APPLICATION_ROOT="/readitlater"
DATABASE_PATH="/var/www/readitlater/readitlater.db"
LOGIN_USERNAME=jared
LOGIN_PASSWORD_HASH="scrypt:32768:8:1$5e2vp6DJB5kRkkGm$d71ce6c558fd7dec912945bb2574969d999402a7dc27bc970f5c6a1b79ea6be530a2e2b83ac945de478729c55ddfb77759ff504eba1a16fcdf5db446cbcd7a3b"

View File

@@ -0,0 +1,2 @@
from werkzeug.security import generate_password_hash
print(generate_password_hash(input("Password to hash: ")))

7
gunicorn_conf.py Normal file
View File

@@ -0,0 +1,7 @@
bind = "127.0.0.1:8013"
workers = 2
threads = 2
timeout = 60
graceful_timeout = 30
accesslog = "/var/log/readitlater/access.log"
errorlog = "/var/log/readitlater/error.log"

86
nginx-config.txt Normal file
View File

@@ -0,0 +1,86 @@
# Default server configuration for HTTP that redirects to HTTPS
server {
if ($host = www.jaredlog.com) {
return 301 https://$host$request_uri;
} # managed by Certbot
if ($host = zappy.jaredlog.com) {
return 301 https://$host$request_uri;
} # managed by Certbot
listen 80 default_server;
listen [::]:80 default_server;
server_name www.jaredlog.com zappy.jaredlog.com;
return 301 https://$server_name$request_uri;
}
server {
# SSL configuration
listen 443 ssl;
listen [::]:443 ssl;
server_name www.jaredlog.com zappy.jaredlog.com;
ssl_certificate /etc/letsencrypt/live/zappy.jaredlog.com/fullchain.pem; # managed by Certbot
ssl_certificate_key /etc/letsencrypt/live/zappy.jaredlog.com/privkey.pem; # managed by Certbot
include /etc/letsencrypt/options-ssl-nginx.conf; # managed by Certbot
ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem; # managed by Certbot
root /var/www/html;
index index.php index.html index.htm index.nginx-debian.html;
# Allow larger uploads (e.g., 15MB)
client_max_body_size 15M;
location / {
# First attempt to serve request as file, then as directory, then fall back to 404.
try_files $uri $uri/ =404;
}
location /readitlater/static/ {
alias /var/www/readitlater/static/;
expires 7d;
add_header Cache-Control "public, max-age=604800";
}
# Security headers
#add_header X-Frame-Options "SAMEORIGIN" always;
#add_header X-Content-Type-Options "nosniff" always;
#add_header Referrer-Policy "strict-origin-when-cross-origin" always;
# Limit abuse
#limit_req_zone $binary_remote_addr zone=api:10m rate=5r/s;
# API (no Basic Auth; lets the Chrome extension call it)
location ^~ /readitlater/api/ {
auth_basic off;
proxy_pass http://127.0.0.1:8013; # <-- no trailing /
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# Health check (exempt from Basic Auth)
location = /readitlater/healthz {
auth_basic off;
proxy_pass http://127.0.0.1:8013; # <-- no trailing /
proxy_set_header Host $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
# UI (protected by Basic Auth)
location /readitlater/ {
proxy_pass http://127.0.0.1:8013; # <-- no trailing /
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 65;
}
}

10
recreate_ai_files.sh Executable file
View File

@@ -0,0 +1,10 @@
#!/bin/sh
echo "================" >> ai_files.txt
echo "sqlite database:" >> ai_files.txt
echo "================" >> ai_files.txt
./database_schema_display.sh ./readitlater.db > ai_files.txt
echo "================" >> ai_files.txt
echo "Nginx config:" >> ai_files.txt
echo "================" >> ai_files.txt
cat nginx-config.txt >> ai_files.txt
./cat_ai_files.sh . >> ai_files.txt

64
templates/base.html Normal file
View File

@@ -0,0 +1,64 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>readitlater</title>
<!-- Google Font: Big Shoulders Inline (Light 300) -->
<link href="https://fonts.googleapis.com/css2?family=Big+Shoulders+Inline:wght@300&display=swap" rel="stylesheet">
<!-- Tailwind -->
<script src="https://cdn.tailwindcss.com"></script>
<!-- HTMX -->
<script src="https://unpkg.com/htmx.org@1.9.12"></script>
</head>
<body class="bg-slate-50 text-slate-900">
<header class="bg-white border-b sticky top-0 z-10">
<div class="max-w-5xl mx-auto px-4 py-3 flex items-center gap-4">
<a
class="text-4xl text-red-600"
style="font-family: 'Big Shoulders Inline', sans-serif; font-weight: 300;"
href="/readitlater/"
>
Readitlater
</a>
<!-- Search form -->
<form action="/readitlater/" method="get" class="ml-auto flex items-center gap-2">
<input
type="search"
name="q"
value="{{ q|default('') }}"
placeholder="Search titles & full text…"
class="border rounded px-3 py-1 w-64"
/>
<button class="px-3 py-1 bg-slate-900 text-white rounded">Search</button>
</form>
<!-- Paste URL form -->
<form action="/readitlater/add-url" method="post" class="flex items-center gap-2">
<input
type="url"
name="u"
required
placeholder="Paste URL…"
class="border rounded px-3 py-1 w-64"
/>
<button class="px-3 py-1 bg-blue-600 text-white rounded">Save</button>
</form>
{% if session.get('auth_ok') %}
<form action="/readitlater/logout" method="post">
<button class="px-3 py-1 text-sm border rounded hover:bg-slate-50">Sign out</button>
</form>
{% endif %}
</div>
</header>
<main class="max-w-5xl mx-auto px-4 py-6">
{% block content %}{% endblock %}
</main>
</body>
</html>

64
templates/bkup_base.txt Normal file
View File

@@ -0,0 +1,64 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>readitlater</title>
<!-- Google Font: Sansation Bold -->
<link href="https://fonts.googleapis.com/css2?family=Sansation:wght@700&display=swap" rel="stylesheet">
<!-- Tailwind -->
<script src="https://cdn.tailwindcss.com"></script>
<!-- HTMX -->
<script src="https://unpkg.com/htmx.org@1.9.12"></script>
</head>
<body class="bg-slate-50 text-slate-900">
<header class="bg-white border-b sticky top-0 z-10">
<div class="max-w-5xl mx-auto px-4 py-3 flex items-center gap-4">
<a
class="text-3xl text-red-600 font-bold"
style="font-family: 'Sansation', sans-serif;"
href="/readitlater/"
>
Readitlater
</a>
<!-- Search form -->
<form action="/readitlater/" method="get" class="ml-auto flex items-center gap-2">
<input
type="search"
name="q"
value="{{ q|default('') }}"
placeholder="Search titles & full text…"
class="border rounded px-3 py-1 w-64"
/>
<button class="px-3 py-1 bg-slate-900 text-white rounded">Search</button>
</form>
<!-- Paste URL form -->
<form action="/readitlater/add-url" method="post" class="flex items-center gap-2">
<input
type="url"
name="u"
required
placeholder="Paste URL…"
class="border rounded px-3 py-1 w-64"
/>
<button class="px-3 py-1 bg-blue-600 text-white rounded">Save</button>
</form>
{% if session.get('auth_ok') %}
<form action="/readitlater/logout" method="post">
<button class="px-3 py-1 text-sm border rounded hover:bg-slate-50">Sign out</button>
</form>
{% endif %}
</div>
</header>
<main class="max-w-5xl mx-auto px-4 py-6">
{% block content %}{% endblock %}
</main>
</body>
</html>

80
templates/detail.html Normal file
View File

@@ -0,0 +1,80 @@
<!-- templates/detail.html -->
{% extends "base.html" %}
{% block content %}
<article class="max-w-none">
<div class="flex items-start justify-between gap-4 mb-4">
<h1 class="text-2xl font-semibold leading-tight break-words">
{{ item.title or "Untitled" }}
</h1>
<form action="/readitlater/item/{{ item.id }}/delete" method="post"
onsubmit="return confirm('Delete this item?')">
<button
type="submit"
title="Delete"
aria-label="Delete"
class="text-white bg-red-600 hover:bg-red-700 focus:ring-2 focus:ring-red-300 rounded px-3 py-1">
✕ Delete
</button>
</form>
</div>
<div class="flex flex-wrap items-center gap-3 text-sm text-slate-500 mb-4">
{% if item.url %}
<a class="text-blue-700 hover:underline break-all" href="{{ item.url }}" target="_blank" rel="noopener noreferrer">
{{ item.url }}
</a>
{% endif %}
<span class="text-slate-400"></span>
<span class="text-slate-400">{{ item.added_at|format_est }}</span>
</div>
<div class="mb-6">
{% include "tags.html" with context %}
{% if tags_all %}
<div class="mt-3 mb-2">
<div class="text-xs text-slate-500 mb-1">Quick add tag:</div>
<div class="flex flex-wrap gap-2">
{% for t in tags_all %}
{% set name = t.name if t.name is defined else t['name'] %}
<button
type="button"
class="text-xs rounded-full px-2 py-1 border bg-slate-100 text-slate-800 border-slate-200 hover:bg-slate-200"
hx-post="/readitlater/item/{{ item.id }}/tag"
hx-vals='{"name": "{{ name|e }}"}'
hx-target="#tags-{{ item.id }}"
hx-swap="outerHTML"
title="Add #{{ name }}">
#{{ name }}
</button>
{% endfor %}
</div>
</div>
{% endif %}
<form
class="mt-2 flex items-center gap-2"
hx-post="/readitlater/item/{{ item.id }}/tag"
hx-target="#tags-{{ item.id }}"
hx-swap="outerHTML">
<input
type="text" name="name" placeholder="Add tag…"
class="border rounded px-2 py-1 text-sm" />
<button
type="submit"
class="text-white bg-slate-800 hover:bg-slate-900 rounded px-3 py-1 text-sm">
+ Add
</button>
</form>
</div>
<!-- Content -->
<div class="max-w-none [&_p]:mb-4">
{% if item.content_html %}
{{ item.content_html|safe }}
{% else %}
<pre class="bg-slate-100 p-3 rounded text-sm whitespace-pre-wrap">{{ item.content_text }}</pre>
{% endif %}
</div>
</article>
{% endblock %}

100
templates/index.html Normal file
View File

@@ -0,0 +1,100 @@
{% extends "base.html" %}
{% block content %}
<!-- Top tag row -->
{% if tags_all %}
<div class="mb-4 overflow-x-auto">
<div class="flex items-center gap-2 min-w-max">
{% for t in tags_all %}
{% set name = t.name if t.name is defined else t['name'] %}
{% set cnt = t.cnt if t.cnt is defined else t['cnt'] %}
<a
href="/readitlater/?tag={{ name|e }}"
class="text-xs rounded-full px-2 py-1 border
{% if filter_tags and name in filter_tags %}
bg-blue-600 text-white border-blue-600
{% else %}
bg-slate-100 text-slate-800 border-slate-200 hover:bg-slate-200
{% endif %}">
#{{ name }} <span class="opacity-70">({{ cnt }})</span>
</a>
{% endfor %}
{% if filter_tags %}
<a href="/readitlater/" class="text-xs px-2 py-1 rounded-full border bg-white hover:bg-slate-50">Clear</a>
{% endif %}
</div>
</div>
{% endif %}
{% if q %}
<h1 class="text-xl font-semibold mb-4">Search results for “{{ q }}”</h1>
{% elif filter_tags %}
<h1 class="text-xl font-semibold mb-2">Tagged</h1>
<div class="mb-4 flex gap-2">
{% for t in filter_tags %}
<a href="/readitlater/?tag={{ t|e }}" class="text-xs bg-slate-100 border border-slate-200 rounded-full px-2 py-1">#{{ t }}</a>
{% endfor %}
</div>
{% else %}
<h1 class="text-xl font-semibold mb-4">Latest</h1>
{% endif %}
<ul class="space-y-3">
{% for r in rows %}
<li class="bg-white p-4 rounded shadow-sm">
<div class="flex items-start gap-3">
{% if r.thumb_url %}
<img
src="{{ r.thumb_url }}"
alt=""
loading="lazy"
class="w-16 h-16 rounded object-cover flex-shrink-0 border border-slate-200"
referrerpolicy="no-referrer"
/>
{% else %}
<div class="w-16 h-16 rounded bg-slate-100 border border-slate-200 flex items-center justify-center text-slate-400 text-xs flex-shrink-0">
No&nbsp;img
</div>
{% endif %}
<div class="min-w-0 flex-1">
<a class="font-medium text-blue-700" href="/readitlater/item/{{ r.id }}">
{{ r.title or "Untitled" }}
</a>
{% if r.url %}
<div class="text-sm text-slate-500 truncate">{{ r.url }}</div>
{% endif %}
<div class="text-xs text-slate-400 mt-1">{{ r.added_at|format_est }}</div>
{% set tags_for_item = tagmap.get(r.id, []) %}
{% if tags_for_item %}
<div class="mt-2 flex flex-wrap gap-2">
{% for t in tags_for_item %}
<a href="/readitlater/?tag={{ t|e }}"
class="text-xs bg-slate-100 border border-slate-200 rounded-full px-2 py-1 hover:bg-slate-200">
#{{ t }}
</a>
{% endfor %}
</div>
{% endif %}
</div>
<form
hx-post="/readitlater/item/{{ r.id }}/delete"
hx-confirm="Delete this item?"
class="ml-3">
<button
type="submit"
aria-label="Delete"
title="Delete"
class="text-white bg-red-600 hover:bg-red-700 focus:ring-2 focus:ring-red-300 rounded-full w-7 h-7 flex items-center justify-center">
</button>
</form>
</div>
</li>
{% else %}
<li class="text-slate-500">No items yet. Use the browser extension to save a page.</li>
{% endfor %}
</ul>
{% endblock %}

24
templates/login.html Normal file
View File

@@ -0,0 +1,24 @@
{% extends "base.html" %}
{% block content %}
<div class="max-w-sm mx-auto bg-white p-6 rounded shadow">
<h1 class="text-xl font-semibold mb-4">Sign in</h1>
{% if error %}
<div class="mb-3 text-sm text-red-700 bg-red-50 border border-red-200 rounded p-2">
{{ error }}
</div>
{% endif %}
<form method="post" action="{{ url_for('login_submit', next=request.args.get('next')) }}" class="space-y-3">
<div>
<label class="block text-sm mb-1">Username</label>
<input name="username" type="text" required class="w-full border rounded px-3 py-2">
</div>
<div>
<label class="block text-sm mb-1">Password</label>
<input name="password" type="password" required class="w-full border rounded px-3 py-2">
</div>
<button class="w-full bg-slate-900 text-white rounded px-3 py-2">Sign in</button>
</form>
</div>
{% endblock %}

16
templates/tags.html Normal file
View File

@@ -0,0 +1,16 @@
<div id="tags-{{ item.id }}" class="flex flex-wrap gap-2">
{% for t in tags %}
<span class="inline-flex items-center gap-1 text-xs bg-slate-100 border border-slate-200 rounded-full px-2 py-1">
<a class="text-slate-700 hover:underline" href="/readitlater/?tag={{ t|e }}">#{{ t }}</a>
<form
hx-post="/readitlater/item/{{ item.id }}/tag/{{ t }}/delete"
hx-target="#tags-{{ item.id }}"
hx-swap="outerHTML"
onsubmit="return confirm('Remove tag &quot;{{ t }}&quot;?')">
<button type="submit" class="text-slate-500 hover:text-red-600" title="Remove"></button>
</form>
</span>
{% else %}
<span class="text-xs text-slate-400">No tags yet</span>
{% endfor %}
</div>