From 5cd657c507397d0135b9228cbef0a405049c5052 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:01:59 +0000 Subject: [PATCH 01/33] refactor: split claude-code session-start into sync + async setup Move table creation, placeholder insertion, and version check into a separate session-start-setup.ts that runs as an async hook. The fast path (session-start.ts) now only reads local credentials and injects the system prompt context, keeping session startup <1s. --- src/hooks/session-start-setup.ts | 178 +++++++++++++++++++++++++++++++ src/hooks/session-start.ts | 170 +++-------------------------- 2 files changed, 195 insertions(+), 153 deletions(-) create mode 100644 src/hooks/session-start-setup.ts diff --git a/src/hooks/session-start-setup.ts b/src/hooks/session-start-setup.ts new file mode 100644 index 0000000..0e25602 --- /dev/null +++ b/src/hooks/session-start-setup.ts @@ -0,0 +1,178 @@ +#!/usr/bin/env node + +/** + * SessionStart async setup hook: + * Runs server-side operations (table creation, placeholder, version check) + * in the background so they don't block session startup. + */ + +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import { mkdirSync, appendFileSync, readFileSync } from "node:fs"; +import { execSync } from "node:child_process"; +import { homedir } from "node:os"; +import { loadCredentials, saveCredentials } from "../commands/auth.js"; +import { loadConfig } from "../config.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { sqlStr } from "../utils/sql.js"; +import { readStdin } from "../utils/stdin.js"; +import { log as _log, utcTimestamp } from "../utils/debug.js"; +const log = (msg: string) => _log("session-setup", msg); + +const __bundleDir = dirname(fileURLToPath(import.meta.url)); + +const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +const VERSION_CHECK_TIMEOUT = 3000; + +const HOME = homedir(); +const WIKI_LOG = join(HOME, ".claude", "hooks", "deeplake-wiki.log"); + +function wikiLog(msg: string): void { + try { + mkdirSync(join(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync(WIKI_LOG, `[${utcTimestamp()}] ${msg}\n`); + } catch { /* ignore */ } +} + +function getInstalledVersion(): string | null { + let dir = __bundleDir; + for (let i = 0; i < 5; i++) { + const candidate = join(dir, "package.json"); + try { + const pkg = JSON.parse(readFileSync(candidate, "utf-8")); + if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; + } catch { /* not here, keep looking */ } + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + return null; +} + +async function getLatestVersion(): Promise { + try { + const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); + if (!res.ok) return null; + const pkg = await res.json(); + return pkg.version ?? null; + } catch { + return null; + } +} + +function isNewer(latest: string, current: string): boolean { + const parse = (v: string) => v.split(".").map(Number); + const [la, lb, lc] = parse(latest); + const [ca, cb, cc] = parse(current); + return la > ca || (la === ca && lb > cb) || (la === ca && lb === cb && lc > cc); +} + +/** Create a placeholder summary via direct SQL INSERT. */ +async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { + const summaryPath = `/summaries/${userName}/${sessionId}.md`; + + await api.query(`SELECT deeplake_sync_table('${table}')`); + const existing = await api.query( + `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` + ); + if (existing.length > 0) { + wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); + return; + } + + const now = new Date().toISOString(); + const projectName = cwd.split("/").pop() ?? "unknown"; + const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; + const content = [ + `# Session ${sessionId}`, + `- **Source**: ${sessionSource}`, + `- **Started**: ${now}`, + `- **Project**: ${projectName}`, + `- **Status**: in-progress`, + "", + ].join("\n"); + const filename = `${sessionId}.md`; + + await api.query( + `INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ` + + `${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')` + ); + + wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); +} + +interface SessionStartInput { + session_id: string; + cwd?: string; +} + +async function main(): Promise { + if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; + + const input = await readStdin(); + const creds = loadCredentials(); + if (!creds?.token) { log("no credentials"); return; } + + // Backfill userName if missing + if (!creds.userName) { + try { + const { userInfo } = await import("node:os"); + creds.userName = userInfo().username ?? "unknown"; + saveCredentials(creds); + log(`backfilled userName: ${creds.userName}`); + } catch { /* non-fatal */ } + } + + // Table setup + placeholder (fire-and-forget, async hook) + if (input.session_id) { + try { + const config = loadConfig(); + if (config) { + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); + await api.ensureTable(); + await api.ensureSessionsTable(config.sessionsTableName); + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + log("setup complete"); + } + } catch (e: any) { + log(`setup failed: ${e.message}`); + wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + } + } + + // Version check + auto-update + const autoupdate = creds.autoupdate !== false; + try { + const current = getInstalledVersion(); + if (current) { + const latest = await getLatestVersion(); + if (latest && isNewer(latest, current)) { + if (autoupdate) { + log(`autoupdate: updating ${current} → ${latest}`); + try { + const scopes = ["user", "project", "local", "managed"]; + const cmd = scopes + .map(s => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null`) + .join("; "); + execSync(cmd, { stdio: "ignore", timeout: 60_000 }); + process.stderr.write(`✅ Hivemind auto-updated: ${current} → ${latest}. Run /reload-plugins to apply.\n`); + log(`autoupdate succeeded: ${current} → ${latest}`); + } catch (e: any) { + process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Auto-update failed — run /hivemind:update to upgrade manually.\n`); + log(`autoupdate failed: ${e.message}`); + } + } else { + process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Run /hivemind:update to upgrade.\n`); + log(`update available (autoupdate off): ${current} → ${latest}`); + } + } else { + log(`version up to date: ${current}`); + } + } + } catch (e: any) { + log(`version check failed: ${e.message}`); + } +} + +main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); diff --git a/src/hooks/session-start.ts b/src/hooks/session-start.ts index 2019615..424b8ef 100644 --- a/src/hooks/session-start.ts +++ b/src/hooks/session-start.ts @@ -1,22 +1,18 @@ #!/usr/bin/env node /** - * SessionStart hook: - * 1. If no credentials → run device flow login (opens browser) - * 2. Inject Deeplake memory instructions into Claude's context + * SessionStart hook (fast path): + * Only reads local credentials and injects context into Claude's system prompt. + * All server calls (table setup, placeholder, version check) are handled by + * session-start-setup.js which runs as a separate async hook. */ import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; -import { mkdirSync, appendFileSync, readFileSync } from "node:fs"; -import { execSync } from "node:child_process"; -import { homedir } from "node:os"; -import { loadCredentials, saveCredentials, login } from "../commands/auth.js"; -import { loadConfig } from "../config.js"; -import { DeeplakeApi } from "../deeplake-api.js"; -import { sqlStr } from "../utils/sql.js"; +import { readFileSync } from "node:fs"; +import { loadCredentials } from "../commands/auth.js"; import { readStdin } from "../utils/stdin.js"; -import { log as _log, utcTimestamp } from "../utils/debug.js"; +import { log as _log } from "../utils/debug.js"; const log = (msg: string) => _log("session-start", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); @@ -53,14 +49,7 @@ LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty Debugging: Set DEEPLAKE_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; -const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; -const VERSION_CHECK_TIMEOUT = 3000; // 3s — don't block session start - function getInstalledVersion(): string | null { - // Walk up from the bundle directory to find the nearest package.json. - // Depending on install method the layout varies: - // marketplace: /claude-code/bundle/ → package.json is 2 levels up - // cache: /bundle/ → package.json is 1 level up (if present) let dir = __bundleDir; for (let i = 0; i < 5; i++) { const candidate = join(dir, "package.json"); @@ -69,166 +58,41 @@ function getInstalledVersion(): string | null { if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { /* not here, keep looking */ } const parent = dirname(dir); - if (parent === dir) break; // reached filesystem root + if (parent === dir) break; dir = parent; } return null; } -async function getLatestVersion(): Promise { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); - if (!res.ok) return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} - -function isNewer(latest: string, current: string): boolean { - const parse = (v: string) => v.split(".").map(Number); - const [la, lb, lc] = parse(latest); - const [ca, cb, cc] = parse(current); - return la > ca || (la === ca && lb > cb) || (la === ca && lb === cb && lc > cc); -} - -const HOME = homedir(); -const WIKI_LOG = join(HOME, ".claude", "hooks", "deeplake-wiki.log"); - -function wikiLog(msg: string): void { - try { - mkdirSync(join(HOME, ".claude", "hooks"), { recursive: true }); - appendFileSync(WIKI_LOG, `[${utcTimestamp()}] ${msg}\n`); - } catch { /* ignore */ } -} - -/** Create a placeholder summary via direct SQL INSERT (no DeeplakeFs bootstrap needed). */ -async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { - const summaryPath = `/summaries/${userName}/${sessionId}.md`; - - // Check if summary already exists (resumed session) - await api.query(`SELECT deeplake_sync_table('${table}')`); - const existing = await api.query( - `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` - ); - if (existing.length > 0) { - wikiLog(`SessionStart: summary exists for ${sessionId} (resumed)`); - return; - } - - const now = new Date().toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; - const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; - const content = [ - `# Session ${sessionId}`, - `- **Source**: ${sessionSource}`, - `- **Started**: ${now}`, - `- **Project**: ${projectName}`, - `- **Status**: in-progress`, - "", - ].join("\n"); - const filename = `${sessionId}.md`; - - await api.query( - `INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ` + - `${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')` - ); - - wikiLog(`SessionStart: created placeholder for ${sessionId} (${cwd})`); -} - interface SessionStartInput { session_id: string; cwd?: string; } async function main(): Promise { - // Skip if this is a sub-session spawned by the wiki worker if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; - const input = await readStdin(); + await readStdin(); - let creds = loadCredentials(); + const creds = loadCredentials(); if (!creds?.token) { log("no credentials found — run /hivemind:login to authenticate"); } else { log(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - // Backfill userName if missing (for users who logged in before this field was added) - if (creds.token && !creds.userName) { - try { - const { userInfo } = await import("node:os"); - creds.userName = userInfo().username ?? "unknown"; - saveCredentials(creds); - log(`backfilled and persisted userName: ${creds.userName}`); - } catch { /* non-fatal */ } - } - } - - // Create placeholder summary + ensure sessions table via direct SQL (no DeeplakeFs bootstrap) - if (input.session_id && creds?.token) { - try { - const config = loadConfig(); - if (config) { - const table = config.tableName; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - // Ensure both tables exist (once per session) - await api.ensureTable(); - await api.ensureSessionsTable(sessionsTable); - await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); - log("placeholder created"); - } - } catch (e: any) { - log(`placeholder failed: ${e.message}`); - wikiLog(`SessionStart: placeholder failed for ${input.session_id}: ${e.message}`); - } } - // Version check (non-blocking — failures are silently ignored) - const autoupdate = creds?.autoupdate !== false; // default: true - let updateNotice = ""; - try { - const current = getInstalledVersion(); - if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { - if (autoupdate) { - log(`autoupdate: updating ${current} → ${latest}`); - try { - const scopes = ["user", "project", "local", "managed"]; - const cmd = scopes - .map(s => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null`) - .join("; "); - execSync(cmd, { stdio: "ignore", timeout: 60_000 }); - updateNotice = `\n\n✅ Hivemind auto-updated: ${current} → ${latest}. Run /reload-plugins to apply.`; - process.stderr.write(`✅ Hivemind auto-updated: ${current} → ${latest}. Run /reload-plugins to apply.\n`); - log(`autoupdate succeeded: ${current} → ${latest}`); - } catch (e: any) { - updateNotice = `\n\n⬆️ Hivemind update available: ${current} → ${latest}. Auto-update failed — run /hivemind:update to upgrade manually.`; - process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Auto-update failed — run /hivemind:update to upgrade manually.\n`); - log(`autoupdate failed: ${e.message}`); - } - } else { - updateNotice = `\n\n⬆️ Hivemind update available: ${current} → ${latest}. Run /hivemind:update to upgrade.`; - process.stderr.write(`⬆️ Hivemind update available: ${current} → ${latest}. Run /hivemind:update to upgrade.\n`); - log(`update available (autoupdate off): ${current} → ${latest}`); - } - } else { - log(`version up to date: ${current}`); - updateNotice = `\n\n✅ Hivemind v${current} (up to date)`; - } - } - } catch (e: any) { - log(`version check failed: ${e.message}`); + // Local-only version display (no network call — actual update runs in async setup hook) + let versionNotice = ""; + const current = getInstalledVersion(); + if (current) { + versionNotice = `\n\nHivemind v${current}`; } const resolvedContext = context.replace(/DEEPLAKE_AUTH_CMD/g, AUTH_CMD); const additionalContext = creds?.token - ? `${resolvedContext}\n\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${updateNotice}` - : `${resolvedContext}\n\n⚠️ Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${updateNotice}`; + ? `${resolvedContext}\n\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${versionNotice}` + : `${resolvedContext}\n\n⚠️ Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${versionNotice}`; console.log(JSON.stringify({ hookSpecificOutput: { From b881f319026ff1cb25ca7baeca16c93ae6540ccc Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:07 +0000 Subject: [PATCH 02/33] refactor: split codex session-start into sync + async setup Same split as claude-code: move table creation, placeholder, and version check into codex/session-start-setup.ts. The fast path only reads credentials and injects context. --- src/hooks/codex/session-start-setup.ts | 193 +++++++++++++++++++++++++ src/hooks/codex/session-start.ts | 185 ++++-------------------- 2 files changed, 223 insertions(+), 155 deletions(-) create mode 100644 src/hooks/codex/session-start-setup.ts diff --git a/src/hooks/codex/session-start-setup.ts b/src/hooks/codex/session-start-setup.ts new file mode 100644 index 0000000..95ef23b --- /dev/null +++ b/src/hooks/codex/session-start-setup.ts @@ -0,0 +1,193 @@ +#!/usr/bin/env node + +/** + * Codex SessionStart async setup hook: + * Runs server-side operations (table creation, placeholder, version check) + * in the background so they don't block session startup. + */ + +import { fileURLToPath } from "node:url"; +import { dirname, join } from "node:path"; +import { mkdirSync, appendFileSync, readFileSync } from "node:fs"; +import { execSync } from "node:child_process"; +import { homedir } from "node:os"; +import { loadCredentials, saveCredentials } from "../../commands/auth.js"; +import { loadConfig } from "../../config.js"; +import { DeeplakeApi } from "../../deeplake-api.js"; +import { sqlStr } from "../../utils/sql.js"; +import { readStdin } from "../../utils/stdin.js"; +import { log as _log } from "../../utils/debug.js"; +const log = (msg: string) => _log("codex-session-setup", msg); + +const __bundleDir = dirname(fileURLToPath(import.meta.url)); + +const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +const VERSION_CHECK_TIMEOUT = 3000; + +const HOME = homedir(); +const WIKI_LOG = join(HOME, ".codex", "hooks", "deeplake-wiki.log"); + +function wikiLog(msg: string): void { + try { + mkdirSync(join(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync(WIKI_LOG, `[${new Date().toISOString().replace("T", " ").slice(0, 19)}] ${msg}\n`); + } catch { /* ignore */ } +} + +function getInstalledVersion(): string | null { + try { + const pluginJson = join(__bundleDir, "..", ".codex-plugin", "plugin.json"); + const plugin = JSON.parse(readFileSync(pluginJson, "utf-8")); + if (plugin.version) return plugin.version; + } catch { /* fall through */ } + let dir = __bundleDir; + for (let i = 0; i < 5; i++) { + const candidate = join(dir, "package.json"); + try { + const pkg = JSON.parse(readFileSync(candidate, "utf-8")); + if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; + } catch { /* not here, keep looking */ } + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + return null; +} + +async function getLatestVersion(): Promise { + try { + const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); + if (!res.ok) return null; + const pkg = await res.json(); + return pkg.version ?? null; + } catch { + return null; + } +} + +function isNewer(latest: string, current: string): boolean { + const parse = (v: string) => v.split(".").map(Number); + const [la, lb, lc] = parse(latest); + const [ca, cb, cc] = parse(current); + return la > ca || (la === ca && lb > cb) || (la === ca && lb === cb && lc > cc); +} + +/** Create a placeholder summary via direct SQL INSERT. */ +async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { + const summaryPath = `/summaries/${userName}/${sessionId}.md`; + + await api.query(`SELECT deeplake_sync_table('${table}')`); + const existing = await api.query( + `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` + ); + if (existing.length > 0) { + wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); + return; + } + + const now = new Date().toISOString(); + const projectName = cwd.split("/").pop() ?? "unknown"; + const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; + const content = [ + `# Session ${sessionId}`, + `- **Source**: ${sessionSource}`, + `- **Started**: ${now}`, + `- **Project**: ${projectName}`, + `- **Status**: in-progress`, + "", + ].join("\n"); + const filename = `${sessionId}.md`; + + await api.query( + `INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ` + + `${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'codex', '${now}', '${now}')` + ); + + wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); +} + +interface CodexSessionStartInput { + session_id: string; + transcript_path?: string | null; + cwd: string; + hook_event_name: string; + model: string; + source?: string; +} + +async function main(): Promise { + if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; + + const input = await readStdin(); + const creds = loadCredentials(); + if (!creds?.token) { log("no credentials"); return; } + + // Backfill userName if missing + if (!creds.userName) { + try { + const { userInfo } = await import("node:os"); + creds.userName = userInfo().username ?? "unknown"; + saveCredentials(creds); + log(`backfilled userName: ${creds.userName}`); + } catch { /* non-fatal */ } + } + + // Table setup + placeholder + if (input.session_id) { + try { + const config = loadConfig(); + if (config) { + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); + await api.ensureTable(); + await api.ensureSessionsTable(config.sessionsTableName); + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + log("setup complete"); + } + } catch (e: any) { + log(`setup failed: ${e.message}`); + wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + } + } + + // Version check + auto-update + const autoupdate = creds.autoupdate !== false; + try { + const current = getInstalledVersion(); + if (current) { + const latest = await getLatestVersion(); + if (latest && isNewer(latest, current)) { + if (autoupdate) { + log(`autoupdate: updating ${current} → ${latest}`); + try { + const tag = `v${latest}`; + const findCmd = `INSTALL_DIR=""; ` + + `CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); ` + + `if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); ` + + `elif [ -d ~/.codex/hivemind ]; then INSTALL_DIR=~/.codex/hivemind; fi; ` + + `if [ -n "$INSTALL_DIR" ]; then ` + + `TMPDIR=$(mktemp -d); ` + + `git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && ` + + `cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; ` + + `rm -rf "$TMPDIR"; fi`; + execSync(findCmd, { stdio: "ignore", timeout: 60_000 }); + process.stderr.write(`Hivemind auto-updated: ${current} → ${latest}. Restart Codex to apply.\n`); + log(`autoupdate succeeded: ${current} → ${latest} (tag: ${tag})`); + } catch (e: any) { + process.stderr.write(`Hivemind update available: ${current} → ${latest}. Auto-update failed.\n`); + log(`autoupdate failed: ${e.message}`); + } + } else { + process.stderr.write(`Hivemind update available: ${current} → ${latest}.\n`); + log(`update available (autoupdate off): ${current} → ${latest}`); + } + } else { + log(`version up to date: ${current}`); + } + } + } catch (e: any) { + log(`version check failed: ${e.message}`); + } +} + +main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); diff --git a/src/hooks/codex/session-start.ts b/src/hooks/codex/session-start.ts index d53d11c..44b618e 100644 --- a/src/hooks/codex/session-start.ts +++ b/src/hooks/codex/session-start.ts @@ -1,23 +1,20 @@ #!/usr/bin/env node /** - * Codex SessionStart hook: - * 1. If no credentials → log warning - * 2. Inject Deeplake memory instructions into Codex's context via additionalContext + * Codex SessionStart hook (fast path): + * Only reads local credentials and injects context into Codex's developer prompt. + * All server calls (table setup, placeholder, version check) are handled by + * session-start-setup.js which runs as a separate async hook. * * Codex input: { session_id, transcript_path, cwd, hook_event_name, model, source } - * Codex output: { additionalContext: "..." } or plain text on stdout + * Codex output: plain text on stdout (added as developer context) */ +import { spawn } from "node:child_process"; import { fileURLToPath } from "node:url"; import { dirname, join } from "node:path"; -import { mkdirSync, appendFileSync, readFileSync } from "node:fs"; -import { execSync } from "node:child_process"; -import { homedir } from "node:os"; -import { loadCredentials, saveCredentials } from "../../commands/auth.js"; -import { loadConfig } from "../../config.js"; -import { DeeplakeApi } from "../../deeplake-api.js"; -import { sqlStr } from "../../utils/sql.js"; +import { readFileSync } from "node:fs"; +import { loadCredentials } from "../../commands/auth.js"; import { readStdin } from "../../utils/stdin.js"; import { log as _log } from "../../utils/debug.js"; const log = (msg: string) => _log("codex-session-start", msg); @@ -25,8 +22,6 @@ const log = (msg: string) => _log("codex-session-start", msg); const __bundleDir = dirname(fileURLToPath(import.meta.url)); const AUTH_CMD = join(__bundleDir, "commands", "auth-login.js"); -// Codex-specific context: references Codex commands instead of Claude Code slash commands. -// Uses Bash tool for search since Codex doesn't have a standalone Grep tool. const context = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. Structure: index.md (start here) → summaries/*.md → sessions/*.jsonl (last resort). Do NOT jump straight to JSONL. @@ -34,20 +29,12 @@ Search: grep -r "keyword" ~/.deeplake/memory/ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters — they are not available in the memory filesystem. Do NOT spawn subagents to read deeplake memory.`; -const GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; -const VERSION_CHECK_TIMEOUT = 3000; - function getInstalledVersion(): string | null { try { - // Read version from the plugin's own manifest (not the root package.json) const pluginJson = join(__bundleDir, "..", ".codex-plugin", "plugin.json"); const plugin = JSON.parse(readFileSync(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { /* fall through */ } - // Walk up from the bundle directory to find the nearest package.json. - // Depending on install method the layout varies: - // codex cache: /bundle/ → package.json may be 1+ levels up - // local dev: /codex/bundle/ → package.json is 2 levels up let dir = __bundleDir; for (let i = 0; i < 5; i++) { const candidate = join(dir, "package.json"); @@ -62,76 +49,13 @@ function getInstalledVersion(): string | null { return null; } -async function getLatestVersion(): Promise { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); - if (!res.ok) return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} - -function isNewer(latest: string, current: string): boolean { - const parse = (v: string) => v.split(".").map(Number); - const [la, lb, lc] = parse(latest); - const [ca, cb, cc] = parse(current); - return la > ca || (la === ca && lb > cb) || (la === ca && lb === cb && lc > cc); -} - -const HOME = homedir(); -const WIKI_LOG = join(HOME, ".codex", "hooks", "deeplake-wiki.log"); - -function wikiLog(msg: string): void { - try { - mkdirSync(join(HOME, ".codex", "hooks"), { recursive: true }); - appendFileSync(WIKI_LOG, `[${new Date().toISOString().replace("T", " ").slice(0, 19)}] ${msg}\n`); - } catch { /* ignore */ } -} - -/** Create a placeholder summary via direct SQL INSERT. */ -async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { - const summaryPath = `/summaries/${userName}/${sessionId}.md`; - - await api.query(`SELECT deeplake_sync_table('${table}')`); - const existing = await api.query( - `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` - ); - if (existing.length > 0) { - wikiLog(`SessionStart: summary exists for ${sessionId} (resumed)`); - return; - } - - const now = new Date().toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; - const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; - const content = [ - `# Session ${sessionId}`, - `- **Source**: ${sessionSource}`, - `- **Started**: ${now}`, - `- **Project**: ${projectName}`, - `- **Status**: in-progress`, - "", - ].join("\n"); - const filename = `${sessionId}.md`; - - await api.query( - `INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ` + - `${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'codex', '${now}', '${now}')` - ); - - wikiLog(`SessionStart: created placeholder for ${sessionId} (${cwd})`); -} - interface CodexSessionStartInput { session_id: string; transcript_path?: string | null; cwd: string; hook_event_name: string; model: string; - source?: string; // "startup" | "resume" + source?: string; } async function main(): Promise { @@ -139,88 +63,39 @@ async function main(): Promise { const input = await readStdin(); - let creds = loadCredentials(); + const creds = loadCredentials(); if (!creds?.token) { log("no credentials found — run auth login to authenticate"); } else { log(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - if (creds.token && !creds.userName) { - try { - const { userInfo } = await import("node:os"); - creds.userName = userInfo().username ?? "unknown"; - saveCredentials(creds); - log(`backfilled userName: ${creds.userName}`); - } catch { /* non-fatal */ } - } } - // Create placeholder summary - if (input.session_id && creds?.token) { - try { - const config = loadConfig(); - if (config) { - const table = config.tableName; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - await api.ensureTable(); - await api.ensureSessionsTable(sessionsTable); - await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); - log("placeholder created"); - } - } catch (e: any) { - log(`placeholder failed: ${e.message}`); - wikiLog(`SessionStart: placeholder failed for ${input.session_id}: ${e.message}`); - } + // Spawn async setup (table creation, placeholder, version check) as detached process. + // Codex doesn't support async hooks, so we use the same pattern as the wiki worker. + if (creds?.token) { + const setupScript = join(__bundleDir, "session-start-setup.js"); + const child = spawn("node", [setupScript], { + detached: true, + stdio: ["pipe", "ignore", "ignore"], + env: { ...process.env }, + }); + // Feed the same stdin input to the setup process + child.stdin?.write(JSON.stringify(input)); + child.stdin?.end(); + child.unref(); + log("spawned async setup process"); } - // Version check + auto-update (via GitHub clone into cache directory) - const autoupdate = creds?.autoupdate !== false; // default: true - let updateNotice = ""; - try { - const current = getInstalledVersion(); - if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { - if (autoupdate) { - log(`autoupdate: updating ${current} → ${latest}`); - try { - const tag = `v${latest}`; - // Try two install locations: ~/.codex/plugins/cache (plugin system) and ~/.codex/hivemind/ (manual install) - const findCmd = `INSTALL_DIR=""; ` + - `CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); ` + - `if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); ` + - `elif [ -d ~/.codex/hivemind ]; then INSTALL_DIR=~/.codex/hivemind; fi; ` + - `if [ -n "$INSTALL_DIR" ]; then ` + - `TMPDIR=$(mktemp -d); ` + - `git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && ` + - `cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; ` + - `rm -rf "$TMPDIR"; fi`; - execSync(findCmd, { stdio: "ignore", timeout: 60_000 }); - updateNotice = `\n\nHivemind auto-updated: ${current} → ${latest}. Restart Codex to apply.`; - process.stderr.write(`Hivemind auto-updated: ${current} → ${latest}. Restart Codex to apply.\n`); - log(`autoupdate succeeded: ${current} → ${latest} (tag: ${tag})`); - } catch (e: any) { - updateNotice = `\n\nHivemind update available: ${current} → ${latest}. Auto-update failed.`; - process.stderr.write(`Hivemind update available: ${current} → ${latest}. Auto-update failed.\n`); - log(`autoupdate failed: ${e.message}`); - } - } else { - updateNotice = `\n\nHivemind update available: ${current} → ${latest}.`; - process.stderr.write(`Hivemind update available: ${current} → ${latest}.\n`); - log(`update available (autoupdate off): ${current} → ${latest}`); - } - } else { - log(`version up to date: ${current}`); - } - } - } catch (e: any) { - log(`version check failed: ${e.message}`); + let versionNotice = ""; + const current = getInstalledVersion(); + if (current) { + versionNotice = `\nHivemind v${current}`; } const additionalContext = creds?.token - ? `${context}\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${updateNotice}` - : `${context}\nNot logged in to Deeplake. Run: node "${AUTH_CMD}" login${updateNotice}`; + ? `${context}\nLogged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${versionNotice}` + : `${context}\nNot logged in to Deeplake. Run: node "${AUTH_CMD}" login${versionNotice}`; // Codex SessionStart: plain text on stdout is added as developer context. // JSON { additionalContext } format is rejected by Codex 0.118.0. From bf20b21739c1980c3d22f61a2e7f97c210fcab24 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:14 +0000 Subject: [PATCH 03/33] feat: register session-start-setup as async hook Add session-start-setup.js as a second SessionStart hook with async: true and 120s timeout. This runs the slow setup (table creation, version check) in parallel with the session. --- claude-code/hooks/hooks.json | 14 +++++++++++--- codex/hooks/hooks.json | 2 +- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/claude-code/hooks/hooks.json b/claude-code/hooks/hooks.json index 53073eb..7801e25 100644 --- a/claude-code/hooks/hooks.json +++ b/claude-code/hooks/hooks.json @@ -7,7 +7,13 @@ { "type": "command", "command": "node \"${CLAUDE_PLUGIN_ROOT}/bundle/session-start.js\"", - "timeout": 120 + "timeout": 10 + }, + { + "type": "command", + "command": "node \"${CLAUDE_PLUGIN_ROOT}/bundle/session-start-setup.js\"", + "timeout": 120, + "async": true } ] } @@ -18,7 +24,8 @@ { "type": "command", "command": "node \"${CLAUDE_PLUGIN_ROOT}/bundle/capture.js\"", - "timeout": 10 + "timeout": 10, + "async": true } ] } @@ -52,7 +59,8 @@ { "type": "command", "command": "node \"${CLAUDE_PLUGIN_ROOT}/bundle/capture.js\"", - "timeout": 30 + "timeout": 30, + "async": true } ] } diff --git a/codex/hooks/hooks.json b/codex/hooks/hooks.json index d94d425..4813f94 100644 --- a/codex/hooks/hooks.json +++ b/codex/hooks/hooks.json @@ -7,7 +7,7 @@ { "type": "command", "command": "node \"$CODEX_PLUGIN_ROOT/bundle/session-start.js\"", - "timeout": 120 + "timeout": 10 } ] } From a45dfd5e6055e2db0d0fd9dd49fbdd1aedca5d2e Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:19 +0000 Subject: [PATCH 04/33] feat: add local JSONL capture queue New capture-queue.ts utility that appends session events to a local JSONL file (~/.deeplake/capture/.jsonl) instead of making direct API calls. Events are flushed to cloud at session end. --- src/utils/capture-queue.ts | 57 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 src/utils/capture-queue.ts diff --git a/src/utils/capture-queue.ts b/src/utils/capture-queue.ts new file mode 100644 index 0000000..31f50a0 --- /dev/null +++ b/src/utils/capture-queue.ts @@ -0,0 +1,57 @@ +/** + * Local capture queue — appends session events to a local JSONL file + * instead of making HTTP calls. Events are flushed to cloud at session end. + * + * Queue file: ~/.deeplake/capture/.jsonl + * One line per event, each line is a JSON object. + */ + +import { appendFileSync, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; +import { join } from "node:path"; +import { homedir } from "node:os"; + +const QUEUE_DIR = join(homedir(), ".deeplake", "capture"); + +/** Ensure the queue directory exists. */ +function ensureDir(): void { + mkdirSync(QUEUE_DIR, { recursive: true }); +} + +/** Get the queue file path for a session. */ +export function queuePath(sessionId: string): string { + return join(QUEUE_DIR, `${sessionId}.jsonl`); +} + +/** Append a single event to the session's local queue. Pure filesystem, no network. */ +export function appendEvent(sessionId: string, event: Record): void { + ensureDir(); + const line = JSON.stringify(event) + "\n"; + appendFileSync(queuePath(sessionId), line); +} + +/** Read all events from a session's local queue. Returns empty array if no file. */ +export function readEvents(sessionId: string): Record[] { + const path = queuePath(sessionId); + if (!existsSync(path)) return []; + const content = readFileSync(path, "utf-8").trim(); + if (!content) return []; + return content.split("\n").map(line => JSON.parse(line)); +} + +/** Read raw JSONL content from a session's local queue. */ +export function readRawJsonl(sessionId: string): string { + const path = queuePath(sessionId); + if (!existsSync(path)) return ""; + return readFileSync(path, "utf-8").trim(); +} + +/** Delete the queue file after successful flush. */ +export function deleteQueue(sessionId: string): void { + const path = queuePath(sessionId); + try { unlinkSync(path); } catch { /* ignore */ } +} + +/** Return the queue directory path (for cleanup/listing). */ +export function getQueueDir(): string { + return QUEUE_DIR; +} From e54ca3c27987057aa0fad6b2b5fb0a26513c26ee Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:25 +0000 Subject: [PATCH 05/33] refactor: claude-code capture hook uses local queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace direct API calls with appendEvent() from capture-queue. No network calls during capture — events buffered locally. --- src/hooks/capture.ts | 61 +++++--------------------------------------- 1 file changed, 7 insertions(+), 54 deletions(-) diff --git a/src/hooks/capture.ts b/src/hooks/capture.ts index d75e44a..9ea6c6f 100644 --- a/src/hooks/capture.ts +++ b/src/hooks/capture.ts @@ -1,17 +1,16 @@ #!/usr/bin/env node /** - * Capture hook — writes each session event as a separate row in the sessions table. - * One INSERT per event, no concat, no race conditions. + * Capture hook — appends each session event to a local JSONL queue file. + * No network calls — events are flushed to cloud at session end by the wiki worker. * * Used by: UserPromptSubmit, PostToolUse (async), Stop, SubagentStop + * + * Queue file: ~/.deeplake/capture/.jsonl */ -import { homedir } from "node:os"; import { readStdin } from "../utils/stdin.js"; -import { loadConfig } from "../config.js"; -import { DeeplakeApi } from "../deeplake-api.js"; -import { sqlStr } from "../utils/sql.js"; +import { appendEvent } from "../utils/capture-queue.js"; import { log as _log } from "../utils/debug.js"; const log = (msg: string) => _log("capture", msg); @@ -38,26 +37,10 @@ interface HookInput { const CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; -/** Build the session path matching the CLI convention: - * /sessions//___.jsonl */ -function buildSessionPath(config: { userName: string; orgName: string; workspaceId: string }, sessionId: string): string { - const userName = config.userName; - const orgName = config.orgName; - const workspace = config.workspaceId ?? "default"; - - return `/sessions/${userName}/${userName}_${orgName}_${workspace}_${sessionId}.jsonl`; -} - async function main(): Promise { if (!CAPTURE) return; const input = await readStdin(); - const config = loadConfig(); - if (!config) { log("no config"); return; } - - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); - // Build the event entry const ts = new Date().toISOString(); const meta = { session_id: input.session_id, @@ -105,38 +88,8 @@ async function main(): Promise { return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log(`writing to ${sessionPath}`); - - // Simple INSERT — one row per event, no concat, no race conditions. - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - - // For JSONB: only escape single quotes for the SQL literal, keep JSON structure intact. - // sqlStr() would also escape backslashes and strip control chars, corrupting the JSON. - const jsonForSql = line.replace(/'/g, "''"); - - const insertSql = - `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'claude_code', '${ts}', '${ts}')`; - - try { - await api.query(insertSql); - } catch (e: any) { - // Fallback: table might not exist (session-start failed or org switched mid-session). - // Create it and retry once. - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; - } - } - - log("capture ok → cloud"); + appendEvent(input.session_id, entry); + log("capture ok → local queue"); } main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); From 35c07f6a6c92de18dace40423f4c58af91502440 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:41 +0000 Subject: [PATCH 06/33] refactor: codex capture + stop hooks use local queue Same local queue refactor for codex hooks. capture.ts and stop.ts now use appendEvent() instead of direct API calls. --- src/hooks/codex/capture.ts | 50 +++++--------------------------------- src/hooks/codex/stop.ts | 29 +++++----------------- 2 files changed, 12 insertions(+), 67 deletions(-) diff --git a/src/hooks/codex/capture.ts b/src/hooks/codex/capture.ts index 5feb7c9..cdc27dc 100644 --- a/src/hooks/codex/capture.ts +++ b/src/hooks/codex/capture.ts @@ -1,21 +1,16 @@ #!/usr/bin/env node /** - * Codex Capture hook — writes each session event as a row in the sessions table. + * Codex Capture hook — appends each session event to a local JSONL queue file. + * No network calls — events are flushed to cloud at session end by the wiki worker. * * Used by: UserPromptSubmit, PostToolUse * - * Codex input fields: - * All events: session_id, transcript_path, cwd, hook_event_name, model - * UserPromptSubmit: prompt (user text) - * PostToolUse: tool_name, tool_use_id, tool_input, tool_response - * Stop: (no extra fields — Codex has no last_assistant_message equivalent) + * Queue file: ~/.deeplake/capture/.jsonl */ import { readStdin } from "../../utils/stdin.js"; -import { loadConfig } from "../../config.js"; -import { DeeplakeApi } from "../../deeplake-api.js"; -import { sqlStr } from "../../utils/sql.js"; +import { appendEvent } from "../../utils/capture-queue.js"; import { log as _log } from "../../utils/debug.js"; const log = (msg: string) => _log("codex-capture", msg); @@ -37,18 +32,9 @@ interface CodexHookInput { const CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; -function buildSessionPath(config: { userName: string; orgName: string; workspaceId: string }, sessionId: string): string { - return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; -} - async function main(): Promise { if (!CAPTURE) return; const input = await readStdin(); - const config = loadConfig(); - if (!config) { log("no config"); return; } - - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = new Date().toISOString(); const meta = { @@ -87,32 +73,8 @@ async function main(): Promise { return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log(`writing to ${sessionPath}`); - - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - - const insertSql = - `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'codex', '${ts}', '${ts}')`; - - try { - await api.query(insertSql); - } catch (e: any) { - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; - } - } - - log("capture ok"); + appendEvent(input.session_id, entry); + log("capture ok → local queue"); } main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); diff --git a/src/hooks/codex/stop.ts b/src/hooks/codex/stop.ts index 4346e78..488e209 100644 --- a/src/hooks/codex/stop.ts +++ b/src/hooks/codex/stop.ts @@ -18,8 +18,7 @@ import { writeFileSync, readFileSync, mkdirSync, appendFileSync, existsSync } fr import { homedir, tmpdir } from "node:os"; import { readStdin } from "../../utils/stdin.js"; import { loadConfig } from "../../config.js"; -import { DeeplakeApi } from "../../deeplake-api.js"; -import { sqlStr } from "../../utils/sql.js"; +import { appendEvent } from "../../utils/capture-queue.js"; import { log as _log } from "../../utils/debug.js"; const log = (msg: string) => _log("codex-stop", msg); @@ -117,11 +116,9 @@ async function main(): Promise { const config = loadConfig(); if (!config) { log("no config"); return; } - // 1. Capture the stop event (try to extract last assistant message from transcript) + // 1. Capture the stop event to local queue (no network) if (CAPTURE) { try { - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = new Date().toISOString(); // Codex Stop doesn't include last_assistant_message, but it provides @@ -132,13 +129,10 @@ async function main(): Promise { const transcriptPath = input.transcript_path; if (existsSync(transcriptPath)) { const transcript = readFileSync(transcriptPath, "utf-8"); - // Codex transcript is JSONL with format: - // {"type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"..."}]}} const lines = transcript.trim().split("\n").reverse(); for (const line of lines) { try { const entry = JSON.parse(line); - // Codex nests the message inside payload const msg = entry.payload ?? entry; if (msg.role === "assistant" && msg.content) { const content = typeof msg.content === "string" @@ -160,7 +154,7 @@ async function main(): Promise { } } - const entry = { + appendEvent(sessionId, { id: crypto.randomUUID(), session_id: sessionId, transcript_path: input.transcript_path, @@ -170,20 +164,8 @@ async function main(): Promise { timestamp: ts, type: lastAssistantMessage ? "assistant_message" : "assistant_stop", content: lastAssistantMessage, - }; - const line = JSON.stringify(entry); - const sessionPath = buildSessionPath(config, sessionId); - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - - const insertSql = - `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', 'Stop', 'codex', '${ts}', '${ts}')`; - - await api.query(insertSql); - log("stop event captured"); + }); + log("stop event captured → local queue"); } catch (e: any) { log(`capture failed: ${e.message}`); } @@ -204,6 +186,7 @@ async function main(): Promise { apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, + orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, From 2671b06dbd0026d22890b7d9e70dc380d16d7a11 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:47 +0000 Subject: [PATCH 07/33] feat: wiki worker reads from local JSONL queue Wiki worker now reads session events from the local capture queue files instead of querying the API. Session-end triggers the flush. --- src/hooks/session-end.ts | 1 + src/hooks/wiki-worker.ts | 123 ++++++++++++++++++++++++++++++--------- 2 files changed, 96 insertions(+), 28 deletions(-) diff --git a/src/hooks/session-end.ts b/src/hooks/session-end.ts index 1560853..7b66c48 100644 --- a/src/hooks/session-end.ts +++ b/src/hooks/session-end.ts @@ -123,6 +123,7 @@ async function main(): Promise { apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, + orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index 9155071..09bc77a 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -1,21 +1,30 @@ #!/usr/bin/env node /** - * Background wiki worker — reads session events from the sessions table, - * runs claude -p to generate a wiki summary, and uploads it to the memory table. + * Background wiki worker — flushes local capture queue to cloud, + * then generates a wiki summary using claude -p. * - * Invoked by session-end.ts as: node wiki-worker.js + * Invoked by session-end.ts / codex stop.ts as: node wiki-worker.js + * + * Flow: + * 1. Read events from local queue (~/.deeplake/capture/.jsonl) + * 2. Batch-upload events to sessions table (cloud) + * 3. Delete local queue file + * 4. Run claude -p to generate wiki summary + * 5. Upload summary to memory table */ import { readFileSync, writeFileSync, existsSync, appendFileSync, mkdirSync, rmSync } from "node:fs"; import { execFileSync } from "node:child_process"; import { join } from "node:path"; import { utcTimestamp } from "../utils/debug.js"; +import { readEvents, readRawJsonl, deleteQueue } from "../utils/capture-queue.js"; interface WorkerConfig { apiUrl: string; token: string; orgId: string; + orgName: string; workspaceId: string; memoryTable: string; sessionsTable: string; @@ -67,7 +76,7 @@ async function query(sql: string, retries = 2): Promise[ Object.fromEntries(j.columns!.map((col, i) => [col, row[i]])) ); } - if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429)) { + if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429 || r.status === 500)) { wlog(`API ${r.status}, retrying in ${attempt + 1}s...`); await new Promise(resolve => setTimeout(resolve, (attempt + 1) * 1000)); continue; @@ -81,40 +90,98 @@ function cleanup(): void { try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ } } +/** Build the session path matching the CLI convention. */ +function buildSessionPath(): string { + const org = cfg.orgName ?? cfg.orgId; + return `/sessions/${cfg.userName}/${cfg.userName}_${org}_${cfg.workspaceId}_${cfg.sessionId}.jsonl`; +} + +/** Flush local queue events to the cloud sessions table. */ +async function flushQueue(): Promise<{ events: Record[]; jsonlServerPath: string }> { + const events = readEvents(cfg.sessionId); + const jsonlServerPath = buildSessionPath(); + + if (events.length === 0) { + wlog("no local events to flush"); + return { events, jsonlServerPath }; + } + + wlog(`flushing ${events.length} events to cloud`); + const filename = jsonlServerPath.split("/").pop() ?? ""; + + for (const event of events) { + const line = JSON.stringify(event); + // For JSONB: only escape single quotes for the SQL literal + const jsonForSql = line.replace(/'/g, "''"); + const ts = (event.timestamp as string) ?? new Date().toISOString(); + const hookEvent = (event.hook_event_name as string) ?? ""; + + try { + await query( + `INSERT INTO "${cfg.sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${esc(jsonlServerPath)}', '${esc(filename)}', '${jsonForSql}'::jsonb, '${esc(cfg.userName)}', ` + + `${Buffer.byteLength(line, "utf-8")}, '${esc(cfg.project)}', '${esc(hookEvent)}', 'claude_code', '${ts}', '${ts}')` + ); + } catch (e: any) { + wlog(`flush event failed: ${e.message}`); + // Don't delete queue if flush fails — events will be retried next session end + throw e; + } + } + + deleteQueue(cfg.sessionId); + wlog(`flushed ${events.length} events, deleted local queue`); + return { events, jsonlServerPath }; +} + async function main(): Promise { try { - // 1. Fetch session events from sessions table, reconstruct JSONL - wlog("fetching session events"); - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); - const rows = await query( + // 1. Flush local queue to cloud + const { events, jsonlServerPath } = await flushQueue(); + + // 2. Also fetch any events already in cloud (from previous sessions or partial flushes) + wlog("fetching cloud events"); + try { + await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); + } catch { /* sync might fail on new tables, continue */ } + const cloudRows = await query( `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` ); - if (rows.length === 0) { + // Merge: cloud events + local events (deduped by id) + const seenIds = new Set(); + const allEvents: Record[] = []; + + for (const row of cloudRows) { + const msg = typeof row.message === "string" ? JSON.parse(row.message) : row.message; + const id = msg?.id as string; + if (id && !seenIds.has(id)) { + seenIds.add(id); + allEvents.push(msg); + } + } + // Add local events not already in cloud (e.g. if flush failed partially) + for (const evt of events) { + const id = evt.id as string; + if (id && !seenIds.has(id)) { + seenIds.add(id); + allEvents.push(evt); + } + } + + if (allEvents.length === 0) { wlog("no session events found — exiting"); return; } - // Reconstruct JSONL from individual rows (message is JSONB — may be object or string) - const jsonlContent = rows - .map(r => typeof r.message === "string" ? r.message : JSON.stringify(r.message)) - .join("\n"); - const jsonlLines = rows.length; - - // Derive the server path - const pathRows = await query( - `SELECT DISTINCT path FROM "${cfg.sessionsTable}" ` + - `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' LIMIT 1` - ); - const jsonlServerPath = pathRows.length > 0 - ? pathRows[0].path as string - : `/sessions/unknown/${cfg.sessionId}.jsonl`; - + // Reconstruct JSONL + const jsonlContent = allEvents.map(e => JSON.stringify(e)).join("\n"); + const jsonlLines = allEvents.length; writeFileSync(tmpJsonl, jsonlContent); - wlog(`found ${jsonlLines} events at ${jsonlServerPath}`); + wlog(`found ${jsonlLines} total events at ${jsonlServerPath}`); - // 2. Check for existing summary in memory table (resumed session) + // 3. Check for existing summary in memory table (resumed session) let prevOffset = 0; try { await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); @@ -131,7 +198,7 @@ async function main(): Promise { } } catch { /* no existing summary */ } - // 3. Build prompt and run claude -p + // 4. Build prompt and run claude -p const prompt = cfg.promptTemplate .replace(/__JSONL__/g, tmpJsonl) .replace(/__SUMMARY__/g, tmpSummary) @@ -158,7 +225,7 @@ async function main(): Promise { wlog(`claude -p failed: ${e.status ?? e.message}`); } - // 4. Upload summary to memory table + // 5. Upload summary to memory table if (existsSync(tmpSummary)) { const text = readFileSync(tmpSummary, "utf-8"); if (text.trim()) { From 2967b756db714fe7cc574fe04f68b884ae5d2bce Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:53 +0000 Subject: [PATCH 08/33] build: add session-start-setup entry point to esbuild config --- esbuild.config.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/esbuild.config.mjs b/esbuild.config.mjs index e1afa3c..9a45bcf 100644 --- a/esbuild.config.mjs +++ b/esbuild.config.mjs @@ -4,6 +4,7 @@ import { chmodSync } from "node:fs"; // Claude Code plugin const ccHooks = [ { entry: "dist/src/hooks/session-start.js", out: "session-start" }, + { entry: "dist/src/hooks/session-start-setup.js", out: "session-start-setup" }, { entry: "dist/src/hooks/capture.js", out: "capture" }, { entry: "dist/src/hooks/pre-tool-use.js", out: "pre-tool-use" }, { entry: "dist/src/hooks/session-end.js", out: "session-end" }, @@ -36,6 +37,7 @@ for (const h of ccAll) { // Codex plugin const codexHooks = [ { entry: "dist/src/hooks/codex/session-start.js", out: "session-start" }, + { entry: "dist/src/hooks/codex/session-start-setup.js", out: "session-start-setup" }, { entry: "dist/src/hooks/codex/capture.js", out: "capture" }, { entry: "dist/src/hooks/codex/pre-tool-use.js", out: "pre-tool-use" }, { entry: "dist/src/hooks/codex/stop.js", out: "stop" }, From d30ec150f4360950e4d6713557090695ad0194d3 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:02:57 +0000 Subject: [PATCH 09/33] test: add session-start and capture queue tests --- claude-code/tests/session-start.test.ts | 147 ++++++++++++++++++++++++ codex/tests/codex-hooks.test.ts | 8 ++ codex/tests/codex-integration.test.ts | 34 ++++++ 3 files changed, 189 insertions(+) create mode 100644 claude-code/tests/session-start.test.ts diff --git a/claude-code/tests/session-start.test.ts b/claude-code/tests/session-start.test.ts new file mode 100644 index 0000000..7014a73 --- /dev/null +++ b/claude-code/tests/session-start.test.ts @@ -0,0 +1,147 @@ +import { describe, it, expect } from "vitest"; +import { execFileSync } from "node:child_process"; +import { readFileSync, existsSync } from "node:fs"; +import { join, dirname } from "node:path"; +import { fileURLToPath } from "node:url"; + +const __dir = dirname(fileURLToPath(import.meta.url)); +const ccRoot = join(__dir, ".."); +const bundleDir = join(ccRoot, "bundle"); + +// ── hooks.json structure tests ────────────────────────────────────────────── + +describe("claude-code hooks.json: async refactor", () => { + const hooks = JSON.parse(readFileSync(join(ccRoot, "hooks", "hooks.json"), "utf-8")); + + it("SessionStart has exactly 2 hooks", () => { + const sessionStart = hooks.hooks.SessionStart; + expect(sessionStart).toHaveLength(1); // one entry + expect(sessionStart[0].hooks).toHaveLength(2); // two hooks in the entry + }); + + it("first SessionStart hook is sync with timeout <= 15s", () => { + const first = hooks.hooks.SessionStart[0].hooks[0]; + expect(first).not.toHaveProperty("async"); + expect(first.timeout).toBeLessThanOrEqual(15); + expect(first.command).toContain("session-start.js"); + expect(first.command).not.toContain("session-start-setup.js"); + }); + + it("second SessionStart hook is async and references session-start-setup.js", () => { + const second = hooks.hooks.SessionStart[0].hooks[1]; + expect(second.async).toBe(true); + expect(second.command).toContain("session-start-setup.js"); + }); + + it("UserPromptSubmit has async: true", () => { + const hook = hooks.hooks.UserPromptSubmit[0].hooks[0]; + expect(hook.async).toBe(true); + }); + + it("Stop has async: true", () => { + const hook = hooks.hooks.Stop[0].hooks[0]; + expect(hook.async).toBe(true); + }); + + it("PreToolUse does NOT have async flag", () => { + const hook = hooks.hooks.PreToolUse[0].hooks[0]; + expect(hook).not.toHaveProperty("async"); + }); + + it("SessionEnd does NOT have async flag", () => { + const hook = hooks.hooks.SessionEnd[0].hooks[0]; + expect(hook).not.toHaveProperty("async"); + }); +}); + +// ── Bundle existence ──────────────────────────────────────────────────────── + +describe("claude-code bundle: session-start-setup.js exists", () => { + it("session-start-setup.js exists in bundle/", () => { + expect(existsSync(join(bundleDir, "session-start-setup.js"))).toBe(true); + const content = readFileSync(join(bundleDir, "session-start-setup.js"), "utf-8"); + expect(content.length).toBeGreaterThan(0); + }); +}); + +// ── session-start.js integration tests ────────────────────────────────────── + +function runHook(bundle: string, input: Record, extraEnv: Record = {}): string { + const result = execFileSync("node", [join(bundleDir, bundle)], { + input: JSON.stringify(input), + encoding: "utf-8", + timeout: 15_000, + env: { + ...process.env, + DEEPLAKE_CAPTURE: "false", + DEEPLAKE_TOKEN: "", + DEEPLAKE_ORG_ID: "", + ...extraEnv, + }, + }); + return result.trim(); +} + +describe("claude-code integration: session-start.js (sync hook)", () => { + const baseInput = { + session_id: "test-session-ss-001", + cwd: "/tmp/test-project", + hook_event_name: "SessionStart", + }; + + it("returns valid JSON with hookSpecificOutput.additionalContext", () => { + const raw = runHook("session-start.js", baseInput); + const parsed = JSON.parse(raw); + expect(parsed).toHaveProperty("hookSpecificOutput"); + expect(parsed.hookSpecificOutput).toHaveProperty("additionalContext"); + expect(typeof parsed.hookSpecificOutput.additionalContext).toBe("string"); + }); + + it("additionalContext contains DEEPLAKE MEMORY", () => { + const raw = runHook("session-start.js", baseInput); + const parsed = JSON.parse(raw); + expect(parsed.hookSpecificOutput.additionalContext).toContain("DEEPLAKE MEMORY"); + }); + + it("contains login status text", () => { + const raw = runHook("session-start.js", baseInput); + const parsed = JSON.parse(raw); + const ctx = parsed.hookSpecificOutput.additionalContext; + expect(ctx).toMatch(/Logged in to Deeplake|Not logged in to Deeplake/); + }); + + it("completes within 3s with no credentials (no server calls)", () => { + const start = Date.now(); + runHook("session-start.js", baseInput); + const elapsed = Date.now() - start; + expect(elapsed).toBeLessThan(3000); + }); +}); + +// ── session-start-setup.js integration tests ──────────────────────────────── + +describe("claude-code integration: session-start-setup.js (async hook)", () => { + const baseInput = { + session_id: "test-session-setup-001", + cwd: "/tmp/test-project", + hook_event_name: "SessionStart", + }; + + it("exits cleanly when DEEPLAKE_WIKI_WORKER=1", () => { + const raw = runHook("session-start-setup.js", baseInput, { DEEPLAKE_WIKI_WORKER: "1" }); + // Fire-and-forget hook: no stdout expected + expect(raw).toBe(""); + }); + + it("exits cleanly with no credentials (DEEPLAKE_TOKEN='')", () => { + // Should not throw — just exits gracefully + const raw = runHook("session-start-setup.js", baseInput); + // No stdout output expected from async fire-and-forget hook + expect(raw).toBe(""); + }); + + it("does NOT produce stdout output (fire-and-forget)", () => { + const raw = runHook("session-start-setup.js", baseInput); + expect(raw).toBe(""); + }); +}); diff --git a/codex/tests/codex-hooks.test.ts b/codex/tests/codex-hooks.test.ts index 780e087..2dd01bc 100644 --- a/codex/tests/codex-hooks.test.ts +++ b/codex/tests/codex-hooks.test.ts @@ -42,6 +42,13 @@ describe("codex hooks.json", () => { expect(sessionStart[0].matcher).toBe("startup|resume"); }); + it("SessionStart timeout is <= 15s (regression: was 120s)", () => { + const sessionStart = hooks.hooks.SessionStart; + for (const hook of sessionStart[0].hooks) { + expect(hook.timeout).toBeLessThanOrEqual(15); + } + }); + it("no hooks use the async flag (not supported in Codex)", () => { for (const [, entries] of Object.entries(hooks.hooks) as [string, any[]][]) { for (const entry of entries) { @@ -87,6 +94,7 @@ describe("codex bundle output", () => { const expectedFiles = [ "session-start.js", + "session-start-setup.js", "capture.js", "pre-tool-use.js", "stop.js", diff --git a/codex/tests/codex-integration.test.ts b/codex/tests/codex-integration.test.ts index 6a44f33..0675801 100644 --- a/codex/tests/codex-integration.test.ts +++ b/codex/tests/codex-integration.test.ts @@ -295,6 +295,40 @@ describe("codex integration: pre-tool-use", () => { }); }); +// ── SessionStartSetup ─────────────────────────────────────────────────────── + +describe("codex integration: session-start-setup", () => { + it("exits cleanly when DEEPLAKE_WIKI_WORKER=1", () => { + const raw = runHook("session-start-setup.js", { + session_id: "test-session-setup-001", + cwd: "/tmp/test-project", + hook_event_name: "SessionStart", + model: "gpt-5.2", + }, { DEEPLAKE_WIKI_WORKER: "1" }); + expect(raw).toBe(""); + }); + + it("exits cleanly with no credentials (DEEPLAKE_TOKEN='')", () => { + const raw = runHook("session-start-setup.js", { + session_id: "test-session-setup-002", + cwd: "/tmp/test-project", + hook_event_name: "SessionStart", + model: "gpt-5.2", + }); + expect(raw).toBe(""); + }); + + it("does NOT produce stdout output (fire-and-forget)", () => { + const raw = runHook("session-start-setup.js", { + session_id: "test-session-setup-003", + cwd: "/tmp/test-project", + hook_event_name: "SessionStart", + model: "gpt-5.2", + }); + expect(raw).toBe(""); + }); +}); + // ── Stop ───────────────────────────────────────────────────────────────────── describe("codex integration: stop", () => { From 9abe44ab70538d1e998c3b13c4a0af606b8dc12c Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 00:03:04 +0000 Subject: [PATCH 10/33] build: regenerate bundles for async hooks and capture queue --- claude-code/bundle/capture.js | 303 ++------------- claude-code/bundle/session-end.js | 1 + claude-code/bundle/session-start-setup.js | 448 +++++++++++++++++++++ claude-code/bundle/session-start.js | 430 ++------------------ claude-code/bundle/wiki-worker.js | 111 +++++- codex/bundle/capture.js | 298 ++------------ codex/bundle/session-start-setup.js | 452 ++++++++++++++++++++++ codex/bundle/session-start.js | 440 +++------------------ codex/bundle/stop.js | 300 +++----------- 9 files changed, 1164 insertions(+), 1619 deletions(-) create mode 100755 claude-code/bundle/session-start-setup.js create mode 100755 codex/bundle/session-start-setup.js diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 3d3858b..5e4187a 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -17,43 +17,25 @@ function readStdin() { }); } -// dist/src/config.js -import { readFileSync, existsSync } from "node:fs"; +// dist/src/utils/capture-queue.js +import { appendFileSync, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; import { join } from "node:path"; -import { homedir, userInfo } from "node:os"; -function loadConfig() { - const home = homedir(); - const credPath = join(home, ".deeplake", "credentials.json"); - let creds = null; - if (existsSync(credPath)) { - try { - creds = JSON.parse(readFileSync(credPath, "utf-8")); - } catch { - return null; - } - } - const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; - if (!token || !orgId) - return null; - return { - token, - orgId, - orgName: creds?.orgName ?? orgId, - userName: creds?.userName || userInfo().username || "unknown", - workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: process.env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") - }; +import { homedir } from "node:os"; +var QUEUE_DIR = join(homedir(), ".deeplake", "capture"); +function ensureDir() { + mkdirSync(QUEUE_DIR, { recursive: true }); +} +function queuePath(sessionId) { + return join(QUEUE_DIR, `${sessionId}.jsonl`); +} +function appendEvent(sessionId, event) { + ensureDir(); + const line = JSON.stringify(event) + "\n"; + appendFileSync(queuePath(sessionId), line); } - -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; // dist/src/utils/debug.js -import { appendFileSync } from "node:fs"; +import { appendFileSync as appendFileSync2 } from "node:fs"; import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; @@ -61,237 +43,17 @@ var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; - appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} + appendFileSync2(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} `); } -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); -} - -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -var Semaphore = class { - max; - waiting = []; - active = 0; - constructor(max) { - this.max = max; - } - async acquire() { - if (this.active < this.max) { - this.active++; - return; - } - await new Promise((resolve) => this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - await this._sem.acquire(); - try { - return await this._queryWithRetry(sql); - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - /** List all tables in the workspace (with retry). */ - async listTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return []; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return []; - } - } - return []; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - } - } -}; - // dist/src/hooks/capture.js -var log3 = (msg) => log("capture", msg); +var log2 = (msg) => log("capture", msg); var CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; -function buildSessionPath(config, sessionId) { - const userName = config.userName; - const orgName = config.orgName; - const workspace = config.workspaceId ?? "default"; - return `/sessions/${userName}/${userName}_${orgName}_${workspace}_${sessionId}.jsonl`; -} async function main() { if (!CAPTURE) return; const input = await readStdin(); - const config = loadConfig(); - if (!config) { - log3("no config"); - return; - } - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = (/* @__PURE__ */ new Date()).toISOString(); const meta = { session_id: input.session_id, @@ -305,7 +67,7 @@ async function main() { }; let entry; if (input.prompt !== void 0) { - log3(`user session=${input.session_id}`); + log2(`user session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -313,7 +75,7 @@ async function main() { content: input.prompt }; } else if (input.tool_name !== void 0) { - log3(`tool=${input.tool_name} session=${input.session_id}`); + log2(`tool=${input.tool_name} session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -324,7 +86,7 @@ async function main() { tool_response: JSON.stringify(input.tool_response) }; } else if (input.last_assistant_message !== void 0) { - log3(`assistant session=${input.session_id}`); + log2(`assistant session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -333,30 +95,13 @@ async function main() { ...input.agent_transcript_path ? { agent_transcript_path: input.agent_transcript_path } : {} }; } else { - log3("unknown event, skipping"); + log2("unknown event, skipping"); return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log3(`writing to ${sessionPath}`); - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = line.replace(/'/g, "''"); - const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'claude_code', '${ts}', '${ts}')`; - try { - await api.query(insertSql); - } catch (e) { - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log3("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; - } - } - log3("capture ok \u2192 cloud"); + appendEvent(input.session_id, entry); + log2("capture ok \u2192 local queue"); } main().catch((e) => { - log3(`fatal: ${e.message}`); + log2(`fatal: ${e.message}`); process.exit(0); }); diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index b8ac2a2..ba9c340 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -168,6 +168,7 @@ async function main() { apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, + orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js new file mode 100755 index 0000000..cb8c0df --- /dev/null +++ b/claude-code/bundle/session-start-setup.js @@ -0,0 +1,448 @@ +#!/usr/bin/env node + +// dist/src/hooks/session-start-setup.js +import { fileURLToPath } from "node:url"; +import { dirname, join as join4 } from "node:path"; +import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2, readFileSync as readFileSync3 } from "node:fs"; +import { execSync as execSync2 } from "node:child_process"; +import { homedir as homedir4 } from "node:os"; + +// dist/src/commands/auth.js +import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; +import { homedir } from "node:os"; +import { execSync } from "node:child_process"; +var CONFIG_DIR = join(homedir(), ".deeplake"); +var CREDS_PATH = join(CONFIG_DIR, "credentials.json"); +function loadCredentials() { + if (!existsSync(CREDS_PATH)) + return null; + try { + return JSON.parse(readFileSync(CREDS_PATH, "utf-8")); + } catch { + return null; + } +} +function saveCredentials(creds) { + if (!existsSync(CONFIG_DIR)) + mkdirSync(CONFIG_DIR, { recursive: true, mode: 448 }); + writeFileSync(CREDS_PATH, JSON.stringify({ ...creds, savedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), { mode: 384 }); +} + +// dist/src/config.js +import { readFileSync as readFileSync2, existsSync as existsSync2 } from "node:fs"; +import { join as join2 } from "node:path"; +import { homedir as homedir2, userInfo } from "node:os"; +function loadConfig() { + const home = homedir2(); + const credPath = join2(home, ".deeplake", "credentials.json"); + let creds = null; + if (existsSync2(credPath)) { + try { + creds = JSON.parse(readFileSync2(credPath, "utf-8")); + } catch { + return null; + } + } + const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; + if (!token || !orgId) + return null; + return { + token, + orgId, + orgName: creds?.orgName ?? orgId, + userName: creds?.userName || userInfo().username || "unknown", + workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: process.env.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") + }; +} + +// dist/src/deeplake-api.js +import { randomUUID } from "node:crypto"; + +// dist/src/utils/debug.js +import { appendFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { homedir as homedir3 } from "node:os"; +var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; +var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); +function utcTimestamp(d = /* @__PURE__ */ new Date()) { + return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; +} +function log(tag, msg) { + if (!DEBUG) + return; + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} +`); +} + +// dist/src/utils/sql.js +function sqlStr(value) { + return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); +} + +// dist/src/deeplake-api.js +var log2 = (msg) => log("sdk", msg); +var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); +var MAX_RETRIES = 3; +var BASE_DELAY_MS = 500; +var MAX_CONCURRENCY = 5; +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} +var Semaphore = class { + max; + waiting = []; + active = 0; + constructor(max) { + this.max = max; + } + async acquire() { + if (this.active < this.max) { + this.active++; + return; + } + await new Promise((resolve) => this.waiting.push(resolve)); + } + release() { + this.active--; + const next = this.waiting.shift(); + if (next) { + this.active++; + next(); + } + } +}; +var DeeplakeApi = class { + token; + apiUrl; + orgId; + workspaceId; + tableName; + _pendingRows = []; + _sem = new Semaphore(MAX_CONCURRENCY); + constructor(token, apiUrl, orgId, workspaceId, tableName) { + this.token = token; + this.apiUrl = apiUrl; + this.orgId = orgId; + this.workspaceId = workspaceId; + this.tableName = tableName; + } + /** Execute SQL with retry on transient errors and bounded concurrency. */ + async query(sql) { + await this._sem.acquire(); + try { + return await this._queryWithRetry(sql); + } finally { + this._sem.release(); + } + } + async _queryWithRetry(sql) { + let lastError; + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + let resp; + try { + resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { + method: "POST", + headers: { + Authorization: `Bearer ${this.token}`, + "Content-Type": "application/json", + "X-Activeloop-Org-Id": this.orgId + }, + body: JSON.stringify({ query: sql }) + }); + } catch (e) { + lastError = e instanceof Error ? e : new Error(String(e)); + if (attempt < MAX_RETRIES) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw lastError; + } + if (resp.ok) { + const raw = await resp.json(); + if (!raw?.rows || !raw?.columns) + return []; + return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); + } + const text = await resp.text().catch(() => ""); + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + } + throw lastError ?? new Error("Query failed: max retries exceeded"); + } + // ── Writes ────────────────────────────────────────────────────────────────── + /** Queue rows for writing. Call commit() to flush. */ + appendRows(rows) { + this._pendingRows.push(...rows); + } + /** Flush pending rows via SQL. */ + async commit() { + if (this._pendingRows.length === 0) + return; + const rows = this._pendingRows; + this._pendingRows = []; + const CONCURRENCY = 10; + for (let i = 0; i < rows.length; i += CONCURRENCY) { + const chunk = rows.slice(i, i + CONCURRENCY); + await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); + } + log2(`commit: ${rows.length} rows`); + } + async upsertRowSql(row) { + const ts = (/* @__PURE__ */ new Date()).toISOString(); + const cd = row.creationDate ?? ts; + const lud = row.lastUpdateDate ?? ts; + const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); + if (exists.length > 0) { + let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; + if (row.project !== void 0) + setClauses += `, project = '${sqlStr(row.project)}'`; + if (row.description !== void 0) + setClauses += `, description = '${sqlStr(row.description)}'`; + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); + } else { + const id = randomUUID(); + let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; + let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; + if (row.project !== void 0) { + cols += ", project"; + vals += `, '${sqlStr(row.project)}'`; + } + if (row.description !== void 0) { + cols += ", description"; + vals += `, '${sqlStr(row.description)}'`; + } + await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); + } + } + /** Update specific columns on a row by path. */ + async updateColumns(path, columns) { + const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); + } + // ── Convenience ───────────────────────────────────────────────────────────── + /** Create a BM25 search index on a column. */ + async createIndex(column) { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); + } + /** List all tables in the workspace (with retry). */ + async listTables() { + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { + headers: { + Authorization: `Bearer ${this.token}`, + "X-Activeloop-Org-Id": this.orgId + } + }); + if (resp.ok) { + const data = await resp.json(); + return (data.tables ?? []).map((t) => t.table_name); + } + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); + continue; + } + return []; + } catch { + if (attempt < MAX_RETRIES) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); + continue; + } + return []; + } + } + return []; + } + /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ + async ensureTable(name) { + const tbl = name ?? this.tableName; + const tables = await this.listTables(); + if (!tables.includes(tbl)) { + log2(`table "${tbl}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${tbl}" created`); + } + } + /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + async ensureSessionsTable(name) { + const tables = await this.listTables(); + if (!tables.includes(name)) { + log2(`table "${name}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${name}" created`); + } + } +}; + +// dist/src/utils/stdin.js +function readStdin() { + return new Promise((resolve, reject) => { + let data = ""; + process.stdin.setEncoding("utf-8"); + process.stdin.on("data", (chunk) => data += chunk); + process.stdin.on("end", () => { + try { + resolve(JSON.parse(data)); + } catch (err) { + reject(new Error(`Failed to parse hook input: ${err}`)); + } + }); + process.stdin.on("error", reject); + }); +} + +// dist/src/hooks/session-start-setup.js +var log3 = (msg) => log("session-setup", msg); +var __bundleDir = dirname(fileURLToPath(import.meta.url)); +var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +var VERSION_CHECK_TIMEOUT = 3e3; +var HOME = homedir4(); +var WIKI_LOG = join4(HOME, ".claude", "hooks", "deeplake-wiki.log"); +function wikiLog(msg) { + try { + mkdirSync2(join4(HOME, ".claude", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${utcTimestamp()}] ${msg} +`); + } catch { + } +} +function getInstalledVersion() { + let dir = __bundleDir; + for (let i = 0; i < 5; i++) { + const candidate = join4(dir, "package.json"); + try { + const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) + return pkg.version; + } catch { + } + const parent = dirname(dir); + if (parent === dir) + break; + dir = parent; + } + return null; +} +async function getLatestVersion() { + try { + const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); + if (!res.ok) + return null; + const pkg = await res.json(); + return pkg.version ?? null; + } catch { + return null; + } +} +function isNewer(latest, current) { + const parse = (v) => v.split(".").map(Number); + const [la, lb, lc] = parse(latest); + const [ca, cb, cc] = parse(current); + return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; +} +async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { + const summaryPath = `/summaries/${userName}/${sessionId}.md`; + await api.query(`SELECT deeplake_sync_table('${table}')`); + const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); + if (existing.length > 0) { + wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); + return; + } + const now = (/* @__PURE__ */ new Date()).toISOString(); + const projectName = cwd.split("/").pop() ?? "unknown"; + const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; + const content = [ + `# Session ${sessionId}`, + `- **Source**: ${sessionSource}`, + `- **Started**: ${now}`, + `- **Project**: ${projectName}`, + `- **Status**: in-progress`, + "" + ].join("\n"); + const filename = `${sessionId}.md`; + await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')`); + wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); +} +async function main() { + if (process.env.DEEPLAKE_WIKI_WORKER === "1") + return; + const input = await readStdin(); + const creds = loadCredentials(); + if (!creds?.token) { + log3("no credentials"); + return; + } + if (!creds.userName) { + try { + const { userInfo: userInfo2 } = await import("node:os"); + creds.userName = userInfo2().username ?? "unknown"; + saveCredentials(creds); + log3(`backfilled userName: ${creds.userName}`); + } catch { + } + } + if (input.session_id) { + try { + const config = loadConfig(); + if (config) { + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); + await api.ensureTable(); + await api.ensureSessionsTable(config.sessionsTableName); + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + log3("setup complete"); + } + } catch (e) { + log3(`setup failed: ${e.message}`); + wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + } + } + const autoupdate = creds.autoupdate !== false; + try { + const current = getInstalledVersion(); + if (current) { + const latest = await getLatestVersion(); + if (latest && isNewer(latest, current)) { + if (autoupdate) { + log3(`autoupdate: updating ${current} \u2192 ${latest}`); + try { + const scopes = ["user", "project", "local", "managed"]; + const cmd = scopes.map((s) => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null`).join("; "); + execSync2(cmd, { stdio: "ignore", timeout: 6e4 }); + process.stderr.write(`\u2705 Hivemind auto-updated: ${current} \u2192 ${latest}. Run /reload-plugins to apply. +`); + log3(`autoupdate succeeded: ${current} \u2192 ${latest}`); + } catch (e) { + process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed \u2014 run /hivemind:update to upgrade manually. +`); + log3(`autoupdate failed: ${e.message}`); + } + } else { + process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Run /hivemind:update to upgrade. +`); + log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); + } + } else { + log3(`version up to date: ${current}`); + } + } + } catch (e) { + log3(`version check failed: ${e.message}`); + } +} +main().catch((e) => { + log3(`fatal: ${e.message}`); + process.exit(0); +}); diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index e022424..88ef738 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -2,10 +2,8 @@ // dist/src/hooks/session-start.js import { fileURLToPath } from "node:url"; -import { dirname, join as join4 } from "node:path"; -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2, readFileSync as readFileSync3 } from "node:fs"; -import { execSync as execSync2 } from "node:child_process"; -import { homedir as homedir4 } from "node:os"; +import { dirname, join as join3 } from "node:path"; +import { readFileSync as readFileSync2 } from "node:fs"; // dist/src/commands/auth.js import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; @@ -23,269 +21,6 @@ function loadCredentials() { return null; } } -function saveCredentials(creds) { - if (!existsSync(CONFIG_DIR)) - mkdirSync(CONFIG_DIR, { recursive: true, mode: 448 }); - writeFileSync(CREDS_PATH, JSON.stringify({ ...creds, savedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), { mode: 384 }); -} - -// dist/src/config.js -import { readFileSync as readFileSync2, existsSync as existsSync2 } from "node:fs"; -import { join as join2 } from "node:path"; -import { homedir as homedir2, userInfo } from "node:os"; -function loadConfig() { - const home = homedir2(); - const credPath = join2(home, ".deeplake", "credentials.json"); - let creds = null; - if (existsSync2(credPath)) { - try { - creds = JSON.parse(readFileSync2(credPath, "utf-8")); - } catch { - return null; - } - } - const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; - if (!token || !orgId) - return null; - return { - token, - orgId, - orgName: creds?.orgName ?? orgId, - userName: creds?.userName || userInfo().username || "unknown", - workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: process.env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") - }; -} - -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; - -// dist/src/utils/debug.js -import { appendFileSync } from "node:fs"; -import { join as join3 } from "node:path"; -import { homedir as homedir3 } from "node:os"; -var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; -var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); -function utcTimestamp(d = /* @__PURE__ */ new Date()) { - return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; -} -function log(tag, msg) { - if (!DEBUG) - return; - appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} -`); -} - -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); -} - -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -var Semaphore = class { - max; - waiting = []; - active = 0; - constructor(max) { - this.max = max; - } - async acquire() { - if (this.active < this.max) { - this.active++; - return; - } - await new Promise((resolve) => this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - await this._sem.acquire(); - try { - return await this._queryWithRetry(sql); - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - /** List all tables in the workspace (with retry). */ - async listTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return []; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return []; - } - } - return []; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - } - } -}; // dist/src/utils/stdin.js function readStdin() { @@ -304,10 +39,23 @@ function readStdin() { }); } +// dist/src/utils/debug.js +import { appendFileSync } from "node:fs"; +import { join as join2 } from "node:path"; +import { homedir as homedir2 } from "node:os"; +var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; +var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); +function log(tag, msg) { + if (!DEBUG) + return; + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} +`); +} + // dist/src/hooks/session-start.js -var log3 = (msg) => log("session-start", msg); +var log2 = (msg) => log("session-start", msg); var __bundleDir = dirname(fileURLToPath(import.meta.url)); -var AUTH_CMD = join4(__bundleDir, "commands", "auth-login.js"); +var AUTH_CMD = join3(__bundleDir, "commands", "auth-login.js"); var context = `DEEPLAKE MEMORY: You have TWO memory sources. ALWAYS check BOTH when the user asks you to recall, remember, or look up ANY information: 1. Your built-in memory (~/.claude/) \u2014 personal per-project notes @@ -338,14 +86,12 @@ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, etc.) to LIMITS: Do NOT spawn subagents to read deeplake memory. If a file returns empty after 2 attempts, skip it and move on. Report what you found rather than exhaustively retrying. Debugging: Set DEEPLAKE_DEBUG=1 to enable verbose logging to ~/.deeplake/hook-debug.log`; -var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; -var VERSION_CHECK_TIMEOUT = 3e3; function getInstalledVersion() { let dir = __bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join4(dir, "package.json"); + const candidate = join3(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync2(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { @@ -357,143 +103,29 @@ function getInstalledVersion() { } return null; } -async function getLatestVersion() { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); - if (!res.ok) - return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} -function isNewer(latest, current) { - const parse = (v) => v.split(".").map(Number); - const [la, lb, lc] = parse(latest); - const [ca, cb, cc] = parse(current); - return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; -} -var HOME = homedir4(); -var WIKI_LOG = join4(HOME, ".claude", "hooks", "deeplake-wiki.log"); -function wikiLog(msg) { - try { - mkdirSync2(join4(HOME, ".claude", "hooks"), { recursive: true }); - appendFileSync2(WIKI_LOG, `[${utcTimestamp()}] ${msg} -`); - } catch { - } -} -async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { - const summaryPath = `/summaries/${userName}/${sessionId}.md`; - await api.query(`SELECT deeplake_sync_table('${table}')`); - const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); - if (existing.length > 0) { - wikiLog(`SessionStart: summary exists for ${sessionId} (resumed)`); - return; - } - const now = (/* @__PURE__ */ new Date()).toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; - const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; - const content = [ - `# Session ${sessionId}`, - `- **Source**: ${sessionSource}`, - `- **Started**: ${now}`, - `- **Project**: ${projectName}`, - `- **Status**: in-progress`, - "" - ].join("\n"); - const filename = `${sessionId}.md`; - await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'claude_code', '${now}', '${now}')`); - wikiLog(`SessionStart: created placeholder for ${sessionId} (${cwd})`); -} async function main() { if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; - const input = await readStdin(); - let creds = loadCredentials(); + await readStdin(); + const creds = loadCredentials(); if (!creds?.token) { - log3("no credentials found \u2014 run /hivemind:login to authenticate"); + log2("no credentials found \u2014 run /hivemind:login to authenticate"); } else { - log3(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - if (creds.token && !creds.userName) { - try { - const { userInfo: userInfo2 } = await import("node:os"); - creds.userName = userInfo2().username ?? "unknown"; - saveCredentials(creds); - log3(`backfilled and persisted userName: ${creds.userName}`); - } catch { - } - } - } - if (input.session_id && creds?.token) { - try { - const config = loadConfig(); - if (config) { - const table = config.tableName; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - await api.ensureTable(); - await api.ensureSessionsTable(sessionsTable); - await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); - log3("placeholder created"); - } - } catch (e) { - log3(`placeholder failed: ${e.message}`); - wikiLog(`SessionStart: placeholder failed for ${input.session_id}: ${e.message}`); - } + log2(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); } - const autoupdate = creds?.autoupdate !== false; - let updateNotice = ""; - try { - const current = getInstalledVersion(); - if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { - if (autoupdate) { - log3(`autoupdate: updating ${current} \u2192 ${latest}`); - try { - const scopes = ["user", "project", "local", "managed"]; - const cmd = scopes.map((s) => `claude plugin update hivemind@hivemind --scope ${s} 2>/dev/null`).join("; "); - execSync2(cmd, { stdio: "ignore", timeout: 6e4 }); - updateNotice = ` - -\u2705 Hivemind auto-updated: ${current} \u2192 ${latest}. Run /reload-plugins to apply.`; - process.stderr.write(`\u2705 Hivemind auto-updated: ${current} \u2192 ${latest}. Run /reload-plugins to apply. -`); - log3(`autoupdate succeeded: ${current} \u2192 ${latest}`); - } catch (e) { - updateNotice = ` + let versionNotice = ""; + const current = getInstalledVersion(); + if (current) { + versionNotice = ` -\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed \u2014 run /hivemind:update to upgrade manually.`; - process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed \u2014 run /hivemind:update to upgrade manually. -`); - log3(`autoupdate failed: ${e.message}`); - } - } else { - updateNotice = ` - -\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Run /hivemind:update to upgrade.`; - process.stderr.write(`\u2B06\uFE0F Hivemind update available: ${current} \u2192 ${latest}. Run /hivemind:update to upgrade. -`); - log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); - } - } else { - log3(`version up to date: ${current}`); - updateNotice = ` - -\u2705 Hivemind v${current} (up to date)`; - } - } - } catch (e) { - log3(`version check failed: ${e.message}`); +Hivemind v${current}`; } const resolvedContext = context.replace(/DEEPLAKE_AUTH_CMD/g, AUTH_CMD); const additionalContext = creds?.token ? `${resolvedContext} -Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${updateNotice}` : `${resolvedContext} +Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${versionNotice}` : `${resolvedContext} -\u26A0\uFE0F Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${updateNotice}`; +\u26A0\uFE0F Not logged in to Deeplake. Memory search will not work. Ask the user to run /hivemind:login to authenticate.${versionNotice}`; console.log(JSON.stringify({ hookSpecificOutput: { hookEventName: "SessionStart", @@ -502,6 +134,6 @@ Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${cred })); } main().catch((e) => { - log3(`fatal: ${e.message}`); + log2(`fatal: ${e.message}`); process.exit(0); }); diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index f759c23..1d08294 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -1,9 +1,9 @@ #!/usr/bin/env node // dist/src/hooks/wiki-worker.js -import { readFileSync, writeFileSync, existsSync, appendFileSync as appendFileSync2, mkdirSync, rmSync } from "node:fs"; +import { readFileSync as readFileSync2, writeFileSync, existsSync as existsSync2, appendFileSync as appendFileSync3, mkdirSync as mkdirSync2, rmSync } from "node:fs"; import { execFileSync } from "node:child_process"; -import { join as join2 } from "node:path"; +import { join as join3 } from "node:path"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -15,15 +15,40 @@ function utcTimestamp(d = /* @__PURE__ */ new Date()) { return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; } +// dist/src/utils/capture-queue.js +import { appendFileSync as appendFileSync2, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; +import { join as join2 } from "node:path"; +import { homedir as homedir2 } from "node:os"; +var QUEUE_DIR = join2(homedir2(), ".deeplake", "capture"); +function queuePath(sessionId) { + return join2(QUEUE_DIR, `${sessionId}.jsonl`); +} +function readEvents(sessionId) { + const path = queuePath(sessionId); + if (!existsSync(path)) + return []; + const content = readFileSync(path, "utf-8").trim(); + if (!content) + return []; + return content.split("\n").map((line) => JSON.parse(line)); +} +function deleteQueue(sessionId) { + const path = queuePath(sessionId); + try { + unlinkSync(path); + } catch { + } +} + // dist/src/hooks/wiki-worker.js -var cfg = JSON.parse(readFileSync(process.argv[2], "utf-8")); +var cfg = JSON.parse(readFileSync2(process.argv[2], "utf-8")); var tmpDir = cfg.tmpDir; -var tmpJsonl = join2(tmpDir, "session.jsonl"); -var tmpSummary = join2(tmpDir, "summary.md"); +var tmpJsonl = join3(tmpDir, "session.jsonl"); +var tmpSummary = join3(tmpDir, "summary.md"); function wlog(msg) { try { - mkdirSync(cfg.hooksDir, { recursive: true }); - appendFileSync2(cfg.wikiLog, `[${utcTimestamp()}] wiki-worker(${cfg.sessionId}): ${msg} + mkdirSync2(cfg.hooksDir, { recursive: true }); + appendFileSync3(cfg.wikiLog, `[${utcTimestamp()}] wiki-worker(${cfg.sessionId}): ${msg} `); } catch { } @@ -48,7 +73,7 @@ async function query(sql, retries = 2) { return []; return j.rows.map((row) => Object.fromEntries(j.columns.map((col, i) => [col, row[i]]))); } - if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429)) { + if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429 || r.status === 500)) { wlog(`API ${r.status}, retrying in ${attempt + 1}s...`); await new Promise((resolve) => setTimeout(resolve, (attempt + 1) * 1e3)); continue; @@ -63,21 +88,69 @@ function cleanup() { } catch { } } +function buildSessionPath() { + const org = cfg.orgName ?? cfg.orgId; + return `/sessions/${cfg.userName}/${cfg.userName}_${org}_${cfg.workspaceId}_${cfg.sessionId}.jsonl`; +} +async function flushQueue() { + const events = readEvents(cfg.sessionId); + const jsonlServerPath = buildSessionPath(); + if (events.length === 0) { + wlog("no local events to flush"); + return { events, jsonlServerPath }; + } + wlog(`flushing ${events.length} events to cloud`); + const filename = jsonlServerPath.split("/").pop() ?? ""; + for (const event of events) { + const line = JSON.stringify(event); + const jsonForSql = line.replace(/'/g, "''"); + const ts = event.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(); + const hookEvent = event.hook_event_name ?? ""; + try { + await query(`INSERT INTO "${cfg.sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${esc(jsonlServerPath)}', '${esc(filename)}', '${jsonForSql}'::jsonb, '${esc(cfg.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${esc(cfg.project)}', '${esc(hookEvent)}', 'claude_code', '${ts}', '${ts}')`); + } catch (e) { + wlog(`flush event failed: ${e.message}`); + throw e; + } + } + deleteQueue(cfg.sessionId); + wlog(`flushed ${events.length} events, deleted local queue`); + return { events, jsonlServerPath }; +} async function main() { try { - wlog("fetching session events"); - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); - const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); - if (rows.length === 0) { + const { events, jsonlServerPath } = await flushQueue(); + wlog("fetching cloud events"); + try { + await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); + } catch { + } + const cloudRows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); + const seenIds = /* @__PURE__ */ new Set(); + const allEvents = []; + for (const row of cloudRows) { + const msg = typeof row.message === "string" ? JSON.parse(row.message) : row.message; + const id = msg?.id; + if (id && !seenIds.has(id)) { + seenIds.add(id); + allEvents.push(msg); + } + } + for (const evt of events) { + const id = evt.id; + if (id && !seenIds.has(id)) { + seenIds.add(id); + allEvents.push(evt); + } + } + if (allEvents.length === 0) { wlog("no session events found \u2014 exiting"); return; } - const jsonlContent = rows.map((r) => typeof r.message === "string" ? r.message : JSON.stringify(r.message)).join("\n"); - const jsonlLines = rows.length; - const pathRows = await query(`SELECT DISTINCT path FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' LIMIT 1`); - const jsonlServerPath = pathRows.length > 0 ? pathRows[0].path : `/sessions/unknown/${cfg.sessionId}.jsonl`; + const jsonlContent = allEvents.map((e) => JSON.stringify(e)).join("\n"); + const jsonlLines = allEvents.length; writeFileSync(tmpJsonl, jsonlContent); - wlog(`found ${jsonlLines} events at ${jsonlServerPath}`); + wlog(`found ${jsonlLines} total events at ${jsonlServerPath}`); let prevOffset = 0; try { await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); @@ -112,8 +185,8 @@ async function main() { } catch (e) { wlog(`claude -p failed: ${e.status ?? e.message}`); } - if (existsSync(tmpSummary)) { - const text = readFileSync(tmpSummary, "utf-8"); + if (existsSync2(tmpSummary)) { + const text = readFileSync2(tmpSummary, "utf-8"); if (text.trim()) { const fname = `${cfg.sessionId}.md`; const vpath = `/summaries/${cfg.userName}/${fname}`; diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 175e58b..500abab 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -17,43 +17,25 @@ function readStdin() { }); } -// dist/src/config.js -import { readFileSync, existsSync } from "node:fs"; +// dist/src/utils/capture-queue.js +import { appendFileSync, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; import { join } from "node:path"; -import { homedir, userInfo } from "node:os"; -function loadConfig() { - const home = homedir(); - const credPath = join(home, ".deeplake", "credentials.json"); - let creds = null; - if (existsSync(credPath)) { - try { - creds = JSON.parse(readFileSync(credPath, "utf-8")); - } catch { - return null; - } - } - const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; - if (!token || !orgId) - return null; - return { - token, - orgId, - orgName: creds?.orgName ?? orgId, - userName: creds?.userName || userInfo().username || "unknown", - workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: process.env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") - }; +import { homedir } from "node:os"; +var QUEUE_DIR = join(homedir(), ".deeplake", "capture"); +function ensureDir() { + mkdirSync(QUEUE_DIR, { recursive: true }); +} +function queuePath(sessionId) { + return join(QUEUE_DIR, `${sessionId}.jsonl`); +} +function appendEvent(sessionId, event) { + ensureDir(); + const line = JSON.stringify(event) + "\n"; + appendFileSync(queuePath(sessionId), line); } - -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; // dist/src/utils/debug.js -import { appendFileSync } from "node:fs"; +import { appendFileSync as appendFileSync2 } from "node:fs"; import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; @@ -61,234 +43,17 @@ var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; - appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} + appendFileSync2(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} `); } -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); -} - -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -var Semaphore = class { - max; - waiting = []; - active = 0; - constructor(max) { - this.max = max; - } - async acquire() { - if (this.active < this.max) { - this.active++; - return; - } - await new Promise((resolve) => this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - await this._sem.acquire(); - try { - return await this._queryWithRetry(sql); - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - /** List all tables in the workspace (with retry). */ - async listTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return []; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return []; - } - } - return []; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - } - } -}; - // dist/src/hooks/codex/capture.js -var log3 = (msg) => log("codex-capture", msg); +var log2 = (msg) => log("codex-capture", msg); var CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; -function buildSessionPath(config, sessionId) { - return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; -} async function main() { if (!CAPTURE) return; const input = await readStdin(); - const config = loadConfig(); - if (!config) { - log3("no config"); - return; - } - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = (/* @__PURE__ */ new Date()).toISOString(); const meta = { session_id: input.session_id, @@ -301,7 +66,7 @@ async function main() { }; let entry; if (input.hook_event_name === "UserPromptSubmit" && input.prompt !== void 0) { - log3(`user session=${input.session_id}`); + log2(`user session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -309,7 +74,7 @@ async function main() { content: input.prompt }; } else if (input.hook_event_name === "PostToolUse" && input.tool_name !== void 0) { - log3(`tool=${input.tool_name} session=${input.session_id}`); + log2(`tool=${input.tool_name} session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -320,30 +85,13 @@ async function main() { tool_response: JSON.stringify(input.tool_response) }; } else { - log3(`unknown event: ${input.hook_event_name}, skipping`); + log2(`unknown event: ${input.hook_event_name}, skipping`); return; } - const sessionPath = buildSessionPath(config, input.session_id); - const line = JSON.stringify(entry); - log3(`writing to ${sessionPath}`); - const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'codex', '${ts}', '${ts}')`; - try { - await api.query(insertSql); - } catch (e) { - if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { - log3("table missing, creating and retrying"); - await api.ensureSessionsTable(sessionsTable); - await api.query(insertSql); - } else { - throw e; - } - } - log3("capture ok"); + appendEvent(input.session_id, entry); + log2("capture ok \u2192 local queue"); } main().catch((e) => { - log3(`fatal: ${e.message}`); + log2(`fatal: ${e.message}`); process.exit(0); }); diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js new file mode 100755 index 0000000..88b0be0 --- /dev/null +++ b/codex/bundle/session-start-setup.js @@ -0,0 +1,452 @@ +#!/usr/bin/env node + +// dist/src/hooks/codex/session-start-setup.js +import { fileURLToPath } from "node:url"; +import { dirname, join as join4 } from "node:path"; +import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2, readFileSync as readFileSync3 } from "node:fs"; +import { execSync as execSync2 } from "node:child_process"; +import { homedir as homedir4 } from "node:os"; + +// dist/src/commands/auth.js +import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; +import { join } from "node:path"; +import { homedir } from "node:os"; +import { execSync } from "node:child_process"; +var CONFIG_DIR = join(homedir(), ".deeplake"); +var CREDS_PATH = join(CONFIG_DIR, "credentials.json"); +function loadCredentials() { + if (!existsSync(CREDS_PATH)) + return null; + try { + return JSON.parse(readFileSync(CREDS_PATH, "utf-8")); + } catch { + return null; + } +} +function saveCredentials(creds) { + if (!existsSync(CONFIG_DIR)) + mkdirSync(CONFIG_DIR, { recursive: true, mode: 448 }); + writeFileSync(CREDS_PATH, JSON.stringify({ ...creds, savedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), { mode: 384 }); +} + +// dist/src/config.js +import { readFileSync as readFileSync2, existsSync as existsSync2 } from "node:fs"; +import { join as join2 } from "node:path"; +import { homedir as homedir2, userInfo } from "node:os"; +function loadConfig() { + const home = homedir2(); + const credPath = join2(home, ".deeplake", "credentials.json"); + let creds = null; + if (existsSync2(credPath)) { + try { + creds = JSON.parse(readFileSync2(credPath, "utf-8")); + } catch { + return null; + } + } + const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; + if (!token || !orgId) + return null; + return { + token, + orgId, + orgName: creds?.orgName ?? orgId, + userName: creds?.userName || userInfo().username || "unknown", + workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: process.env.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") + }; +} + +// dist/src/deeplake-api.js +import { randomUUID } from "node:crypto"; + +// dist/src/utils/debug.js +import { appendFileSync } from "node:fs"; +import { join as join3 } from "node:path"; +import { homedir as homedir3 } from "node:os"; +var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; +var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); +function log(tag, msg) { + if (!DEBUG) + return; + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} +`); +} + +// dist/src/utils/sql.js +function sqlStr(value) { + return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); +} + +// dist/src/deeplake-api.js +var log2 = (msg) => log("sdk", msg); +var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); +var MAX_RETRIES = 3; +var BASE_DELAY_MS = 500; +var MAX_CONCURRENCY = 5; +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} +var Semaphore = class { + max; + waiting = []; + active = 0; + constructor(max) { + this.max = max; + } + async acquire() { + if (this.active < this.max) { + this.active++; + return; + } + await new Promise((resolve) => this.waiting.push(resolve)); + } + release() { + this.active--; + const next = this.waiting.shift(); + if (next) { + this.active++; + next(); + } + } +}; +var DeeplakeApi = class { + token; + apiUrl; + orgId; + workspaceId; + tableName; + _pendingRows = []; + _sem = new Semaphore(MAX_CONCURRENCY); + constructor(token, apiUrl, orgId, workspaceId, tableName) { + this.token = token; + this.apiUrl = apiUrl; + this.orgId = orgId; + this.workspaceId = workspaceId; + this.tableName = tableName; + } + /** Execute SQL with retry on transient errors and bounded concurrency. */ + async query(sql) { + await this._sem.acquire(); + try { + return await this._queryWithRetry(sql); + } finally { + this._sem.release(); + } + } + async _queryWithRetry(sql) { + let lastError; + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + let resp; + try { + resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { + method: "POST", + headers: { + Authorization: `Bearer ${this.token}`, + "Content-Type": "application/json", + "X-Activeloop-Org-Id": this.orgId + }, + body: JSON.stringify({ query: sql }) + }); + } catch (e) { + lastError = e instanceof Error ? e : new Error(String(e)); + if (attempt < MAX_RETRIES) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw lastError; + } + if (resp.ok) { + const raw = await resp.json(); + if (!raw?.rows || !raw?.columns) + return []; + return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); + } + const text = await resp.text().catch(() => ""); + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + } + throw lastError ?? new Error("Query failed: max retries exceeded"); + } + // ── Writes ────────────────────────────────────────────────────────────────── + /** Queue rows for writing. Call commit() to flush. */ + appendRows(rows) { + this._pendingRows.push(...rows); + } + /** Flush pending rows via SQL. */ + async commit() { + if (this._pendingRows.length === 0) + return; + const rows = this._pendingRows; + this._pendingRows = []; + const CONCURRENCY = 10; + for (let i = 0; i < rows.length; i += CONCURRENCY) { + const chunk = rows.slice(i, i + CONCURRENCY); + await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); + } + log2(`commit: ${rows.length} rows`); + } + async upsertRowSql(row) { + const ts = (/* @__PURE__ */ new Date()).toISOString(); + const cd = row.creationDate ?? ts; + const lud = row.lastUpdateDate ?? ts; + const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); + if (exists.length > 0) { + let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; + if (row.project !== void 0) + setClauses += `, project = '${sqlStr(row.project)}'`; + if (row.description !== void 0) + setClauses += `, description = '${sqlStr(row.description)}'`; + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); + } else { + const id = randomUUID(); + let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; + let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; + if (row.project !== void 0) { + cols += ", project"; + vals += `, '${sqlStr(row.project)}'`; + } + if (row.description !== void 0) { + cols += ", description"; + vals += `, '${sqlStr(row.description)}'`; + } + await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); + } + } + /** Update specific columns on a row by path. */ + async updateColumns(path, columns) { + const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); + } + // ── Convenience ───────────────────────────────────────────────────────────── + /** Create a BM25 search index on a column. */ + async createIndex(column) { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); + } + /** List all tables in the workspace (with retry). */ + async listTables() { + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { + headers: { + Authorization: `Bearer ${this.token}`, + "X-Activeloop-Org-Id": this.orgId + } + }); + if (resp.ok) { + const data = await resp.json(); + return (data.tables ?? []).map((t) => t.table_name); + } + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); + continue; + } + return []; + } catch { + if (attempt < MAX_RETRIES) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); + continue; + } + return []; + } + } + return []; + } + /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ + async ensureTable(name) { + const tbl = name ?? this.tableName; + const tables = await this.listTables(); + if (!tables.includes(tbl)) { + log2(`table "${tbl}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${tbl}" created`); + } + } + /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + async ensureSessionsTable(name) { + const tables = await this.listTables(); + if (!tables.includes(name)) { + log2(`table "${name}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${name}" created`); + } + } +}; + +// dist/src/utils/stdin.js +function readStdin() { + return new Promise((resolve, reject) => { + let data = ""; + process.stdin.setEncoding("utf-8"); + process.stdin.on("data", (chunk) => data += chunk); + process.stdin.on("end", () => { + try { + resolve(JSON.parse(data)); + } catch (err) { + reject(new Error(`Failed to parse hook input: ${err}`)); + } + }); + process.stdin.on("error", reject); + }); +} + +// dist/src/hooks/codex/session-start-setup.js +var log3 = (msg) => log("codex-session-setup", msg); +var __bundleDir = dirname(fileURLToPath(import.meta.url)); +var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; +var VERSION_CHECK_TIMEOUT = 3e3; +var HOME = homedir4(); +var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); +function wikiLog(msg) { + try { + mkdirSync2(join4(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} +`); + } catch { + } +} +function getInstalledVersion() { + try { + const pluginJson = join4(__bundleDir, "..", ".codex-plugin", "plugin.json"); + const plugin = JSON.parse(readFileSync3(pluginJson, "utf-8")); + if (plugin.version) + return plugin.version; + } catch { + } + let dir = __bundleDir; + for (let i = 0; i < 5; i++) { + const candidate = join4(dir, "package.json"); + try { + const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) + return pkg.version; + } catch { + } + const parent = dirname(dir); + if (parent === dir) + break; + dir = parent; + } + return null; +} +async function getLatestVersion() { + try { + const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); + if (!res.ok) + return null; + const pkg = await res.json(); + return pkg.version ?? null; + } catch { + return null; + } +} +function isNewer(latest, current) { + const parse = (v) => v.split(".").map(Number); + const [la, lb, lc] = parse(latest); + const [ca, cb, cc] = parse(current); + return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; +} +async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { + const summaryPath = `/summaries/${userName}/${sessionId}.md`; + await api.query(`SELECT deeplake_sync_table('${table}')`); + const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); + if (existing.length > 0) { + wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); + return; + } + const now = (/* @__PURE__ */ new Date()).toISOString(); + const projectName = cwd.split("/").pop() ?? "unknown"; + const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; + const content = [ + `# Session ${sessionId}`, + `- **Source**: ${sessionSource}`, + `- **Started**: ${now}`, + `- **Project**: ${projectName}`, + `- **Status**: in-progress`, + "" + ].join("\n"); + const filename = `${sessionId}.md`; + await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'codex', '${now}', '${now}')`); + wikiLog(`SessionSetup: created placeholder for ${sessionId} (${cwd})`); +} +async function main() { + if (process.env.DEEPLAKE_WIKI_WORKER === "1") + return; + const input = await readStdin(); + const creds = loadCredentials(); + if (!creds?.token) { + log3("no credentials"); + return; + } + if (!creds.userName) { + try { + const { userInfo: userInfo2 } = await import("node:os"); + creds.userName = userInfo2().username ?? "unknown"; + saveCredentials(creds); + log3(`backfilled userName: ${creds.userName}`); + } catch { + } + } + if (input.session_id) { + try { + const config = loadConfig(); + if (config) { + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); + await api.ensureTable(); + await api.ensureSessionsTable(config.sessionsTableName); + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + log3("setup complete"); + } + } catch (e) { + log3(`setup failed: ${e.message}`); + wikiLog(`SessionSetup: failed for ${input.session_id}: ${e.message}`); + } + } + const autoupdate = creds.autoupdate !== false; + try { + const current = getInstalledVersion(); + if (current) { + const latest = await getLatestVersion(); + if (latest && isNewer(latest, current)) { + if (autoupdate) { + log3(`autoupdate: updating ${current} \u2192 ${latest}`); + try { + const tag = `v${latest}`; + const findCmd = `INSTALL_DIR=""; CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); elif [ -d ~/.codex/hivemind ]; then INSTALL_DIR=~/.codex/hivemind; fi; if [ -n "$INSTALL_DIR" ]; then TMPDIR=$(mktemp -d); git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; rm -rf "$TMPDIR"; fi`; + execSync2(findCmd, { stdio: "ignore", timeout: 6e4 }); + process.stderr.write(`Hivemind auto-updated: ${current} \u2192 ${latest}. Restart Codex to apply. +`); + log3(`autoupdate succeeded: ${current} \u2192 ${latest} (tag: ${tag})`); + } catch (e) { + process.stderr.write(`Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed. +`); + log3(`autoupdate failed: ${e.message}`); + } + } else { + process.stderr.write(`Hivemind update available: ${current} \u2192 ${latest}. +`); + log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); + } + } else { + log3(`version up to date: ${current}`); + } + } + } catch (e) { + log3(`version check failed: ${e.message}`); + } +} +main().catch((e) => { + log3(`fatal: ${e.message}`); + process.exit(0); +}); diff --git a/codex/bundle/session-start.js b/codex/bundle/session-start.js index 2878577..a14e501 100755 --- a/codex/bundle/session-start.js +++ b/codex/bundle/session-start.js @@ -1,11 +1,10 @@ #!/usr/bin/env node // dist/src/hooks/codex/session-start.js +import { spawn } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join4 } from "node:path"; -import { mkdirSync as mkdirSync2, appendFileSync as appendFileSync2, readFileSync as readFileSync3 } from "node:fs"; -import { execSync as execSync2 } from "node:child_process"; -import { homedir as homedir4 } from "node:os"; +import { dirname, join as join3 } from "node:path"; +import { readFileSync as readFileSync2 } from "node:fs"; // dist/src/commands/auth.js import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs"; @@ -23,266 +22,6 @@ function loadCredentials() { return null; } } -function saveCredentials(creds) { - if (!existsSync(CONFIG_DIR)) - mkdirSync(CONFIG_DIR, { recursive: true, mode: 448 }); - writeFileSync(CREDS_PATH, JSON.stringify({ ...creds, savedAt: (/* @__PURE__ */ new Date()).toISOString() }, null, 2), { mode: 384 }); -} - -// dist/src/config.js -import { readFileSync as readFileSync2, existsSync as existsSync2 } from "node:fs"; -import { join as join2 } from "node:path"; -import { homedir as homedir2, userInfo } from "node:os"; -function loadConfig() { - const home = homedir2(); - const credPath = join2(home, ".deeplake", "credentials.json"); - let creds = null; - if (existsSync2(credPath)) { - try { - creds = JSON.parse(readFileSync2(credPath, "utf-8")); - } catch { - return null; - } - } - const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; - const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; - if (!token || !orgId) - return null; - return { - token, - orgId, - orgName: creds?.orgName ?? orgId, - userName: creds?.userName || userInfo().username || "unknown", - workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", - apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", - tableName: process.env.DEEPLAKE_TABLE ?? "memory", - sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", - memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join2(home, ".deeplake", "memory") - }; -} - -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; - -// dist/src/utils/debug.js -import { appendFileSync } from "node:fs"; -import { join as join3 } from "node:path"; -import { homedir as homedir3 } from "node:os"; -var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; -var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); -function log(tag, msg) { - if (!DEBUG) - return; - appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} -`); -} - -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); -} - -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -var Semaphore = class { - max; - waiting = []; - active = 0; - constructor(max) { - this.max = max; - } - async acquire() { - if (this.active < this.max) { - this.active++; - return; - } - await new Promise((resolve) => this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - await this._sem.acquire(); - try { - return await this._queryWithRetry(sql); - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - /** List all tables in the workspace (with retry). */ - async listTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return []; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return []; - } - } - return []; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - } - } -}; // dist/src/utils/stdin.js function readStdin() { @@ -301,31 +40,42 @@ function readStdin() { }); } +// dist/src/utils/debug.js +import { appendFileSync } from "node:fs"; +import { join as join2 } from "node:path"; +import { homedir as homedir2 } from "node:os"; +var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; +var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); +function log(tag, msg) { + if (!DEBUG) + return; + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} +`); +} + // dist/src/hooks/codex/session-start.js -var log3 = (msg) => log("codex-session-start", msg); +var log2 = (msg) => log("codex-session-start", msg); var __bundleDir = dirname(fileURLToPath(import.meta.url)); -var AUTH_CMD = join4(__bundleDir, "commands", "auth-login.js"); +var AUTH_CMD = join3(__bundleDir, "commands", "auth-login.js"); var context = `DEEPLAKE MEMORY: Persistent memory at ~/.deeplake/memory/ shared across sessions, users, and agents. Structure: index.md (start here) \u2192 summaries/*.md \u2192 sessions/*.jsonl (last resort). Do NOT jump straight to JSONL. Search: grep -r "keyword" ~/.deeplake/memory/ IMPORTANT: Only use bash commands (cat, ls, grep, echo, jq, head, tail, sed, awk, etc.) to interact with ~/.deeplake/memory/. Do NOT use python, python3, node, curl, or other interpreters \u2014 they are not available in the memory filesystem. Do NOT spawn subagents to read deeplake memory.`; -var GITHUB_RAW_PKG = "https://raw.githubusercontent.com/activeloopai/hivemind/main/package.json"; -var VERSION_CHECK_TIMEOUT = 3e3; function getInstalledVersion() { try { - const pluginJson = join4(__bundleDir, "..", ".codex-plugin", "plugin.json"); - const plugin = JSON.parse(readFileSync3(pluginJson, "utf-8")); + const pluginJson = join3(__bundleDir, "..", ".codex-plugin", "plugin.json"); + const plugin = JSON.parse(readFileSync2(pluginJson, "utf-8")); if (plugin.version) return plugin.version; } catch { } let dir = __bundleDir; for (let i = 0; i < 5; i++) { - const candidate = join4(dir, "package.json"); + const candidate = join3(dir, "package.json"); try { - const pkg = JSON.parse(readFileSync3(candidate, "utf-8")); + const pkg = JSON.parse(readFileSync2(candidate, "utf-8")); if ((pkg.name === "hivemind" || pkg.name === "hivemind-codex") && pkg.version) return pkg.version; } catch { @@ -337,140 +87,40 @@ function getInstalledVersion() { } return null; } -async function getLatestVersion() { - try { - const res = await fetch(GITHUB_RAW_PKG, { signal: AbortSignal.timeout(VERSION_CHECK_TIMEOUT) }); - if (!res.ok) - return null; - const pkg = await res.json(); - return pkg.version ?? null; - } catch { - return null; - } -} -function isNewer(latest, current) { - const parse = (v) => v.split(".").map(Number); - const [la, lb, lc] = parse(latest); - const [ca, cb, cc] = parse(current); - return la > ca || la === ca && lb > cb || la === ca && lb === cb && lc > cc; -} -var HOME = homedir4(); -var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); -function wikiLog(msg) { - try { - mkdirSync2(join4(HOME, ".codex", "hooks"), { recursive: true }); - appendFileSync2(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} -`); - } catch { - } -} -async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { - const summaryPath = `/summaries/${userName}/${sessionId}.md`; - await api.query(`SELECT deeplake_sync_table('${table}')`); - const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); - if (existing.length > 0) { - wikiLog(`SessionStart: summary exists for ${sessionId} (resumed)`); - return; - } - const now = (/* @__PURE__ */ new Date()).toISOString(); - const projectName = cwd.split("/").pop() ?? "unknown"; - const sessionSource = `/sessions/${userName}/${userName}_${orgName}_${workspaceId}_${sessionId}.jsonl`; - const content = [ - `# Session ${sessionId}`, - `- **Source**: ${sessionSource}`, - `- **Started**: ${now}`, - `- **Project**: ${projectName}`, - `- **Status**: in-progress`, - "" - ].join("\n"); - const filename = `${sessionId}.md`; - await api.query(`INSERT INTO "${table}" (id, path, filename, summary, author, mime_type, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(summaryPath)}', '${sqlStr(filename)}', E'${sqlStr(content)}', '${sqlStr(userName)}', 'text/markdown', ${Buffer.byteLength(content, "utf-8")}, '${sqlStr(projectName)}', 'in progress', 'codex', '${now}', '${now}')`); - wikiLog(`SessionStart: created placeholder for ${sessionId} (${cwd})`); -} async function main() { if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; const input = await readStdin(); - let creds = loadCredentials(); + const creds = loadCredentials(); if (!creds?.token) { - log3("no credentials found \u2014 run auth login to authenticate"); + log2("no credentials found \u2014 run auth login to authenticate"); } else { - log3(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); - if (creds.token && !creds.userName) { - try { - const { userInfo: userInfo2 } = await import("node:os"); - creds.userName = userInfo2().username ?? "unknown"; - saveCredentials(creds); - log3(`backfilled userName: ${creds.userName}`); - } catch { - } - } - } - if (input.session_id && creds?.token) { - try { - const config = loadConfig(); - if (config) { - const table = config.tableName; - const sessionsTable = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); - await api.ensureTable(); - await api.ensureSessionsTable(sessionsTable); - await createPlaceholder(api, table, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); - log3("placeholder created"); - } - } catch (e) { - log3(`placeholder failed: ${e.message}`); - wikiLog(`SessionStart: placeholder failed for ${input.session_id}: ${e.message}`); - } + log2(`credentials loaded: org=${creds.orgName ?? creds.orgId}`); + } + if (creds?.token) { + const setupScript = join3(__bundleDir, "session-start-setup.js"); + const child = spawn("node", [setupScript], { + detached: true, + stdio: ["pipe", "ignore", "ignore"], + env: { ...process.env } + }); + child.stdin?.write(JSON.stringify(input)); + child.stdin?.end(); + child.unref(); + log2("spawned async setup process"); } - const autoupdate = creds?.autoupdate !== false; - let updateNotice = ""; - try { - const current = getInstalledVersion(); - if (current) { - const latest = await getLatestVersion(); - if (latest && isNewer(latest, current)) { - if (autoupdate) { - log3(`autoupdate: updating ${current} \u2192 ${latest}`); - try { - const tag = `v${latest}`; - const findCmd = `INSTALL_DIR=""; CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); elif [ -d ~/.codex/hivemind ]; then INSTALL_DIR=~/.codex/hivemind; fi; if [ -n "$INSTALL_DIR" ]; then TMPDIR=$(mktemp -d); git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; rm -rf "$TMPDIR"; fi`; - execSync2(findCmd, { stdio: "ignore", timeout: 6e4 }); - updateNotice = ` - -Hivemind auto-updated: ${current} \u2192 ${latest}. Restart Codex to apply.`; - process.stderr.write(`Hivemind auto-updated: ${current} \u2192 ${latest}. Restart Codex to apply. -`); - log3(`autoupdate succeeded: ${current} \u2192 ${latest} (tag: ${tag})`); - } catch (e) { - updateNotice = ` - -Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed.`; - process.stderr.write(`Hivemind update available: ${current} \u2192 ${latest}. Auto-update failed. -`); - log3(`autoupdate failed: ${e.message}`); - } - } else { - updateNotice = ` - -Hivemind update available: ${current} \u2192 ${latest}.`; - process.stderr.write(`Hivemind update available: ${current} \u2192 ${latest}. -`); - log3(`update available (autoupdate off): ${current} \u2192 ${latest}`); - } - } else { - log3(`version up to date: ${current}`); - } - } - } catch (e) { - log3(`version check failed: ${e.message}`); + let versionNotice = ""; + const current = getInstalledVersion(); + if (current) { + versionNotice = ` +Hivemind v${current}`; } const additionalContext = creds?.token ? `${context} -Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${updateNotice}` : `${context} -Not logged in to Deeplake. Run: node "${AUTH_CMD}" login${updateNotice}`; +Logged in to Deeplake as org: ${creds.orgName ?? creds.orgId} (workspace: ${creds.workspaceId ?? "default"})${versionNotice}` : `${context} +Not logged in to Deeplake. Run: node "${AUTH_CMD}" login${versionNotice}`; console.log(additionalContext); } main().catch((e) => { - log3(`fatal: ${e.message}`); + log2(`fatal: ${e.message}`); process.exit(0); }); diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index a92c4e6..ef7ce4d 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -3,9 +3,9 @@ // dist/src/hooks/codex/stop.js import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join3 } from "node:path"; -import { writeFileSync, readFileSync as readFileSync2, mkdirSync, appendFileSync as appendFileSync2, existsSync as existsSync2 } from "node:fs"; -import { homedir as homedir3, tmpdir } from "node:os"; +import { dirname, join as join4 } from "node:path"; +import { writeFileSync, readFileSync as readFileSync3, mkdirSync as mkdirSync2, appendFileSync as appendFileSync3, existsSync as existsSync3 } from "node:fs"; +import { homedir as homedir4, tmpdir } from "node:os"; // dist/src/utils/stdin.js function readStdin() { @@ -56,238 +56,45 @@ function loadConfig() { }; } -// dist/src/deeplake-api.js -import { randomUUID } from "node:crypto"; - -// dist/src/utils/debug.js -import { appendFileSync } from "node:fs"; +// dist/src/utils/capture-queue.js +import { appendFileSync, mkdirSync, readFileSync as readFileSync2, existsSync as existsSync2, unlinkSync } from "node:fs"; import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; +var QUEUE_DIR = join2(homedir2(), ".deeplake", "capture"); +function ensureDir() { + mkdirSync(QUEUE_DIR, { recursive: true }); +} +function queuePath(sessionId) { + return join2(QUEUE_DIR, `${sessionId}.jsonl`); +} +function appendEvent(sessionId, event) { + ensureDir(); + const line = JSON.stringify(event) + "\n"; + appendFileSync(queuePath(sessionId), line); +} + +// dist/src/utils/debug.js +import { appendFileSync as appendFileSync2 } from "node:fs"; +import { join as join3 } from "node:path"; +import { homedir as homedir3 } from "node:os"; var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; -var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); +var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; - appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} + appendFileSync2(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} `); } -// dist/src/utils/sql.js -function sqlStr(value) { - return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); -} - -// dist/src/deeplake-api.js -var log2 = (msg) => log("sdk", msg); -var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); -var MAX_RETRIES = 3; -var BASE_DELAY_MS = 500; -var MAX_CONCURRENCY = 5; -function sleep(ms) { - return new Promise((resolve) => setTimeout(resolve, ms)); -} -var Semaphore = class { - max; - waiting = []; - active = 0; - constructor(max) { - this.max = max; - } - async acquire() { - if (this.active < this.max) { - this.active++; - return; - } - await new Promise((resolve) => this.waiting.push(resolve)); - } - release() { - this.active--; - const next = this.waiting.shift(); - if (next) { - this.active++; - next(); - } - } -}; -var DeeplakeApi = class { - token; - apiUrl; - orgId; - workspaceId; - tableName; - _pendingRows = []; - _sem = new Semaphore(MAX_CONCURRENCY); - constructor(token, apiUrl, orgId, workspaceId, tableName) { - this.token = token; - this.apiUrl = apiUrl; - this.orgId = orgId; - this.workspaceId = workspaceId; - this.tableName = tableName; - } - /** Execute SQL with retry on transient errors and bounded concurrency. */ - async query(sql) { - await this._sem.acquire(); - try { - return await this._queryWithRetry(sql); - } finally { - this._sem.release(); - } - } - async _queryWithRetry(sql) { - let lastError; - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - let resp; - try { - resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { - method: "POST", - headers: { - Authorization: `Bearer ${this.token}`, - "Content-Type": "application/json", - "X-Activeloop-Org-Id": this.orgId - }, - body: JSON.stringify({ query: sql }) - }); - } catch (e) { - lastError = e instanceof Error ? e : new Error(String(e)); - if (attempt < MAX_RETRIES) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw lastError; - } - if (resp.ok) { - const raw = await resp.json(); - if (!raw?.rows || !raw?.columns) - return []; - return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); - } - const text = await resp.text().catch(() => ""); - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; - log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); - await sleep(delay); - continue; - } - throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); - } - throw lastError ?? new Error("Query failed: max retries exceeded"); - } - // ── Writes ────────────────────────────────────────────────────────────────── - /** Queue rows for writing. Call commit() to flush. */ - appendRows(rows) { - this._pendingRows.push(...rows); - } - /** Flush pending rows via SQL. */ - async commit() { - if (this._pendingRows.length === 0) - return; - const rows = this._pendingRows; - this._pendingRows = []; - const CONCURRENCY = 10; - for (let i = 0; i < rows.length; i += CONCURRENCY) { - const chunk = rows.slice(i, i + CONCURRENCY); - await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); - } - log2(`commit: ${rows.length} rows`); - } - async upsertRowSql(row) { - const ts = (/* @__PURE__ */ new Date()).toISOString(); - const cd = row.creationDate ?? ts; - const lud = row.lastUpdateDate ?? ts; - const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); - if (exists.length > 0) { - let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; - if (row.project !== void 0) - setClauses += `, project = '${sqlStr(row.project)}'`; - if (row.description !== void 0) - setClauses += `, description = '${sqlStr(row.description)}'`; - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); - } else { - const id = randomUUID(); - let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; - let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; - if (row.project !== void 0) { - cols += ", project"; - vals += `, '${sqlStr(row.project)}'`; - } - if (row.description !== void 0) { - cols += ", description"; - vals += `, '${sqlStr(row.description)}'`; - } - await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); - } - } - /** Update specific columns on a row by path. */ - async updateColumns(path, columns) { - const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); - await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); - } - // ── Convenience ───────────────────────────────────────────────────────────── - /** Create a BM25 search index on a column. */ - async createIndex(column) { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); - } - /** List all tables in the workspace (with retry). */ - async listTables() { - for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { - try { - const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { - headers: { - Authorization: `Bearer ${this.token}`, - "X-Activeloop-Org-Id": this.orgId - } - }); - if (resp.ok) { - const data = await resp.json(); - return (data.tables ?? []).map((t) => t.table_name); - } - if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); - continue; - } - return []; - } catch { - if (attempt < MAX_RETRIES) { - await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); - continue; - } - return []; - } - } - return []; - } - /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ - async ensureTable(name) { - const tbl = name ?? this.tableName; - const tables = await this.listTables(); - if (!tables.includes(tbl)) { - log2(`table "${tbl}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${tbl}" created`); - } - } - /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ - async ensureSessionsTable(name) { - const tables = await this.listTables(); - if (!tables.includes(name)) { - log2(`table "${name}" not found, creating`); - await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); - log2(`table "${name}" created`); - } - } -}; - // dist/src/hooks/codex/stop.js -var log3 = (msg) => log("codex-stop", msg); -var HOME = homedir3(); -var WIKI_LOG = join3(HOME, ".codex", "hooks", "deeplake-wiki.log"); +var log2 = (msg) => log("codex-stop", msg); +var HOME = homedir4(); +var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); var __bundleDir = dirname(fileURLToPath(import.meta.url)); function wikiLog(msg) { try { - mkdirSync(join3(HOME, ".codex", "hooks"), { recursive: true }); - appendFileSync2(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} + mkdirSync2(join4(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync3(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} `); } catch { } @@ -349,9 +156,6 @@ IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; var CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; -function buildSessionPath(config, sessionId) { - return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; -} async function main() { if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; @@ -361,25 +165,23 @@ async function main() { return; const config = loadConfig(); if (!config) { - log3("no config"); + log2("no config"); return; } if (CAPTURE) { try { - const sessionsTable2 = config.sessionsTableName; - const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable2); const ts = (/* @__PURE__ */ new Date()).toISOString(); let lastAssistantMessage = ""; if (input.transcript_path) { try { const transcriptPath = input.transcript_path; - if (existsSync2(transcriptPath)) { - const transcript = readFileSync2(transcriptPath, "utf-8"); + if (existsSync3(transcriptPath)) { + const transcript = readFileSync3(transcriptPath, "utf-8"); const lines = transcript.trim().split("\n").reverse(); - for (const line2 of lines) { + for (const line of lines) { try { - const entry2 = JSON.parse(line2); - const msg = entry2.payload ?? entry2; + const entry = JSON.parse(line); + const msg = entry.payload ?? entry; if (msg.role === "assistant" && msg.content) { const content = typeof msg.content === "string" ? msg.content : Array.isArray(msg.content) ? msg.content.filter((b) => b.type === "output_text" || b.type === "text").map((b) => b.text).join("\n") : ""; if (content) { @@ -391,13 +193,13 @@ async function main() { } } if (lastAssistantMessage) - log3(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); + log2(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); } } catch (e) { - log3(`transcript read failed: ${e.message}`); + log2(`transcript read failed: ${e.message}`); } } - const entry = { + appendEvent(sessionId, { id: crypto.randomUUID(), session_id: sessionId, transcript_path: input.transcript_path, @@ -407,17 +209,10 @@ async function main() { timestamp: ts, type: lastAssistantMessage ? "assistant_message" : "assistant_stop", content: lastAssistantMessage - }; - const line = JSON.stringify(entry); - const sessionPath = buildSessionPath(config, sessionId); - const projectName2 = (input.cwd ?? "").split("/").pop() || "unknown"; - const filename = sessionPath.split("/").pop() ?? ""; - const jsonForSql = sqlStr(line); - const insertSql = `INSERT INTO "${sessionsTable2}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName2)}', 'Stop', 'codex', '${ts}', '${ts}')`; - await api.query(insertSql); - log3("stop event captured"); + }); + log2("stop event captured \u2192 local queue"); } catch (e) { - log3(`capture failed: ${e.message}`); + log2(`capture failed: ${e.message}`); } } const cwd = input.cwd ?? ""; @@ -425,13 +220,14 @@ async function main() { const sessionsTable = config.sessionsTableName; const agentBin = findSummaryBin(); const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join3(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync(tmpDir, { recursive: true }); - const configFile = join3(tmpDir, "config.json"); + const tmpDir = join4(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync2(tmpDir, { recursive: true }); + const configFile = join4(tmpDir, "config.json"); writeFileSync(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, + orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, @@ -441,11 +237,11 @@ async function main() { tmpDir, codexBin: agentBin, wikiLog: WIKI_LOG, - hooksDir: join3(HOME, ".codex", "hooks"), + hooksDir: join4(HOME, ".codex", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE })); wikiLog(`Stop: spawning summary worker for ${sessionId}`); - const workerPath = join3(__bundleDir, "wiki-worker.js"); + const workerPath = join4(__bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -453,6 +249,6 @@ async function main() { wikiLog(`Stop: spawned summary worker for ${sessionId}`); } main().catch((e) => { - log3(`fatal: ${e.message}`); + log2(`fatal: ${e.message}`); process.exit(0); }); From 3c588929d992fd5416ddc26576ff67c6a3e4f8a9 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 06:13:04 +0000 Subject: [PATCH 11/33] revert: remove local capture queue, restore direct server INSERTs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local JSONL queue deferred server writes to session end, meaning data only reached the server after the session finished. This defeated the purpose of real-time capture — if a session crashed, events were lost. Restore the original direct INSERT approach for all capture hooks. With async: true in CC hooks.json, the INSERT still happens in the background without blocking the user. Codex hooks remain sync but the ~200ms INSERT is acceptable. The session-start split (sync context + async setup) is kept. --- claude-code/bundle/capture.js | 303 +++++++++++++++++++++++++++--- claude-code/bundle/session-end.js | 1 - claude-code/bundle/wiki-worker.js | 111 ++--------- codex/bundle/capture.js | 298 ++++++++++++++++++++++++++--- codex/bundle/stop.js | 300 ++++++++++++++++++++++++----- src/hooks/capture.ts | 61 +++++- src/hooks/codex/capture.ts | 50 ++++- src/hooks/codex/stop.ts | 29 ++- src/hooks/session-end.ts | 1 - src/hooks/wiki-worker.ts | 123 +++--------- src/utils/capture-queue.ts | 57 ------ 11 files changed, 974 insertions(+), 360 deletions(-) delete mode 100644 src/utils/capture-queue.ts diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 5e4187a..3d3858b 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -17,25 +17,43 @@ function readStdin() { }); } -// dist/src/utils/capture-queue.js -import { appendFileSync, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; +// dist/src/config.js +import { readFileSync, existsSync } from "node:fs"; import { join } from "node:path"; -import { homedir } from "node:os"; -var QUEUE_DIR = join(homedir(), ".deeplake", "capture"); -function ensureDir() { - mkdirSync(QUEUE_DIR, { recursive: true }); -} -function queuePath(sessionId) { - return join(QUEUE_DIR, `${sessionId}.jsonl`); -} -function appendEvent(sessionId, event) { - ensureDir(); - const line = JSON.stringify(event) + "\n"; - appendFileSync(queuePath(sessionId), line); +import { homedir, userInfo } from "node:os"; +function loadConfig() { + const home = homedir(); + const credPath = join(home, ".deeplake", "credentials.json"); + let creds = null; + if (existsSync(credPath)) { + try { + creds = JSON.parse(readFileSync(credPath, "utf-8")); + } catch { + return null; + } + } + const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; + if (!token || !orgId) + return null; + return { + token, + orgId, + orgName: creds?.orgName ?? orgId, + userName: creds?.userName || userInfo().username || "unknown", + workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: process.env.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") + }; } +// dist/src/deeplake-api.js +import { randomUUID } from "node:crypto"; + // dist/src/utils/debug.js -import { appendFileSync as appendFileSync2 } from "node:fs"; +import { appendFileSync } from "node:fs"; import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; @@ -43,17 +61,237 @@ var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; - appendFileSync2(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} `); } +// dist/src/utils/sql.js +function sqlStr(value) { + return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); +} + +// dist/src/deeplake-api.js +var log2 = (msg) => log("sdk", msg); +var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); +var MAX_RETRIES = 3; +var BASE_DELAY_MS = 500; +var MAX_CONCURRENCY = 5; +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} +var Semaphore = class { + max; + waiting = []; + active = 0; + constructor(max) { + this.max = max; + } + async acquire() { + if (this.active < this.max) { + this.active++; + return; + } + await new Promise((resolve) => this.waiting.push(resolve)); + } + release() { + this.active--; + const next = this.waiting.shift(); + if (next) { + this.active++; + next(); + } + } +}; +var DeeplakeApi = class { + token; + apiUrl; + orgId; + workspaceId; + tableName; + _pendingRows = []; + _sem = new Semaphore(MAX_CONCURRENCY); + constructor(token, apiUrl, orgId, workspaceId, tableName) { + this.token = token; + this.apiUrl = apiUrl; + this.orgId = orgId; + this.workspaceId = workspaceId; + this.tableName = tableName; + } + /** Execute SQL with retry on transient errors and bounded concurrency. */ + async query(sql) { + await this._sem.acquire(); + try { + return await this._queryWithRetry(sql); + } finally { + this._sem.release(); + } + } + async _queryWithRetry(sql) { + let lastError; + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + let resp; + try { + resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { + method: "POST", + headers: { + Authorization: `Bearer ${this.token}`, + "Content-Type": "application/json", + "X-Activeloop-Org-Id": this.orgId + }, + body: JSON.stringify({ query: sql }) + }); + } catch (e) { + lastError = e instanceof Error ? e : new Error(String(e)); + if (attempt < MAX_RETRIES) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw lastError; + } + if (resp.ok) { + const raw = await resp.json(); + if (!raw?.rows || !raw?.columns) + return []; + return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); + } + const text = await resp.text().catch(() => ""); + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + } + throw lastError ?? new Error("Query failed: max retries exceeded"); + } + // ── Writes ────────────────────────────────────────────────────────────────── + /** Queue rows for writing. Call commit() to flush. */ + appendRows(rows) { + this._pendingRows.push(...rows); + } + /** Flush pending rows via SQL. */ + async commit() { + if (this._pendingRows.length === 0) + return; + const rows = this._pendingRows; + this._pendingRows = []; + const CONCURRENCY = 10; + for (let i = 0; i < rows.length; i += CONCURRENCY) { + const chunk = rows.slice(i, i + CONCURRENCY); + await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); + } + log2(`commit: ${rows.length} rows`); + } + async upsertRowSql(row) { + const ts = (/* @__PURE__ */ new Date()).toISOString(); + const cd = row.creationDate ?? ts; + const lud = row.lastUpdateDate ?? ts; + const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); + if (exists.length > 0) { + let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; + if (row.project !== void 0) + setClauses += `, project = '${sqlStr(row.project)}'`; + if (row.description !== void 0) + setClauses += `, description = '${sqlStr(row.description)}'`; + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); + } else { + const id = randomUUID(); + let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; + let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; + if (row.project !== void 0) { + cols += ", project"; + vals += `, '${sqlStr(row.project)}'`; + } + if (row.description !== void 0) { + cols += ", description"; + vals += `, '${sqlStr(row.description)}'`; + } + await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); + } + } + /** Update specific columns on a row by path. */ + async updateColumns(path, columns) { + const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); + } + // ── Convenience ───────────────────────────────────────────────────────────── + /** Create a BM25 search index on a column. */ + async createIndex(column) { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); + } + /** List all tables in the workspace (with retry). */ + async listTables() { + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { + headers: { + Authorization: `Bearer ${this.token}`, + "X-Activeloop-Org-Id": this.orgId + } + }); + if (resp.ok) { + const data = await resp.json(); + return (data.tables ?? []).map((t) => t.table_name); + } + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); + continue; + } + return []; + } catch { + if (attempt < MAX_RETRIES) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); + continue; + } + return []; + } + } + return []; + } + /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ + async ensureTable(name) { + const tbl = name ?? this.tableName; + const tables = await this.listTables(); + if (!tables.includes(tbl)) { + log2(`table "${tbl}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${tbl}" created`); + } + } + /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + async ensureSessionsTable(name) { + const tables = await this.listTables(); + if (!tables.includes(name)) { + log2(`table "${name}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${name}" created`); + } + } +}; + // dist/src/hooks/capture.js -var log2 = (msg) => log("capture", msg); +var log3 = (msg) => log("capture", msg); var CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; +function buildSessionPath(config, sessionId) { + const userName = config.userName; + const orgName = config.orgName; + const workspace = config.workspaceId ?? "default"; + return `/sessions/${userName}/${userName}_${orgName}_${workspace}_${sessionId}.jsonl`; +} async function main() { if (!CAPTURE) return; const input = await readStdin(); + const config = loadConfig(); + if (!config) { + log3("no config"); + return; + } + const sessionsTable = config.sessionsTableName; + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = (/* @__PURE__ */ new Date()).toISOString(); const meta = { session_id: input.session_id, @@ -67,7 +305,7 @@ async function main() { }; let entry; if (input.prompt !== void 0) { - log2(`user session=${input.session_id}`); + log3(`user session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -75,7 +313,7 @@ async function main() { content: input.prompt }; } else if (input.tool_name !== void 0) { - log2(`tool=${input.tool_name} session=${input.session_id}`); + log3(`tool=${input.tool_name} session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -86,7 +324,7 @@ async function main() { tool_response: JSON.stringify(input.tool_response) }; } else if (input.last_assistant_message !== void 0) { - log2(`assistant session=${input.session_id}`); + log3(`assistant session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -95,13 +333,30 @@ async function main() { ...input.agent_transcript_path ? { agent_transcript_path: input.agent_transcript_path } : {} }; } else { - log2("unknown event, skipping"); + log3("unknown event, skipping"); return; } - appendEvent(input.session_id, entry); - log2("capture ok \u2192 local queue"); + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + log3(`writing to ${sessionPath}`); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + const filename = sessionPath.split("/").pop() ?? ""; + const jsonForSql = line.replace(/'/g, "''"); + const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'claude_code', '${ts}', '${ts}')`; + try { + await api.query(insertSql); + } catch (e) { + if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { + log3("table missing, creating and retrying"); + await api.ensureSessionsTable(sessionsTable); + await api.query(insertSql); + } else { + throw e; + } + } + log3("capture ok \u2192 cloud"); } main().catch((e) => { - log2(`fatal: ${e.message}`); + log3(`fatal: ${e.message}`); process.exit(0); }); diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index ba9c340..b8ac2a2 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -168,7 +168,6 @@ async function main() { apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, - orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index 1d08294..f759c23 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -1,9 +1,9 @@ #!/usr/bin/env node // dist/src/hooks/wiki-worker.js -import { readFileSync as readFileSync2, writeFileSync, existsSync as existsSync2, appendFileSync as appendFileSync3, mkdirSync as mkdirSync2, rmSync } from "node:fs"; +import { readFileSync, writeFileSync, existsSync, appendFileSync as appendFileSync2, mkdirSync, rmSync } from "node:fs"; import { execFileSync } from "node:child_process"; -import { join as join3 } from "node:path"; +import { join as join2 } from "node:path"; // dist/src/utils/debug.js import { appendFileSync } from "node:fs"; @@ -15,40 +15,15 @@ function utcTimestamp(d = /* @__PURE__ */ new Date()) { return d.toISOString().replace("T", " ").slice(0, 19) + " UTC"; } -// dist/src/utils/capture-queue.js -import { appendFileSync as appendFileSync2, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; -import { join as join2 } from "node:path"; -import { homedir as homedir2 } from "node:os"; -var QUEUE_DIR = join2(homedir2(), ".deeplake", "capture"); -function queuePath(sessionId) { - return join2(QUEUE_DIR, `${sessionId}.jsonl`); -} -function readEvents(sessionId) { - const path = queuePath(sessionId); - if (!existsSync(path)) - return []; - const content = readFileSync(path, "utf-8").trim(); - if (!content) - return []; - return content.split("\n").map((line) => JSON.parse(line)); -} -function deleteQueue(sessionId) { - const path = queuePath(sessionId); - try { - unlinkSync(path); - } catch { - } -} - // dist/src/hooks/wiki-worker.js -var cfg = JSON.parse(readFileSync2(process.argv[2], "utf-8")); +var cfg = JSON.parse(readFileSync(process.argv[2], "utf-8")); var tmpDir = cfg.tmpDir; -var tmpJsonl = join3(tmpDir, "session.jsonl"); -var tmpSummary = join3(tmpDir, "summary.md"); +var tmpJsonl = join2(tmpDir, "session.jsonl"); +var tmpSummary = join2(tmpDir, "summary.md"); function wlog(msg) { try { - mkdirSync2(cfg.hooksDir, { recursive: true }); - appendFileSync3(cfg.wikiLog, `[${utcTimestamp()}] wiki-worker(${cfg.sessionId}): ${msg} + mkdirSync(cfg.hooksDir, { recursive: true }); + appendFileSync2(cfg.wikiLog, `[${utcTimestamp()}] wiki-worker(${cfg.sessionId}): ${msg} `); } catch { } @@ -73,7 +48,7 @@ async function query(sql, retries = 2) { return []; return j.rows.map((row) => Object.fromEntries(j.columns.map((col, i) => [col, row[i]]))); } - if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429 || r.status === 500)) { + if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429)) { wlog(`API ${r.status}, retrying in ${attempt + 1}s...`); await new Promise((resolve) => setTimeout(resolve, (attempt + 1) * 1e3)); continue; @@ -88,69 +63,21 @@ function cleanup() { } catch { } } -function buildSessionPath() { - const org = cfg.orgName ?? cfg.orgId; - return `/sessions/${cfg.userName}/${cfg.userName}_${org}_${cfg.workspaceId}_${cfg.sessionId}.jsonl`; -} -async function flushQueue() { - const events = readEvents(cfg.sessionId); - const jsonlServerPath = buildSessionPath(); - if (events.length === 0) { - wlog("no local events to flush"); - return { events, jsonlServerPath }; - } - wlog(`flushing ${events.length} events to cloud`); - const filename = jsonlServerPath.split("/").pop() ?? ""; - for (const event of events) { - const line = JSON.stringify(event); - const jsonForSql = line.replace(/'/g, "''"); - const ts = event.timestamp ?? (/* @__PURE__ */ new Date()).toISOString(); - const hookEvent = event.hook_event_name ?? ""; - try { - await query(`INSERT INTO "${cfg.sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${esc(jsonlServerPath)}', '${esc(filename)}', '${jsonForSql}'::jsonb, '${esc(cfg.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${esc(cfg.project)}', '${esc(hookEvent)}', 'claude_code', '${ts}', '${ts}')`); - } catch (e) { - wlog(`flush event failed: ${e.message}`); - throw e; - } - } - deleteQueue(cfg.sessionId); - wlog(`flushed ${events.length} events, deleted local queue`); - return { events, jsonlServerPath }; -} async function main() { try { - const { events, jsonlServerPath } = await flushQueue(); - wlog("fetching cloud events"); - try { - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); - } catch { - } - const cloudRows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); - const seenIds = /* @__PURE__ */ new Set(); - const allEvents = []; - for (const row of cloudRows) { - const msg = typeof row.message === "string" ? JSON.parse(row.message) : row.message; - const id = msg?.id; - if (id && !seenIds.has(id)) { - seenIds.add(id); - allEvents.push(msg); - } - } - for (const evt of events) { - const id = evt.id; - if (id && !seenIds.has(id)) { - seenIds.add(id); - allEvents.push(evt); - } - } - if (allEvents.length === 0) { + wlog("fetching session events"); + await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); + const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); + if (rows.length === 0) { wlog("no session events found \u2014 exiting"); return; } - const jsonlContent = allEvents.map((e) => JSON.stringify(e)).join("\n"); - const jsonlLines = allEvents.length; + const jsonlContent = rows.map((r) => typeof r.message === "string" ? r.message : JSON.stringify(r.message)).join("\n"); + const jsonlLines = rows.length; + const pathRows = await query(`SELECT DISTINCT path FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' LIMIT 1`); + const jsonlServerPath = pathRows.length > 0 ? pathRows[0].path : `/sessions/unknown/${cfg.sessionId}.jsonl`; writeFileSync(tmpJsonl, jsonlContent); - wlog(`found ${jsonlLines} total events at ${jsonlServerPath}`); + wlog(`found ${jsonlLines} events at ${jsonlServerPath}`); let prevOffset = 0; try { await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); @@ -185,8 +112,8 @@ async function main() { } catch (e) { wlog(`claude -p failed: ${e.status ?? e.message}`); } - if (existsSync2(tmpSummary)) { - const text = readFileSync2(tmpSummary, "utf-8"); + if (existsSync(tmpSummary)) { + const text = readFileSync(tmpSummary, "utf-8"); if (text.trim()) { const fname = `${cfg.sessionId}.md`; const vpath = `/summaries/${cfg.userName}/${fname}`; diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 500abab..175e58b 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -17,25 +17,43 @@ function readStdin() { }); } -// dist/src/utils/capture-queue.js -import { appendFileSync, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; +// dist/src/config.js +import { readFileSync, existsSync } from "node:fs"; import { join } from "node:path"; -import { homedir } from "node:os"; -var QUEUE_DIR = join(homedir(), ".deeplake", "capture"); -function ensureDir() { - mkdirSync(QUEUE_DIR, { recursive: true }); -} -function queuePath(sessionId) { - return join(QUEUE_DIR, `${sessionId}.jsonl`); -} -function appendEvent(sessionId, event) { - ensureDir(); - const line = JSON.stringify(event) + "\n"; - appendFileSync(queuePath(sessionId), line); +import { homedir, userInfo } from "node:os"; +function loadConfig() { + const home = homedir(); + const credPath = join(home, ".deeplake", "credentials.json"); + let creds = null; + if (existsSync(credPath)) { + try { + creds = JSON.parse(readFileSync(credPath, "utf-8")); + } catch { + return null; + } + } + const token = process.env.DEEPLAKE_TOKEN ?? creds?.token; + const orgId = process.env.DEEPLAKE_ORG_ID ?? creds?.orgId; + if (!token || !orgId) + return null; + return { + token, + orgId, + orgName: creds?.orgName ?? orgId, + userName: creds?.userName || userInfo().username || "unknown", + workspaceId: process.env.DEEPLAKE_WORKSPACE_ID ?? creds?.workspaceId ?? "default", + apiUrl: process.env.DEEPLAKE_API_URL ?? creds?.apiUrl ?? "https://api.deeplake.ai", + tableName: process.env.DEEPLAKE_TABLE ?? "memory", + sessionsTableName: process.env.DEEPLAKE_SESSIONS_TABLE ?? "sessions", + memoryPath: process.env.DEEPLAKE_MEMORY_PATH ?? join(home, ".deeplake", "memory") + }; } +// dist/src/deeplake-api.js +import { randomUUID } from "node:crypto"; + // dist/src/utils/debug.js -import { appendFileSync as appendFileSync2 } from "node:fs"; +import { appendFileSync } from "node:fs"; import { join as join2 } from "node:path"; import { homedir as homedir2 } from "node:os"; var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; @@ -43,17 +61,234 @@ var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; - appendFileSync2(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} `); } +// dist/src/utils/sql.js +function sqlStr(value) { + return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); +} + +// dist/src/deeplake-api.js +var log2 = (msg) => log("sdk", msg); +var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); +var MAX_RETRIES = 3; +var BASE_DELAY_MS = 500; +var MAX_CONCURRENCY = 5; +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} +var Semaphore = class { + max; + waiting = []; + active = 0; + constructor(max) { + this.max = max; + } + async acquire() { + if (this.active < this.max) { + this.active++; + return; + } + await new Promise((resolve) => this.waiting.push(resolve)); + } + release() { + this.active--; + const next = this.waiting.shift(); + if (next) { + this.active++; + next(); + } + } +}; +var DeeplakeApi = class { + token; + apiUrl; + orgId; + workspaceId; + tableName; + _pendingRows = []; + _sem = new Semaphore(MAX_CONCURRENCY); + constructor(token, apiUrl, orgId, workspaceId, tableName) { + this.token = token; + this.apiUrl = apiUrl; + this.orgId = orgId; + this.workspaceId = workspaceId; + this.tableName = tableName; + } + /** Execute SQL with retry on transient errors and bounded concurrency. */ + async query(sql) { + await this._sem.acquire(); + try { + return await this._queryWithRetry(sql); + } finally { + this._sem.release(); + } + } + async _queryWithRetry(sql) { + let lastError; + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + let resp; + try { + resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { + method: "POST", + headers: { + Authorization: `Bearer ${this.token}`, + "Content-Type": "application/json", + "X-Activeloop-Org-Id": this.orgId + }, + body: JSON.stringify({ query: sql }) + }); + } catch (e) { + lastError = e instanceof Error ? e : new Error(String(e)); + if (attempt < MAX_RETRIES) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw lastError; + } + if (resp.ok) { + const raw = await resp.json(); + if (!raw?.rows || !raw?.columns) + return []; + return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); + } + const text = await resp.text().catch(() => ""); + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + } + throw lastError ?? new Error("Query failed: max retries exceeded"); + } + // ── Writes ────────────────────────────────────────────────────────────────── + /** Queue rows for writing. Call commit() to flush. */ + appendRows(rows) { + this._pendingRows.push(...rows); + } + /** Flush pending rows via SQL. */ + async commit() { + if (this._pendingRows.length === 0) + return; + const rows = this._pendingRows; + this._pendingRows = []; + const CONCURRENCY = 10; + for (let i = 0; i < rows.length; i += CONCURRENCY) { + const chunk = rows.slice(i, i + CONCURRENCY); + await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); + } + log2(`commit: ${rows.length} rows`); + } + async upsertRowSql(row) { + const ts = (/* @__PURE__ */ new Date()).toISOString(); + const cd = row.creationDate ?? ts; + const lud = row.lastUpdateDate ?? ts; + const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); + if (exists.length > 0) { + let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; + if (row.project !== void 0) + setClauses += `, project = '${sqlStr(row.project)}'`; + if (row.description !== void 0) + setClauses += `, description = '${sqlStr(row.description)}'`; + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); + } else { + const id = randomUUID(); + let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; + let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; + if (row.project !== void 0) { + cols += ", project"; + vals += `, '${sqlStr(row.project)}'`; + } + if (row.description !== void 0) { + cols += ", description"; + vals += `, '${sqlStr(row.description)}'`; + } + await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); + } + } + /** Update specific columns on a row by path. */ + async updateColumns(path, columns) { + const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); + } + // ── Convenience ───────────────────────────────────────────────────────────── + /** Create a BM25 search index on a column. */ + async createIndex(column) { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); + } + /** List all tables in the workspace (with retry). */ + async listTables() { + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { + headers: { + Authorization: `Bearer ${this.token}`, + "X-Activeloop-Org-Id": this.orgId + } + }); + if (resp.ok) { + const data = await resp.json(); + return (data.tables ?? []).map((t) => t.table_name); + } + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); + continue; + } + return []; + } catch { + if (attempt < MAX_RETRIES) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); + continue; + } + return []; + } + } + return []; + } + /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ + async ensureTable(name) { + const tbl = name ?? this.tableName; + const tables = await this.listTables(); + if (!tables.includes(tbl)) { + log2(`table "${tbl}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${tbl}" created`); + } + } + /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + async ensureSessionsTable(name) { + const tables = await this.listTables(); + if (!tables.includes(name)) { + log2(`table "${name}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${name}" created`); + } + } +}; + // dist/src/hooks/codex/capture.js -var log2 = (msg) => log("codex-capture", msg); +var log3 = (msg) => log("codex-capture", msg); var CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; +function buildSessionPath(config, sessionId) { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} async function main() { if (!CAPTURE) return; const input = await readStdin(); + const config = loadConfig(); + if (!config) { + log3("no config"); + return; + } + const sessionsTable = config.sessionsTableName; + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = (/* @__PURE__ */ new Date()).toISOString(); const meta = { session_id: input.session_id, @@ -66,7 +301,7 @@ async function main() { }; let entry; if (input.hook_event_name === "UserPromptSubmit" && input.prompt !== void 0) { - log2(`user session=${input.session_id}`); + log3(`user session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -74,7 +309,7 @@ async function main() { content: input.prompt }; } else if (input.hook_event_name === "PostToolUse" && input.tool_name !== void 0) { - log2(`tool=${input.tool_name} session=${input.session_id}`); + log3(`tool=${input.tool_name} session=${input.session_id}`); entry = { id: crypto.randomUUID(), ...meta, @@ -85,13 +320,30 @@ async function main() { tool_response: JSON.stringify(input.tool_response) }; } else { - log2(`unknown event: ${input.hook_event_name}, skipping`); + log3(`unknown event: ${input.hook_event_name}, skipping`); return; } - appendEvent(input.session_id, entry); - log2("capture ok \u2192 local queue"); + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + log3(`writing to ${sessionPath}`); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + const filename = sessionPath.split("/").pop() ?? ""; + const jsonForSql = sqlStr(line); + const insertSql = `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'codex', '${ts}', '${ts}')`; + try { + await api.query(insertSql); + } catch (e) { + if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { + log3("table missing, creating and retrying"); + await api.ensureSessionsTable(sessionsTable); + await api.query(insertSql); + } else { + throw e; + } + } + log3("capture ok"); } main().catch((e) => { - log2(`fatal: ${e.message}`); + log3(`fatal: ${e.message}`); process.exit(0); }); diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index ef7ce4d..a92c4e6 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -3,9 +3,9 @@ // dist/src/hooks/codex/stop.js import { spawn, execSync } from "node:child_process"; import { fileURLToPath } from "node:url"; -import { dirname, join as join4 } from "node:path"; -import { writeFileSync, readFileSync as readFileSync3, mkdirSync as mkdirSync2, appendFileSync as appendFileSync3, existsSync as existsSync3 } from "node:fs"; -import { homedir as homedir4, tmpdir } from "node:os"; +import { dirname, join as join3 } from "node:path"; +import { writeFileSync, readFileSync as readFileSync2, mkdirSync, appendFileSync as appendFileSync2, existsSync as existsSync2 } from "node:fs"; +import { homedir as homedir3, tmpdir } from "node:os"; // dist/src/utils/stdin.js function readStdin() { @@ -56,45 +56,238 @@ function loadConfig() { }; } -// dist/src/utils/capture-queue.js -import { appendFileSync, mkdirSync, readFileSync as readFileSync2, existsSync as existsSync2, unlinkSync } from "node:fs"; -import { join as join2 } from "node:path"; -import { homedir as homedir2 } from "node:os"; -var QUEUE_DIR = join2(homedir2(), ".deeplake", "capture"); -function ensureDir() { - mkdirSync(QUEUE_DIR, { recursive: true }); -} -function queuePath(sessionId) { - return join2(QUEUE_DIR, `${sessionId}.jsonl`); -} -function appendEvent(sessionId, event) { - ensureDir(); - const line = JSON.stringify(event) + "\n"; - appendFileSync(queuePath(sessionId), line); -} +// dist/src/deeplake-api.js +import { randomUUID } from "node:crypto"; // dist/src/utils/debug.js -import { appendFileSync as appendFileSync2 } from "node:fs"; -import { join as join3 } from "node:path"; -import { homedir as homedir3 } from "node:os"; +import { appendFileSync } from "node:fs"; +import { join as join2 } from "node:path"; +import { homedir as homedir2 } from "node:os"; var DEBUG = process.env.DEEPLAKE_DEBUG === "1"; -var LOG = join3(homedir3(), ".deeplake", "hook-debug.log"); +var LOG = join2(homedir2(), ".deeplake", "hook-debug.log"); function log(tag, msg) { if (!DEBUG) return; - appendFileSync2(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} + appendFileSync(LOG, `${(/* @__PURE__ */ new Date()).toISOString()} [${tag}] ${msg} `); } +// dist/src/utils/sql.js +function sqlStr(value) { + return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); +} + +// dist/src/deeplake-api.js +var log2 = (msg) => log("sdk", msg); +var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); +var MAX_RETRIES = 3; +var BASE_DELAY_MS = 500; +var MAX_CONCURRENCY = 5; +function sleep(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} +var Semaphore = class { + max; + waiting = []; + active = 0; + constructor(max) { + this.max = max; + } + async acquire() { + if (this.active < this.max) { + this.active++; + return; + } + await new Promise((resolve) => this.waiting.push(resolve)); + } + release() { + this.active--; + const next = this.waiting.shift(); + if (next) { + this.active++; + next(); + } + } +}; +var DeeplakeApi = class { + token; + apiUrl; + orgId; + workspaceId; + tableName; + _pendingRows = []; + _sem = new Semaphore(MAX_CONCURRENCY); + constructor(token, apiUrl, orgId, workspaceId, tableName) { + this.token = token; + this.apiUrl = apiUrl; + this.orgId = orgId; + this.workspaceId = workspaceId; + this.tableName = tableName; + } + /** Execute SQL with retry on transient errors and bounded concurrency. */ + async query(sql) { + await this._sem.acquire(); + try { + return await this._queryWithRetry(sql); + } finally { + this._sem.release(); + } + } + async _queryWithRetry(sql) { + let lastError; + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + let resp; + try { + resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables/query`, { + method: "POST", + headers: { + Authorization: `Bearer ${this.token}`, + "Content-Type": "application/json", + "X-Activeloop-Org-Id": this.orgId + }, + body: JSON.stringify({ query: sql }) + }); + } catch (e) { + lastError = e instanceof Error ? e : new Error(String(e)); + if (attempt < MAX_RETRIES) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (fetch error: ${lastError.message}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw lastError; + } + if (resp.ok) { + const raw = await resp.json(); + if (!raw?.rows || !raw?.columns) + return []; + return raw.rows.map((row) => Object.fromEntries(raw.columns.map((col, i) => [col, row[i]]))); + } + const text = await resp.text().catch(() => ""); + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200; + log2(`query retry ${attempt + 1}/${MAX_RETRIES} (${resp.status}) in ${delay.toFixed(0)}ms`); + await sleep(delay); + continue; + } + throw new Error(`Query failed: ${resp.status}: ${text.slice(0, 200)}`); + } + throw lastError ?? new Error("Query failed: max retries exceeded"); + } + // ── Writes ────────────────────────────────────────────────────────────────── + /** Queue rows for writing. Call commit() to flush. */ + appendRows(rows) { + this._pendingRows.push(...rows); + } + /** Flush pending rows via SQL. */ + async commit() { + if (this._pendingRows.length === 0) + return; + const rows = this._pendingRows; + this._pendingRows = []; + const CONCURRENCY = 10; + for (let i = 0; i < rows.length; i += CONCURRENCY) { + const chunk = rows.slice(i, i + CONCURRENCY); + await Promise.allSettled(chunk.map((r) => this.upsertRowSql(r))); + } + log2(`commit: ${rows.length} rows`); + } + async upsertRowSql(row) { + const ts = (/* @__PURE__ */ new Date()).toISOString(); + const cd = row.creationDate ?? ts; + const lud = row.lastUpdateDate ?? ts; + const exists = await this.query(`SELECT path FROM "${this.tableName}" WHERE path = '${sqlStr(row.path)}' LIMIT 1`); + if (exists.length > 0) { + let setClauses = `summary = E'${sqlStr(row.contentText)}', mime_type = '${sqlStr(row.mimeType)}', size_bytes = ${row.sizeBytes}, last_update_date = '${lud}'`; + if (row.project !== void 0) + setClauses += `, project = '${sqlStr(row.project)}'`; + if (row.description !== void 0) + setClauses += `, description = '${sqlStr(row.description)}'`; + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(row.path)}'`); + } else { + const id = randomUUID(); + let cols = "id, path, filename, summary, mime_type, size_bytes, creation_date, last_update_date"; + let vals = `'${id}', '${sqlStr(row.path)}', '${sqlStr(row.filename)}', E'${sqlStr(row.contentText)}', '${sqlStr(row.mimeType)}', ${row.sizeBytes}, '${cd}', '${lud}'`; + if (row.project !== void 0) { + cols += ", project"; + vals += `, '${sqlStr(row.project)}'`; + } + if (row.description !== void 0) { + cols += ", description"; + vals += `, '${sqlStr(row.description)}'`; + } + await this.query(`INSERT INTO "${this.tableName}" (${cols}) VALUES (${vals})`); + } + } + /** Update specific columns on a row by path. */ + async updateColumns(path, columns) { + const setClauses = Object.entries(columns).map(([col, val]) => typeof val === "number" ? `${col} = ${val}` : `${col} = '${sqlStr(String(val))}'`).join(", "); + await this.query(`UPDATE "${this.tableName}" SET ${setClauses} WHERE path = '${sqlStr(path)}'`); + } + // ── Convenience ───────────────────────────────────────────────────────────── + /** Create a BM25 search index on a column. */ + async createIndex(column) { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${sqlStr(column)}_bm25 ON "${this.tableName}" USING deeplake_index ("${column}")`); + } + /** List all tables in the workspace (with retry). */ + async listTables() { + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + const resp = await fetch(`${this.apiUrl}/workspaces/${this.workspaceId}/tables`, { + headers: { + Authorization: `Bearer ${this.token}`, + "X-Activeloop-Org-Id": this.orgId + } + }); + if (resp.ok) { + const data = await resp.json(); + return (data.tables ?? []).map((t) => t.table_name); + } + if (attempt < MAX_RETRIES && RETRYABLE_CODES.has(resp.status)) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 200); + continue; + } + return []; + } catch { + if (attempt < MAX_RETRIES) { + await sleep(BASE_DELAY_MS * Math.pow(2, attempt)); + continue; + } + return []; + } + } + return []; + } + /** Create the memory table if it doesn't already exist. Migrate columns on existing tables. */ + async ensureTable(name) { + const tbl = name ?? this.tableName; + const tables = await this.listTables(); + if (!tables.includes(tbl)) { + log2(`table "${tbl}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${tbl}" created`); + } + } + /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ + async ensureSessionsTable(name) { + const tables = await this.listTables(); + if (!tables.includes(name)) { + log2(`table "${name}" not found, creating`); + await this.query(`CREATE TABLE IF NOT EXISTS "${name}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', message JSONB, author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'application/json', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); + log2(`table "${name}" created`); + } + } +}; + // dist/src/hooks/codex/stop.js -var log2 = (msg) => log("codex-stop", msg); -var HOME = homedir4(); -var WIKI_LOG = join4(HOME, ".codex", "hooks", "deeplake-wiki.log"); +var log3 = (msg) => log("codex-stop", msg); +var HOME = homedir3(); +var WIKI_LOG = join3(HOME, ".codex", "hooks", "deeplake-wiki.log"); var __bundleDir = dirname(fileURLToPath(import.meta.url)); function wikiLog(msg) { try { - mkdirSync2(join4(HOME, ".codex", "hooks"), { recursive: true }); - appendFileSync3(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} + mkdirSync(join3(HOME, ".codex", "hooks"), { recursive: true }); + appendFileSync2(WIKI_LOG, `[${(/* @__PURE__ */ new Date()).toISOString().replace("T", " ").slice(0, 19)}] ${msg} `); } catch { } @@ -156,6 +349,9 @@ IMPORTANT: Be exhaustive. Extract EVERY entity, decision, and fact. PRIVACY: Never include absolute filesystem paths in the summary. LENGTH LIMIT: Keep the total summary under 4000 characters.`; var CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; +function buildSessionPath(config, sessionId) { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} async function main() { if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; @@ -165,23 +361,25 @@ async function main() { return; const config = loadConfig(); if (!config) { - log2("no config"); + log3("no config"); return; } if (CAPTURE) { try { + const sessionsTable2 = config.sessionsTableName; + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable2); const ts = (/* @__PURE__ */ new Date()).toISOString(); let lastAssistantMessage = ""; if (input.transcript_path) { try { const transcriptPath = input.transcript_path; - if (existsSync3(transcriptPath)) { - const transcript = readFileSync3(transcriptPath, "utf-8"); + if (existsSync2(transcriptPath)) { + const transcript = readFileSync2(transcriptPath, "utf-8"); const lines = transcript.trim().split("\n").reverse(); - for (const line of lines) { + for (const line2 of lines) { try { - const entry = JSON.parse(line); - const msg = entry.payload ?? entry; + const entry2 = JSON.parse(line2); + const msg = entry2.payload ?? entry2; if (msg.role === "assistant" && msg.content) { const content = typeof msg.content === "string" ? msg.content : Array.isArray(msg.content) ? msg.content.filter((b) => b.type === "output_text" || b.type === "text").map((b) => b.text).join("\n") : ""; if (content) { @@ -193,13 +391,13 @@ async function main() { } } if (lastAssistantMessage) - log2(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); + log3(`extracted assistant message from transcript (${lastAssistantMessage.length} chars)`); } } catch (e) { - log2(`transcript read failed: ${e.message}`); + log3(`transcript read failed: ${e.message}`); } } - appendEvent(sessionId, { + const entry = { id: crypto.randomUUID(), session_id: sessionId, transcript_path: input.transcript_path, @@ -209,10 +407,17 @@ async function main() { timestamp: ts, type: lastAssistantMessage ? "assistant_message" : "assistant_stop", content: lastAssistantMessage - }); - log2("stop event captured \u2192 local queue"); + }; + const line = JSON.stringify(entry); + const sessionPath = buildSessionPath(config, sessionId); + const projectName2 = (input.cwd ?? "").split("/").pop() || "unknown"; + const filename = sessionPath.split("/").pop() ?? ""; + const jsonForSql = sqlStr(line); + const insertSql = `INSERT INTO "${sessionsTable2}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName2)}', 'Stop', 'codex', '${ts}', '${ts}')`; + await api.query(insertSql); + log3("stop event captured"); } catch (e) { - log2(`capture failed: ${e.message}`); + log3(`capture failed: ${e.message}`); } } const cwd = input.cwd ?? ""; @@ -220,14 +425,13 @@ async function main() { const sessionsTable = config.sessionsTableName; const agentBin = findSummaryBin(); const projectName = cwd.split("/").pop() || "unknown"; - const tmpDir = join4(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); - mkdirSync2(tmpDir, { recursive: true }); - const configFile = join4(tmpDir, "config.json"); + const tmpDir = join3(tmpdir(), `deeplake-wiki-${sessionId}-${Date.now()}`); + mkdirSync(tmpDir, { recursive: true }); + const configFile = join3(tmpDir, "config.json"); writeFileSync(configFile, JSON.stringify({ apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, - orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, @@ -237,11 +441,11 @@ async function main() { tmpDir, codexBin: agentBin, wikiLog: WIKI_LOG, - hooksDir: join4(HOME, ".codex", "hooks"), + hooksDir: join3(HOME, ".codex", "hooks"), promptTemplate: WIKI_PROMPT_TEMPLATE })); wikiLog(`Stop: spawning summary worker for ${sessionId}`); - const workerPath = join4(__bundleDir, "wiki-worker.js"); + const workerPath = join3(__bundleDir, "wiki-worker.js"); spawn("nohup", ["node", workerPath, configFile], { detached: true, stdio: ["ignore", "ignore", "ignore"] @@ -249,6 +453,6 @@ async function main() { wikiLog(`Stop: spawned summary worker for ${sessionId}`); } main().catch((e) => { - log2(`fatal: ${e.message}`); + log3(`fatal: ${e.message}`); process.exit(0); }); diff --git a/src/hooks/capture.ts b/src/hooks/capture.ts index 9ea6c6f..d75e44a 100644 --- a/src/hooks/capture.ts +++ b/src/hooks/capture.ts @@ -1,16 +1,17 @@ #!/usr/bin/env node /** - * Capture hook — appends each session event to a local JSONL queue file. - * No network calls — events are flushed to cloud at session end by the wiki worker. + * Capture hook — writes each session event as a separate row in the sessions table. + * One INSERT per event, no concat, no race conditions. * * Used by: UserPromptSubmit, PostToolUse (async), Stop, SubagentStop - * - * Queue file: ~/.deeplake/capture/.jsonl */ +import { homedir } from "node:os"; import { readStdin } from "../utils/stdin.js"; -import { appendEvent } from "../utils/capture-queue.js"; +import { loadConfig } from "../config.js"; +import { DeeplakeApi } from "../deeplake-api.js"; +import { sqlStr } from "../utils/sql.js"; import { log as _log } from "../utils/debug.js"; const log = (msg: string) => _log("capture", msg); @@ -37,10 +38,26 @@ interface HookInput { const CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; +/** Build the session path matching the CLI convention: + * /sessions//___.jsonl */ +function buildSessionPath(config: { userName: string; orgName: string; workspaceId: string }, sessionId: string): string { + const userName = config.userName; + const orgName = config.orgName; + const workspace = config.workspaceId ?? "default"; + + return `/sessions/${userName}/${userName}_${orgName}_${workspace}_${sessionId}.jsonl`; +} + async function main(): Promise { if (!CAPTURE) return; const input = await readStdin(); + const config = loadConfig(); + if (!config) { log("no config"); return; } + + const sessionsTable = config.sessionsTableName; + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); + // Build the event entry const ts = new Date().toISOString(); const meta = { session_id: input.session_id, @@ -88,8 +105,38 @@ async function main(): Promise { return; } - appendEvent(input.session_id, entry); - log("capture ok → local queue"); + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + log(`writing to ${sessionPath}`); + + // Simple INSERT — one row per event, no concat, no race conditions. + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + const filename = sessionPath.split("/").pop() ?? ""; + + // For JSONB: only escape single quotes for the SQL literal, keep JSON structure intact. + // sqlStr() would also escape backslashes and strip control chars, corrupting the JSON. + const jsonForSql = line.replace(/'/g, "''"); + + const insertSql = + `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + + `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'claude_code', '${ts}', '${ts}')`; + + try { + await api.query(insertSql); + } catch (e: any) { + // Fallback: table might not exist (session-start failed or org switched mid-session). + // Create it and retry once. + if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { + log("table missing, creating and retrying"); + await api.ensureSessionsTable(sessionsTable); + await api.query(insertSql); + } else { + throw e; + } + } + + log("capture ok → cloud"); } main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); diff --git a/src/hooks/codex/capture.ts b/src/hooks/codex/capture.ts index cdc27dc..5feb7c9 100644 --- a/src/hooks/codex/capture.ts +++ b/src/hooks/codex/capture.ts @@ -1,16 +1,21 @@ #!/usr/bin/env node /** - * Codex Capture hook — appends each session event to a local JSONL queue file. - * No network calls — events are flushed to cloud at session end by the wiki worker. + * Codex Capture hook — writes each session event as a row in the sessions table. * * Used by: UserPromptSubmit, PostToolUse * - * Queue file: ~/.deeplake/capture/.jsonl + * Codex input fields: + * All events: session_id, transcript_path, cwd, hook_event_name, model + * UserPromptSubmit: prompt (user text) + * PostToolUse: tool_name, tool_use_id, tool_input, tool_response + * Stop: (no extra fields — Codex has no last_assistant_message equivalent) */ import { readStdin } from "../../utils/stdin.js"; -import { appendEvent } from "../../utils/capture-queue.js"; +import { loadConfig } from "../../config.js"; +import { DeeplakeApi } from "../../deeplake-api.js"; +import { sqlStr } from "../../utils/sql.js"; import { log as _log } from "../../utils/debug.js"; const log = (msg: string) => _log("codex-capture", msg); @@ -32,9 +37,18 @@ interface CodexHookInput { const CAPTURE = process.env.DEEPLAKE_CAPTURE !== "false"; +function buildSessionPath(config: { userName: string; orgName: string; workspaceId: string }, sessionId: string): string { + return `/sessions/${config.userName}/${config.userName}_${config.orgName}_${config.workspaceId}_${sessionId}.jsonl`; +} + async function main(): Promise { if (!CAPTURE) return; const input = await readStdin(); + const config = loadConfig(); + if (!config) { log("no config"); return; } + + const sessionsTable = config.sessionsTableName; + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = new Date().toISOString(); const meta = { @@ -73,8 +87,32 @@ async function main(): Promise { return; } - appendEvent(input.session_id, entry); - log("capture ok → local queue"); + const sessionPath = buildSessionPath(config, input.session_id); + const line = JSON.stringify(entry); + log(`writing to ${sessionPath}`); + + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + const filename = sessionPath.split("/").pop() ?? ""; + const jsonForSql = sqlStr(line); + + const insertSql = + `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + + `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', '${sqlStr(input.hook_event_name ?? "")}', 'codex', '${ts}', '${ts}')`; + + try { + await api.query(insertSql); + } catch (e: any) { + if (e.message?.includes("permission denied") || e.message?.includes("does not exist")) { + log("table missing, creating and retrying"); + await api.ensureSessionsTable(sessionsTable); + await api.query(insertSql); + } else { + throw e; + } + } + + log("capture ok"); } main().catch((e) => { log(`fatal: ${e.message}`); process.exit(0); }); diff --git a/src/hooks/codex/stop.ts b/src/hooks/codex/stop.ts index 488e209..4346e78 100644 --- a/src/hooks/codex/stop.ts +++ b/src/hooks/codex/stop.ts @@ -18,7 +18,8 @@ import { writeFileSync, readFileSync, mkdirSync, appendFileSync, existsSync } fr import { homedir, tmpdir } from "node:os"; import { readStdin } from "../../utils/stdin.js"; import { loadConfig } from "../../config.js"; -import { appendEvent } from "../../utils/capture-queue.js"; +import { DeeplakeApi } from "../../deeplake-api.js"; +import { sqlStr } from "../../utils/sql.js"; import { log as _log } from "../../utils/debug.js"; const log = (msg: string) => _log("codex-stop", msg); @@ -116,9 +117,11 @@ async function main(): Promise { const config = loadConfig(); if (!config) { log("no config"); return; } - // 1. Capture the stop event to local queue (no network) + // 1. Capture the stop event (try to extract last assistant message from transcript) if (CAPTURE) { try { + const sessionsTable = config.sessionsTableName; + const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, sessionsTable); const ts = new Date().toISOString(); // Codex Stop doesn't include last_assistant_message, but it provides @@ -129,10 +132,13 @@ async function main(): Promise { const transcriptPath = input.transcript_path; if (existsSync(transcriptPath)) { const transcript = readFileSync(transcriptPath, "utf-8"); + // Codex transcript is JSONL with format: + // {"type":"response_item","payload":{"type":"message","role":"assistant","content":[{"type":"output_text","text":"..."}]}} const lines = transcript.trim().split("\n").reverse(); for (const line of lines) { try { const entry = JSON.parse(line); + // Codex nests the message inside payload const msg = entry.payload ?? entry; if (msg.role === "assistant" && msg.content) { const content = typeof msg.content === "string" @@ -154,7 +160,7 @@ async function main(): Promise { } } - appendEvent(sessionId, { + const entry = { id: crypto.randomUUID(), session_id: sessionId, transcript_path: input.transcript_path, @@ -164,8 +170,20 @@ async function main(): Promise { timestamp: ts, type: lastAssistantMessage ? "assistant_message" : "assistant_stop", content: lastAssistantMessage, - }); - log("stop event captured → local queue"); + }; + const line = JSON.stringify(entry); + const sessionPath = buildSessionPath(config, sessionId); + const projectName = (input.cwd ?? "").split("/").pop() || "unknown"; + const filename = sessionPath.split("/").pop() ?? ""; + const jsonForSql = sqlStr(line); + + const insertSql = + `INSERT INTO "${sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + + `VALUES ('${crypto.randomUUID()}', '${sqlStr(sessionPath)}', '${sqlStr(filename)}', '${jsonForSql}'::jsonb, '${sqlStr(config.userName)}', ` + + `${Buffer.byteLength(line, "utf-8")}, '${sqlStr(projectName)}', 'Stop', 'codex', '${ts}', '${ts}')`; + + await api.query(insertSql); + log("stop event captured"); } catch (e: any) { log(`capture failed: ${e.message}`); } @@ -186,7 +204,6 @@ async function main(): Promise { apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, - orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, diff --git a/src/hooks/session-end.ts b/src/hooks/session-end.ts index 7b66c48..1560853 100644 --- a/src/hooks/session-end.ts +++ b/src/hooks/session-end.ts @@ -123,7 +123,6 @@ async function main(): Promise { apiUrl: config.apiUrl, token: config.token, orgId: config.orgId, - orgName: config.orgName, workspaceId: config.workspaceId, memoryTable, sessionsTable, diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index 09bc77a..9155071 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -1,30 +1,21 @@ #!/usr/bin/env node /** - * Background wiki worker — flushes local capture queue to cloud, - * then generates a wiki summary using claude -p. + * Background wiki worker — reads session events from the sessions table, + * runs claude -p to generate a wiki summary, and uploads it to the memory table. * - * Invoked by session-end.ts / codex stop.ts as: node wiki-worker.js - * - * Flow: - * 1. Read events from local queue (~/.deeplake/capture/.jsonl) - * 2. Batch-upload events to sessions table (cloud) - * 3. Delete local queue file - * 4. Run claude -p to generate wiki summary - * 5. Upload summary to memory table + * Invoked by session-end.ts as: node wiki-worker.js */ import { readFileSync, writeFileSync, existsSync, appendFileSync, mkdirSync, rmSync } from "node:fs"; import { execFileSync } from "node:child_process"; import { join } from "node:path"; import { utcTimestamp } from "../utils/debug.js"; -import { readEvents, readRawJsonl, deleteQueue } from "../utils/capture-queue.js"; interface WorkerConfig { apiUrl: string; token: string; orgId: string; - orgName: string; workspaceId: string; memoryTable: string; sessionsTable: string; @@ -76,7 +67,7 @@ async function query(sql: string, retries = 2): Promise[ Object.fromEntries(j.columns!.map((col, i) => [col, row[i]])) ); } - if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429 || r.status === 500)) { + if (attempt < retries && (r.status === 502 || r.status === 503 || r.status === 429)) { wlog(`API ${r.status}, retrying in ${attempt + 1}s...`); await new Promise(resolve => setTimeout(resolve, (attempt + 1) * 1000)); continue; @@ -90,98 +81,40 @@ function cleanup(): void { try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ } } -/** Build the session path matching the CLI convention. */ -function buildSessionPath(): string { - const org = cfg.orgName ?? cfg.orgId; - return `/sessions/${cfg.userName}/${cfg.userName}_${org}_${cfg.workspaceId}_${cfg.sessionId}.jsonl`; -} - -/** Flush local queue events to the cloud sessions table. */ -async function flushQueue(): Promise<{ events: Record[]; jsonlServerPath: string }> { - const events = readEvents(cfg.sessionId); - const jsonlServerPath = buildSessionPath(); - - if (events.length === 0) { - wlog("no local events to flush"); - return { events, jsonlServerPath }; - } - - wlog(`flushing ${events.length} events to cloud`); - const filename = jsonlServerPath.split("/").pop() ?? ""; - - for (const event of events) { - const line = JSON.stringify(event); - // For JSONB: only escape single quotes for the SQL literal - const jsonForSql = line.replace(/'/g, "''"); - const ts = (event.timestamp as string) ?? new Date().toISOString(); - const hookEvent = (event.hook_event_name as string) ?? ""; - - try { - await query( - `INSERT INTO "${cfg.sessionsTable}" (id, path, filename, message, author, size_bytes, project, description, agent, creation_date, last_update_date) ` + - `VALUES ('${crypto.randomUUID()}', '${esc(jsonlServerPath)}', '${esc(filename)}', '${jsonForSql}'::jsonb, '${esc(cfg.userName)}', ` + - `${Buffer.byteLength(line, "utf-8")}, '${esc(cfg.project)}', '${esc(hookEvent)}', 'claude_code', '${ts}', '${ts}')` - ); - } catch (e: any) { - wlog(`flush event failed: ${e.message}`); - // Don't delete queue if flush fails — events will be retried next session end - throw e; - } - } - - deleteQueue(cfg.sessionId); - wlog(`flushed ${events.length} events, deleted local queue`); - return { events, jsonlServerPath }; -} - async function main(): Promise { try { - // 1. Flush local queue to cloud - const { events, jsonlServerPath } = await flushQueue(); - - // 2. Also fetch any events already in cloud (from previous sessions or partial flushes) - wlog("fetching cloud events"); - try { - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); - } catch { /* sync might fail on new tables, continue */ } - const cloudRows = await query( + // 1. Fetch session events from sessions table, reconstruct JSONL + wlog("fetching session events"); + await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); + const rows = await query( `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` ); - // Merge: cloud events + local events (deduped by id) - const seenIds = new Set(); - const allEvents: Record[] = []; - - for (const row of cloudRows) { - const msg = typeof row.message === "string" ? JSON.parse(row.message) : row.message; - const id = msg?.id as string; - if (id && !seenIds.has(id)) { - seenIds.add(id); - allEvents.push(msg); - } - } - // Add local events not already in cloud (e.g. if flush failed partially) - for (const evt of events) { - const id = evt.id as string; - if (id && !seenIds.has(id)) { - seenIds.add(id); - allEvents.push(evt); - } - } - - if (allEvents.length === 0) { + if (rows.length === 0) { wlog("no session events found — exiting"); return; } - // Reconstruct JSONL - const jsonlContent = allEvents.map(e => JSON.stringify(e)).join("\n"); - const jsonlLines = allEvents.length; + // Reconstruct JSONL from individual rows (message is JSONB — may be object or string) + const jsonlContent = rows + .map(r => typeof r.message === "string" ? r.message : JSON.stringify(r.message)) + .join("\n"); + const jsonlLines = rows.length; + + // Derive the server path + const pathRows = await query( + `SELECT DISTINCT path FROM "${cfg.sessionsTable}" ` + + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' LIMIT 1` + ); + const jsonlServerPath = pathRows.length > 0 + ? pathRows[0].path as string + : `/sessions/unknown/${cfg.sessionId}.jsonl`; + writeFileSync(tmpJsonl, jsonlContent); - wlog(`found ${jsonlLines} total events at ${jsonlServerPath}`); + wlog(`found ${jsonlLines} events at ${jsonlServerPath}`); - // 3. Check for existing summary in memory table (resumed session) + // 2. Check for existing summary in memory table (resumed session) let prevOffset = 0; try { await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); @@ -198,7 +131,7 @@ async function main(): Promise { } } catch { /* no existing summary */ } - // 4. Build prompt and run claude -p + // 3. Build prompt and run claude -p const prompt = cfg.promptTemplate .replace(/__JSONL__/g, tmpJsonl) .replace(/__SUMMARY__/g, tmpSummary) @@ -225,7 +158,7 @@ async function main(): Promise { wlog(`claude -p failed: ${e.status ?? e.message}`); } - // 5. Upload summary to memory table + // 4. Upload summary to memory table if (existsSync(tmpSummary)) { const text = readFileSync(tmpSummary, "utf-8"); if (text.trim()) { diff --git a/src/utils/capture-queue.ts b/src/utils/capture-queue.ts deleted file mode 100644 index 31f50a0..0000000 --- a/src/utils/capture-queue.ts +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Local capture queue — appends session events to a local JSONL file - * instead of making HTTP calls. Events are flushed to cloud at session end. - * - * Queue file: ~/.deeplake/capture/.jsonl - * One line per event, each line is a JSON object. - */ - -import { appendFileSync, mkdirSync, readFileSync, existsSync, unlinkSync } from "node:fs"; -import { join } from "node:path"; -import { homedir } from "node:os"; - -const QUEUE_DIR = join(homedir(), ".deeplake", "capture"); - -/** Ensure the queue directory exists. */ -function ensureDir(): void { - mkdirSync(QUEUE_DIR, { recursive: true }); -} - -/** Get the queue file path for a session. */ -export function queuePath(sessionId: string): string { - return join(QUEUE_DIR, `${sessionId}.jsonl`); -} - -/** Append a single event to the session's local queue. Pure filesystem, no network. */ -export function appendEvent(sessionId: string, event: Record): void { - ensureDir(); - const line = JSON.stringify(event) + "\n"; - appendFileSync(queuePath(sessionId), line); -} - -/** Read all events from a session's local queue. Returns empty array if no file. */ -export function readEvents(sessionId: string): Record[] { - const path = queuePath(sessionId); - if (!existsSync(path)) return []; - const content = readFileSync(path, "utf-8").trim(); - if (!content) return []; - return content.split("\n").map(line => JSON.parse(line)); -} - -/** Read raw JSONL content from a session's local queue. */ -export function readRawJsonl(sessionId: string): string { - const path = queuePath(sessionId); - if (!existsSync(path)) return ""; - return readFileSync(path, "utf-8").trim(); -} - -/** Delete the queue file after successful flush. */ -export function deleteQueue(sessionId: string): void { - const path = queuePath(sessionId); - try { unlinkSync(path); } catch { /* ignore */ } -} - -/** Return the queue directory path (for cleanup/listing). */ -export function getQueueDir(): string { - return QUEUE_DIR; -} From 5927891ae6ec7125f8cb0bf6d8ee87e48eef81bb Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 21:10:41 +0000 Subject: [PATCH 12/33] fix: index.md now links to raw session files for all path formats The generateVirtualIndex() regex only matched hivemind-style session paths (/sessions//___.jsonl). Generic paths like /sessions/conv_0_session_1.json were invisible in the index. Now supports both formats and strips _summary suffix when matching summaries to their source session files. --- claude-code/bundle/shell/deeplake-shell.js | 17 +++++++++++----- codex/bundle/shell/deeplake-shell.js | 17 +++++++++++----- src/shell/deeplake-fs.ts | 23 ++++++++++++++++------ 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 796c495..ff54609 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -67170,11 +67170,17 @@ var DeeplakeFs = class _DeeplakeFs { // ── Virtual index.md generation ──────────────────────────────────────────── async generateVirtualIndex() { const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByUser = /* @__PURE__ */ new Map(); + const sessionPathsByKey = /* @__PURE__ */ new Map(); for (const sp of this.sessionPaths) { - const m26 = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (m26) - sessionPathsByUser.set(m26[1], sp.slice(1)); + const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); + if (hivemind) { + sessionPathsByKey.set(hivemind[1], sp.slice(1)); + } else { + const fname = sp.split("/").pop() ?? ""; + const stem = fname.replace(/\.[^.]+$/, ""); + if (stem) + sessionPathsByKey.set(stem, sp.slice(1)); + } } const lines = [ "# Session Index", @@ -67192,7 +67198,8 @@ var DeeplakeFs = class _DeeplakeFs { const summaryUser = match2[1]; const sessionId = match2[2]; const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const convPath = sessionPathsByUser.get(sessionId); + const baseName = sessionId.replace(/_summary$/, ""); + const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); const convLink = convPath ? `[messages](${convPath})` : ""; const project = row["project"] || ""; const description = row["description"] || ""; diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 796c495..ff54609 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -67170,11 +67170,17 @@ var DeeplakeFs = class _DeeplakeFs { // ── Virtual index.md generation ──────────────────────────────────────────── async generateVirtualIndex() { const rows = await this.client.query(`SELECT path, project, description, creation_date, last_update_date FROM "${this.table}" WHERE path LIKE '${sqlStr("/summaries/")}%' ORDER BY last_update_date DESC`); - const sessionPathsByUser = /* @__PURE__ */ new Map(); + const sessionPathsByKey = /* @__PURE__ */ new Map(); for (const sp of this.sessionPaths) { - const m26 = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (m26) - sessionPathsByUser.set(m26[1], sp.slice(1)); + const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); + if (hivemind) { + sessionPathsByKey.set(hivemind[1], sp.slice(1)); + } else { + const fname = sp.split("/").pop() ?? ""; + const stem = fname.replace(/\.[^.]+$/, ""); + if (stem) + sessionPathsByKey.set(stem, sp.slice(1)); + } } const lines = [ "# Session Index", @@ -67192,7 +67198,8 @@ var DeeplakeFs = class _DeeplakeFs { const summaryUser = match2[1]; const sessionId = match2[2]; const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const convPath = sessionPathsByUser.get(sessionId); + const baseName = sessionId.replace(/_summary$/, ""); + const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); const convLink = convPath ? `[messages](${convPath})` : ""; const project = row["project"] || ""; const description = row["description"] || ""; diff --git a/src/shell/deeplake-fs.ts b/src/shell/deeplake-fs.ts index 8b97232..e787c1b 100644 --- a/src/shell/deeplake-fs.ts +++ b/src/shell/deeplake-fs.ts @@ -256,12 +256,21 @@ export class DeeplakeFs implements IFileSystem { `WHERE path LIKE '${esc("/summaries/")}%' ORDER BY last_update_date DESC` ); - // Build a lookup: sessionId → JSONL path from sessionPaths - const sessionPathsByUser = new Map(); + // Build a lookup: key → session path from sessionPaths + // Supports two formats: + // 1. /sessions//___.jsonl → key = sessionId + // 2. /sessions/.json (e.g. conv_0_session_1.json) → key = filename stem + const sessionPathsByKey = new Map(); for (const sp of this.sessionPaths) { - // Session path format: /sessions//___.jsonl - const m = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); - if (m) sessionPathsByUser.set(m[1], sp.slice(1)); // strip leading / + const hivemind = sp.match(/\/sessions\/[^/]+\/[^/]+_([^.]+)\.jsonl$/); + if (hivemind) { + sessionPathsByKey.set(hivemind[1], sp.slice(1)); + } else { + // Generic: extract filename without extension + const fname = sp.split("/").pop() ?? ""; + const stem = fname.replace(/\.[^.]+$/, ""); + if (stem) sessionPathsByKey.set(stem, sp.slice(1)); + } } const lines: string[] = [ @@ -280,7 +289,9 @@ export class DeeplakeFs implements IFileSystem { const summaryUser = match[1]; const sessionId = match[2]; const relPath = `summaries/${summaryUser}/${sessionId}.md`; - const convPath = sessionPathsByUser.get(sessionId); + // Try matching session: first exact sessionId, then strip _summary suffix + const baseName = sessionId.replace(/_summary$/, ""); + const convPath = sessionPathsByKey.get(sessionId) ?? sessionPathsByKey.get(baseName); const convLink = convPath ? `[messages](${convPath})` : ""; const project = (row["project"] as string) || ""; const description = (row["description"] as string) || ""; From 39a5a42f07fe59e327b25f50c03c063f5f795419 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 21:10:51 +0000 Subject: [PATCH 13/33] fix: respect DEEPLAKE_CAPTURE=false across all hooks session-end and codex/stop now skip wiki worker when capture is disabled. session-start-setup always runs table sync (ensureTable/ensureSessionsTable) but skips placeholder creation when DEEPLAKE_CAPTURE=false. Previously the entire setup block was skipped, which broke table sync and caused stale query results in the fast path. --- claude-code/bundle/session-end.js | 2 ++ claude-code/bundle/session-start-setup.js | 5 ++++- codex/bundle/session-start-setup.js | 5 ++++- codex/bundle/stop.js | 2 ++ src/hooks/codex/session-start-setup.ts | 7 +++++-- src/hooks/codex/stop.ts | 3 ++- src/hooks/session-end.ts | 2 ++ src/hooks/session-start-setup.ts | 9 +++++++-- 8 files changed, 28 insertions(+), 7 deletions(-) diff --git a/claude-code/bundle/session-end.js b/claude-code/bundle/session-end.js index b8ac2a2..7f93d9f 100755 --- a/claude-code/bundle/session-end.js +++ b/claude-code/bundle/session-end.js @@ -147,6 +147,8 @@ LENGTH LIMIT: Keep the total summary under 4000 characters. Be dense and concise async function main() { if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; + if (process.env.DEEPLAKE_CAPTURE === "false") + return; const input = await readStdin(); const sessionId = input.session_id; const cwd = input.cwd ?? ""; diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index cb8c0df..9f77b3a 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -394,6 +394,7 @@ async function main() { } catch { } } + const captureEnabled = process.env.DEEPLAKE_CAPTURE !== "false"; if (input.session_id) { try { const config = loadConfig(); @@ -401,7 +402,9 @@ async function main() { const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); await api.ensureTable(); await api.ensureSessionsTable(config.sessionsTableName); - await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + if (captureEnabled) { + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + } log3("setup complete"); } } catch (e) { diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 88b0be0..5975cd7 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -398,6 +398,7 @@ async function main() { } catch { } } + const captureEnabled = process.env.DEEPLAKE_CAPTURE !== "false"; if (input.session_id) { try { const config = loadConfig(); @@ -405,7 +406,9 @@ async function main() { const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); await api.ensureTable(); await api.ensureSessionsTable(config.sessionsTableName); - await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + if (captureEnabled) { + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + } log3("setup complete"); } } catch (e) { diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index a92c4e6..9c4100c 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -420,6 +420,8 @@ async function main() { log3(`capture failed: ${e.message}`); } } + if (!CAPTURE) + return; const cwd = input.cwd ?? ""; const memoryTable = config.tableName; const sessionsTable = config.sessionsTableName; diff --git a/src/hooks/codex/session-start-setup.ts b/src/hooks/codex/session-start-setup.ts index 95ef23b..b79a11d 100644 --- a/src/hooks/codex/session-start-setup.ts +++ b/src/hooks/codex/session-start-setup.ts @@ -133,7 +133,8 @@ async function main(): Promise { } catch { /* non-fatal */ } } - // Table setup + placeholder + // Table setup + sync — always sync, only skip placeholder when capture disabled + const captureEnabled = process.env.DEEPLAKE_CAPTURE !== "false"; if (input.session_id) { try { const config = loadConfig(); @@ -141,7 +142,9 @@ async function main(): Promise { const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); await api.ensureTable(); await api.ensureSessionsTable(config.sessionsTableName); - await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + if (captureEnabled) { + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + } log("setup complete"); } } catch (e: any) { diff --git a/src/hooks/codex/stop.ts b/src/hooks/codex/stop.ts index 4346e78..ce953ec 100644 --- a/src/hooks/codex/stop.ts +++ b/src/hooks/codex/stop.ts @@ -189,7 +189,8 @@ async function main(): Promise { } } - // 2. Spawn wiki worker (session summary generation) + // 2. Spawn wiki worker (session summary generation) — skip when capture disabled + if (!CAPTURE) return; const cwd = input.cwd ?? ""; const memoryTable = config.tableName; const sessionsTable = config.sessionsTableName; diff --git a/src/hooks/session-end.ts b/src/hooks/session-end.ts index 1560853..0ff26e4 100644 --- a/src/hooks/session-end.ts +++ b/src/hooks/session-end.ts @@ -100,6 +100,8 @@ LENGTH LIMIT: Keep the total summary under 4000 characters. Be dense and concise async function main(): Promise { // Skip if this is a sub-session spawned by the wiki worker itself if (process.env.DEEPLAKE_WIKI_WORKER === "1") return; + // Skip when capture is disabled (e.g. benchmark runs) + if (process.env.DEEPLAKE_CAPTURE === "false") return; const input = await readStdin(); const sessionId = input.session_id; diff --git a/src/hooks/session-start-setup.ts b/src/hooks/session-start-setup.ts index 0e25602..1f3dce4 100644 --- a/src/hooks/session-start-setup.ts +++ b/src/hooks/session-start-setup.ts @@ -124,7 +124,10 @@ async function main(): Promise { } catch { /* non-fatal */ } } - // Table setup + placeholder (fire-and-forget, async hook) + // Table setup + sync (fire-and-forget, async hook) + // Always sync tables so queries return fresh data. + // Only skip the placeholder when capture is disabled (e.g. benchmark runs). + const captureEnabled = process.env.DEEPLAKE_CAPTURE !== "false"; if (input.session_id) { try { const config = loadConfig(); @@ -132,7 +135,9 @@ async function main(): Promise { const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, config.tableName); await api.ensureTable(); await api.ensureSessionsTable(config.sessionsTableName); - await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + if (captureEnabled) { + await createPlaceholder(api, config.tableName, input.session_id, input.cwd ?? "", config.userName, config.orgName, config.workspaceId); + } log("setup complete"); } } catch (e: any) { From 126372f3c699983d3ab3eae9356f40f3a2e1d4c2 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Tue, 14 Apr 2026 21:11:09 +0000 Subject: [PATCH 14/33] feat: fast-path grep and read now search both memory and sessions tables Grep fast path queries memory (summaries) and sessions (raw dialogue) in parallel. Read fast path falls back to the sessions table for /sessions/* paths. Previously only the memory table was searched, so exact dates and quotes only present in raw session data were invisible to the fast path. --- claude-code/bundle/pre-tool-use.js | 40 +++++++++++++++++++-- src/hooks/pre-tool-use.ts | 56 ++++++++++++++++++++++++++---- 2 files changed, 87 insertions(+), 9 deletions(-) diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index f5ed00a..d02660a 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -503,15 +503,41 @@ async function main() { })); return; } + if (virtualPath.startsWith("/sessions/")) { + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + try { + const sessionRows = await api.query(`SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); + if (sessionRows.length > 0 && sessionRows[0]["content"]) { + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "allow", + updatedInput: { + command: `echo ${JSON.stringify(sessionRows[0]["content"])}`, + description: `[DeepLake direct] cat ${virtualPath}` + } + } + })); + return; + } + } catch { + } + } } else if (input.tool_name === "Grep") { const pattern = input.tool_input.pattern ?? ""; const ignoreCase = !!input.tool_input["-i"]; log3(`direct grep: ${pattern}`); - const rows = await api.query(`SELECT path, summary FROM "${table}" WHERE summary ${ignoreCase ? "ILIKE" : "LIKE"} '%${sqlLike(pattern)}%' LIMIT 5`); - if (rows.length > 0) { + const likeOp = ignoreCase ? "ILIKE" : "LIKE"; + const escapedPattern = sqlLike(pattern); + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + const [memoryRows, sessionRows] = await Promise.all([ + api.query(`SELECT path, summary FROM "${table}" WHERE summary ${likeOp} '%${escapedPattern}%' LIMIT 5`).catch(() => []), + api.query(`SELECT path, message::text AS content FROM "${sessionsTable}" WHERE message::text ${likeOp} '%${escapedPattern}%' LIMIT 3`).catch(() => []) + ]); + if (memoryRows.length > 0 || sessionRows.length > 0) { const allResults = []; const re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); - for (const row of rows) { + for (const row of memoryRows) { const p = row["path"]; const text = row["summary"]; if (!text) @@ -519,6 +545,14 @@ async function main() { const matches = text.split("\n").filter((line) => re.test(line)).slice(0, 5).map((line) => `${p}:${line.slice(0, 300)}`); allResults.push(...matches); } + for (const row of sessionRows) { + const p = row["path"]; + const text = row["content"]; + if (!text) + continue; + const matches = text.split(/(?:"text"\s*:\s*")/g).filter((chunk) => re.test(chunk)).slice(0, 3).map((chunk) => `${p}:${chunk.slice(0, 300).replace(/\\n/g, " ")}`); + allResults.push(...matches); + } const results = allResults.join("\n"); console.log(JSON.stringify({ hookSpecificOutput: { diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 24a8fb4..2086bbc 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -173,6 +173,7 @@ async function main(): Promise { if (input.tool_name === "Read") { const virtualPath = rewritePaths((input.tool_input.file_path as string) ?? ""); log(`direct read: ${virtualPath}`); + // Try memory table first (summaries) const rows = await api.query( `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` ); @@ -189,18 +190,50 @@ async function main(): Promise { })); return; } + // Try sessions table (raw data) — for paths like /sessions/conv_N_session_M.json + if (virtualPath.startsWith("/sessions/")) { + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + try { + const sessionRows = await api.query( + `SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` + ); + if (sessionRows.length > 0 && sessionRows[0]["content"]) { + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "allow", + updatedInput: { + command: `echo ${JSON.stringify(sessionRows[0]["content"])}`, + description: `[DeepLake direct] cat ${virtualPath}`, + }, + }, + })); + return; + } + } catch { /* fall through to shell */ } + } } else if (input.tool_name === "Grep") { const pattern = (input.tool_input.pattern as string) ?? ""; const ignoreCase = !!input.tool_input["-i"]; log(`direct grep: ${pattern}`); - // Single query: fetch path + content together (avoids N+1 round-trips) - const rows = await api.query( - `SELECT path, summary FROM "${table}" WHERE summary ${ignoreCase ? "ILIKE" : "LIKE"} '%${sqlLike(pattern)}%' LIMIT 5` - ); - if (rows.length > 0) { + const likeOp = ignoreCase ? "ILIKE" : "LIKE"; + const escapedPattern = sqlLike(pattern); + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + + // Search both memory (summaries) and sessions (raw data) in parallel + const [memoryRows, sessionRows] = await Promise.all([ + api.query( + `SELECT path, summary FROM "${table}" WHERE summary ${likeOp} '%${escapedPattern}%' LIMIT 5` + ).catch(() => [] as Record[]), + api.query( + `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE message::text ${likeOp} '%${escapedPattern}%' LIMIT 3` + ).catch(() => [] as Record[]), + ]); + + if (memoryRows.length > 0 || sessionRows.length > 0) { const allResults: string[] = []; const re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); - for (const row of rows) { + for (const row of memoryRows) { const p = row["path"] as string; const text = row["summary"] as string; if (!text) continue; @@ -210,6 +243,17 @@ async function main(): Promise { .map(line => `${p}:${line.slice(0, 300)}`); allResults.push(...matches); } + for (const row of sessionRows) { + const p = row["path"] as string; + const text = row["content"] as string; + if (!text) continue; + // Extract matching dialogue turns from session JSON + const matches = text.split(/(?:"text"\s*:\s*")/g) + .filter(chunk => re.test(chunk)) + .slice(0, 3) + .map(chunk => `${p}:${chunk.slice(0, 300).replace(/\\n/g, " ")}`); + allResults.push(...matches); + } const results = allResults.join("\n"); console.log(JSON.stringify({ hookSpecificOutput: { From ee1ad25d717ac4f0767face73ac6de4b95631515 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:58:12 +0000 Subject: [PATCH 15/33] feat: shared grep handler with single SQL query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract parseBashGrep() and handleGrepDirect() into a shared module used by both Claude Code and Codex pre-tool-use hooks. Replaces the old multi-query approach (bootstrap + BM25 + prefetch + individual reads = 112 queries) with a single LIKE query + in-memory regex refinement. Supports all grep flags: -w, -i, -l, -c, -n, -v, -F, -r. Searches only the memory/summaries table — sessions contain raw JSONB which is slow to scan and produces noisy results. --- src/hooks/grep-direct.ts | 169 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 src/hooks/grep-direct.ts diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts new file mode 100644 index 0000000..85ff884 --- /dev/null +++ b/src/hooks/grep-direct.ts @@ -0,0 +1,169 @@ +/** + * Shared grep handler — single SQL query + in-memory regex refinement. + * Used by both Claude Code and Codex pre-tool-use hooks. + */ + +import type { DeeplakeApi } from "../deeplake-api.js"; +import { sqlStr, sqlLike } from "../utils/sql.js"; + +export interface GrepParams { + pattern: string; + targetPath: string; + ignoreCase: boolean; + wordMatch: boolean; + filesOnly: boolean; + countOnly: boolean; + lineNumber: boolean; + invertMatch: boolean; + fixedString: boolean; +} + +/** Parse a bash grep/egrep/fgrep command string into GrepParams. */ +export function parseBashGrep(cmd: string): GrepParams | null { + const first = cmd.trim().split(/\s*\|\s*/)[0]; + if (!/^(grep|egrep|fgrep)\b/.test(first)) return null; + + const isFixed = first.startsWith("fgrep"); + + // Tokenize respecting single/double quotes + const tokens: string[] = []; + let pos = 0; + while (pos < first.length) { + if (first[pos] === " " || first[pos] === "\t") { pos++; continue; } + if (first[pos] === "'" || first[pos] === '"') { + const q = first[pos]; + let end = pos + 1; + while (end < first.length && first[end] !== q) end++; + tokens.push(first.slice(pos + 1, end)); + pos = end + 1; + } else { + let end = pos; + while (end < first.length && first[end] !== " " && first[end] !== "\t") end++; + tokens.push(first.slice(pos, end)); + pos = end; + } + } + + let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, + lineNumber = false, invertMatch = false, fixedString = isFixed; + + let ti = 1; + while (ti < tokens.length && tokens[ti].startsWith("-") && tokens[ti] !== "--") { + const flag = tokens[ti]; + if (flag.startsWith("--")) { + const handlers: Record void> = { + "--ignore-case": () => { ignoreCase = true; }, + "--word-regexp": () => { wordMatch = true; }, + "--files-with-matches": () => { filesOnly = true; }, + "--count": () => { countOnly = true; }, + "--line-number": () => { lineNumber = true; }, + "--invert-match": () => { invertMatch = true; }, + "--fixed-strings": () => { fixedString = true; }, + }; + handlers[flag]?.(); + ti++; continue; + } + for (const c of flag.slice(1)) { + switch (c) { + case "i": ignoreCase = true; break; + case "w": wordMatch = true; break; + case "l": filesOnly = true; break; + case "c": countOnly = true; break; + case "n": lineNumber = true; break; + case "v": invertMatch = true; break; + case "F": fixedString = true; break; + // r/R/E: no-op (recursive implied, extended default) + } + } + ti++; + } + if (ti < tokens.length && tokens[ti] === "--") ti++; + if (ti >= tokens.length) return null; + + let target = tokens[ti + 1] ?? "/"; + if (target === "." || target === "./") target = "/"; + + return { + pattern: tokens[ti], targetPath: target, + ignoreCase, wordMatch, filesOnly, countOnly, lineNumber, invertMatch, fixedString, + }; +} + +/** Run grep via single SQL query + in-memory regex refinement. */ +export async function handleGrepDirect( + api: DeeplakeApi, + table: string, + sessionsTable: string, + params: GrepParams, +): Promise { + if (!params.pattern) return null; + + const { pattern, targetPath, ignoreCase, wordMatch, filesOnly, countOnly, + lineNumber, invertMatch, fixedString } = params; + + const likeOp = ignoreCase ? "ILIKE" : "LIKE"; + const escapedLike = sqlLike(pattern); + + // ── path filter ── + let pathFilter = ""; + if (targetPath && targetPath !== "/") { + const clean = targetPath.replace(/\/+$/, ""); + pathFilter = ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; + } + + // For regex patterns, skip content LIKE (can't match regex syntax). + // Fetch all files under the path and filter in-memory instead. + const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); + const contentFilter = hasRegexMeta ? "" : ` AND summary ${likeOp} '%${escapedLike}%'`; + + // Search only the memory/summaries table — sessions contain raw JSONB + // (prompts, tool calls) which is slow to scan and produces noisy results. + // Summaries already contain all useful content from sessions. + const queries: Promise[]>[] = [ + api.query( + `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`, + ).catch(() => [] as Record[]), + ]; + + const allRows = (await Promise.all(queries)).flat(); + + // ── regex refinement ── + let reStr = fixedString + ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") + : pattern; + if (wordMatch) reStr = `\\b${reStr}\\b`; + let re: RegExp; + try { re = new RegExp(reStr, ignoreCase ? "i" : ""); } + catch { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } + + const output: string[] = []; + const multi = allRows.length > 1; + + for (const row of allRows) { + const p = row["path"] as string; + const text = row["content"] as string; + if (!text) continue; + + const lines = text.split("\n"); + const matched: string[] = []; + + for (let i = 0; i < lines.length; i++) { + if (re.test(lines[i]) !== !!invertMatch) { + if (filesOnly) { output.push(p); break; } + const prefix = multi ? `${p}:` : ""; + const ln = lineNumber ? `${i + 1}:` : ""; + matched.push(`${prefix}${ln}${lines[i]}`); + } + } + + if (!filesOnly) { + if (countOnly) { + output.push(`${multi ? `${p}:` : ""}${matched.length}`); + } else { + output.push(...matched); + } + } + } + + return output.join("\n") || "(no matches)"; +} From 6111aafdf94c3d3127af6b3f07f597bea94e3c51 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:58:38 +0000 Subject: [PATCH 16/33] feat: fast-path all read commands via direct SQL Replace shell spawn with direct SQL queries for all read-only commands targeting the deeplake memory VFS. Each command now executes 1 SQL query instead of 2-4 bootstrap queries + command execution. Commands optimized: cat, head, tail, ls, find, wc -l, grep (via shared module). Handles real-world patterns: 2>/dev/null, 2>&1, cat|head pipes. Routes /sessions/* paths directly to sessions table (skip memory). Generates virtual /index.md from metadata when no physical row exists. Benchmarks (activeloop/hivemind, 405 files): - grep: 143.9s/108q -> 0.46s/1q (312x) - cat: 995ms/3q -> 151ms/1q (7x) - ls: 920ms/2q -> 128ms/1q (7x) - head: 1065ms/3q -> 142ms/1q (8x) - e2e: 454s -> 52s (8.7x, 0 shell spawns) --- src/hooks/pre-tool-use.ts | 313 +++++++++++++++++++++++++++----------- 1 file changed, 226 insertions(+), 87 deletions(-) diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 2086bbc..0034562 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -9,6 +9,7 @@ import { readStdin } from "../utils/stdin.js"; import { loadConfig } from "../config.js"; import { DeeplakeApi } from "../deeplake-api.js"; import { sqlStr, sqlLike } from "../utils/sql.js"; +import { type GrepParams, parseBashGrep, handleGrepDirect } from "./grep-direct.js"; import { log as _log } from "../utils/debug.js"; const log = (msg: string) => _log("pre", msg); @@ -128,6 +129,45 @@ function getShellCommand(toolName: string, toolInput: Record): return null; } +// ── Output helper ──────────────────────────────────────────────────────────── + +function emitResult(command: string, description: string): void { + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "allow", + updatedInput: { command, description }, + }, + })); +} + +// ── Grep parameter extraction (Claude Code specific) ───────────────────────── + +/** Extract grep parameters from Grep tool input or Bash grep command. */ +function extractGrepParams( + toolName: string, + toolInput: Record, + shellCmd: string, +): GrepParams | null { + if (toolName === "Grep") { + const outputMode = (toolInput.output_mode as string) ?? "files_with_matches"; + return { + pattern: (toolInput.pattern as string) ?? "", + targetPath: rewritePaths((toolInput.path as string) ?? "") || "/", + ignoreCase: !!toolInput["-i"], + wordMatch: false, + filesOnly: outputMode === "files_with_matches", + countOnly: outputMode === "count", + lineNumber: !!toolInput["-n"], + invertMatch: false, + fixedString: false, + }; + } + if (toolName === "Bash") return parseBashGrep(shellCmd); + return null; +} + + async function main(): Promise { const input = await readStdin(); log(`hook fired: tool=${input.tool_name} input=${JSON.stringify(input.tool_input)}`); @@ -165,106 +205,205 @@ async function main(): Promise { // ── Fast path: handle Read and Grep directly via SQL (no shell spawn) ── const config = loadConfig(); - if (config && (input.tool_name === "Read" || input.tool_name === "Grep")) { + if (config) { const table = process.env["DEEPLAKE_TABLE"] ?? "memory"; + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); try { - if (input.tool_name === "Read") { - const virtualPath = rewritePaths((input.tool_input.file_path as string) ?? ""); - log(`direct read: ${virtualPath}`); - // Try memory table first (summaries) - const rows = await api.query( - `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` - ); - if (rows.length > 0 && rows[0]["summary"]) { - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(rows[0]["summary"])}`, - description: `[DeepLake direct] cat ${virtualPath}`, - }, - }, - })); + // ── Grep (Grep tool or Bash grep) — single SQL query ── + const grepParams = extractGrepParams(input.tool_name, input.tool_input, shellCmd); + if (grepParams) { + log(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result = await handleGrepDirect(api, table, sessionsTable, grepParams); + if (result !== null) { + emitResult(`echo ${JSON.stringify(result)}`, `[DeepLake direct] grep ${grepParams.pattern}`); return; } - // Try sessions table (raw data) — for paths like /sessions/conv_N_session_M.json - if (virtualPath.startsWith("/sessions/")) { - const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; - try { - const sessionRows = await api.query( - `SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` + } + + // ── Read file: Read tool, or Bash cat/head/tail ── + { + let virtualPath: string | null = null; + let lineLimit = 0; // 0 = all lines + let fromEnd = false; // true = tail + + if (input.tool_name === "Read") { + virtualPath = rewritePaths((input.tool_input.file_path as string) ?? ""); + } else if (input.tool_name === "Bash") { + // cat [2>...] [| grep ... | head -N] or [| head -N] + // Strip stderr redirect (2>/dev/null, 2>&1, etc.) and optional grep -v pipe + const catCmd = shellCmd.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { virtualPath = catPipeHead[1]; lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); } + // cat + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) virtualPath = catMatch[1]; + } + // head [-n] N + if (!virtualPath) { + const headMatch = shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? + shellCmd.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { virtualPath = headMatch[2]; lineLimit = Math.abs(parseInt(headMatch[1], 10)); } + else { virtualPath = headMatch[1]; lineLimit = 10; } + } + } + // tail [-n] N + if (!virtualPath) { + const tailMatch = shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? + shellCmd.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { virtualPath = tailMatch[2]; lineLimit = Math.abs(parseInt(tailMatch[1], 10)); } + else { virtualPath = tailMatch[1]; lineLimit = 10; } + } + } + // wc -l + if (!virtualPath) { + const wcMatch = shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { virtualPath = wcMatch[1]; lineLimit = -1; } // -1 = count mode + } + } + + if (virtualPath && !virtualPath.endsWith("/")) { + log(`direct read: ${virtualPath}`); + let content: string | null = null; + + if (virtualPath.startsWith("/sessions/")) { + // Session files live in the sessions table — skip memory + try { + const sessionRows = await api.query( + `SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` + ); + if (sessionRows.length > 0 && sessionRows[0]["content"]) { + content = sessionRows[0]["content"] as string; + } + } catch { /* fall through to shell */ } + } else { + // Memory table (summaries, notes, etc.) + const rows = await api.query( + `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` ); - if (sessionRows.length > 0 && sessionRows[0]["content"]) { - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(sessionRows[0]["content"])}`, - description: `[DeepLake direct] cat ${virtualPath}`, - }, - }, - })); + if (rows.length > 0 && rows[0]["summary"]) { + content = rows[0]["summary"] as string; + } else if (virtualPath === "/index.md") { + // Virtual index — generate from metadata + const idxRows = await api.query( + `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + ); + const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; + for (const r of idxRows) { + const p = r["path"] as string; + const proj = r["project"] as string || ""; + const desc = (r["description"] as string || "").slice(0, 120); + const date = (r["creation_date"] as string || "").slice(0, 10); + lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); + } + content = lines.join("\n"); + } + } + + if (content !== null) { + if (lineLimit === -1) { + const count = content.split("\n").length; + emitResult(`echo ${JSON.stringify(`${count} ${virtualPath}`)}`, `[DeepLake direct] wc -l ${virtualPath}`); return; } - } catch { /* fall through to shell */ } + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + const label = lineLimit > 0 ? (fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}`) : "cat"; + emitResult(`echo ${JSON.stringify(content)}`, `[DeepLake direct] ${label} ${virtualPath}`); + return; + } } - } else if (input.tool_name === "Grep") { - const pattern = (input.tool_input.pattern as string) ?? ""; - const ignoreCase = !!input.tool_input["-i"]; - log(`direct grep: ${pattern}`); - const likeOp = ignoreCase ? "ILIKE" : "LIKE"; - const escapedPattern = sqlLike(pattern); - const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; - - // Search both memory (summaries) and sessions (raw data) in parallel - const [memoryRows, sessionRows] = await Promise.all([ - api.query( - `SELECT path, summary FROM "${table}" WHERE summary ${likeOp} '%${escapedPattern}%' LIMIT 5` - ).catch(() => [] as Record[]), - api.query( - `SELECT path, message::text AS content FROM "${sessionsTable}" WHERE message::text ${likeOp} '%${escapedPattern}%' LIMIT 3` - ).catch(() => [] as Record[]), - ]); - - if (memoryRows.length > 0 || sessionRows.length > 0) { - const allResults: string[] = []; - const re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); - for (const row of memoryRows) { - const p = row["path"] as string; - const text = row["summary"] as string; - if (!text) continue; - const matches = text.split("\n") - .filter(line => re.test(line)) - .slice(0, 5) - .map(line => `${p}:${line.slice(0, 300)}`); - allResults.push(...matches); + } + + // ── ls: Bash ls or Glob tool ── + { + let lsDir: string | null = null; + let longFormat = false; + + if (input.tool_name === "Glob") { + lsDir = rewritePaths((input.tool_input.path as string) ?? "") || "/"; + } else if (input.tool_name === "Bash") { + const lsMatch = shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/); + if (lsMatch) { + lsDir = lsMatch[2] ?? "/"; + longFormat = (lsMatch[1] ?? "").includes("l"); } - for (const row of sessionRows) { + } + + if (lsDir) { + const dir = lsDir.replace(/\/+$/, "") || "/"; + log(`direct ls: ${dir}`); + // Query the right table(s) based on path + const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); + const isRoot = dir === "/"; + const lsQueries: Promise[]>[] = []; + if (!isSessionDir) { + lsQueries.push(api.query( + `SELECT path, size_bytes FROM "${table}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path` + ).catch(() => [])); + } + if (isSessionDir || isRoot) { + lsQueries.push(api.query( + `SELECT path, size_bytes FROM "${sessionsTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path` + ).catch(() => [])); + } + const rows = (await Promise.all(lsQueries)).flat(); + const entries = new Map(); + const prefix = dir === "/" ? "/" : dir + "/"; + for (const row of rows) { const p = row["path"] as string; - const text = row["content"] as string; - if (!text) continue; - // Extract matching dialogue turns from session JSON - const matches = text.split(/(?:"text"\s*:\s*")/g) - .filter(chunk => re.test(chunk)) - .slice(0, 3) - .map(chunk => `${p}:${chunk.slice(0, 300).replace(/\\n/g, " ")}`); - allResults.push(...matches); + if (!p.startsWith(prefix) && dir !== "/") continue; + const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) continue; + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: (row["size_bytes"] as number) ?? 0 }); + } + } + const lines: string[] = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); + } + } + emitResult(`echo ${JSON.stringify(lines.join("\n") || "(empty directory)")}`, `[DeepLake direct] ls ${dir}`); + return; + } + } + + // ── find -name '' ── + if (input.tool_name === "Bash") { + const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = findMatch[2].replace(/\*/g, "%").replace(/\?/g, "_"); + log(`direct find: ${dir} -name '${findMatch[2]}'`); + const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); + const findTable = isSessionDir ? sessionsTable : table; + const rows = await api.query( + `SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path` + ); + let result = rows.map(r => r["path"] as string).join("\n") || ""; + // Handle piped wc -l + if (/\|\s*wc\s+-l\s*$/.test(shellCmd)) { + result = String(rows.length); } - const results = allResults.join("\n"); - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(results || "(no matches)")}`, - description: `[DeepLake direct] grep ${pattern}`, - }, - }, - })); + emitResult(`echo ${JSON.stringify(result || "(no matches)")}`, `[DeepLake direct] find ${dir}`); return; } } From aac9724446da2c539668d4c77ce4cdcff3171136 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:58:46 +0000 Subject: [PATCH 17/33] feat: codex grep fast-path via shared module Replace inline grep handler (LIMIT 5, no path filter) with shared handleGrepDirect() from grep-direct.ts. Same single-query approach as Claude Code hook. --- src/hooks/codex/pre-tool-use.ts | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 67a17c7..1fe06a3 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -27,6 +27,7 @@ import { readStdin } from "../../utils/stdin.js"; import { loadConfig } from "../../config.js"; import { DeeplakeApi } from "../../deeplake-api.js"; import { sqlStr, sqlLike } from "../../utils/sql.js"; +import { parseBashGrep, handleGrepDirect } from "../grep-direct.js"; import { log as _log } from "../../utils/debug.js"; const log = (msg: string) => _log("codex-pre", msg); @@ -197,30 +198,14 @@ async function main(): Promise { } } - // Detect: grep [-ri] - const grepMatch = rewritten.match(/^grep\s+(?:-[a-zA-Z]+\s+)*(?:'([^']*)'|"([^"]*)"|(\S+))\s+(\S+)/); - if (grepMatch) { - const pattern = grepMatch[1] ?? grepMatch[2] ?? grepMatch[3]; - const ignoreCase = /\s-[a-zA-Z]*i/.test(rewritten); - log(`direct grep: ${pattern}`); - const rows = await api.query( - `SELECT path, summary FROM "${table}" WHERE summary ${ignoreCase ? "ILIKE" : "LIKE"} '%${sqlLike(pattern)}%' LIMIT 5` - ); - if (rows.length > 0) { - const allResults: string[] = []; - const re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); - for (const row of rows) { - const p = row["path"] as string; - const text = row["summary"] as string; - if (!text) continue; - const matches = text.split("\n") - .filter(line => re.test(line)) - .slice(0, 5) - .map(line => `${p}:${line.slice(0, 300)}`); - allResults.push(...matches); - } - const results = allResults.join("\n"); - blockWithContent(results || "(no matches)"); + // Detect: grep/egrep/fgrep with all flags + const grepParams = parseBashGrep(rewritten); + if (grepParams) { + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + log(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result = await handleGrepDirect(api, table, sessionsTable, grepParams); + if (result !== null) { + blockWithContent(result); } } } catch (e: any) { From 7d9ca0c8d90c7fd4ee0d2a381f02063ad41f630d Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:58:53 +0000 Subject: [PATCH 18/33] fix: grep interceptor mount="/" never matching When mountPoint is "/", the check t.startsWith(mount + "/") becomes t.startsWith("//") which never matches. Use a pre-computed mountPrefix that handles the root case correctly. --- src/shell/grep-interceptor.ts | 48 +++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/src/shell/grep-interceptor.ts b/src/shell/grep-interceptor.ts index 354f9a3..0b02cc6 100644 --- a/src/shell/grep-interceptor.ts +++ b/src/shell/grep-interceptor.ts @@ -5,6 +5,8 @@ import type { DeeplakeFs } from "./deeplake-fs.js"; import { sqlStr as esc, sqlLike } from "../utils/sql.js"; +const MAX_FALLBACK_CANDIDATES = 500; + /** * Custom grep command for just-bash that replaces the built-in when the target * paths are under the Deeplake mount. Two-phase strategy: @@ -20,6 +22,7 @@ export function createGrepCommand( client: DeeplakeApi, fs: DeeplakeFs, table: string, + sessionsTable?: string, ) { return defineCommand("grep", async (args, ctx) => { const parsed = yargsParser(args, { @@ -44,34 +47,57 @@ export function createGrepCommand( const mount = fs.mountPoint; // Only intercept if all targets are under our mount point - const allUnderMount = targets.every(t => t === mount || t.startsWith(mount + "/")); + const mountPrefix = mount === "/" ? "/" : mount + "/"; + const allUnderMount = targets.every(t => t === mount || t.startsWith(mountPrefix)); if (!allUnderMount) { // Signal to caller that this command doesn't handle it return { stdout: "", stderr: "", exitCode: 127 }; } - // ── Phase 1: coarse filter — try BM25, fall back to in-memory ────────── + // ── Phase 1: coarse filter — BM25 on summaries + LIKE on sessions ───── let candidates: string[] = []; try { - const bm25 = await Promise.race([ + const queries: Promise[]>[] = [ client.query(`SELECT path FROM "${table}" WHERE summary <#> '${esc(pattern)}' LIMIT 50`), + ]; + if (sessionsTable) { + queries.push( + client.query(`SELECT path FROM "${sessionsTable}" WHERE message::text LIKE '%${sqlLike(pattern)}%' LIMIT 10`) + ); + } + const results = await Promise.race([ + Promise.all(queries), new Promise((_, reject) => setTimeout(() => reject(new Error("timeout")), 3000)), ]); - candidates = bm25.map(r => r["path"] as string).filter(Boolean); + for (const rows of results) { + candidates.push(...rows.map(r => r["path"] as string).filter(Boolean)); + } } catch { - // BM25 index not available or timed out — fall back to in-memory + // BM25/LIKE not available or timed out — fall back to in-memory } + // Narrow candidates to those under the requested targets + const withinTargets = (p: string) => + targets.some(t => t === "/" || p === t || p.startsWith(t + "/")); + if (candidates.length === 0) { - // No BM25 results or no index — use all files under targets and search in-memory - candidates = fs.getAllPaths().filter(p => !p.endsWith("/")); + // No BM25 results or no index — only scan files under requested targets. + candidates = fs.getAllPaths().filter(p => !p.endsWith("/") && withinTargets(p)); + if (candidates.length > MAX_FALLBACK_CANDIDATES) { + candidates = candidates.slice(0, MAX_FALLBACK_CANDIDATES); + } + } else { + candidates = candidates.filter(c => withinTargets(c)); } - // Narrow candidates to those under the requested targets - candidates = candidates.filter(c => - targets.some(t => t === "/" || c === t || c.startsWith(t + "/")) - ); + // Preserve order and remove duplicates to avoid repeated reads. + const seen = new Set(); + candidates = candidates.filter((c) => { + if (seen.has(c)) return false; + seen.add(c); + return true; + }); // ── Phase 2: prefetch into content cache (single batch query) ─────────── await fs.prefetch(candidates); From 6629871f42dce1558447d63aed0a91066291fad0 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:59:01 +0000 Subject: [PATCH 19/33] perf: remove deeplake_sync_table from all hooks Removes 11 occurrences of deeplake_sync_table() across bootstrap, flush, session-start-setup, and wiki-worker (both CC and Codex). Each call added 100-300ms latency and was unnecessary for read operations. Saves 2-4 queries per shell spawn and 1 query per session start / wiki generation. --- src/hooks/codex/session-start-setup.ts | 1 - src/hooks/codex/wiki-worker.ts | 5 +---- src/hooks/session-start-setup.ts | 1 - src/hooks/wiki-worker.ts | 5 +---- src/shell/deeplake-fs.ts | 18 +----------------- 5 files changed, 3 insertions(+), 27 deletions(-) diff --git a/src/hooks/codex/session-start-setup.ts b/src/hooks/codex/session-start-setup.ts index b79a11d..6beb904 100644 --- a/src/hooks/codex/session-start-setup.ts +++ b/src/hooks/codex/session-start-setup.ts @@ -76,7 +76,6 @@ function isNewer(latest: string, current: string): boolean { async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { const summaryPath = `/summaries/${userName}/${sessionId}.md`; - await api.query(`SELECT deeplake_sync_table('${table}')`); const existing = await api.query( `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` ); diff --git a/src/hooks/codex/wiki-worker.ts b/src/hooks/codex/wiki-worker.ts index 3c45514..a65dbeb 100644 --- a/src/hooks/codex/wiki-worker.ts +++ b/src/hooks/codex/wiki-worker.ts @@ -83,7 +83,6 @@ async function main(): Promise { try { // 1. Fetch session events from sessions table wlog("fetching session events"); - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); const rows = await query( `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + `WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` @@ -113,7 +112,6 @@ async function main(): Promise { // 2. Check for existing summary (resumed session) let prevOffset = 0; try { - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); const sumRows = await query( `SELECT summary FROM "${cfg.memoryTable}" ` + `WHERE path = '${esc(`/summaries/${cfg.userName}/${cfg.sessionId}.md`)}' LIMIT 1` @@ -161,8 +159,7 @@ async function main(): Promise { const vpath = `/summaries/${cfg.userName}/${fname}`; const ts = new Date().toISOString(); - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); - const existing = await query( + const existing = await query( `SELECT path FROM "${cfg.memoryTable}" WHERE path = '${esc(vpath)}' LIMIT 1` ); diff --git a/src/hooks/session-start-setup.ts b/src/hooks/session-start-setup.ts index 1f3dce4..c44f819 100644 --- a/src/hooks/session-start-setup.ts +++ b/src/hooks/session-start-setup.ts @@ -71,7 +71,6 @@ function isNewer(latest: string, current: string): boolean { async function createPlaceholder(api: DeeplakeApi, table: string, sessionId: string, cwd: string, userName: string, orgName: string, workspaceId: string): Promise { const summaryPath = `/summaries/${userName}/${sessionId}.md`; - await api.query(`SELECT deeplake_sync_table('${table}')`); const existing = await api.query( `SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1` ); diff --git a/src/hooks/wiki-worker.ts b/src/hooks/wiki-worker.ts index 9155071..1ffa705 100644 --- a/src/hooks/wiki-worker.ts +++ b/src/hooks/wiki-worker.ts @@ -85,7 +85,6 @@ async function main(): Promise { try { // 1. Fetch session events from sessions table, reconstruct JSONL wlog("fetching session events"); - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); const rows = await query( `SELECT message, creation_date FROM "${cfg.sessionsTable}" ` + `WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC` @@ -117,7 +116,6 @@ async function main(): Promise { // 2. Check for existing summary in memory table (resumed session) let prevOffset = 0; try { - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); const sumRows = await query( `SELECT summary FROM "${cfg.memoryTable}" ` + `WHERE path = '${esc(`/summaries/${cfg.userName}/${cfg.sessionId}.md`)}' LIMIT 1` @@ -166,8 +164,7 @@ async function main(): Promise { const vpath = `/summaries/${cfg.userName}/${fname}`; const ts = new Date().toISOString(); - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); - const existing = await query( + const existing = await query( `SELECT path FROM "${cfg.memoryTable}" WHERE path = '${esc(vpath)}' LIMIT 1` ); diff --git a/src/shell/deeplake-fs.ts b/src/shell/deeplake-fs.ts index e787c1b..a9cd895 100644 --- a/src/shell/deeplake-fs.ts +++ b/src/shell/deeplake-fs.ts @@ -94,22 +94,8 @@ export class DeeplakeFs implements IFileSystem { // Ensure the table exists before bootstrapping. await client.ensureTable(); - // Sync both tables in parallel before bootstrap queries. - // Track whether session sync succeeded — skip session bootstrap if it failed. - let sessionSyncOk = false; - const syncPromises: Promise[] = [ - client.query(`SELECT deeplake_sync_table('${table}')`), - ]; - if (sessionsTable) { - syncPromises.push( - client.query(`SELECT deeplake_sync_table('${sessionsTable}')`) - .then(() => { sessionSyncOk = true; }) - .catch(() => { /* sessions table may not exist yet */ }) - ); - } - await Promise.all(syncPromises); - // Bootstrap memory + sessions metadata in parallel. + let sessionSyncOk = true; const memoryBootstrap = (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { @@ -210,8 +196,6 @@ export class DeeplakeFs implements IFileSystem { failures++; } } - // Sync so subsequent reads see the successfully written data. - await this.client.query(`SELECT deeplake_sync_table('${this.table}')`); if (failures > 0) { throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); } From 212c81e74b0cf3772e3d10cd500f04353bd39d2b Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:59:08 +0000 Subject: [PATCH 20/33] feat: SQL query tracing with DEEPLAKE_DEBUG Add traceSql() that logs query start/ok/fail with timing and row counts. Enabled via DEEPLAKE_DEBUG=1 or DEEPLAKE_TRACE_SQL=1. Outputs to stderr and to the debug log file. Essential for benchmarking and diagnosing slow queries. --- src/deeplake-api.ts | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index 2825b24..b08e389 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -3,6 +3,19 @@ import { log as _log } from "./utils/debug.js"; import { sqlStr } from "./utils/sql.js"; const log = (msg: string) => _log("sdk", msg); +const TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +const DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; + +function summarizeSql(sql: string, maxLen = 220): string { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} + +function traceSql(msg: string): void { + if (!TRACE_SQL) return; + process.stderr.write(`[deeplake-sql] ${msg}\n`); + if (DEBUG_FILE_LOG) log(msg); +} // ── Retry & concurrency primitives ────────────────────────────────────────── @@ -60,9 +73,18 @@ export class DeeplakeApi { /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql: string): Promise[]> { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e: unknown) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } From 8dcfbf58e8baa7fd98f5d29c8ae902a938020ccf Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:59:14 +0000 Subject: [PATCH 21/33] fix: pass sessionsTable to grep interceptor in shell The createGrepCommand() now accepts sessionsTable parameter for cross-table search support. --- src/shell/deeplake-shell.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shell/deeplake-shell.ts b/src/shell/deeplake-shell.ts index 292e110..0c44dfd 100644 --- a/src/shell/deeplake-shell.ts +++ b/src/shell/deeplake-shell.ts @@ -62,7 +62,7 @@ async function main(): Promise { const bash = new Bash({ fs, cwd: mount, - customCommands: [createGrepCommand(client, fs, table)], + customCommands: [createGrepCommand(client, fs, table, sessionsTable)], env: { HOME: mount, DEEPLAKE_TABLE: table, From 9f2b5096a66f80c53d14f19e663a7564921320ac Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:59:20 +0000 Subject: [PATCH 22/33] build: regenerate bundles for fast-path optimization --- claude-code/bundle/capture.js | 25 +- claude-code/bundle/commands/auth-login.js | 25 +- claude-code/bundle/pre-tool-use.js | 449 +++++++++++++++++---- claude-code/bundle/session-start-setup.js | 26 +- claude-code/bundle/shell/deeplake-shell.js | 79 +++- claude-code/bundle/wiki-worker.js | 3 - codex/bundle/capture.js | 25 +- codex/bundle/commands/auth-login.js | 25 +- codex/bundle/pre-tool-use.js | 214 +++++++++- codex/bundle/session-start-setup.js | 26 +- codex/bundle/shell/deeplake-shell.js | 79 +++- codex/bundle/stop.js | 25 +- codex/bundle/wiki-worker.js | 3 - 13 files changed, 859 insertions(+), 145 deletions(-) diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 3d3858b..0b8f246 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -72,6 +72,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -119,9 +133,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index ce6a7ed..9acecdf 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -248,6 +248,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -295,9 +309,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index d02660a..8e97146 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -82,6 +82,20 @@ function sqlLike(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -129,9 +143,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } @@ -282,6 +305,169 @@ var DeeplakeApi = class { } }; +// dist/src/hooks/grep-direct.js +function parseBashGrep(cmd) { + const first = cmd.trim().split(/\s*\|\s*/)[0]; + if (!/^(grep|egrep|fgrep)\b/.test(first)) + return null; + const isFixed = first.startsWith("fgrep"); + const tokens = []; + let pos = 0; + while (pos < first.length) { + if (first[pos] === " " || first[pos] === " ") { + pos++; + continue; + } + if (first[pos] === "'" || first[pos] === '"') { + const q = first[pos]; + let end = pos + 1; + while (end < first.length && first[end] !== q) + end++; + tokens.push(first.slice(pos + 1, end)); + pos = end + 1; + } else { + let end = pos; + while (end < first.length && first[end] !== " " && first[end] !== " ") + end++; + tokens.push(first.slice(pos, end)); + pos = end; + } + } + let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + let ti = 1; + while (ti < tokens.length && tokens[ti].startsWith("-") && tokens[ti] !== "--") { + const flag = tokens[ti]; + if (flag.startsWith("--")) { + const handlers = { + "--ignore-case": () => { + ignoreCase = true; + }, + "--word-regexp": () => { + wordMatch = true; + }, + "--files-with-matches": () => { + filesOnly = true; + }, + "--count": () => { + countOnly = true; + }, + "--line-number": () => { + lineNumber = true; + }, + "--invert-match": () => { + invertMatch = true; + }, + "--fixed-strings": () => { + fixedString = true; + } + }; + handlers[flag]?.(); + ti++; + continue; + } + for (const c of flag.slice(1)) { + switch (c) { + case "i": + ignoreCase = true; + break; + case "w": + wordMatch = true; + break; + case "l": + filesOnly = true; + break; + case "c": + countOnly = true; + break; + case "n": + lineNumber = true; + break; + case "v": + invertMatch = true; + break; + case "F": + fixedString = true; + break; + } + } + ti++; + } + if (ti < tokens.length && tokens[ti] === "--") + ti++; + if (ti >= tokens.length) + return null; + let target = tokens[ti + 1] ?? "/"; + if (target === "." || target === "./") + target = "/"; + return { + pattern: tokens[ti], + targetPath: target, + ignoreCase, + wordMatch, + filesOnly, + countOnly, + lineNumber, + invertMatch, + fixedString + }; +} +async function handleGrepDirect(api, table, sessionsTable, params) { + if (!params.pattern) + return null; + const { pattern, targetPath, ignoreCase, wordMatch, filesOnly, countOnly, lineNumber, invertMatch, fixedString } = params; + const likeOp = ignoreCase ? "ILIKE" : "LIKE"; + const escapedLike = sqlLike(pattern); + let pathFilter = ""; + if (targetPath && targetPath !== "/") { + const clean = targetPath.replace(/\/+$/, ""); + pathFilter = ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; + } + const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); + const contentFilter = hasRegexMeta ? "" : ` AND summary ${likeOp} '%${escapedLike}%'`; + const queries = [ + api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`).catch(() => []) + ]; + const allRows = (await Promise.all(queries)).flat(); + let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : pattern; + if (wordMatch) + reStr = `\\b${reStr}\\b`; + let re; + try { + re = new RegExp(reStr, ignoreCase ? "i" : ""); + } catch { + re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); + } + const output = []; + const multi = allRows.length > 1; + for (const row of allRows) { + const p = row["path"]; + const text = row["content"]; + if (!text) + continue; + const lines = text.split("\n"); + const matched = []; + for (let i = 0; i < lines.length; i++) { + if (re.test(lines[i]) !== !!invertMatch) { + if (filesOnly) { + output.push(p); + break; + } + const prefix = multi ? `${p}:` : ""; + const ln = lineNumber ? `${i + 1}:` : ""; + matched.push(`${prefix}${ln}${lines[i]}`); + } + } + if (!filesOnly) { + if (countOnly) { + output.push(`${multi ? `${p}:` : ""}${matched.length}`); + } else { + output.push(...matched); + } + } + } + return output.join("\n") || "(no matches)"; +} + // dist/src/hooks/pre-tool-use.js var log3 = (msg) => log("pre", msg); var MEMORY_PATH = join3(homedir3(), ".deeplake", "memory"); @@ -458,6 +644,34 @@ function getShellCommand(toolName, toolInput) { } return null; } +function emitResult(command, description) { + console.log(JSON.stringify({ + hookSpecificOutput: { + hookEventName: "PreToolUse", + permissionDecision: "allow", + updatedInput: { command, description } + } + })); +} +function extractGrepParams(toolName, toolInput, shellCmd) { + if (toolName === "Grep") { + const outputMode = toolInput.output_mode ?? "files_with_matches"; + return { + pattern: toolInput.pattern ?? "", + targetPath: rewritePaths(toolInput.path ?? "") || "/", + ignoreCase: !!toolInput["-i"], + wordMatch: false, + filesOnly: outputMode === "files_with_matches", + countOnly: outputMode === "count", + lineNumber: !!toolInput["-n"], + invertMatch: false, + fixedString: false + }; + } + if (toolName === "Bash") + return parseBashGrep(shellCmd); + return null; +} async function main() { const input = await readStdin(); log3(`hook fired: tool=${input.tool_name} input=${JSON.stringify(input.tool_input)}`); @@ -482,88 +696,187 @@ async function main() { if (!shellCmd) return; const config = loadConfig(); - if (config && (input.tool_name === "Read" || input.tool_name === "Grep")) { + if (config) { const table = process.env["DEEPLAKE_TABLE"] ?? "memory"; + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); try { - if (input.tool_name === "Read") { - const virtualPath = rewritePaths(input.tool_input.file_path ?? ""); - log3(`direct read: ${virtualPath}`); - const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (rows.length > 0 && rows[0]["summary"]) { - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(rows[0]["summary"])}`, - description: `[DeepLake direct] cat ${virtualPath}` + const grepParams = extractGrepParams(input.tool_name, input.tool_input, shellCmd); + if (grepParams) { + log3(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result = await handleGrepDirect(api, table, sessionsTable, grepParams); + if (result !== null) { + emitResult(`echo ${JSON.stringify(result)}`, `[DeepLake direct] grep ${grepParams.pattern}`); + return; + } + } + { + let virtualPath = null; + let lineLimit = 0; + let fromEnd = false; + if (input.tool_name === "Read") { + virtualPath = rewritePaths(input.tool_input.file_path ?? ""); + } else if (input.tool_name === "Bash") { + const catCmd = shellCmd.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { + virtualPath = catPipeHead[1]; + lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); + } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) + virtualPath = catMatch[1]; + } + if (!virtualPath) { + const headMatch = shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? shellCmd.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { + virtualPath = headMatch[2]; + lineLimit = Math.abs(parseInt(headMatch[1], 10)); + } else { + virtualPath = headMatch[1]; + lineLimit = 10; } } - })); - return; + } + if (!virtualPath) { + const tailMatch = shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? shellCmd.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { + virtualPath = tailMatch[2]; + lineLimit = Math.abs(parseInt(tailMatch[1], 10)); + } else { + virtualPath = tailMatch[1]; + lineLimit = 10; + } + } + } + if (!virtualPath) { + const wcMatch = shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { + virtualPath = wcMatch[1]; + lineLimit = -1; + } + } } - if (virtualPath.startsWith("/sessions/")) { - const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; - try { - const sessionRows = await api.query(`SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (sessionRows.length > 0 && sessionRows[0]["content"]) { - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(sessionRows[0]["content"])}`, - description: `[DeepLake direct] cat ${virtualPath}` - } - } - })); + if (virtualPath && !virtualPath.endsWith("/")) { + log3(`direct read: ${virtualPath}`); + let content = null; + if (virtualPath.startsWith("/sessions/")) { + try { + const sessionRows = await api.query(`SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); + if (sessionRows.length > 0 && sessionRows[0]["content"]) { + content = sessionRows[0]["content"]; + } + } catch { + } + } else { + const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); + if (rows.length > 0 && rows[0]["summary"]) { + content = rows[0]["summary"]; + } else if (virtualPath === "/index.md") { + const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); + const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; + for (const r of idxRows) { + const p = r["path"]; + const proj = r["project"] || ""; + const desc = (r["description"] || "").slice(0, 120); + const date = (r["creation_date"] || "").slice(0, 10); + lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); + } + content = lines.join("\n"); + } + } + if (content !== null) { + if (lineLimit === -1) { + const count = content.split("\n").length; + emitResult(`echo ${JSON.stringify(`${count} ${virtualPath}`)}`, `[DeepLake direct] wc -l ${virtualPath}`); return; } - } catch { + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + const label = lineLimit > 0 ? fromEnd ? `tail -${lineLimit}` : `head -${lineLimit}` : "cat"; + emitResult(`echo ${JSON.stringify(content)}`, `[DeepLake direct] ${label} ${virtualPath}`); + return; } } - } else if (input.tool_name === "Grep") { - const pattern = input.tool_input.pattern ?? ""; - const ignoreCase = !!input.tool_input["-i"]; - log3(`direct grep: ${pattern}`); - const likeOp = ignoreCase ? "ILIKE" : "LIKE"; - const escapedPattern = sqlLike(pattern); - const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; - const [memoryRows, sessionRows] = await Promise.all([ - api.query(`SELECT path, summary FROM "${table}" WHERE summary ${likeOp} '%${escapedPattern}%' LIMIT 5`).catch(() => []), - api.query(`SELECT path, message::text AS content FROM "${sessionsTable}" WHERE message::text ${likeOp} '%${escapedPattern}%' LIMIT 3`).catch(() => []) - ]); - if (memoryRows.length > 0 || sessionRows.length > 0) { - const allResults = []; - const re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); - for (const row of memoryRows) { - const p = row["path"]; - const text = row["summary"]; - if (!text) - continue; - const matches = text.split("\n").filter((line) => re.test(line)).slice(0, 5).map((line) => `${p}:${line.slice(0, 300)}`); - allResults.push(...matches); + } + { + let lsDir = null; + let longFormat = false; + if (input.tool_name === "Glob") { + lsDir = rewritePaths(input.tool_input.path ?? "") || "/"; + } else if (input.tool_name === "Bash") { + const lsMatch = shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/); + if (lsMatch) { + lsDir = lsMatch[2] ?? "/"; + longFormat = (lsMatch[1] ?? "").includes("l"); + } + } + if (lsDir) { + const dir = lsDir.replace(/\/+$/, "") || "/"; + log3(`direct ls: ${dir}`); + const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); + const isRoot = dir === "/"; + const lsQueries = []; + if (!isSessionDir) { + lsQueries.push(api.query(`SELECT path, size_bytes FROM "${table}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path`).catch(() => [])); } - for (const row of sessionRows) { + if (isSessionDir || isRoot) { + lsQueries.push(api.query(`SELECT path, size_bytes FROM "${sessionsTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' ORDER BY path`).catch(() => [])); + } + const rows = (await Promise.all(lsQueries)).flat(); + const entries = /* @__PURE__ */ new Map(); + const prefix = dir === "/" ? "/" : dir + "/"; + for (const row of rows) { const p = row["path"]; - const text = row["content"]; - if (!text) + if (!p.startsWith(prefix) && dir !== "/") + continue; + const rest = dir === "/" ? p.slice(1) : p.slice(prefix.length); + const slash = rest.indexOf("/"); + const name = slash === -1 ? rest : rest.slice(0, slash); + if (!name) continue; - const matches = text.split(/(?:"text"\s*:\s*")/g).filter((chunk) => re.test(chunk)).slice(0, 3).map((chunk) => `${p}:${chunk.slice(0, 300).replace(/\\n/g, " ")}`); - allResults.push(...matches); + const existing = entries.get(name); + if (slash !== -1) { + if (!existing) + entries.set(name, { isDir: true, size: 0 }); + } else { + entries.set(name, { isDir: false, size: row["size_bytes"] ?? 0 }); + } } - const results = allResults.join("\n"); - console.log(JSON.stringify({ - hookSpecificOutput: { - hookEventName: "PreToolUse", - permissionDecision: "allow", - updatedInput: { - command: `echo ${JSON.stringify(results || "(no matches)")}`, - description: `[DeepLake direct] grep ${pattern}` - } + const lines = []; + for (const [name, info] of [...entries].sort((a, b) => a[0].localeCompare(b[0]))) { + if (longFormat) { + const type = info.isDir ? "drwxr-xr-x" : "-rw-r--r--"; + const size = String(info.isDir ? 0 : info.size).padStart(6); + lines.push(`${type} 1 user user ${size} ${name}${info.isDir ? "/" : ""}`); + } else { + lines.push(name + (info.isDir ? "/" : "")); } - })); + } + emitResult(`echo ${JSON.stringify(lines.join("\n") || "(empty directory)")}`, `[DeepLake direct] ls ${dir}`); + return; + } + } + if (input.tool_name === "Bash") { + const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = findMatch[2].replace(/\*/g, "%").replace(/\?/g, "_"); + log3(`direct find: ${dir} -name '${findMatch[2]}'`); + const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); + const findTable = isSessionDir ? sessionsTable : table; + const rows = await api.query(`SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path`); + let result = rows.map((r) => r["path"]).join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(shellCmd)) { + result = String(rows.length); + } + emitResult(`echo ${JSON.stringify(result || "(no matches)")}`, `[DeepLake direct] find ${dir}`); return; } } diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index 9f77b3a..ce8ad06 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -87,6 +87,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -134,9 +148,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } @@ -355,7 +378,6 @@ function isNewer(latest, current) { } async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { const summaryPath = `/summaries/${userName}/${sessionId}.md`; - await api.query(`SELECT deeplake_sync_table('${table}')`); const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); if (existing.length > 0) { wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index ff54609..d872ea8 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66772,9 +66772,26 @@ function log(tag, msg) { function sqlStr(value) { return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } +function sqlLike(value) { + return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_"); +} // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -66822,9 +66839,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e6) { + const message = e6 instanceof Error ? e6.message : String(e6); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e6; } finally { this._sem.release(); } @@ -67038,17 +67064,7 @@ var DeeplakeFs = class _DeeplakeFs { const fs3 = new _DeeplakeFs(client, table, mount); fs3.sessionsTable = sessionsTable ?? null; await client.ensureTable(); - let sessionSyncOk = false; - const syncPromises = [ - client.query(`SELECT deeplake_sync_table('${table}')`) - ]; - if (sessionsTable) { - syncPromises.push(client.query(`SELECT deeplake_sync_table('${sessionsTable}')`).then(() => { - sessionSyncOk = true; - }).catch(() => { - })); - } - await Promise.all(syncPromises); + let sessionSyncOk = true; const memoryBootstrap = (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { @@ -67139,7 +67155,6 @@ var DeeplakeFs = class _DeeplakeFs { failures++; } } - await this.client.query(`SELECT deeplake_sync_table('${this.table}')`); if (failures > 0) { throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); } @@ -68541,7 +68556,8 @@ yargsParser.looksLikeNumber = looksLikeNumber; var lib_default = yargsParser; // dist/src/shell/grep-interceptor.js -function createGrepCommand(client, fs3, table) { +var MAX_FALLBACK_CANDIDATES = 500; +function createGrepCommand(client, fs3, table, sessionsTable) { return Yi2("grep", async (args, ctx) => { const parsed = lib_default(args, { boolean: ["r", "R", "l", "i", "n", "v", "c", "F", "fixed-strings", "recursive", "ignore-case"], @@ -68557,23 +68573,44 @@ function createGrepCommand(client, fs3, table) { if (targets.length === 0) return { stdout: "", stderr: "", exitCode: 1 }; const mount = fs3.mountPoint; - const allUnderMount = targets.every((t6) => t6 === mount || t6.startsWith(mount + "/")); + const mountPrefix = mount === "/" ? "/" : mount + "/"; + const allUnderMount = targets.every((t6) => t6 === mount || t6.startsWith(mountPrefix)); if (!allUnderMount) { return { stdout: "", stderr: "", exitCode: 127 }; } let candidates = []; try { - const bm25 = await Promise.race([ - client.query(`SELECT path FROM "${table}" WHERE summary <#> '${sqlStr(pattern)}' LIMIT 50`), + const queries = [ + client.query(`SELECT path FROM "${table}" WHERE summary <#> '${sqlStr(pattern)}' LIMIT 50`) + ]; + if (sessionsTable) { + queries.push(client.query(`SELECT path FROM "${sessionsTable}" WHERE message::text LIKE '%${sqlLike(pattern)}%' LIMIT 10`)); + } + const results = await Promise.race([ + Promise.all(queries), new Promise((_16, reject) => setTimeout(() => reject(new Error("timeout")), 3e3)) ]); - candidates = bm25.map((r10) => r10["path"]).filter(Boolean); + for (const rows of results) { + candidates.push(...rows.map((r10) => r10["path"]).filter(Boolean)); + } } catch { } + const withinTargets = (p22) => targets.some((t6) => t6 === "/" || p22 === t6 || p22.startsWith(t6 + "/")); if (candidates.length === 0) { - candidates = fs3.getAllPaths().filter((p22) => !p22.endsWith("/")); + candidates = fs3.getAllPaths().filter((p22) => !p22.endsWith("/") && withinTargets(p22)); + if (candidates.length > MAX_FALLBACK_CANDIDATES) { + candidates = candidates.slice(0, MAX_FALLBACK_CANDIDATES); + } + } else { + candidates = candidates.filter((c15) => withinTargets(c15)); } - candidates = candidates.filter((c15) => targets.some((t6) => t6 === "/" || c15 === t6 || c15.startsWith(t6 + "/"))); + const seen = /* @__PURE__ */ new Set(); + candidates = candidates.filter((c15) => { + if (seen.has(c15)) + return false; + seen.add(c15); + return true; + }); await fs3.prefetch(candidates); const fixedString = parsed.F || parsed["fixed-strings"]; const ignoreCase = parsed.i || parsed["ignore-case"]; @@ -68644,7 +68681,7 @@ async function main() { const bash = new xt6({ fs: fs3, cwd: mount, - customCommands: [createGrepCommand(client, fs3, table)], + customCommands: [createGrepCommand(client, fs3, table, sessionsTable)], env: { HOME: mount, DEEPLAKE_TABLE: table, diff --git a/claude-code/bundle/wiki-worker.js b/claude-code/bundle/wiki-worker.js index f759c23..3f7eea8 100755 --- a/claude-code/bundle/wiki-worker.js +++ b/claude-code/bundle/wiki-worker.js @@ -66,7 +66,6 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE '${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); @@ -80,7 +79,6 @@ async function main() { wlog(`found ${jsonlLines} events at ${jsonlServerPath}`); let prevOffset = 0; try { - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); const sumRows = await query(`SELECT summary FROM "${cfg.memoryTable}" WHERE path = '${esc(`/summaries/${cfg.userName}/${cfg.sessionId}.md`)}' LIMIT 1`); if (sumRows.length > 0 && sumRows[0]["summary"]) { const existing = sumRows[0]["summary"]; @@ -118,7 +116,6 @@ async function main() { const fname = `${cfg.sessionId}.md`; const vpath = `/summaries/${cfg.userName}/${fname}`; const ts = (/* @__PURE__ */ new Date()).toISOString(); - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); const existing = await query(`SELECT path FROM "${cfg.memoryTable}" WHERE path = '${esc(vpath)}' LIMIT 1`); if (existing.length > 0) { await query(`UPDATE "${cfg.memoryTable}" SET summary = E'${esc(text)}', size_bytes = ${Buffer.byteLength(text)}, last_update_date = '${ts}' WHERE path = '${esc(vpath)}'`); diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 175e58b..dfd28a6 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -72,6 +72,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -119,9 +133,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index ce6a7ed..9acecdf 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -248,6 +248,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -295,9 +309,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index e164861..ab20f48 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -83,6 +83,20 @@ function sqlLike(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -130,9 +144,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } @@ -283,6 +306,169 @@ var DeeplakeApi = class { } }; +// dist/src/hooks/grep-direct.js +function parseBashGrep(cmd) { + const first = cmd.trim().split(/\s*\|\s*/)[0]; + if (!/^(grep|egrep|fgrep)\b/.test(first)) + return null; + const isFixed = first.startsWith("fgrep"); + const tokens = []; + let pos = 0; + while (pos < first.length) { + if (first[pos] === " " || first[pos] === " ") { + pos++; + continue; + } + if (first[pos] === "'" || first[pos] === '"') { + const q = first[pos]; + let end = pos + 1; + while (end < first.length && first[end] !== q) + end++; + tokens.push(first.slice(pos + 1, end)); + pos = end + 1; + } else { + let end = pos; + while (end < first.length && first[end] !== " " && first[end] !== " ") + end++; + tokens.push(first.slice(pos, end)); + pos = end; + } + } + let ignoreCase = false, wordMatch = false, filesOnly = false, countOnly = false, lineNumber = false, invertMatch = false, fixedString = isFixed; + let ti = 1; + while (ti < tokens.length && tokens[ti].startsWith("-") && tokens[ti] !== "--") { + const flag = tokens[ti]; + if (flag.startsWith("--")) { + const handlers = { + "--ignore-case": () => { + ignoreCase = true; + }, + "--word-regexp": () => { + wordMatch = true; + }, + "--files-with-matches": () => { + filesOnly = true; + }, + "--count": () => { + countOnly = true; + }, + "--line-number": () => { + lineNumber = true; + }, + "--invert-match": () => { + invertMatch = true; + }, + "--fixed-strings": () => { + fixedString = true; + } + }; + handlers[flag]?.(); + ti++; + continue; + } + for (const c of flag.slice(1)) { + switch (c) { + case "i": + ignoreCase = true; + break; + case "w": + wordMatch = true; + break; + case "l": + filesOnly = true; + break; + case "c": + countOnly = true; + break; + case "n": + lineNumber = true; + break; + case "v": + invertMatch = true; + break; + case "F": + fixedString = true; + break; + } + } + ti++; + } + if (ti < tokens.length && tokens[ti] === "--") + ti++; + if (ti >= tokens.length) + return null; + let target = tokens[ti + 1] ?? "/"; + if (target === "." || target === "./") + target = "/"; + return { + pattern: tokens[ti], + targetPath: target, + ignoreCase, + wordMatch, + filesOnly, + countOnly, + lineNumber, + invertMatch, + fixedString + }; +} +async function handleGrepDirect(api, table, sessionsTable, params) { + if (!params.pattern) + return null; + const { pattern, targetPath, ignoreCase, wordMatch, filesOnly, countOnly, lineNumber, invertMatch, fixedString } = params; + const likeOp = ignoreCase ? "ILIKE" : "LIKE"; + const escapedLike = sqlLike(pattern); + let pathFilter = ""; + if (targetPath && targetPath !== "/") { + const clean = targetPath.replace(/\/+$/, ""); + pathFilter = ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; + } + const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); + const contentFilter = hasRegexMeta ? "" : ` AND summary ${likeOp} '%${escapedLike}%'`; + const queries = [ + api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`).catch(() => []) + ]; + const allRows = (await Promise.all(queries)).flat(); + let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : pattern; + if (wordMatch) + reStr = `\\b${reStr}\\b`; + let re; + try { + re = new RegExp(reStr, ignoreCase ? "i" : ""); + } catch { + re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); + } + const output = []; + const multi = allRows.length > 1; + for (const row of allRows) { + const p = row["path"]; + const text = row["content"]; + if (!text) + continue; + const lines = text.split("\n"); + const matched = []; + for (let i = 0; i < lines.length; i++) { + if (re.test(lines[i]) !== !!invertMatch) { + if (filesOnly) { + output.push(p); + break; + } + const prefix = multi ? `${p}:` : ""; + const ln = lineNumber ? `${i + 1}:` : ""; + matched.push(`${prefix}${ln}${lines[i]}`); + } + } + if (!filesOnly) { + if (countOnly) { + output.push(`${multi ? `${p}:` : ""}${matched.length}`); + } else { + output.push(...matched); + } + } + } + return output.join("\n") || "(no matches)"; +} + // dist/src/hooks/codex/pre-tool-use.js var log3 = (msg) => log("codex-pre", msg); var MEMORY_PATH = join3(homedir3(), ".deeplake", "memory"); @@ -487,25 +673,13 @@ async function main() { blockWithContent(`ls: cannot access '${dir}': No such file or directory`); } } - const grepMatch = rewritten.match(/^grep\s+(?:-[a-zA-Z]+\s+)*(?:'([^']*)'|"([^"]*)"|(\S+))\s+(\S+)/); - if (grepMatch) { - const pattern = grepMatch[1] ?? grepMatch[2] ?? grepMatch[3]; - const ignoreCase = /\s-[a-zA-Z]*i/.test(rewritten); - log3(`direct grep: ${pattern}`); - const rows = await api.query(`SELECT path, summary FROM "${table}" WHERE summary ${ignoreCase ? "ILIKE" : "LIKE"} '%${sqlLike(pattern)}%' LIMIT 5`); - if (rows.length > 0) { - const allResults = []; - const re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); - for (const row of rows) { - const p = row["path"]; - const text = row["summary"]; - if (!text) - continue; - const matches = text.split("\n").filter((line) => re.test(line)).slice(0, 5).map((line) => `${p}:${line.slice(0, 300)}`); - allResults.push(...matches); - } - const results = allResults.join("\n"); - blockWithContent(results || "(no matches)"); + const grepParams = parseBashGrep(rewritten); + if (grepParams) { + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + log3(`direct grep: pattern=${grepParams.pattern} path=${grepParams.targetPath}`); + const result2 = await handleGrepDirect(api, table, sessionsTable, grepParams); + if (result2 !== null) { + blockWithContent(result2); } } } catch (e) { diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 5975cd7..636b21e 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -84,6 +84,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -131,9 +145,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } @@ -359,7 +382,6 @@ function isNewer(latest, current) { } async function createPlaceholder(api, table, sessionId, cwd, userName, orgName, workspaceId) { const summaryPath = `/summaries/${userName}/${sessionId}.md`; - await api.query(`SELECT deeplake_sync_table('${table}')`); const existing = await api.query(`SELECT path FROM "${table}" WHERE path = '${sqlStr(summaryPath)}' LIMIT 1`); if (existing.length > 0) { wikiLog(`SessionSetup: summary exists for ${sessionId} (resumed)`); diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index ff54609..d872ea8 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66772,9 +66772,26 @@ function log(tag, msg) { function sqlStr(value) { return value.replace(/\\/g, "\\\\").replace(/'/g, "''").replace(/\0/g, "").replace(/[\x01-\x08\x0b\x0c\x0e-\x1f\x7f]/g, ""); } +function sqlLike(value) { + return sqlStr(value).replace(/%/g, "\\%").replace(/_/g, "\\_"); +} // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -66822,9 +66839,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e6) { + const message = e6 instanceof Error ? e6.message : String(e6); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e6; } finally { this._sem.release(); } @@ -67038,17 +67064,7 @@ var DeeplakeFs = class _DeeplakeFs { const fs3 = new _DeeplakeFs(client, table, mount); fs3.sessionsTable = sessionsTable ?? null; await client.ensureTable(); - let sessionSyncOk = false; - const syncPromises = [ - client.query(`SELECT deeplake_sync_table('${table}')`) - ]; - if (sessionsTable) { - syncPromises.push(client.query(`SELECT deeplake_sync_table('${sessionsTable}')`).then(() => { - sessionSyncOk = true; - }).catch(() => { - })); - } - await Promise.all(syncPromises); + let sessionSyncOk = true; const memoryBootstrap = (async () => { const sql = `SELECT path, size_bytes, mime_type FROM "${table}" ORDER BY path`; try { @@ -67139,7 +67155,6 @@ var DeeplakeFs = class _DeeplakeFs { failures++; } } - await this.client.query(`SELECT deeplake_sync_table('${this.table}')`); if (failures > 0) { throw new Error(`flush: ${failures}/${rows.length} writes failed and were re-queued`); } @@ -68541,7 +68556,8 @@ yargsParser.looksLikeNumber = looksLikeNumber; var lib_default = yargsParser; // dist/src/shell/grep-interceptor.js -function createGrepCommand(client, fs3, table) { +var MAX_FALLBACK_CANDIDATES = 500; +function createGrepCommand(client, fs3, table, sessionsTable) { return Yi2("grep", async (args, ctx) => { const parsed = lib_default(args, { boolean: ["r", "R", "l", "i", "n", "v", "c", "F", "fixed-strings", "recursive", "ignore-case"], @@ -68557,23 +68573,44 @@ function createGrepCommand(client, fs3, table) { if (targets.length === 0) return { stdout: "", stderr: "", exitCode: 1 }; const mount = fs3.mountPoint; - const allUnderMount = targets.every((t6) => t6 === mount || t6.startsWith(mount + "/")); + const mountPrefix = mount === "/" ? "/" : mount + "/"; + const allUnderMount = targets.every((t6) => t6 === mount || t6.startsWith(mountPrefix)); if (!allUnderMount) { return { stdout: "", stderr: "", exitCode: 127 }; } let candidates = []; try { - const bm25 = await Promise.race([ - client.query(`SELECT path FROM "${table}" WHERE summary <#> '${sqlStr(pattern)}' LIMIT 50`), + const queries = [ + client.query(`SELECT path FROM "${table}" WHERE summary <#> '${sqlStr(pattern)}' LIMIT 50`) + ]; + if (sessionsTable) { + queries.push(client.query(`SELECT path FROM "${sessionsTable}" WHERE message::text LIKE '%${sqlLike(pattern)}%' LIMIT 10`)); + } + const results = await Promise.race([ + Promise.all(queries), new Promise((_16, reject) => setTimeout(() => reject(new Error("timeout")), 3e3)) ]); - candidates = bm25.map((r10) => r10["path"]).filter(Boolean); + for (const rows of results) { + candidates.push(...rows.map((r10) => r10["path"]).filter(Boolean)); + } } catch { } + const withinTargets = (p22) => targets.some((t6) => t6 === "/" || p22 === t6 || p22.startsWith(t6 + "/")); if (candidates.length === 0) { - candidates = fs3.getAllPaths().filter((p22) => !p22.endsWith("/")); + candidates = fs3.getAllPaths().filter((p22) => !p22.endsWith("/") && withinTargets(p22)); + if (candidates.length > MAX_FALLBACK_CANDIDATES) { + candidates = candidates.slice(0, MAX_FALLBACK_CANDIDATES); + } + } else { + candidates = candidates.filter((c15) => withinTargets(c15)); } - candidates = candidates.filter((c15) => targets.some((t6) => t6 === "/" || c15 === t6 || c15.startsWith(t6 + "/"))); + const seen = /* @__PURE__ */ new Set(); + candidates = candidates.filter((c15) => { + if (seen.has(c15)) + return false; + seen.add(c15); + return true; + }); await fs3.prefetch(candidates); const fixedString = parsed.F || parsed["fixed-strings"]; const ignoreCase = parsed.i || parsed["ignore-case"]; @@ -68644,7 +68681,7 @@ async function main() { const bash = new xt6({ fs: fs3, cwd: mount, - customCommands: [createGrepCommand(client, fs3, table)], + customCommands: [createGrepCommand(client, fs3, table, sessionsTable)], env: { HOME: mount, DEEPLAKE_TABLE: table, diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index 9c4100c..e751a2e 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -79,6 +79,20 @@ function sqlStr(value) { // dist/src/deeplake-api.js var log2 = (msg) => log("sdk", msg); +var TRACE_SQL = process.env.DEEPLAKE_TRACE_SQL === "1" || process.env.DEEPLAKE_DEBUG === "1"; +var DEBUG_FILE_LOG = process.env.DEEPLAKE_DEBUG === "1"; +function summarizeSql(sql, maxLen = 220) { + const compact = sql.replace(/\s+/g, " ").trim(); + return compact.length > maxLen ? `${compact.slice(0, maxLen)}...` : compact; +} +function traceSql(msg) { + if (!TRACE_SQL) + return; + process.stderr.write(`[deeplake-sql] ${msg} +`); + if (DEBUG_FILE_LOG) + log2(msg); +} var RETRYABLE_CODES = /* @__PURE__ */ new Set([429, 500, 502, 503, 504]); var MAX_RETRIES = 3; var BASE_DELAY_MS = 500; @@ -126,9 +140,18 @@ var DeeplakeApi = class { } /** Execute SQL with retry on transient errors and bounded concurrency. */ async query(sql) { + const startedAt = Date.now(); + const summary = summarizeSql(sql); + traceSql(`query start: ${summary}`); await this._sem.acquire(); try { - return await this._queryWithRetry(sql); + const rows = await this._queryWithRetry(sql); + traceSql(`query ok (${Date.now() - startedAt}ms, rows=${rows.length}): ${summary}`); + return rows; + } catch (e) { + const message = e instanceof Error ? e.message : String(e); + traceSql(`query fail (${Date.now() - startedAt}ms): ${summary} :: ${message}`); + throw e; } finally { this._sem.release(); } diff --git a/codex/bundle/wiki-worker.js b/codex/bundle/wiki-worker.js index d2f5916..b79abb6 100755 --- a/codex/bundle/wiki-worker.js +++ b/codex/bundle/wiki-worker.js @@ -54,7 +54,6 @@ function cleanup() { async function main() { try { wlog("fetching session events"); - await query(`SELECT deeplake_sync_table('${cfg.sessionsTable}')`); const rows = await query(`SELECT message, creation_date FROM "${cfg.sessionsTable}" WHERE path LIKE E'${esc(`/sessions/%${cfg.sessionId}%`)}' ORDER BY creation_date ASC`); if (rows.length === 0) { wlog("no session events found \u2014 exiting"); @@ -68,7 +67,6 @@ async function main() { wlog(`found ${jsonlLines} events at ${jsonlServerPath}`); let prevOffset = 0; try { - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); const sumRows = await query(`SELECT summary FROM "${cfg.memoryTable}" WHERE path = '${esc(`/summaries/${cfg.userName}/${cfg.sessionId}.md`)}' LIMIT 1`); if (sumRows.length > 0 && sumRows[0]["summary"]) { const existing = sumRows[0]["summary"]; @@ -102,7 +100,6 @@ async function main() { const fname = `${cfg.sessionId}.md`; const vpath = `/summaries/${cfg.userName}/${fname}`; const ts = (/* @__PURE__ */ new Date()).toISOString(); - await query(`SELECT deeplake_sync_table('${cfg.memoryTable}')`); const existing = await query(`SELECT path FROM "${cfg.memoryTable}" WHERE path = '${esc(vpath)}' LIMIT 1`); if (existing.length > 0) { await query(`UPDATE "${cfg.memoryTable}" SET summary = E'${esc(text)}', size_bytes = ${Buffer.byteLength(text)}, last_update_date = '${ts}' WHERE path = '${esc(vpath)}'`); From 9dd62af15e3435ec4cc133a8b9a78c56bb1fa923 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Wed, 15 Apr 2026 22:59:27 +0000 Subject: [PATCH 23/33] docs: fast-path benchmark results and optimization plan Full before/after benchmarks for 14 commands across 49 variants. Documents table routing, real-world command patterns, e2e results (454s -> 52s), and remaining bottlenecks (BM25, indexes, sessions). --- PLAN-fast-path-all-commands.md | 60 ++++++++++++ RESULTS-fast-path-all-commands.md | 150 ++++++++++++++++++++++++++++++ 2 files changed, 210 insertions(+) create mode 100644 PLAN-fast-path-all-commands.md create mode 100644 RESULTS-fast-path-all-commands.md diff --git a/PLAN-fast-path-all-commands.md b/PLAN-fast-path-all-commands.md new file mode 100644 index 0000000..e1d9527 --- /dev/null +++ b/PLAN-fast-path-all-commands.md @@ -0,0 +1,60 @@ +# Plan: Fast-path all Bash commands on ~/.deeplake/memory/ + +## Context + +The Deeplake plugin intercepts tool calls targeting `~/.deeplake/memory/` via the `pre-tool-use` hook. Previously, every intercepted command spawned a Node.js shell process (`deeplake-shell.js`) that bootstrapped by loading ALL file metadata (399+379 rows) before executing the actual command. This caused 2-160s latency per command. + +**Goal**: Every read-only command gets a single direct SQL query. Zero shell spawns for reads. + +## What was changed + +### File: `src/hooks/pre-tool-use.ts` + +The fast path section (after `getShellCommand()`) was expanded from handling only `Grep` and `Read` tool to handling ALL common Bash commands: + +#### 1. grep (Bash + Grep tool) — already done +Delegates to `handleGrepDirect()` from `src/hooks/grep-direct.ts`. +Single SQL: `SELECT path, summary AS content WHERE summary LIKE '%pattern%' AND path LIKE '/dir/%'` + +#### 2. cat (Bash) — NEW +**Parser**: `shellCmd.match(/^cat\s+(\S+)\s*$/)` +**SQL**: `SELECT summary FROM "memory" WHERE path = '' LIMIT 1` +Falls back to sessions table for `/sessions/*` paths. + +#### 3. head -N (Bash) — NEW +**Parser**: `shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/)` +**SQL**: same as cat, then `lines.slice(0, N)` in-memory. + +#### 4. tail -N (Bash) — NEW +**Parser**: `shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/)` +**SQL**: same as cat, then `lines.slice(-N)` in-memory. + +#### 5. ls [flags] dir (Bash + Glob tool) — NEW +**Parser**: `shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/)` +**SQL**: `SELECT path, size_bytes FROM "memory" WHERE path LIKE '/dir/%' ORDER BY path` +Extracts immediate children from full paths, supports `-l` long format. + +#### 6. wc -l file (Bash) — NEW +**Parser**: `shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/)` +**SQL**: same as cat, then `content.split('\n').length`. + +#### 7. find dir -name 'pattern' (Bash) — NEW +**Parser**: `shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/)` +**SQL**: `SELECT path FROM "memory" WHERE path LIKE '/dir/%' AND filename LIKE '%.md' ORDER BY path` +Handles piped `| wc -l` by returning count instead of paths. + +### Other files changed +- `src/hooks/grep-direct.ts` — shared grep handler (unchanged in this batch) +- `src/shell/grep-interceptor.ts` — fixed mount="/" bug +- `src/shell/deeplake-fs.ts` — removed `deeplake_sync_table` from bootstrap/flush +- `src/hooks/session-start-setup.ts` — removed `deeplake_sync_table` +- `src/hooks/wiki-worker.ts` — removed `deeplake_sync_table` (3x) +- `src/hooks/codex/pre-tool-use.ts` — grep fast path via shared module +- `src/hooks/codex/session-start-setup.ts` — removed `deeplake_sync_table` +- `src/hooks/codex/wiki-worker.ts` — removed `deeplake_sync_table` (3x) + +## TODO + +- [ ] Investigate BM25 — the old code tried `summary <#> 'pattern'` but always got a 400 error ("Data type mismatch: argument of WHERE must be type boolean, not type real"). Check if the index exists, if the syntax is wrong, or if BM25 is not supported on this table. If fixable, BM25 would give ranked results instead of LIKE substring match. +- [ ] Port cat/head/tail/ls/wc/find fast paths to `src/hooks/codex/pre-tool-use.ts` +- [ ] Run full e2e benchmark to measure overall improvement diff --git a/RESULTS-fast-path-all-commands.md b/RESULTS-fast-path-all-commands.md new file mode 100644 index 0000000..09316fc --- /dev/null +++ b/RESULTS-fast-path-all-commands.md @@ -0,0 +1,150 @@ +# Fast-Path Benchmark Results — 2026-04-15 + +## Test environment +- **Org**: activeloop, **Workspace**: hivemind +- **Table size**: 405 files (memory), 385 files (sessions) +- **Machine**: EC2 Linux 6.8.0-1030-gcp + +## How to reproduce + +Each command is tested in two modes: + +```bash +# BEFORE — old shell path (spawns deeplake-shell.js, bootstraps full table) +time DEEPLAKE_DEBUG=1 node claude-code/bundle/shell/deeplake-shell.js -c "" \ + 2>/tmp/before.log > /dev/null +grep -c "query start" /tmp/before.log + +# AFTER — new fast path (direct SQL in pre-tool-use hook, no shell spawn) +time DEEPLAKE_DEBUG=1 node claude-code/bundle/pre-tool-use.js < /tmp/vfs-test-.json \ + 2>/tmp/after.log > /dev/null +grep -c "query start" /tmp/after.log +``` + +Full test suite: `bash /tmp/vfs-tests.sh` + +## Results — all commands benchmarked + +| Command | Before (time) | Before (queries) | After (time) | After (queries) | Speedup | +|---------|--------------|-----------------|-------------|----------------|---------| +| `grep -w 'sasun' /summaries` | 143,930ms | 108 | 462ms | 1 | **312x** | +| `cat file` (summary) | 995ms | 3 | 323ms | 1 | **3x** | +| `cat file 2>/dev/null` | 983ms | 3 | 151ms | 1 | **7x** | +| `cat file 2>&1 \| head -200` | 1,251ms | 4 | 288ms | 2 | **4x** | +| `head -20 file` | 1,065ms | 3 | 142ms | 1 | **8x** | +| `head -n 20 file` | 958ms | 3 | 159ms | 1 | **6x** | +| `tail -10 file` | 1,176ms | 4 | 309ms | 2 | **4x** | +| `ls /summaries/` | 920ms | 2 | 128ms | 1 | **7x** | +| `ls -la /summaries/sasun/` | 880ms | 2 | 178ms | 1 | **5x** | +| `ls /` (root) | 994ms | 2 | 164ms | 1 | **6x** | +| `find -name '*.md' \| wc -l` | 916ms | 2 | 172ms | 1 | **5x** | +| `wc -l file` | 1,077ms | 4 | 144ms | 1 | **8x** | +| `Read tool` (index.md) | 1,119ms | 4 | 576ms | 2 | **2x** | +| `Glob tool` | 897ms | 2 | 135ms | 1 | **7x** | +| `cat file` (sessions) | 2,073ms | 2 | 1,318ms | 1 | **1.6x** | + +## Table routing + +Commands now query the correct table directly based on path — no wasted queries. + +| Path | cat/head/tail/wc | ls/find | grep | +|------|-----------------|---------|------| +| `/summaries/*` | memory (1 query) | memory (1 query) | memory (1 query) | +| `/sessions/*` | sessions (1 query) | sessions (1 query) | memory only — summaries have the content | +| `/` (root) | depends on file | both in parallel (2 queries) | memory only | +| `/index.md` | virtual — generated from memory metadata (1 query) | N/A | N/A | + +**Before**: session file reads always queried memory first (728ms, 0 rows), then sessions. Wasted 728ms per read. +**After**: `/sessions/*` paths go directly to the sessions table. 2,073ms → 1,318ms. + +## What each "Before" does vs "After" + +### grep (108 → 1 query, 312x faster) +**Before**: shell spawn → bootstrap (2 metadata queries loading 405+385 rows) → BM25 (fails with 400 error) → prefetch all files → read each session file individually (1-12s each). +**After**: `SELECT path, summary AS content FROM "memory" WHERE path LIKE '/summaries/%' AND summary LIKE '%sasun%' LIMIT 100`. Searches only the memory/summaries table — sessions contain raw JSONB which is slow to scan and produces noisy results. + +### cat (3 → 1 query, 7x faster) +**Before**: shell spawn → bootstrap (2 metadata queries) → file read query. +**After**: `SELECT summary FROM "memory" WHERE path = '...' LIMIT 1`. For session files: `SELECT message::text FROM "sessions" WHERE path = '...' LIMIT 1` (direct, no memory query first). + +### head/tail (3 → 1 query, 6-8x faster) +**Before**: shell spawn → bootstrap → file read. +**After**: same single query as cat, then `lines.slice(0, N)` or `lines.slice(-N)` in-memory. + +### ls (2 → 1 query, 5-7x faster) +**Before**: shell spawn → bootstrap (loading ALL 405+385 rows), then directory listing from in-memory cache. +**After**: `SELECT path, size_bytes FROM "" WHERE path LIKE '/dir/%' ORDER BY path`. Queries only the relevant table based on path. Root `/` queries both tables in parallel. + +### find (2 → 1 query, 5x faster) +**Before**: shell spawn → bootstrap, then in-memory tree walk. +**After**: `SELECT path FROM "
" WHERE path LIKE '/dir/%' AND filename LIKE '%.md' ORDER BY path`. Routes to correct table based on path. + +### wc -l (4 → 1 query, 8x faster) +**Before**: shell spawn → bootstrap → file read → count. +**After**: same single query as cat, then `content.split('\n').length`. + +## Real-world command variants + +Claude Code generates commands with `2>/dev/null`, `2>&1`, and pipes. All handled: + +| Real-world pattern | Status | +|-------------------|--------| +| `cat file 2>/dev/null` | FAST — strips stderr redirect | +| `cat file 2>&1 \| head -200` | FAST — strips stderr redirect + extracts head | +| `cat file 2>/dev/null \| grep -v \| head -100` | FAST — strips all intermediate pipes | +| `head -n 20 file`, `head -n20 file`, `head file` | FAST — all flag formats | +| `tail -n 10 file`, `tail file` | FAST — all flag formats | +| `ls -la`, `ls -l`, `ls -lh`, `ls -R` | FAST — all flag combos | +| `find dir -type f -name '*.md' \| wc -l` | FAST — with `-type` and piped `wc` | + +## Full variant matrix + +49 variants tested. **42 FAST**, **8 SHELL** (expected — pipes to jq/complex commands, writes), **0 BROKEN**. + +### Commands that stay SHELL (expected) +| Variant | Reason | +|---------|--------| +| `cat file1 file2` | multi-file cat | +| `cat file \| jq .` | piped to interpreter | +| `head file \| grep pattern` | piped to grep | +| `ls dir \| wc -l` | piped to wc | +| `wc file` (no `-l` flag) | not optimized | +| `echo > file` | write operation | +| `mkdir dir` | write operation | +| `rm file` | write operation | + +## E2E Claude session benchmarks + +| Test | Original (Apr 14) | After grep fix only | After all fixes | Speedup | +|------|-------------------|--------------------|-----------------|---------| +| "Search memory for hooks/latency" | 454.3s (7.5 min) | 63.4s (20 queries) | **52.3s (15 queries, 0 shell spawns)** | **8.7x** | + +### E2E query breakdown (52.3s run) +- 5× grep queries (direct SQL, 1 query each) — **fast path** +- 7× file reads (direct SQL, 1 query each) — **fast path** +- 1× virtual index.md generation — **fast path** +- 0× shell bootstrap queries — **eliminated** + +### What's left in the 52s +~15s is SQL query time (15 queries × ~1s each). The remaining ~37s is Claude thinking time + API latency — not something the plugin can optimize. + +## Remaining bottlenecks + +1. **Sessions JSONB→text cast**: reading a session file takes ~1.3s because `message::text` casts JSONB to text. Fix: add a `content_text` column with pre-extracted text. +2. **BM25 broken**: `summary <#> 'pattern'` returns 400 error ("Data type mismatch"). Fix: create BM25 index with `CREATE INDEX ... USING deeplake_index("summary")` in `ensureTable()`. +3. **No index on path column**: every `WHERE path = '...'` is a full table scan. Fix: `CREATE INDEX ON memory(path)`. +4. **Shell still used for writes**: `echo > file`, `mkdir`, `rm` still spawn the shell with full bootstrap. Low priority — writes are rare (~1-2 per session). + +## Files modified + +| File | Change | +|------|--------| +| `src/hooks/grep-direct.ts` | Shared grep handler — single SQL query, searches memory only | +| `src/hooks/pre-tool-use.ts` | Fast path for all read commands: grep, cat, head, tail, ls, find, wc. Routes to correct table based on path. Handles `2>/dev/null`, `2>&1`, `cat\|head` pipes. Virtual index.md generation. | +| `src/hooks/codex/pre-tool-use.ts` | Grep fast path via shared module | +| `src/shell/grep-interceptor.ts` | Fixed mount="/" bug | +| `src/shell/deeplake-fs.ts` | Removed `deeplake_sync_table` from bootstrap and flush | +| `src/hooks/session-start-setup.ts` | Removed `deeplake_sync_table` | +| `src/hooks/wiki-worker.ts` | Removed `deeplake_sync_table` (3 occurrences) | +| `src/hooks/codex/session-start-setup.ts` | Removed `deeplake_sync_table` | +| `src/hooks/codex/wiki-worker.ts` | Removed `deeplake_sync_table` (3 occurrences) | From 2c258e0377371756ab83645a98c4cb16b5431d04 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 00:38:51 +0000 Subject: [PATCH 24/33] feat: BM25 ranked search for grep + auto-create index Use BM25 (summary <#> 'pattern' AS score ORDER BY score DESC) for grep instead of LIKE substring match. Falls back to LIKE if BM25 index is missing or query fails. Create BM25 index automatically in ensureTable() via CREATE INDEX IF NOT EXISTS ... WITH (index_type = 'bm25'). BM25 gives ranked results (score > 0 = relevant) vs LIKE which returns unranked matches. Speed: 241ms (BM25) vs 497ms (LIKE) on 405 files. --- src/deeplake-api.ts | 6 +++++ src/hooks/grep-direct.ts | 47 ++++++++++++++++++++++++++++++---------- 2 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index b08e389..e281cd1 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -253,6 +253,12 @@ export class DeeplakeApi { ); log(`table "${tbl}" created`); } + // Ensure BM25 index exists on summary column (idempotent) + try { + await this.query( + `CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')` + ); + } catch { /* index may already exist or not be supported */ } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts index 85ff884..651c4f3 100644 --- a/src/hooks/grep-direct.ts +++ b/src/hooks/grep-direct.ts @@ -111,21 +111,46 @@ export async function handleGrepDirect( pathFilter = ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } - // For regex patterns, skip content LIKE (can't match regex syntax). - // Fetch all files under the path and filter in-memory instead. + // For regex patterns, can't use BM25 or LIKE — fetch all files under path const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); - const contentFilter = hasRegexMeta ? "" : ` AND summary ${likeOp} '%${escapedLike}%'`; // Search only the memory/summaries table — sessions contain raw JSONB // (prompts, tool calls) which is slow to scan and produces noisy results. // Summaries already contain all useful content from sessions. - const queries: Promise[]>[] = [ - api.query( - `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`, - ).catch(() => [] as Record[]), - ]; + // + // Strategy: BM25 first (ranked, fast with index), LIKE fallback if BM25 fails. + let rows: Record[] = []; + + if (!hasRegexMeta) { + // Try BM25 ranked search first + try { + rows = await api.query( + `SELECT path, summary AS content, summary <#> '${sqlStr(pattern)}' AS score FROM "${table}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT 100`, + ); + // BM25 returns all rows with score 0 for non-matches — filter them + rows = rows.filter(r => (r["score"] as number) > 0); + } catch { + // BM25 not available (no index) — fall back to LIKE + rows = []; + } - const allRows = (await Promise.all(queries)).flat(); + // LIKE fallback if BM25 returned nothing or failed + if (rows.length === 0) { + const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + try { + rows = await api.query( + `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`, + ); + } catch { rows = []; } + } + } else { + // Regex pattern — fetch all files under path, filter in-memory + try { + rows = await api.query( + `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter} LIMIT 100`, + ); + } catch { rows = []; } + } // ── regex refinement ── let reStr = fixedString @@ -137,9 +162,9 @@ export async function handleGrepDirect( catch { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } const output: string[] = []; - const multi = allRows.length > 1; + const multi = rows.length > 1; - for (const row of allRows) { + for (const row of rows) { const p = row["path"] as string; const text = row["content"] as string; if (!text) continue; From 8ca4ec63c4c25c23cba4a04d7ec9454d6b81205f Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 00:39:02 +0000 Subject: [PATCH 25/33] build: regenerate bundles with BM25 support --- claude-code/bundle/capture.js | 4 +++ claude-code/bundle/commands/auth-login.js | 4 +++ claude-code/bundle/pre-tool-use.js | 36 +++++++++++++++++----- claude-code/bundle/session-start-setup.js | 4 +++ claude-code/bundle/shell/deeplake-shell.js | 4 +++ codex/bundle/capture.js | 4 +++ codex/bundle/commands/auth-login.js | 4 +++ codex/bundle/pre-tool-use.js | 36 +++++++++++++++++----- codex/bundle/session-start-setup.js | 4 +++ codex/bundle/shell/deeplake-shell.js | 4 +++ codex/bundle/stop.js | 4 +++ 11 files changed, 94 insertions(+), 14 deletions(-) diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index 0b8f246..ececeb1 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -283,6 +283,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index 9acecdf..96e157d 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -459,6 +459,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 8e97146..0f2a598 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -293,6 +293,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { @@ -423,11 +427,29 @@ async function handleGrepDirect(api, table, sessionsTable, params) { pathFilter = ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); - const contentFilter = hasRegexMeta ? "" : ` AND summary ${likeOp} '%${escapedLike}%'`; - const queries = [ - api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`).catch(() => []) - ]; - const allRows = (await Promise.all(queries)).flat(); + let rows = []; + if (!hasRegexMeta) { + try { + rows = await api.query(`SELECT path, summary AS content, summary <#> '${sqlStr(pattern)}' AS score FROM "${table}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT 100`); + rows = rows.filter((r) => r["score"] > 0); + } catch { + rows = []; + } + if (rows.length === 0) { + const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + try { + rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); + } catch { + rows = []; + } + } + } else { + try { + rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter} LIMIT 100`); + } catch { + rows = []; + } + } let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : pattern; if (wordMatch) reStr = `\\b${reStr}\\b`; @@ -438,8 +460,8 @@ async function handleGrepDirect(api, table, sessionsTable, params) { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } const output = []; - const multi = allRows.length > 1; - for (const row of allRows) { + const multi = rows.length > 1; + for (const row of rows) { const p = row["path"]; const text = row["content"]; if (!text) diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index ce8ad06..59ad00f 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -298,6 +298,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index d872ea8..5a8966b 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66989,6 +66989,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index dfd28a6..9903fbf 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -283,6 +283,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index 9acecdf..96e157d 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -459,6 +459,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index ab20f48..4e4eb92 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -294,6 +294,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { @@ -424,11 +428,29 @@ async function handleGrepDirect(api, table, sessionsTable, params) { pathFilter = ` AND (path = '${sqlStr(clean)}' OR path LIKE '${sqlLike(clean)}/%')`; } const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); - const contentFilter = hasRegexMeta ? "" : ` AND summary ${likeOp} '%${escapedLike}%'`; - const queries = [ - api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`).catch(() => []) - ]; - const allRows = (await Promise.all(queries)).flat(); + let rows = []; + if (!hasRegexMeta) { + try { + rows = await api.query(`SELECT path, summary AS content, summary <#> '${sqlStr(pattern)}' AS score FROM "${table}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT 100`); + rows = rows.filter((r) => r["score"] > 0); + } catch { + rows = []; + } + if (rows.length === 0) { + const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; + try { + rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); + } catch { + rows = []; + } + } + } else { + try { + rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter} LIMIT 100`); + } catch { + rows = []; + } + } let reStr = fixedString ? pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") : pattern; if (wordMatch) reStr = `\\b${reStr}\\b`; @@ -439,8 +461,8 @@ async function handleGrepDirect(api, table, sessionsTable, params) { re = new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), ignoreCase ? "i" : ""); } const output = []; - const multi = allRows.length > 1; - for (const row of allRows) { + const multi = rows.length > 1; + for (const row of rows) { const p = row["path"]; const text = row["content"]; if (!text) diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 636b21e..a8f5b1e 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -295,6 +295,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index d872ea8..5a8966b 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66989,6 +66989,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index e751a2e..4c48b3e 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -290,6 +290,10 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } + try { + await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); + } catch { + } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { From e7ef009020cb993d9e63b04f7f6e867e1033fdac Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 00:43:24 +0000 Subject: [PATCH 26/33] test: add grep-direct tests, update pre-tool-use tests for fast path Add 25 tests for parseBashGrep() covering all flag combinations, edge cases (egrep/fgrep, quoted patterns, piped commands, -- separator). Update pre-tool-use tests: commands now hit the fast path (direct SQL) instead of being rewritten to shell. Tests verify interception works for all command variants including 2>/dev/null and cat|head pipes. Fix ensureTable test to expect BM25 index creation call. --- claude-code/tests/deeplake-api.test.ts | 9 +- claude-code/tests/grep-direct.test.ts | 135 +++++++++++++++++++++++++ claude-code/tests/pre-tool-use.test.ts | 80 +++++++++++---- 3 files changed, 201 insertions(+), 23 deletions(-) create mode 100644 claude-code/tests/grep-direct.test.ts diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index 7a97e44..c667651 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -349,14 +349,19 @@ describe("DeeplakeApi.ensureTable", () => { expect(createSql).toContain("USING deeplake"); }); - it("does nothing when table already exists", async () => { + it("skips CREATE TABLE when table already exists but still creates BM25 index", async () => { mockFetch.mockResolvedValueOnce({ ok: true, status: 200, json: async () => ({ tables: [{ table_name: "my_table" }] }), }); + // BM25 index creation (idempotent, may succeed or fail) + mockFetch.mockResolvedValueOnce({ + ok: true, status: 200, + json: async () => ({ columns: [], rows: null, row_count: 0 }), + }); const api = makeApi("my_table"); await api.ensureTable(); - expect(mockFetch).toHaveBeenCalledOnce(); // only listTables, no CREATE + expect(mockFetch).toHaveBeenCalledTimes(2); // listTables + CREATE INDEX (no CREATE TABLE) }); it("creates table with custom name", async () => { diff --git a/claude-code/tests/grep-direct.test.ts b/claude-code/tests/grep-direct.test.ts new file mode 100644 index 0000000..1eb2a52 --- /dev/null +++ b/claude-code/tests/grep-direct.test.ts @@ -0,0 +1,135 @@ +import { describe, it, expect } from "vitest"; +import { parseBashGrep, type GrepParams } from "../../src/hooks/grep-direct.js"; + +describe("parseBashGrep", () => { + // ── Basic parsing ── + + it("parses simple grep", () => { + const r = parseBashGrep("grep 'sasun' /summaries"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("sasun"); + expect(r!.targetPath).toBe("/summaries"); + }); + + it("parses grep without quotes", () => { + const r = parseBashGrep("grep sasun /summaries"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("sasun"); + }); + + it("parses grep with double quotes", () => { + const r = parseBashGrep('grep "sasun" /summaries'); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("sasun"); + }); + + it("defaults targetPath to / when no path given", () => { + const r = parseBashGrep("grep 'pattern'"); + expect(r).not.toBeNull(); + expect(r!.targetPath).toBe("/"); + }); + + it("normalizes . and ./ to /", () => { + expect(parseBashGrep("grep 'pat' .")!.targetPath).toBe("/"); + expect(parseBashGrep("grep 'pat' ./")!.targetPath).toBe("/"); + }); + + it("returns null for non-grep commands", () => { + expect(parseBashGrep("cat /file")).toBeNull(); + expect(parseBashGrep("ls /dir")).toBeNull(); + expect(parseBashGrep("echo hello")).toBeNull(); + }); + + it("returns null when no pattern given", () => { + expect(parseBashGrep("grep")).toBeNull(); + expect(parseBashGrep("grep -r")).toBeNull(); + }); + + // ── Flag parsing ── + + it("parses -i flag", () => { + const r = parseBashGrep("grep -i 'pattern' /dir"); + expect(r!.ignoreCase).toBe(true); + }); + + it("parses -w flag", () => { + const r = parseBashGrep("grep -w 'pattern' /dir"); + expect(r!.wordMatch).toBe(true); + }); + + it("parses -l flag", () => { + const r = parseBashGrep("grep -l 'pattern' /dir"); + expect(r!.filesOnly).toBe(true); + }); + + it("parses -c flag", () => { + const r = parseBashGrep("grep -c 'pattern' /dir"); + expect(r!.countOnly).toBe(true); + }); + + it("parses -n flag", () => { + const r = parseBashGrep("grep -n 'pattern' /dir"); + expect(r!.lineNumber).toBe(true); + }); + + it("parses -v flag", () => { + const r = parseBashGrep("grep -v 'pattern' /dir"); + expect(r!.invertMatch).toBe(true); + }); + + it("parses -F flag", () => { + const r = parseBashGrep("grep -F 'pattern' /dir"); + expect(r!.fixedString).toBe(true); + }); + + it("parses combined flags -ri", () => { + const r = parseBashGrep("grep -ri 'pattern' /dir"); + expect(r!.ignoreCase).toBe(true); + // -r is no-op (recursive implied) + }); + + it("parses combined flags -wni", () => { + const r = parseBashGrep("grep -wni 'pattern' /dir"); + expect(r!.wordMatch).toBe(true); + expect(r!.lineNumber).toBe(true); + expect(r!.ignoreCase).toBe(true); + }); + + it("parses -rl flags", () => { + const r = parseBashGrep("grep -rl 'pattern' /dir"); + expect(r!.filesOnly).toBe(true); + }); + + // ── Variants ── + + it("parses egrep", () => { + const r = parseBashGrep("egrep 'pattern' /dir"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("pattern"); + }); + + it("parses fgrep as fixed-string", () => { + const r = parseBashGrep("fgrep 'pattern' /dir"); + expect(r!.fixedString).toBe(true); + }); + + it("parses long options", () => { + const r = parseBashGrep("grep --ignore-case --word-regexp 'pat' /dir"); + expect(r!.ignoreCase).toBe(true); + expect(r!.wordMatch).toBe(true); + }); + + it("handles -- separator", () => { + const r = parseBashGrep("grep -- '-pattern' /dir"); + expect(r!.pattern).toBe("-pattern"); + }); + + // ── Piped commands (only first command parsed) ── + + it("parses first command in pipe", () => { + const r = parseBashGrep("grep 'pattern' /dir | head -5"); + expect(r).not.toBeNull(); + expect(r!.pattern).toBe("pattern"); + expect(r!.targetPath).toBe("/dir"); + }); +}); diff --git a/claude-code/tests/pre-tool-use.test.ts b/claude-code/tests/pre-tool-use.test.ts index bfa2a77..36657ef 100644 --- a/claude-code/tests/pre-tool-use.test.ts +++ b/claude-code/tests/pre-tool-use.test.ts @@ -44,75 +44,113 @@ function runPreToolUse( }; } -// ── Safe commands: should be intercepted and rewritten to virtual shell ────── +// ── Read commands: fast path (direct SQL) or shell fallback ────────────────── -describe("pre-tool-use: safe bash commands targeting memory", () => { - it("rewrites ls to virtual shell", () => { +describe("pre-tool-use: commands targeting memory are intercepted", () => { + it("intercepts ls", () => { const r = runPreToolUse("Bash", { command: "ls ~/.deeplake/memory/" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); - expect(r.updatedCommand).toContain("deeplake-shell.js"); - expect(r.updatedCommand).toContain("ls /"); + // Fast path: echo with results, or shell fallback + expect(r.updatedCommand).toBeDefined(); } }); - it("rewrites cat to virtual shell", () => { + it("intercepts cat", () => { const r = runPreToolUse("Bash", { command: "cat ~/.deeplake/memory/index.md" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); - expect(r.updatedCommand).toContain("cat /index.md"); + expect(r.updatedCommand).toBeDefined(); + } + }); + + it("intercepts cat with 2>/dev/null", () => { + const r = runPreToolUse("Bash", { command: "cat ~/.deeplake/memory/file.md 2>/dev/null" }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + } + }); + + it("intercepts cat 2>&1 | head", () => { + const r = runPreToolUse("Bash", { command: "cat ~/.deeplake/memory/index.md 2>&1 | head -200" }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); } }); - it("rewrites grep to virtual shell", () => { + it("intercepts grep", () => { const r = runPreToolUse("Bash", { command: "grep -r 'keyword' ~/.deeplake/memory/" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); - expect(r.updatedCommand).toContain("grep -r 'keyword' /"); } }); - it("rewrites echo redirect to virtual shell", () => { - const r = runPreToolUse("Bash", { command: "echo 'hello' > ~/.deeplake/memory/test.md" }); + it("intercepts head", () => { + const r = runPreToolUse("Bash", { command: "head -20 ~/.deeplake/memory/index.md" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); - expect(r.updatedCommand).toContain("deeplake-shell.js"); } }); - it("rewrites jq pipeline to virtual shell", () => { - const r = runPreToolUse("Bash", { command: "cat ~/.deeplake/memory/data.json | jq '.keys | length'" }); + it("intercepts head -n N", () => { + const r = runPreToolUse("Bash", { command: "head -n 50 ~/.deeplake/memory/index.md" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); - expect(r.updatedCommand).toContain("deeplake-shell.js"); } }); - it("rewrites find to virtual shell", () => { - const r = runPreToolUse("Bash", { command: "find ~/.deeplake/memory/ -name '*.json'" }); + it("intercepts tail", () => { + const r = runPreToolUse("Bash", { command: "tail -10 ~/.deeplake/memory/index.md" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); - expect(r.updatedCommand).toContain("deeplake-shell.js"); } }); - it("rewrites wc to virtual shell", () => { + it("intercepts wc -l", () => { const r = runPreToolUse("Bash", { command: "wc -l ~/.deeplake/memory/index.md" }); expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + } + }); + + it("intercepts find -name", () => { + const r = runPreToolUse("Bash", { command: "find ~/.deeplake/memory/ -name '*.json'" }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + } + }); + + it("intercepts ls -la", () => { + const r = runPreToolUse("Bash", { command: "ls -la ~/.deeplake/memory/summaries/" }); + expect(r.empty).toBe(false); + if (!r.empty) { + expect(r.decision).toBe("allow"); + } + }); + + // ── Write commands still use shell ── + + it("rewrites echo redirect to virtual shell", () => { + const r = runPreToolUse("Bash", { command: "echo 'hello' > ~/.deeplake/memory/test.md" }); + expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); expect(r.updatedCommand).toContain("deeplake-shell.js"); } }); - it("rewrites head/tail to virtual shell", () => { - const r = runPreToolUse("Bash", { command: "head -5 ~/.deeplake/memory/index.md" }); + it("rewrites jq pipeline to virtual shell", () => { + const r = runPreToolUse("Bash", { command: "cat ~/.deeplake/memory/data.json | jq '.keys | length'" }); expect(r.empty).toBe(false); if (!r.empty) { expect(r.decision).toBe("allow"); From 9298bb9ee6b731a15889bf53739925bd8d5cf9cd Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 00:53:17 +0000 Subject: [PATCH 27/33] ci: use PR description as release body Instead of auto-generated release notes (just PR links), extract the merged PR title and body to populate the release page. This shows benchmarks, changelogs, and test plans directly on the release page without requiring an extra click. --- .github/workflows/release.yml | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 6316211..5aa65b3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -94,10 +94,27 @@ jobs: env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Get merged PR description + if: steps.check.outputs.exists == 'false' + id: pr + run: | + # Find the most recent merged PR targeting main + PR_BODY=$(gh pr list --state merged --base main --limit 1 --json body,title -q '.[0].body // ""') + PR_TITLE=$(gh pr list --state merged --base main --limit 1 --json title -q '.[0].title // ""') + { + echo "body<> "$GITHUB_OUTPUT" + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Create GitHub Release if: steps.check.outputs.exists == 'false' uses: softprops/action-gh-release@v2 with: tag_name: ${{ steps.version.outputs.version }} - name: ${{ steps.version.outputs.version }} - generate_release_notes: true + name: "${{ steps.version.outputs.version }} — ${{ steps.pr.outputs.title }}" + body: ${{ steps.pr.outputs.body }} + generate_release_notes: false From 6bce30aad9b526650c303873dc4f856c6e6338a5 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 01:13:29 +0000 Subject: [PATCH 28/33] security: validate version tag before shell exec, escape find pattern in SQL 1. Codex auto-update: validate git tag matches semver (v1.2.3) before interpolating into execSync shell string. Prevents command injection via tampered GitHub API response. 2. find -name fast path: apply sqlLike() before glob-to-SQL conversion so quotes, backslashes, and control chars are escaped before the LIKE pattern reaches the query. --- claude-code/bundle/pre-tool-use.js | 2 +- codex/bundle/session-start-setup.js | 2 ++ src/hooks/codex/session-start-setup.ts | 1 + src/hooks/pre-tool-use.ts | 2 +- 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 0f2a598..56b52e8 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -889,7 +889,7 @@ async function main() { const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); if (findMatch) { const dir = findMatch[1].replace(/\/+$/, "") || "/"; - const namePattern = findMatch[2].replace(/\*/g, "%").replace(/\?/g, "_"); + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); log3(`direct find: ${dir} -name '${findMatch[2]}'`); const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); const findTable = isSessionDir ? sessionsTable : table; diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 4a06d46..42c3fe2 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -452,6 +452,8 @@ async function main() { log3(`autoupdate: updating ${current} \u2192 ${latest}`); try { const tag = `v${latest}`; + if (!/^v\d+\.\d+\.\d+$/.test(tag)) + throw new Error(`unsafe version tag: ${tag}`); const findCmd = `INSTALL_DIR=""; CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); elif [ -d ~/.codex/hivemind ]; then INSTALL_DIR=~/.codex/hivemind; fi; if [ -n "$INSTALL_DIR" ]; then TMPDIR=$(mktemp -d); git clone --depth 1 --branch ${tag} -q https://github.com/activeloopai/hivemind.git "$TMPDIR/hivemind" 2>/dev/null && cp -r "$TMPDIR/hivemind/codex/"* "$INSTALL_DIR/" 2>/dev/null; rm -rf "$TMPDIR"; fi`; execSync2(findCmd, { stdio: "ignore", timeout: 6e4 }); process.stderr.write(`Hivemind auto-updated: ${current} \u2192 ${latest}. Restart Codex to apply. diff --git a/src/hooks/codex/session-start-setup.ts b/src/hooks/codex/session-start-setup.ts index 6beb904..8b84ef8 100644 --- a/src/hooks/codex/session-start-setup.ts +++ b/src/hooks/codex/session-start-setup.ts @@ -163,6 +163,7 @@ async function main(): Promise { log(`autoupdate: updating ${current} → ${latest}`); try { const tag = `v${latest}`; + if (!/^v\d+\.\d+\.\d+$/.test(tag)) throw new Error(`unsafe version tag: ${tag}`); const findCmd = `INSTALL_DIR=""; ` + `CACHE_DIR=$(find ~/.codex/plugins/cache -maxdepth 3 -name "hivemind" -type d 2>/dev/null | head -1); ` + `if [ -n "$CACHE_DIR" ]; then INSTALL_DIR=$(ls -1d "$CACHE_DIR"/*/ 2>/dev/null | tail -1); ` + diff --git a/src/hooks/pre-tool-use.ts b/src/hooks/pre-tool-use.ts index 0034562..15095c6 100644 --- a/src/hooks/pre-tool-use.ts +++ b/src/hooks/pre-tool-use.ts @@ -391,7 +391,7 @@ async function main(): Promise { const findMatch = shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); if (findMatch) { const dir = findMatch[1].replace(/\/+$/, "") || "/"; - const namePattern = findMatch[2].replace(/\*/g, "%").replace(/\?/g, "_"); + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); log(`direct find: ${dir} -name '${findMatch[2]}'`); const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); const findTable = isSessionDir ? sessionsTable : table; From 73e8e6869e277db366ae3e50b15349dfb0d06308 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 02:34:21 +0000 Subject: [PATCH 29/33] perf: use LIKE for grep search, simplify ensureTable Grep search uses LIKE with path filtering for consistent results. Simplify ensureTable to only create tables without additional index operations. Update tests accordingly. --- claude-code/bundle/capture.js | 4 ---- claude-code/bundle/commands/auth-login.js | 4 ---- claude-code/bundle/pre-tool-use.js | 16 ++------------- claude-code/bundle/session-start-setup.js | 4 ---- claude-code/bundle/session-start.js | 4 ---- claude-code/bundle/shell/deeplake-shell.js | 4 ---- claude-code/tests/deeplake-api.test.ts | 10 +++------ codex/bundle/capture.js | 4 ---- codex/bundle/commands/auth-login.js | 4 ---- codex/bundle/pre-tool-use.js | 16 ++------------- codex/bundle/session-start-setup.js | 4 ---- codex/bundle/shell/deeplake-shell.js | 4 ---- codex/bundle/stop.js | 4 ---- src/deeplake-api.ts | 13 ++++++------ src/hooks/grep-direct.ts | 24 ++++++---------------- 15 files changed, 20 insertions(+), 99 deletions(-) diff --git a/claude-code/bundle/capture.js b/claude-code/bundle/capture.js index ececeb1..0b8f246 100755 --- a/claude-code/bundle/capture.js +++ b/claude-code/bundle/capture.js @@ -283,10 +283,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/commands/auth-login.js b/claude-code/bundle/commands/auth-login.js index ebcee34..1dec02e 100755 --- a/claude-code/bundle/commands/auth-login.js +++ b/claude-code/bundle/commands/auth-login.js @@ -467,10 +467,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/pre-tool-use.js b/claude-code/bundle/pre-tool-use.js index 56b52e8..29bde19 100755 --- a/claude-code/bundle/pre-tool-use.js +++ b/claude-code/bundle/pre-tool-use.js @@ -293,10 +293,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { @@ -429,20 +425,12 @@ async function handleGrepDirect(api, table, sessionsTable, params) { const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); let rows = []; if (!hasRegexMeta) { + const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; try { - rows = await api.query(`SELECT path, summary AS content, summary <#> '${sqlStr(pattern)}' AS score FROM "${table}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT 100`); - rows = rows.filter((r) => r["score"] > 0); + rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); } catch { rows = []; } - if (rows.length === 0) { - const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; - try { - rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); - } catch { - rows = []; - } - } } else { try { rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter} LIMIT 100`); diff --git a/claude-code/bundle/session-start-setup.js b/claude-code/bundle/session-start-setup.js index da64786..c5eb496 100755 --- a/claude-code/bundle/session-start-setup.js +++ b/claude-code/bundle/session-start-setup.js @@ -298,10 +298,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/session-start.js b/claude-code/bundle/session-start.js index 515f0b8..3ac4946 100755 --- a/claude-code/bundle/session-start.js +++ b/claude-code/bundle/session-start.js @@ -298,10 +298,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/bundle/shell/deeplake-shell.js b/claude-code/bundle/shell/deeplake-shell.js index 5a8966b..d872ea8 100755 --- a/claude-code/bundle/shell/deeplake-shell.js +++ b/claude-code/bundle/shell/deeplake-shell.js @@ -66989,10 +66989,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/claude-code/tests/deeplake-api.test.ts b/claude-code/tests/deeplake-api.test.ts index c667651..b1276c9 100644 --- a/claude-code/tests/deeplake-api.test.ts +++ b/claude-code/tests/deeplake-api.test.ts @@ -349,19 +349,15 @@ describe("DeeplakeApi.ensureTable", () => { expect(createSql).toContain("USING deeplake"); }); - it("skips CREATE TABLE when table already exists but still creates BM25 index", async () => { + it("does nothing when table already exists", async () => { + // BM25 index creation is disabled (oid bug), so ensureTable only calls listTables mockFetch.mockResolvedValueOnce({ ok: true, status: 200, json: async () => ({ tables: [{ table_name: "my_table" }] }), }); - // BM25 index creation (idempotent, may succeed or fail) - mockFetch.mockResolvedValueOnce({ - ok: true, status: 200, - json: async () => ({ columns: [], rows: null, row_count: 0 }), - }); const api = makeApi("my_table"); await api.ensureTable(); - expect(mockFetch).toHaveBeenCalledTimes(2); // listTables + CREATE INDEX (no CREATE TABLE) + expect(mockFetch).toHaveBeenCalledOnce(); // only listTables, no CREATE }); it("creates table with custom name", async () => { diff --git a/codex/bundle/capture.js b/codex/bundle/capture.js index 9903fbf..dfd28a6 100755 --- a/codex/bundle/capture.js +++ b/codex/bundle/capture.js @@ -283,10 +283,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/commands/auth-login.js b/codex/bundle/commands/auth-login.js index ebcee34..1dec02e 100755 --- a/codex/bundle/commands/auth-login.js +++ b/codex/bundle/commands/auth-login.js @@ -467,10 +467,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 4e4eb92..a6772e9 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -294,10 +294,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { @@ -430,20 +426,12 @@ async function handleGrepDirect(api, table, sessionsTable, params) { const hasRegexMeta = !fixedString && /[.*+?^${}()|[\]\\]/.test(pattern); let rows = []; if (!hasRegexMeta) { + const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; try { - rows = await api.query(`SELECT path, summary AS content, summary <#> '${sqlStr(pattern)}' AS score FROM "${table}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT 100`); - rows = rows.filter((r) => r["score"] > 0); + rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); } catch { rows = []; } - if (rows.length === 0) { - const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; - try { - rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`); - } catch { - rows = []; - } - } } else { try { rows = await api.query(`SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter} LIMIT 100`); diff --git a/codex/bundle/session-start-setup.js b/codex/bundle/session-start-setup.js index 42c3fe2..1195099 100755 --- a/codex/bundle/session-start-setup.js +++ b/codex/bundle/session-start-setup.js @@ -295,10 +295,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/shell/deeplake-shell.js b/codex/bundle/shell/deeplake-shell.js index 5a8966b..d872ea8 100755 --- a/codex/bundle/shell/deeplake-shell.js +++ b/codex/bundle/shell/deeplake-shell.js @@ -66989,10 +66989,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/codex/bundle/stop.js b/codex/bundle/stop.js index 4c48b3e..e751a2e 100755 --- a/codex/bundle/stop.js +++ b/codex/bundle/stop.js @@ -290,10 +290,6 @@ var DeeplakeApi = class { await this.query(`CREATE TABLE IF NOT EXISTS "${tbl}" (id TEXT NOT NULL DEFAULT '', path TEXT NOT NULL DEFAULT '', filename TEXT NOT NULL DEFAULT '', summary TEXT NOT NULL DEFAULT '', author TEXT NOT NULL DEFAULT '', mime_type TEXT NOT NULL DEFAULT 'text/plain', size_bytes BIGINT NOT NULL DEFAULT 0, project TEXT NOT NULL DEFAULT '', description TEXT NOT NULL DEFAULT '', agent TEXT NOT NULL DEFAULT '', creation_date TEXT NOT NULL DEFAULT '', last_update_date TEXT NOT NULL DEFAULT '') USING deeplake`); log2(`table "${tbl}" created`); } - try { - await this.query(`CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')`); - } catch { - } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ async ensureSessionsTable(name) { diff --git a/src/deeplake-api.ts b/src/deeplake-api.ts index e281cd1..6f4d9ad 100644 --- a/src/deeplake-api.ts +++ b/src/deeplake-api.ts @@ -253,12 +253,13 @@ export class DeeplakeApi { ); log(`table "${tbl}" created`); } - // Ensure BM25 index exists on summary column (idempotent) - try { - await this.query( - `CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')` - ); - } catch { /* index may already exist or not be supported */ } + // BM25 index disabled — CREATE INDEX causes intermittent oid errors on fresh tables. + // See bm25-oid-bug.sh for reproduction. Re-enable once Deeplake fixes the oid invalidation. + // try { + // await this.query( + // `CREATE INDEX IF NOT EXISTS idx_${tbl}_summary_bm25 ON "${this.workspaceId}"."${tbl}" USING deeplake_index (summary) WITH (index_type = 'bm25')` + // ); + // } catch { /* index may already exist or not be supported */ } } /** Create the sessions table (uses JSONB for message since every row is a JSON event). */ diff --git a/src/hooks/grep-direct.ts b/src/hooks/grep-direct.ts index 651c4f3..fa20a93 100644 --- a/src/hooks/grep-direct.ts +++ b/src/hooks/grep-direct.ts @@ -122,27 +122,15 @@ export async function handleGrepDirect( let rows: Record[] = []; if (!hasRegexMeta) { - // Try BM25 ranked search first + // BM25 ranked search disabled — CREATE INDEX causes oid errors on fresh tables. + // See bm25-oid-bug.sh. Using LIKE until Deeplake fixes the oid invalidation. + // When re-enabling, uncomment the BM25 block and make LIKE the fallback. + const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; try { rows = await api.query( - `SELECT path, summary AS content, summary <#> '${sqlStr(pattern)}' AS score FROM "${table}" WHERE 1=1${pathFilter} ORDER BY score DESC LIMIT 100`, + `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`, ); - // BM25 returns all rows with score 0 for non-matches — filter them - rows = rows.filter(r => (r["score"] as number) > 0); - } catch { - // BM25 not available (no index) — fall back to LIKE - rows = []; - } - - // LIKE fallback if BM25 returned nothing or failed - if (rows.length === 0) { - const contentFilter = ` AND summary ${likeOp} '%${escapedLike}%'`; - try { - rows = await api.query( - `SELECT path, summary AS content FROM "${table}" WHERE 1=1${pathFilter}${contentFilter} LIMIT 100`, - ); - } catch { rows = []; } - } + } catch { rows = []; } } else { // Regex pattern — fetch all files under path, filter in-memory try { From b0472e793d6285b1405b15d071dc7d64266a1595 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 02:42:05 +0000 Subject: [PATCH 30/33] feat: fast-path head, tail, wc, find for codex pre-tool-use hook Port all read command fast paths from CC to Codex: head, tail, wc -l, find -name, cat with 2>/dev/null and piped head. Each command now executes 1 SQL query instead of spawning a shell with full bootstrap. Also handles session path routing (direct to sessions table). --- codex/bundle/pre-tool-use.js | 95 +++++++++++++++++++++++++++++--- src/hooks/codex/pre-tool-use.ts | 98 +++++++++++++++++++++++++++++---- 2 files changed, 176 insertions(+), 17 deletions(-) diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index a6772e9..6dbbb55 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -633,13 +633,77 @@ async function main() { const table = process.env["DEEPLAKE_TABLE"] ?? "memory"; const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); try { - const catMatch = rewritten.match(/^cat\s+(\S+)$/); - if (catMatch) { - const virtualPath = catMatch[1]; - log3(`direct read: ${virtualPath}`); - const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (rows.length > 0 && rows[0]["summary"]) { - blockWithContent(rows[0]["summary"]); + { + let virtualPath = null; + let lineLimit = 0; + let fromEnd = false; + const catCmd = rewritten.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { + virtualPath = catPipeHead[1]; + lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); + } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) + virtualPath = catMatch[1]; + } + if (!virtualPath) { + const headMatch = rewritten.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? rewritten.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { + virtualPath = headMatch[2]; + lineLimit = Math.abs(parseInt(headMatch[1], 10)); + } else { + virtualPath = headMatch[1]; + lineLimit = 10; + } + } + } + if (!virtualPath) { + const tailMatch = rewritten.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? rewritten.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { + virtualPath = tailMatch[2]; + lineLimit = Math.abs(parseInt(tailMatch[1], 10)); + } else { + virtualPath = tailMatch[1]; + lineLimit = 10; + } + } + } + if (!virtualPath) { + const wcMatch = rewritten.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { + virtualPath = wcMatch[1]; + lineLimit = -1; + } + } + if (virtualPath && !virtualPath.endsWith("/")) { + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + const isSession = virtualPath.startsWith("/sessions/"); + log3(`direct read: ${virtualPath}`); + let content = null; + if (isSession) { + const rows = await api.query(`SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); + if (rows.length > 0 && rows[0]["content"]) + content = rows[0]["content"]; + } else { + const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); + if (rows.length > 0 && rows[0]["summary"]) + content = rows[0]["summary"]; + } + if (content !== null) { + if (lineLimit === -1) { + blockWithContent(`${content.split("\n").length} ${virtualPath}`); + } + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + blockWithContent(content); + } } } const lsMatch = rewritten.match(/^ls\s+(?:-[a-zA-Z]+\s+)*(\S+)?\s*$/); @@ -683,6 +747,23 @@ async function main() { blockWithContent(`ls: cannot access '${dir}': No such file or directory`); } } + { + const findMatch = rewritten.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); + const findTable = isSessionDir ? sessionsTable : table; + log3(`direct find: ${dir} -name '${findMatch[2]}'`); + const rows = await api.query(`SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path`); + let result2 = rows.map((r) => r["path"]).join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(rewritten)) { + result2 = String(rows.length); + } + blockWithContent(result2 || "(no matches)"); + } + } const grepParams = parseBashGrep(rewritten); if (grepParams) { const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 1fe06a3..10fc5c0 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -142,16 +142,73 @@ async function main(): Promise { const api = new DeeplakeApi(config.token, config.apiUrl, config.orgId, config.workspaceId, table); try { - // Detect: cat - const catMatch = rewritten.match(/^cat\s+(\S+)$/); - if (catMatch) { - const virtualPath = catMatch[1]; - log(`direct read: ${virtualPath}`); - const rows = await api.query( - `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` - ); - if (rows.length > 0 && rows[0]["summary"]) { - blockWithContent(rows[0]["summary"] as string); + // Detect: cat/head/tail/wc — read a single file + { + let virtualPath: string | null = null; + let lineLimit = 0; + let fromEnd = false; + + // cat [2>/dev/null] [| head -N] + const catCmd = rewritten.replace(/\s+2>\S+/g, "").trim(); + const catPipeHead = catCmd.match(/^cat\s+(\S+?)\s*(?:\|[^|]*)*\|\s*head\s+(?:-n?\s*)?(-?\d+)\s*$/); + if (catPipeHead) { virtualPath = catPipeHead[1]; lineLimit = Math.abs(parseInt(catPipeHead[2], 10)); } + if (!virtualPath) { + const catMatch = catCmd.match(/^cat\s+(\S+)\s*$/); + if (catMatch) virtualPath = catMatch[1]; + } + // head [-n] N + if (!virtualPath) { + const headMatch = rewritten.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? + rewritten.match(/^head\s+(\S+)\s*$/); + if (headMatch) { + if (headMatch[2]) { virtualPath = headMatch[2]; lineLimit = Math.abs(parseInt(headMatch[1], 10)); } + else { virtualPath = headMatch[1]; lineLimit = 10; } + } + } + // tail [-n] N + if (!virtualPath) { + const tailMatch = rewritten.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/) ?? + rewritten.match(/^tail\s+(\S+)\s*$/); + if (tailMatch) { + fromEnd = true; + if (tailMatch[2]) { virtualPath = tailMatch[2]; lineLimit = Math.abs(parseInt(tailMatch[1], 10)); } + else { virtualPath = tailMatch[1]; lineLimit = 10; } + } + } + // wc -l + if (!virtualPath) { + const wcMatch = rewritten.match(/^wc\s+-l\s+(\S+)\s*$/); + if (wcMatch) { virtualPath = wcMatch[1]; lineLimit = -1; } + } + + if (virtualPath && !virtualPath.endsWith("/")) { + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + const isSession = virtualPath.startsWith("/sessions/"); + log(`direct read: ${virtualPath}`); + + let content: string | null = null; + if (isSession) { + const rows = await api.query( + `SELECT message::text AS content FROM "${sessionsTable}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` + ); + if (rows.length > 0 && rows[0]["content"]) content = rows[0]["content"] as string; + } else { + const rows = await api.query( + `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` + ); + if (rows.length > 0 && rows[0]["summary"]) content = rows[0]["summary"] as string; + } + + if (content !== null) { + if (lineLimit === -1) { + blockWithContent(`${content.split("\n").length} ${virtualPath}`); + } + if (lineLimit > 0) { + const lines = content.split("\n"); + content = fromEnd ? lines.slice(-lineLimit).join("\n") : lines.slice(0, lineLimit).join("\n"); + } + blockWithContent(content); + } } } @@ -198,6 +255,27 @@ async function main(): Promise { } } + // Detect: find -name '' + { + const findMatch = rewritten.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/); + if (findMatch) { + const dir = findMatch[1].replace(/\/+$/, "") || "/"; + const namePattern = sqlLike(findMatch[2]).replace(/\*/g, "%").replace(/\?/g, "_"); + const sessionsTable = process.env["DEEPLAKE_SESSIONS_TABLE"] ?? "sessions"; + const isSessionDir = dir === "/sessions" || dir.startsWith("/sessions/"); + const findTable = isSessionDir ? sessionsTable : table; + log(`direct find: ${dir} -name '${findMatch[2]}'`); + const rows = await api.query( + `SELECT path FROM "${findTable}" WHERE path LIKE '${sqlLike(dir === "/" ? "" : dir)}/%' AND filename LIKE '${namePattern}' ORDER BY path` + ); + let result = rows.map(r => r["path"] as string).join("\n") || ""; + if (/\|\s*wc\s+-l\s*$/.test(rewritten)) { + result = String(rows.length); + } + blockWithContent(result || "(no matches)"); + } + } + // Detect: grep/egrep/fgrep with all flags const grepParams = parseBashGrep(rewritten); if (grepParams) { From efed74bc60418b2b98a52f358a10a3b870469ce0 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 02:46:08 +0000 Subject: [PATCH 31/33] fix: add virtual index.md generation to codex hook MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex was falling through to the shell (1.8s) for any command targeting /index.md because there is no physical row — it is generated on the fly from memory table metadata. Port the same virtual index generation from the CC hook. --- codex/bundle/pre-tool-use.js | 14 +++++++++++++- src/hooks/codex/pre-tool-use.ts | 18 +++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/codex/bundle/pre-tool-use.js b/codex/bundle/pre-tool-use.js index 6dbbb55..29b9a87 100755 --- a/codex/bundle/pre-tool-use.js +++ b/codex/bundle/pre-tool-use.js @@ -691,8 +691,20 @@ async function main() { content = rows[0]["content"]; } else { const rows = await api.query(`SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1`); - if (rows.length > 0 && rows[0]["summary"]) + if (rows.length > 0 && rows[0]["summary"]) { content = rows[0]["summary"]; + } else if (virtualPath === "/index.md") { + const idxRows = await api.query(`SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC`); + const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; + for (const r of idxRows) { + const p = r["path"]; + const proj = r["project"] || ""; + const desc = (r["description"] || "").slice(0, 120); + const date = (r["creation_date"] || "").slice(0, 10); + lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); + } + content = lines.join("\n"); + } } if (content !== null) { if (lineLimit === -1) { diff --git a/src/hooks/codex/pre-tool-use.ts b/src/hooks/codex/pre-tool-use.ts index 10fc5c0..d938702 100644 --- a/src/hooks/codex/pre-tool-use.ts +++ b/src/hooks/codex/pre-tool-use.ts @@ -196,7 +196,23 @@ async function main(): Promise { const rows = await api.query( `SELECT summary FROM "${table}" WHERE path = '${sqlStr(virtualPath)}' LIMIT 1` ); - if (rows.length > 0 && rows[0]["summary"]) content = rows[0]["summary"] as string; + if (rows.length > 0 && rows[0]["summary"]) { + content = rows[0]["summary"] as string; + } else if (virtualPath === "/index.md") { + // Virtual index — generate from metadata + const idxRows = await api.query( + `SELECT path, project, description, creation_date FROM "${table}" WHERE path LIKE '/summaries/%' ORDER BY creation_date DESC` + ); + const lines = ["# Memory Index", "", `${idxRows.length} sessions:`, ""]; + for (const r of idxRows) { + const p = r["path"] as string; + const proj = r["project"] as string || ""; + const desc = (r["description"] as string || "").slice(0, 120); + const date = (r["creation_date"] as string || "").slice(0, 10); + lines.push(`- [${p}](${p}) ${date} ${proj ? `[${proj}]` : ""} ${desc}`); + } + content = lines.join("\n"); + } } if (content !== null) { From 266ab1a6bf3a3875c76282436e954be64009bf38 Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 03:16:44 +0000 Subject: [PATCH 32/33] chore: remove benchmark results from tracking, keep local only --- .gitignore | 2 + PLAN-fast-path-all-commands.md | 60 ------------ RESULTS-fast-path-all-commands.md | 150 ------------------------------ 3 files changed, 2 insertions(+), 210 deletions(-) delete mode 100644 PLAN-fast-path-all-commands.md delete mode 100644 RESULTS-fast-path-all-commands.md diff --git a/.gitignore b/.gitignore index b91e0cd..4f538ba 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,5 @@ coverage/ bench/ .claude/ CLAUDE.md +RESULTS-fast-path-all-commands.md +PLAN-fast-path-all-commands.md diff --git a/PLAN-fast-path-all-commands.md b/PLAN-fast-path-all-commands.md deleted file mode 100644 index e1d9527..0000000 --- a/PLAN-fast-path-all-commands.md +++ /dev/null @@ -1,60 +0,0 @@ -# Plan: Fast-path all Bash commands on ~/.deeplake/memory/ - -## Context - -The Deeplake plugin intercepts tool calls targeting `~/.deeplake/memory/` via the `pre-tool-use` hook. Previously, every intercepted command spawned a Node.js shell process (`deeplake-shell.js`) that bootstrapped by loading ALL file metadata (399+379 rows) before executing the actual command. This caused 2-160s latency per command. - -**Goal**: Every read-only command gets a single direct SQL query. Zero shell spawns for reads. - -## What was changed - -### File: `src/hooks/pre-tool-use.ts` - -The fast path section (after `getShellCommand()`) was expanded from handling only `Grep` and `Read` tool to handling ALL common Bash commands: - -#### 1. grep (Bash + Grep tool) — already done -Delegates to `handleGrepDirect()` from `src/hooks/grep-direct.ts`. -Single SQL: `SELECT path, summary AS content WHERE summary LIKE '%pattern%' AND path LIKE '/dir/%'` - -#### 2. cat (Bash) — NEW -**Parser**: `shellCmd.match(/^cat\s+(\S+)\s*$/)` -**SQL**: `SELECT summary FROM "memory" WHERE path = '' LIMIT 1` -Falls back to sessions table for `/sessions/*` paths. - -#### 3. head -N (Bash) — NEW -**Parser**: `shellCmd.match(/^head\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/)` -**SQL**: same as cat, then `lines.slice(0, N)` in-memory. - -#### 4. tail -N (Bash) — NEW -**Parser**: `shellCmd.match(/^tail\s+(?:-n\s*)?(-?\d+)\s+(\S+)\s*$/)` -**SQL**: same as cat, then `lines.slice(-N)` in-memory. - -#### 5. ls [flags] dir (Bash + Glob tool) — NEW -**Parser**: `shellCmd.match(/^ls\s+(?:-([a-zA-Z]+)\s+)?(\S+)?\s*$/)` -**SQL**: `SELECT path, size_bytes FROM "memory" WHERE path LIKE '/dir/%' ORDER BY path` -Extracts immediate children from full paths, supports `-l` long format. - -#### 6. wc -l file (Bash) — NEW -**Parser**: `shellCmd.match(/^wc\s+-l\s+(\S+)\s*$/)` -**SQL**: same as cat, then `content.split('\n').length`. - -#### 7. find dir -name 'pattern' (Bash) — NEW -**Parser**: `shellCmd.match(/^find\s+(\S+)\s+(?:-type\s+\S+\s+)?-name\s+'([^']+)'/)` -**SQL**: `SELECT path FROM "memory" WHERE path LIKE '/dir/%' AND filename LIKE '%.md' ORDER BY path` -Handles piped `| wc -l` by returning count instead of paths. - -### Other files changed -- `src/hooks/grep-direct.ts` — shared grep handler (unchanged in this batch) -- `src/shell/grep-interceptor.ts` — fixed mount="/" bug -- `src/shell/deeplake-fs.ts` — removed `deeplake_sync_table` from bootstrap/flush -- `src/hooks/session-start-setup.ts` — removed `deeplake_sync_table` -- `src/hooks/wiki-worker.ts` — removed `deeplake_sync_table` (3x) -- `src/hooks/codex/pre-tool-use.ts` — grep fast path via shared module -- `src/hooks/codex/session-start-setup.ts` — removed `deeplake_sync_table` -- `src/hooks/codex/wiki-worker.ts` — removed `deeplake_sync_table` (3x) - -## TODO - -- [ ] Investigate BM25 — the old code tried `summary <#> 'pattern'` but always got a 400 error ("Data type mismatch: argument of WHERE must be type boolean, not type real"). Check if the index exists, if the syntax is wrong, or if BM25 is not supported on this table. If fixable, BM25 would give ranked results instead of LIKE substring match. -- [ ] Port cat/head/tail/ls/wc/find fast paths to `src/hooks/codex/pre-tool-use.ts` -- [ ] Run full e2e benchmark to measure overall improvement diff --git a/RESULTS-fast-path-all-commands.md b/RESULTS-fast-path-all-commands.md deleted file mode 100644 index 09316fc..0000000 --- a/RESULTS-fast-path-all-commands.md +++ /dev/null @@ -1,150 +0,0 @@ -# Fast-Path Benchmark Results — 2026-04-15 - -## Test environment -- **Org**: activeloop, **Workspace**: hivemind -- **Table size**: 405 files (memory), 385 files (sessions) -- **Machine**: EC2 Linux 6.8.0-1030-gcp - -## How to reproduce - -Each command is tested in two modes: - -```bash -# BEFORE — old shell path (spawns deeplake-shell.js, bootstraps full table) -time DEEPLAKE_DEBUG=1 node claude-code/bundle/shell/deeplake-shell.js -c "" \ - 2>/tmp/before.log > /dev/null -grep -c "query start" /tmp/before.log - -# AFTER — new fast path (direct SQL in pre-tool-use hook, no shell spawn) -time DEEPLAKE_DEBUG=1 node claude-code/bundle/pre-tool-use.js < /tmp/vfs-test-.json \ - 2>/tmp/after.log > /dev/null -grep -c "query start" /tmp/after.log -``` - -Full test suite: `bash /tmp/vfs-tests.sh` - -## Results — all commands benchmarked - -| Command | Before (time) | Before (queries) | After (time) | After (queries) | Speedup | -|---------|--------------|-----------------|-------------|----------------|---------| -| `grep -w 'sasun' /summaries` | 143,930ms | 108 | 462ms | 1 | **312x** | -| `cat file` (summary) | 995ms | 3 | 323ms | 1 | **3x** | -| `cat file 2>/dev/null` | 983ms | 3 | 151ms | 1 | **7x** | -| `cat file 2>&1 \| head -200` | 1,251ms | 4 | 288ms | 2 | **4x** | -| `head -20 file` | 1,065ms | 3 | 142ms | 1 | **8x** | -| `head -n 20 file` | 958ms | 3 | 159ms | 1 | **6x** | -| `tail -10 file` | 1,176ms | 4 | 309ms | 2 | **4x** | -| `ls /summaries/` | 920ms | 2 | 128ms | 1 | **7x** | -| `ls -la /summaries/sasun/` | 880ms | 2 | 178ms | 1 | **5x** | -| `ls /` (root) | 994ms | 2 | 164ms | 1 | **6x** | -| `find -name '*.md' \| wc -l` | 916ms | 2 | 172ms | 1 | **5x** | -| `wc -l file` | 1,077ms | 4 | 144ms | 1 | **8x** | -| `Read tool` (index.md) | 1,119ms | 4 | 576ms | 2 | **2x** | -| `Glob tool` | 897ms | 2 | 135ms | 1 | **7x** | -| `cat file` (sessions) | 2,073ms | 2 | 1,318ms | 1 | **1.6x** | - -## Table routing - -Commands now query the correct table directly based on path — no wasted queries. - -| Path | cat/head/tail/wc | ls/find | grep | -|------|-----------------|---------|------| -| `/summaries/*` | memory (1 query) | memory (1 query) | memory (1 query) | -| `/sessions/*` | sessions (1 query) | sessions (1 query) | memory only — summaries have the content | -| `/` (root) | depends on file | both in parallel (2 queries) | memory only | -| `/index.md` | virtual — generated from memory metadata (1 query) | N/A | N/A | - -**Before**: session file reads always queried memory first (728ms, 0 rows), then sessions. Wasted 728ms per read. -**After**: `/sessions/*` paths go directly to the sessions table. 2,073ms → 1,318ms. - -## What each "Before" does vs "After" - -### grep (108 → 1 query, 312x faster) -**Before**: shell spawn → bootstrap (2 metadata queries loading 405+385 rows) → BM25 (fails with 400 error) → prefetch all files → read each session file individually (1-12s each). -**After**: `SELECT path, summary AS content FROM "memory" WHERE path LIKE '/summaries/%' AND summary LIKE '%sasun%' LIMIT 100`. Searches only the memory/summaries table — sessions contain raw JSONB which is slow to scan and produces noisy results. - -### cat (3 → 1 query, 7x faster) -**Before**: shell spawn → bootstrap (2 metadata queries) → file read query. -**After**: `SELECT summary FROM "memory" WHERE path = '...' LIMIT 1`. For session files: `SELECT message::text FROM "sessions" WHERE path = '...' LIMIT 1` (direct, no memory query first). - -### head/tail (3 → 1 query, 6-8x faster) -**Before**: shell spawn → bootstrap → file read. -**After**: same single query as cat, then `lines.slice(0, N)` or `lines.slice(-N)` in-memory. - -### ls (2 → 1 query, 5-7x faster) -**Before**: shell spawn → bootstrap (loading ALL 405+385 rows), then directory listing from in-memory cache. -**After**: `SELECT path, size_bytes FROM "
" WHERE path LIKE '/dir/%' ORDER BY path`. Queries only the relevant table based on path. Root `/` queries both tables in parallel. - -### find (2 → 1 query, 5x faster) -**Before**: shell spawn → bootstrap, then in-memory tree walk. -**After**: `SELECT path FROM "
" WHERE path LIKE '/dir/%' AND filename LIKE '%.md' ORDER BY path`. Routes to correct table based on path. - -### wc -l (4 → 1 query, 8x faster) -**Before**: shell spawn → bootstrap → file read → count. -**After**: same single query as cat, then `content.split('\n').length`. - -## Real-world command variants - -Claude Code generates commands with `2>/dev/null`, `2>&1`, and pipes. All handled: - -| Real-world pattern | Status | -|-------------------|--------| -| `cat file 2>/dev/null` | FAST — strips stderr redirect | -| `cat file 2>&1 \| head -200` | FAST — strips stderr redirect + extracts head | -| `cat file 2>/dev/null \| grep -v \| head -100` | FAST — strips all intermediate pipes | -| `head -n 20 file`, `head -n20 file`, `head file` | FAST — all flag formats | -| `tail -n 10 file`, `tail file` | FAST — all flag formats | -| `ls -la`, `ls -l`, `ls -lh`, `ls -R` | FAST — all flag combos | -| `find dir -type f -name '*.md' \| wc -l` | FAST — with `-type` and piped `wc` | - -## Full variant matrix - -49 variants tested. **42 FAST**, **8 SHELL** (expected — pipes to jq/complex commands, writes), **0 BROKEN**. - -### Commands that stay SHELL (expected) -| Variant | Reason | -|---------|--------| -| `cat file1 file2` | multi-file cat | -| `cat file \| jq .` | piped to interpreter | -| `head file \| grep pattern` | piped to grep | -| `ls dir \| wc -l` | piped to wc | -| `wc file` (no `-l` flag) | not optimized | -| `echo > file` | write operation | -| `mkdir dir` | write operation | -| `rm file` | write operation | - -## E2E Claude session benchmarks - -| Test | Original (Apr 14) | After grep fix only | After all fixes | Speedup | -|------|-------------------|--------------------|-----------------|---------| -| "Search memory for hooks/latency" | 454.3s (7.5 min) | 63.4s (20 queries) | **52.3s (15 queries, 0 shell spawns)** | **8.7x** | - -### E2E query breakdown (52.3s run) -- 5× grep queries (direct SQL, 1 query each) — **fast path** -- 7× file reads (direct SQL, 1 query each) — **fast path** -- 1× virtual index.md generation — **fast path** -- 0× shell bootstrap queries — **eliminated** - -### What's left in the 52s -~15s is SQL query time (15 queries × ~1s each). The remaining ~37s is Claude thinking time + API latency — not something the plugin can optimize. - -## Remaining bottlenecks - -1. **Sessions JSONB→text cast**: reading a session file takes ~1.3s because `message::text` casts JSONB to text. Fix: add a `content_text` column with pre-extracted text. -2. **BM25 broken**: `summary <#> 'pattern'` returns 400 error ("Data type mismatch"). Fix: create BM25 index with `CREATE INDEX ... USING deeplake_index("summary")` in `ensureTable()`. -3. **No index on path column**: every `WHERE path = '...'` is a full table scan. Fix: `CREATE INDEX ON memory(path)`. -4. **Shell still used for writes**: `echo > file`, `mkdir`, `rm` still spawn the shell with full bootstrap. Low priority — writes are rare (~1-2 per session). - -## Files modified - -| File | Change | -|------|--------| -| `src/hooks/grep-direct.ts` | Shared grep handler — single SQL query, searches memory only | -| `src/hooks/pre-tool-use.ts` | Fast path for all read commands: grep, cat, head, tail, ls, find, wc. Routes to correct table based on path. Handles `2>/dev/null`, `2>&1`, `cat\|head` pipes. Virtual index.md generation. | -| `src/hooks/codex/pre-tool-use.ts` | Grep fast path via shared module | -| `src/shell/grep-interceptor.ts` | Fixed mount="/" bug | -| `src/shell/deeplake-fs.ts` | Removed `deeplake_sync_table` from bootstrap and flush | -| `src/hooks/session-start-setup.ts` | Removed `deeplake_sync_table` | -| `src/hooks/wiki-worker.ts` | Removed `deeplake_sync_table` (3 occurrences) | -| `src/hooks/codex/session-start-setup.ts` | Removed `deeplake_sync_table` | -| `src/hooks/codex/wiki-worker.ts` | Removed `deeplake_sync_table` (3 occurrences) | From 7c34d5ce9dd334b4454257bd639bc70c07de10ca Mon Sep 17 00:00:00 2001 From: Emanuele Fenocchi Date: Thu, 16 Apr 2026 03:19:03 +0000 Subject: [PATCH 33/33] ci: add test coverage reporting to CI workflow Run vitest with --coverage and display a coverage summary table in the GitHub Actions step summary. Uses @vitest/coverage-v8 (already installed). --- .github/workflows/ci.yml | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e77c5b3..04f7d75 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,8 +22,28 @@ jobs: - name: Install dependencies run: npm install - - name: Run CI checks - run: npm run ci + - name: Typecheck + run: npm run typecheck + + - name: Run tests with coverage + run: npx vitest run --coverage --coverage.reporter=text --coverage.reporter=json-summary + + - name: Coverage summary + if: always() + run: | + if [ -f coverage/coverage-summary.json ]; then + echo "### Test Coverage" >> $GITHUB_STEP_SUMMARY + node -e " + const c = require('./coverage/coverage-summary.json').total; + const fmt = (v) => v.pct.toFixed(1) + '%'; + console.log('| Metric | Coverage |'); + console.log('|--------|----------|'); + console.log('| Statements | ' + fmt(c.statements) + ' |'); + console.log('| Branches | ' + fmt(c.branches) + ' |'); + console.log('| Functions | ' + fmt(c.functions) + ' |'); + console.log('| Lines | ' + fmt(c.lines) + ' |'); + " >> $GITHUB_STEP_SUMMARY + fi - name: Build bundles run: npm run build