import { write } from "./helpers" import { QuartzEmitterPlugin } from "../types" import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path" import { ReadTimeResults } from "reading-time" import { GlobalConfiguration } from "../../cfg" import { spawn } from "child_process" const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX" const defaults: GlobalConfiguration["semanticSearch"] = { enable: true, model: DEFAULT_MODEL_ID, aot: false, dims: 1024, dtype: "fp32", shardSizeRows: 1024, hnsw: { M: 16, efConstruction: 200 }, chunking: { chunkSize: 512, chunkOverlap: 128, noChunking: false, }, vllm: { enable: false, vllmUrl: process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings", concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10), batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10), }, } type ContentDetails = { slug: string title: string filePath: FilePath content: string readingTime?: Partial } /** * Check if uv is installed */ function checkUvInstalled(): Promise { return new Promise((resolve) => { const proc = spawn("uv", ["--version"], { shell: true }) proc.on("error", () => resolve(false)) proc.on("close", (code) => resolve(code === 0)) }) } /** * Run the Python embedding build script using uv * Script uses PEP 723 inline metadata for dependency management */ function runEmbedBuild( jsonlPath: string, outDir: string, opts: { model: string dtype: string dims: number shardSizeRows: number chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean } vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number } }, ): Promise { return new Promise((resolve, reject) => { const scriptPath = joinSegments(QUARTZ, "embed_build.py") const args = [ "run", scriptPath, "--jsonl", jsonlPath, "--model", opts.model, "--out", outDir, "--dtype", opts.dtype, "--dims", String(opts.dims), "--shard-size", String(opts.shardSizeRows), "--chunk-size", String(opts.chunking.chunkSize), "--chunk-overlap", String(opts.chunking.chunkOverlap), ] if (opts.chunking.noChunking) { args.push("--no-chunking") } if (opts.vllm.enable) { args.push("--use-vllm") if (opts.vllm.vllmUrl) { args.push("--vllm-url", opts.vllm.vllmUrl) } args.push("--concurrency", String(opts.vllm.concurrency)) args.push("--batch-size", String(opts.vllm.batchSize)) } console.log("\nRunning embedding generation:") console.log(` uv ${args.join(" ")}`) const env = { ...process.env } if (opts.vllm.enable && !env.USE_VLLM) { env.USE_VLLM = "1" } const proc = spawn("uv", args, { stdio: "inherit", shell: true, env, }) proc.on("error", (err) => { reject(new Error(`Failed to spawn uv: ${err.message}`)) }) proc.on("close", (code) => { if (code === 0) { console.log("Embedding generation completed successfully") resolve() } else { reject(new Error(`embed_build.py exited with code ${code}`)) } }) }) } export const SemanticIndex: QuartzEmitterPlugin> = ( opts, ) => { const merged = { ...defaults, ...opts } const o = { enable: merged.enable!, model: merged.model!, aot: merged.aot!, dims: merged.dims!, dtype: merged.dtype!, shardSizeRows: merged.shardSizeRows!, hnsw: { M: merged.hnsw?.M ?? defaults.hnsw!.M!, efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!, efSearch: merged.hnsw?.efSearch, }, chunking: { chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!, chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!, noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!, }, vllm: { enable: merged.vllm?.enable ?? defaults.vllm!.enable!, vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl, concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!, batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!, }, } if (!o.model) { throw new Error("Semantic search requires a model identifier") } return { name: "SemanticIndex", getQuartzComponents() { return [] }, async *partialEmit() {}, async *emit(ctx, content, _resources) { if (!o.enable) return const docs: ContentDetails[] = [] for (const [_, file] of content) { const slug = file.data.slug! const title = file.data.frontmatter?.title ?? slug const text = file.data.text if (text) { docs.push({ slug, title, filePath: file.data.filePath!, content: text, readingTime: file.data.readingTime, }) } } // Emit JSONL with the exact text used for embeddings const jsonl = docs .map((d) => ({ slug: d.slug, title: d.title, text: d.content })) .map((o) => JSON.stringify(o)) .join("\n") const jsonlSlug = "embeddings-text" as FullSlug yield write({ ctx, slug: jsonlSlug, ext: ".jsonl", content: jsonl, }) // If aot is false, run the embedding generation script if (!o.aot) { console.log("\nGenerating embeddings (aot=false)...") // Check for uv const hasUv = await checkUvInstalled() if (!hasUv) { throw new Error( "uv is required for embedding generation. Install it from https://docs.astral.sh/uv/", ) } const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl") const outDir = joinSegments(ctx.argv.output, "embeddings") try { await runEmbedBuild(jsonlPath, outDir, o) } catch (err) { const message = err instanceof Error ? err.message : String(err) throw new Error(`Embedding generation failed: ${message}`) } } else { console.log( "\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/", ) } }, externalResources(_ctx) { return {} }, } }