forked from github/quartz
feat: semantic search (1/n)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
235
quartz/plugins/emitters/semantic.ts
Normal file
235
quartz/plugins/emitters/semantic.ts
Normal file
@@ -0,0 +1,235 @@
|
||||
import { write } from "./helpers"
|
||||
import { QuartzEmitterPlugin } from "../types"
|
||||
import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path"
|
||||
import { ReadTimeResults } from "reading-time"
|
||||
import { GlobalConfiguration } from "../../cfg"
|
||||
import { spawn } from "child_process"
|
||||
|
||||
const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX"
|
||||
|
||||
const defaults: GlobalConfiguration["semanticSearch"] = {
|
||||
enable: true,
|
||||
model: DEFAULT_MODEL_ID,
|
||||
aot: false,
|
||||
dims: 1024,
|
||||
dtype: "fp32",
|
||||
shardSizeRows: 1024,
|
||||
hnsw: { M: 16, efConstruction: 200 },
|
||||
chunking: {
|
||||
chunkSize: 512,
|
||||
chunkOverlap: 128,
|
||||
noChunking: false,
|
||||
},
|
||||
vllm: {
|
||||
enable: false,
|
||||
vllmUrl:
|
||||
process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings",
|
||||
concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10),
|
||||
batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10),
|
||||
},
|
||||
}
|
||||
|
||||
type ContentDetails = {
|
||||
slug: string
|
||||
title: string
|
||||
filePath: FilePath
|
||||
content: string
|
||||
readingTime?: Partial<ReadTimeResults>
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if uv is installed
|
||||
*/
|
||||
function checkUvInstalled(): Promise<boolean> {
|
||||
return new Promise((resolve) => {
|
||||
const proc = spawn("uv", ["--version"], { shell: true })
|
||||
proc.on("error", () => resolve(false))
|
||||
proc.on("close", (code) => resolve(code === 0))
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the Python embedding build script using uv
|
||||
* Script uses PEP 723 inline metadata for dependency management
|
||||
*/
|
||||
function runEmbedBuild(
|
||||
jsonlPath: string,
|
||||
outDir: string,
|
||||
opts: {
|
||||
model: string
|
||||
dtype: string
|
||||
dims: number
|
||||
shardSizeRows: number
|
||||
chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean }
|
||||
vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number }
|
||||
},
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const scriptPath = joinSegments(QUARTZ, "embed_build.py")
|
||||
const args = [
|
||||
"run",
|
||||
scriptPath,
|
||||
"--jsonl",
|
||||
jsonlPath,
|
||||
"--model",
|
||||
opts.model,
|
||||
"--out",
|
||||
outDir,
|
||||
"--dtype",
|
||||
opts.dtype,
|
||||
"--dims",
|
||||
String(opts.dims),
|
||||
"--shard-size",
|
||||
String(opts.shardSizeRows),
|
||||
"--chunk-size",
|
||||
String(opts.chunking.chunkSize),
|
||||
"--chunk-overlap",
|
||||
String(opts.chunking.chunkOverlap),
|
||||
]
|
||||
|
||||
if (opts.chunking.noChunking) {
|
||||
args.push("--no-chunking")
|
||||
}
|
||||
|
||||
if (opts.vllm.enable) {
|
||||
args.push("--use-vllm")
|
||||
if (opts.vllm.vllmUrl) {
|
||||
args.push("--vllm-url", opts.vllm.vllmUrl)
|
||||
}
|
||||
args.push("--concurrency", String(opts.vllm.concurrency))
|
||||
args.push("--batch-size", String(opts.vllm.batchSize))
|
||||
}
|
||||
|
||||
console.log("\nRunning embedding generation:")
|
||||
console.log(` uv ${args.join(" ")}`)
|
||||
|
||||
const env = { ...process.env }
|
||||
if (opts.vllm.enable && !env.USE_VLLM) {
|
||||
env.USE_VLLM = "1"
|
||||
}
|
||||
|
||||
const proc = spawn("uv", args, {
|
||||
stdio: "inherit",
|
||||
shell: true,
|
||||
env,
|
||||
})
|
||||
|
||||
proc.on("error", (err) => {
|
||||
reject(new Error(`Failed to spawn uv: ${err.message}`))
|
||||
})
|
||||
|
||||
proc.on("close", (code) => {
|
||||
if (code === 0) {
|
||||
console.log("Embedding generation completed successfully")
|
||||
resolve()
|
||||
} else {
|
||||
reject(new Error(`embed_build.py exited with code ${code}`))
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export const SemanticIndex: QuartzEmitterPlugin<Partial<GlobalConfiguration["semanticSearch"]>> = (
|
||||
opts,
|
||||
) => {
|
||||
const merged = { ...defaults, ...opts }
|
||||
const o = {
|
||||
enable: merged.enable!,
|
||||
model: merged.model!,
|
||||
aot: merged.aot!,
|
||||
dims: merged.dims!,
|
||||
dtype: merged.dtype!,
|
||||
shardSizeRows: merged.shardSizeRows!,
|
||||
hnsw: {
|
||||
M: merged.hnsw?.M ?? defaults.hnsw!.M!,
|
||||
efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!,
|
||||
efSearch: merged.hnsw?.efSearch,
|
||||
},
|
||||
chunking: {
|
||||
chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!,
|
||||
chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!,
|
||||
noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!,
|
||||
},
|
||||
vllm: {
|
||||
enable: merged.vllm?.enable ?? defaults.vllm!.enable!,
|
||||
vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl,
|
||||
concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!,
|
||||
batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!,
|
||||
},
|
||||
}
|
||||
|
||||
if (!o.model) {
|
||||
throw new Error("Semantic search requires a model identifier")
|
||||
}
|
||||
|
||||
return {
|
||||
name: "SemanticIndex",
|
||||
getQuartzComponents() {
|
||||
return []
|
||||
},
|
||||
async *partialEmit() {},
|
||||
async *emit(ctx, content, _resources) {
|
||||
if (!o.enable) return
|
||||
|
||||
const docs: ContentDetails[] = []
|
||||
for (const [_, file] of content) {
|
||||
const slug = file.data.slug!
|
||||
const title = file.data.frontmatter?.title ?? slug
|
||||
const text = file.data.text
|
||||
if (text) {
|
||||
docs.push({
|
||||
slug,
|
||||
title,
|
||||
filePath: file.data.filePath!,
|
||||
content: text,
|
||||
readingTime: file.data.readingTime,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Emit JSONL with the exact text used for embeddings
|
||||
const jsonl = docs
|
||||
.map((d) => ({ slug: d.slug, title: d.title, text: d.content }))
|
||||
.map((o) => JSON.stringify(o))
|
||||
.join("\n")
|
||||
|
||||
const jsonlSlug = "embeddings-text" as FullSlug
|
||||
yield write({
|
||||
ctx,
|
||||
slug: jsonlSlug,
|
||||
ext: ".jsonl",
|
||||
content: jsonl,
|
||||
})
|
||||
|
||||
// If aot is false, run the embedding generation script
|
||||
if (!o.aot) {
|
||||
console.log("\nGenerating embeddings (aot=false)...")
|
||||
|
||||
// Check for uv
|
||||
const hasUv = await checkUvInstalled()
|
||||
if (!hasUv) {
|
||||
throw new Error(
|
||||
"uv is required for embedding generation. Install it from https://docs.astral.sh/uv/",
|
||||
)
|
||||
}
|
||||
|
||||
const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl")
|
||||
const outDir = joinSegments(ctx.argv.output, "embeddings")
|
||||
|
||||
try {
|
||||
await runEmbedBuild(jsonlPath, outDir, o)
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err)
|
||||
throw new Error(`Embedding generation failed: ${message}`)
|
||||
}
|
||||
} else {
|
||||
console.log(
|
||||
"\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/",
|
||||
)
|
||||
}
|
||||
},
|
||||
externalResources(_ctx) {
|
||||
return {}
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user