Compare commits
14 Commits
feat/seman
...
active-fol
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
51bfc8f9e3 | ||
|
|
86a30ad150 | ||
|
|
45d2ef8690 | ||
|
|
0ecb859d2d | ||
|
|
2fdc8129b6 | ||
|
|
8bc6cb9061 | ||
|
|
af5773f0e4 | ||
|
|
4260214a07 | ||
|
|
0c4386dce1 | ||
|
|
08c861707b | ||
|
|
1377004fca | ||
|
|
519d56c132 | ||
|
|
52460f376f | ||
|
|
b4805a1031 |
4
.github/workflows/build-preview.yaml
vendored
4
.github/workflows/build-preview.yaml
vendored
@@ -16,7 +16,7 @@ jobs:
|
|||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Setup Node
|
- name: Setup Node
|
||||||
uses: actions/setup-node@v5
|
uses: actions/setup-node@v6
|
||||||
with:
|
with:
|
||||||
node-version: 22
|
node-version: 22
|
||||||
|
|
||||||
@@ -37,7 +37,7 @@ jobs:
|
|||||||
run: npx quartz build -d docs -v
|
run: npx quartz build -d docs -v
|
||||||
|
|
||||||
- name: Upload build artifact
|
- name: Upload build artifact
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v5
|
||||||
with:
|
with:
|
||||||
name: preview-build
|
name: preview-build
|
||||||
path: public
|
path: public
|
||||||
|
|||||||
4
.github/workflows/ci.yaml
vendored
4
.github/workflows/ci.yaml
vendored
@@ -24,7 +24,7 @@ jobs:
|
|||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
|
|
||||||
- name: Setup Node
|
- name: Setup Node
|
||||||
uses: actions/setup-node@v5
|
uses: actions/setup-node@v6
|
||||||
with:
|
with:
|
||||||
node-version: 22
|
node-version: 22
|
||||||
|
|
||||||
@@ -57,7 +57,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 0
|
fetch-depth: 0
|
||||||
- name: Setup Node
|
- name: Setup Node
|
||||||
uses: actions/setup-node@v5
|
uses: actions/setup-node@v6
|
||||||
with:
|
with:
|
||||||
node-version: 22
|
node-version: 22
|
||||||
- name: Get package version
|
- name: Get package version
|
||||||
|
|||||||
2
.github/workflows/deploy-preview.yaml
vendored
2
.github/workflows/deploy-preview.yaml
vendored
@@ -18,7 +18,7 @@ jobs:
|
|||||||
name: Deploy Preview to Cloudflare Pages
|
name: Deploy Preview to Cloudflare Pages
|
||||||
steps:
|
steps:
|
||||||
- name: Download build artifact
|
- name: Download build artifact
|
||||||
uses: actions/download-artifact@v5
|
uses: actions/download-artifact@v6
|
||||||
id: preview-build-artifact
|
id: preview-build-artifact
|
||||||
with:
|
with:
|
||||||
name: preview-build
|
name: preview-build
|
||||||
|
|||||||
4
.github/workflows/docker-build-push.yaml
vendored
4
.github/workflows/docker-build-push.yaml
vendored
@@ -25,7 +25,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
fetch-depth: 1
|
fetch-depth: 1
|
||||||
- name: Inject slug/short variables
|
- name: Inject slug/short variables
|
||||||
uses: rlespinasse/github-slug-action@v5.2.0
|
uses: rlespinasse/github-slug-action@v5.3.0
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v3
|
uses: docker/setup-qemu-action@v3
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
@@ -37,7 +37,7 @@ jobs:
|
|||||||
network=host
|
network=host
|
||||||
- name: Install cosign
|
- name: Install cosign
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
uses: sigstore/cosign-installer@v3.10.0
|
uses: sigstore/cosign-installer@v4.0.0
|
||||||
- name: Login to GitHub Container Registry
|
- name: Login to GitHub Container Registry
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
|
|||||||
Binary file not shown.
File diff suppressed because one or more lines are too long
Binary file not shown.
1
index.d.ts
vendored
1
index.d.ts
vendored
@@ -13,4 +13,3 @@ interface CustomEventMap {
|
|||||||
|
|
||||||
type ContentIndex = Record<FullSlug, ContentDetails>
|
type ContentIndex = Record<FullSlug, ContentDetails>
|
||||||
declare const fetchData: Promise<ContentIndex>
|
declare const fetchData: Promise<ContentIndex>
|
||||||
declare const semanticCfg: import("./quartz/cfg").GlobalConfiguration["semanticSearch"]
|
|
||||||
|
|||||||
1093
package-lock.json
generated
1093
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
18
package.json
18
package.json
@@ -37,7 +37,6 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@clack/prompts": "^0.11.0",
|
"@clack/prompts": "^0.11.0",
|
||||||
"@floating-ui/dom": "^1.7.4",
|
"@floating-ui/dom": "^1.7.4",
|
||||||
"@huggingface/transformers": "^3.7.5",
|
|
||||||
"@myriaddreamin/rehype-typst": "^0.6.0",
|
"@myriaddreamin/rehype-typst": "^0.6.0",
|
||||||
"@napi-rs/simple-git": "0.1.22",
|
"@napi-rs/simple-git": "0.1.22",
|
||||||
"@tweenjs/tween.js": "^25.0.0",
|
"@tweenjs/tween.js": "^25.0.0",
|
||||||
@@ -61,11 +60,10 @@
|
|||||||
"mdast-util-to-hast": "^13.2.0",
|
"mdast-util-to-hast": "^13.2.0",
|
||||||
"mdast-util-to-string": "^4.0.0",
|
"mdast-util-to-string": "^4.0.0",
|
||||||
"micromorph": "^0.4.5",
|
"micromorph": "^0.4.5",
|
||||||
"minimatch": "^10.0.3",
|
"minimatch": "^10.1.1",
|
||||||
"onnxruntime-web": "^1.23.0",
|
"pixi.js": "^8.14.0",
|
||||||
"pixi.js": "^8.13.2",
|
|
||||||
"preact": "^10.27.2",
|
"preact": "^10.27.2",
|
||||||
"preact-render-to-string": "^6.6.1",
|
"preact-render-to-string": "^6.6.3",
|
||||||
"pretty-bytes": "^7.1.0",
|
"pretty-bytes": "^7.1.0",
|
||||||
"pretty-time": "^1.1.0",
|
"pretty-time": "^1.1.0",
|
||||||
"reading-time": "^1.5.0",
|
"reading-time": "^1.5.0",
|
||||||
@@ -95,7 +93,7 @@
|
|||||||
"unified": "^11.0.5",
|
"unified": "^11.0.5",
|
||||||
"unist-util-visit": "^5.0.0",
|
"unist-util-visit": "^5.0.0",
|
||||||
"vfile": "^6.0.3",
|
"vfile": "^6.0.3",
|
||||||
"workerpool": "^9.3.4",
|
"workerpool": "^10.0.0",
|
||||||
"ws": "^8.18.3",
|
"ws": "^8.18.3",
|
||||||
"yargs": "^18.0.0"
|
"yargs": "^18.0.0"
|
||||||
},
|
},
|
||||||
@@ -103,14 +101,14 @@
|
|||||||
"@types/d3": "^7.4.3",
|
"@types/d3": "^7.4.3",
|
||||||
"@types/hast": "^3.0.4",
|
"@types/hast": "^3.0.4",
|
||||||
"@types/js-yaml": "^4.0.9",
|
"@types/js-yaml": "^4.0.9",
|
||||||
"@types/node": "^24.6.0",
|
"@types/node": "^24.10.0",
|
||||||
"@types/pretty-time": "^1.1.5",
|
"@types/pretty-time": "^1.1.5",
|
||||||
"@types/source-map-support": "^0.5.10",
|
"@types/source-map-support": "^0.5.10",
|
||||||
"@types/ws": "^8.18.1",
|
"@types/ws": "^8.18.1",
|
||||||
"@types/yargs": "^17.0.33",
|
"@types/yargs": "^17.0.34",
|
||||||
"esbuild": "^0.25.10",
|
"esbuild": "^0.25.12",
|
||||||
"prettier": "^3.6.2",
|
"prettier": "^3.6.2",
|
||||||
"tsx": "^4.20.6",
|
"tsx": "^4.20.6",
|
||||||
"typescript": "^5.9.2"
|
"typescript": "^5.9.3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,18 +1,6 @@
|
|||||||
import { GlobalConfiguration, QuartzConfig } from "./quartz/cfg"
|
import { QuartzConfig } from "./quartz/cfg"
|
||||||
import * as Plugin from "./quartz/plugins"
|
import * as Plugin from "./quartz/plugins"
|
||||||
|
|
||||||
const semanticSearch: GlobalConfiguration["semanticSearch"] = {
|
|
||||||
enable: true,
|
|
||||||
model: "onnx-community/embeddinggemma-300m-ONNX",
|
|
||||||
aot: true,
|
|
||||||
dims: 768,
|
|
||||||
dtype: "fp32",
|
|
||||||
shardSizeRows: 1024,
|
|
||||||
hnsw: { M: 16, efConstruction: 200 },
|
|
||||||
chunking: { chunkSize: 256, chunkOverlap: 64 },
|
|
||||||
vllm: { enable: true, concurrency: 16, batchSize: 128 },
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Quartz 4 Configuration
|
* Quartz 4 Configuration
|
||||||
*
|
*
|
||||||
@@ -64,7 +52,6 @@ const config: QuartzConfig = {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
semanticSearch,
|
|
||||||
},
|
},
|
||||||
plugins: {
|
plugins: {
|
||||||
transformers: [
|
transformers: [
|
||||||
@@ -97,7 +84,6 @@ const config: QuartzConfig = {
|
|||||||
enableSiteMap: true,
|
enableSiteMap: true,
|
||||||
enableRSS: true,
|
enableRSS: true,
|
||||||
}),
|
}),
|
||||||
Plugin.SemanticIndex(semanticSearch),
|
|
||||||
Plugin.Assets(),
|
Plugin.Assets(),
|
||||||
Plugin.Static(),
|
Plugin.Static(),
|
||||||
Plugin.Favicon(),
|
Plugin.Favicon(),
|
||||||
|
|||||||
@@ -78,34 +78,6 @@ export interface GlobalConfiguration {
|
|||||||
* Region Codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
* Region Codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
|
||||||
*/
|
*/
|
||||||
locale: ValidLocale
|
locale: ValidLocale
|
||||||
/** Semantic search configuration */
|
|
||||||
semanticSearch?: {
|
|
||||||
enable: boolean
|
|
||||||
model: string
|
|
||||||
aot: boolean
|
|
||||||
dtype: "fp32" | "fp16"
|
|
||||||
dims: number
|
|
||||||
shardSizeRows: number
|
|
||||||
manifestUrl?: string
|
|
||||||
manifestBaseUrl?: string
|
|
||||||
disableCache?: boolean
|
|
||||||
hnsw: {
|
|
||||||
M: number
|
|
||||||
efConstruction: number
|
|
||||||
efSearch?: number
|
|
||||||
}
|
|
||||||
chunking: {
|
|
||||||
chunkSize: number
|
|
||||||
chunkOverlap: number
|
|
||||||
noChunking?: boolean
|
|
||||||
}
|
|
||||||
vllm?: {
|
|
||||||
enable: boolean
|
|
||||||
vllmUrl?: string
|
|
||||||
concurrency: number
|
|
||||||
batchSize: number
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface QuartzConfig {
|
export interface QuartzConfig {
|
||||||
|
|||||||
@@ -7,12 +7,10 @@ import { i18n } from "../i18n"
|
|||||||
|
|
||||||
export interface SearchOptions {
|
export interface SearchOptions {
|
||||||
enablePreview: boolean
|
enablePreview: boolean
|
||||||
includeButton: boolean
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const defaultOptions: SearchOptions = {
|
const defaultOptions: SearchOptions = {
|
||||||
enablePreview: true,
|
enablePreview: true,
|
||||||
includeButton: true,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export default ((userOpts?: Partial<SearchOptions>) => {
|
export default ((userOpts?: Partial<SearchOptions>) => {
|
||||||
@@ -31,54 +29,19 @@ export default ((userOpts?: Partial<SearchOptions>) => {
|
|||||||
</svg>
|
</svg>
|
||||||
<p>{i18n(cfg.locale).components.search.title}</p>
|
<p>{i18n(cfg.locale).components.search.title}</p>
|
||||||
</button>
|
</button>
|
||||||
<search class="search-container">
|
<div class="search-container">
|
||||||
<form class="search-space">
|
<div class="search-space">
|
||||||
<div class="input-container">
|
<input
|
||||||
<input
|
autocomplete="off"
|
||||||
autocomplete="off"
|
class="search-bar"
|
||||||
class="search-bar"
|
name="search"
|
||||||
name="search"
|
type="text"
|
||||||
type="text"
|
aria-label={searchPlaceholder}
|
||||||
aria-label={searchPlaceholder}
|
placeholder={searchPlaceholder}
|
||||||
placeholder={searchPlaceholder}
|
/>
|
||||||
/>
|
<div class="search-layout" data-preview={opts.enablePreview}></div>
|
||||||
<div class="search-mode-toggle" role="radiogroup" aria-label="Search mode">
|
</div>
|
||||||
<button
|
</div>
|
||||||
type="button"
|
|
||||||
class="mode-option"
|
|
||||||
data-mode="lexical"
|
|
||||||
aria-pressed="true"
|
|
||||||
aria-label="Full-text search"
|
|
||||||
>
|
|
||||||
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
|
|
||||||
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
|
|
||||||
<path d="M4 6h12M4 10h8M4 14h6" />
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
||||||
<span class="sr-only">Full-text</span>
|
|
||||||
</button>
|
|
||||||
<button
|
|
||||||
type="button"
|
|
||||||
class="mode-option"
|
|
||||||
data-mode="semantic"
|
|
||||||
aria-pressed="false"
|
|
||||||
aria-label="Semantic search"
|
|
||||||
>
|
|
||||||
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
|
|
||||||
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
|
|
||||||
<circle cx="5.2" cy="10" r="2.4" />
|
|
||||||
<circle cx="14.8" cy="4.8" r="2.1" />
|
|
||||||
<circle cx="14.8" cy="15.2" r="2.1" />
|
|
||||||
<path d="M7.1 8.7l5.2-2.4M7.1 11.3l5.2 2.4M14.8 6.9v6.2" />
|
|
||||||
</g>
|
|
||||||
</svg>
|
|
||||||
<span class="sr-only">Semantic</span>
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<output class="search-layout" data-preview={opts.enablePreview} />
|
|
||||||
</form>
|
|
||||||
</search>
|
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import { visit } from "unist-util-visit"
|
|||||||
import { Root, Element, ElementContent } from "hast"
|
import { Root, Element, ElementContent } from "hast"
|
||||||
import { GlobalConfiguration } from "../cfg"
|
import { GlobalConfiguration } from "../cfg"
|
||||||
import { i18n } from "../i18n"
|
import { i18n } from "../i18n"
|
||||||
|
import { styleText } from "util"
|
||||||
|
|
||||||
interface RenderComponents {
|
interface RenderComponents {
|
||||||
head: QuartzComponent
|
head: QuartzComponent
|
||||||
@@ -25,7 +26,6 @@ const headerRegex = new RegExp(/h[1-6]/)
|
|||||||
export function pageResources(
|
export function pageResources(
|
||||||
baseDir: FullSlug | RelativeURL,
|
baseDir: FullSlug | RelativeURL,
|
||||||
staticResources: StaticResources,
|
staticResources: StaticResources,
|
||||||
cfg?: GlobalConfiguration,
|
|
||||||
): StaticResources {
|
): StaticResources {
|
||||||
const contentIndexPath = joinSegments(baseDir, "static/contentIndex.json")
|
const contentIndexPath = joinSegments(baseDir, "static/contentIndex.json")
|
||||||
const contentIndexScript = `const fetchData = fetch("${contentIndexPath}").then(data => data.json())`
|
const contentIndexScript = `const fetchData = fetch("${contentIndexPath}").then(data => data.json())`
|
||||||
@@ -49,12 +49,6 @@ export function pageResources(
|
|||||||
spaPreserve: true,
|
spaPreserve: true,
|
||||||
script: contentIndexScript,
|
script: contentIndexScript,
|
||||||
},
|
},
|
||||||
{
|
|
||||||
loadTime: "beforeDOMReady",
|
|
||||||
contentType: "inline",
|
|
||||||
spaPreserve: true,
|
|
||||||
script: `const semanticCfg = ${JSON.stringify(cfg?.semanticSearch ?? {})};`,
|
|
||||||
},
|
|
||||||
...staticResources.js,
|
...staticResources.js,
|
||||||
],
|
],
|
||||||
additionalHead: staticResources.additionalHead,
|
additionalHead: staticResources.additionalHead,
|
||||||
@@ -75,6 +69,7 @@ function renderTranscludes(
|
|||||||
cfg: GlobalConfiguration,
|
cfg: GlobalConfiguration,
|
||||||
slug: FullSlug,
|
slug: FullSlug,
|
||||||
componentData: QuartzComponentProps,
|
componentData: QuartzComponentProps,
|
||||||
|
visited: Set<FullSlug>,
|
||||||
) {
|
) {
|
||||||
// process transcludes in componentData
|
// process transcludes in componentData
|
||||||
visit(root, "element", (node, _index, _parent) => {
|
visit(root, "element", (node, _index, _parent) => {
|
||||||
@@ -83,6 +78,30 @@ function renderTranscludes(
|
|||||||
if (classNames.includes("transclude")) {
|
if (classNames.includes("transclude")) {
|
||||||
const inner = node.children[0] as Element
|
const inner = node.children[0] as Element
|
||||||
const transcludeTarget = (inner.properties["data-slug"] ?? slug) as FullSlug
|
const transcludeTarget = (inner.properties["data-slug"] ?? slug) as FullSlug
|
||||||
|
if (visited.has(transcludeTarget)) {
|
||||||
|
console.warn(
|
||||||
|
styleText(
|
||||||
|
"yellow",
|
||||||
|
`Warning: Skipping circular transclusion: ${slug} -> ${transcludeTarget}`,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
node.children = [
|
||||||
|
{
|
||||||
|
type: "element",
|
||||||
|
tagName: "p",
|
||||||
|
properties: { style: "color: var(--secondary);" },
|
||||||
|
children: [
|
||||||
|
{
|
||||||
|
type: "text",
|
||||||
|
value: `Circular transclusion detected: ${transcludeTarget}`,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
return
|
||||||
|
}
|
||||||
|
visited.add(transcludeTarget)
|
||||||
|
|
||||||
const page = componentData.allFiles.find((f) => f.slug === transcludeTarget)
|
const page = componentData.allFiles.find((f) => f.slug === transcludeTarget)
|
||||||
if (!page) {
|
if (!page) {
|
||||||
return
|
return
|
||||||
@@ -203,7 +222,8 @@ export function renderPage(
|
|||||||
// make a deep copy of the tree so we don't remove the transclusion references
|
// make a deep copy of the tree so we don't remove the transclusion references
|
||||||
// for the file cached in contentMap in build.ts
|
// for the file cached in contentMap in build.ts
|
||||||
const root = clone(componentData.tree) as Root
|
const root = clone(componentData.tree) as Root
|
||||||
renderTranscludes(root, cfg, slug, componentData)
|
const visited = new Set<FullSlug>([slug])
|
||||||
|
renderTranscludes(root, cfg, slug, componentData, visited)
|
||||||
|
|
||||||
// set componentData.tree to the edited html that has transclusions rendered
|
// set componentData.tree to the edited html that has transclusions rendered
|
||||||
componentData.tree = root
|
componentData.tree = root
|
||||||
|
|||||||
@@ -111,6 +111,10 @@ function createFolderNode(
|
|||||||
const folderPath = node.slug
|
const folderPath = node.slug
|
||||||
folderContainer.dataset.folderpath = folderPath
|
folderContainer.dataset.folderpath = folderPath
|
||||||
|
|
||||||
|
if (currentSlug === folderPath) {
|
||||||
|
folderContainer.classList.add("active")
|
||||||
|
}
|
||||||
|
|
||||||
if (opts.folderClickBehavior === "link") {
|
if (opts.folderClickBehavior === "link") {
|
||||||
// Replace button with link for link behavior
|
// Replace button with link for link behavior
|
||||||
const button = titleContainer.querySelector(".folder-button") as HTMLElement
|
const button = titleContainer.querySelector(".folder-button") as HTMLElement
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
|
import FlexSearch, { DefaultDocumentSearchResults } from "flexsearch"
|
||||||
import { ContentDetails } from "../../plugins/emitters/contentIndex"
|
import { ContentDetails } from "../../plugins/emitters/contentIndex"
|
||||||
import { SemanticClient, type SemanticResult } from "./semantic.inline"
|
import { registerEscapeHandler, removeAllChildren } from "./util"
|
||||||
import { registerEscapeHandler, removeAllChildren, fetchCanonical } from "./util"
|
|
||||||
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
|
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
|
||||||
|
|
||||||
interface Item {
|
interface Item {
|
||||||
@@ -15,46 +14,43 @@ interface Item {
|
|||||||
|
|
||||||
// Can be expanded with things like "term" in the future
|
// Can be expanded with things like "term" in the future
|
||||||
type SearchType = "basic" | "tags"
|
type SearchType = "basic" | "tags"
|
||||||
type SearchMode = "lexical" | "semantic"
|
let searchType: SearchType = "basic"
|
||||||
const SEARCH_MODE_STORAGE_KEY = "quartz:search:mode"
|
|
||||||
|
|
||||||
const loadStoredSearchMode = (): SearchMode | null => {
|
|
||||||
if (typeof window === "undefined") {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const stored = window.localStorage.getItem(SEARCH_MODE_STORAGE_KEY)
|
|
||||||
return stored === "lexical" || stored === "semantic" ? stored : null
|
|
||||||
} catch (err) {
|
|
||||||
console.warn("[Search] failed to read stored search mode:", err)
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const persistSearchMode = (mode: SearchMode) => {
|
|
||||||
if (typeof window === "undefined") {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
window.localStorage.setItem(SEARCH_MODE_STORAGE_KEY, mode)
|
|
||||||
} catch (err) {
|
|
||||||
console.warn("[Search] failed to persist search mode:", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let searchMode: SearchMode = "lexical"
|
|
||||||
let currentSearchTerm: string = ""
|
let currentSearchTerm: string = ""
|
||||||
let rawSearchTerm: string = ""
|
const encoder = (str: string) => {
|
||||||
let semantic: SemanticClient | null = null
|
return str
|
||||||
let semanticReady = false
|
.toLowerCase()
|
||||||
let semanticInitFailed = false
|
.split(/\s+/)
|
||||||
type SimilarityResult = { item: Item; similarity: number }
|
.filter((token) => token.length > 0)
|
||||||
let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
|
}
|
||||||
let manifestIds: string[] = []
|
|
||||||
|
|
||||||
|
let index = new FlexSearch.Document<Item>({
|
||||||
|
encode: encoder,
|
||||||
|
document: {
|
||||||
|
id: "id",
|
||||||
|
tag: "tags",
|
||||||
|
index: [
|
||||||
|
{
|
||||||
|
field: "title",
|
||||||
|
tokenize: "forward",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
field: "content",
|
||||||
|
tokenize: "forward",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
field: "tags",
|
||||||
|
tokenize: "forward",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
const p = new DOMParser()
|
||||||
|
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
|
||||||
const contextWindowWords = 30
|
const contextWindowWords = 30
|
||||||
|
const numSearchResults = 8
|
||||||
|
const numTagResults = 5
|
||||||
|
|
||||||
const tokenizeTerm = (term: string) => {
|
const tokenizeTerm = (term: string) => {
|
||||||
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
|
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
|
||||||
const tokenLen = tokens.length
|
const tokenLen = tokens.length
|
||||||
@@ -112,102 +108,6 @@ function highlight(searchTerm: string, text: string, trim?: boolean) {
|
|||||||
}`
|
}`
|
||||||
}
|
}
|
||||||
|
|
||||||
// To be used with search and everything else with flexsearch
|
|
||||||
const encoder = (str: string) =>
|
|
||||||
str
|
|
||||||
.toLowerCase()
|
|
||||||
.split(/\s+/)
|
|
||||||
.filter((token) => token.length > 0)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get parent document slug for a chunk ID
|
|
||||||
*/
|
|
||||||
function getParentSlug(slug: string): string {
|
|
||||||
const meta = chunkMetadata[slug]
|
|
||||||
return meta ? meta.parentSlug : slug
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Aggregate semantic search results from chunks to documents using RRF
|
|
||||||
* @param results Raw semantic results (chunk-level)
|
|
||||||
* @param slugToDocIndex Map from document slug to index in idDataMap
|
|
||||||
* @returns Object with rrfScores (for ranking) and maxScores (for display)
|
|
||||||
*/
|
|
||||||
function aggregateChunkResults(
|
|
||||||
results: SemanticResult[],
|
|
||||||
slugToDocIndex: Map<FullSlug, number>,
|
|
||||||
): { rrfScores: Map<number, number>; maxScores: Map<number, number> } {
|
|
||||||
// Group chunks by parent document
|
|
||||||
const docChunks = new Map<string, Array<{ score: number }>>()
|
|
||||||
|
|
||||||
results.forEach(({ id, score }) => {
|
|
||||||
// id is an index into manifestIds (the chunk IDs from embeddings)
|
|
||||||
const chunkSlug = manifestIds[id]
|
|
||||||
if (!chunkSlug) return
|
|
||||||
|
|
||||||
// Get parent document slug
|
|
||||||
const parentSlug = getParentSlug(chunkSlug)
|
|
||||||
|
|
||||||
if (!docChunks.has(parentSlug)) {
|
|
||||||
docChunks.set(parentSlug, [])
|
|
||||||
}
|
|
||||||
|
|
||||||
docChunks.get(parentSlug)!.push({ score })
|
|
||||||
})
|
|
||||||
|
|
||||||
// Apply RRF for ranking and track max similarity for display
|
|
||||||
const rrfScores = new Map<number, number>()
|
|
||||||
const maxScores = new Map<number, number>()
|
|
||||||
const RRF_K = 60
|
|
||||||
|
|
||||||
for (const [parentSlug, chunks] of docChunks) {
|
|
||||||
const docIdx = slugToDocIndex.get(parentSlug as FullSlug)
|
|
||||||
if (typeof docIdx !== "number") continue
|
|
||||||
|
|
||||||
// Sort chunks by score descending to assign per-document ranks
|
|
||||||
chunks.sort((a, b) => b.score - a.score)
|
|
||||||
|
|
||||||
// RRF formula: sum(1 / (k + rank)) across all chunks, using per-document ranks
|
|
||||||
const rrfScore = chunks.reduce((sum, _, rank) => sum + 1.0 / (RRF_K + rank), 0)
|
|
||||||
|
|
||||||
// Max similarity score for display (original 0-1 range)
|
|
||||||
const maxScore = chunks[0].score
|
|
||||||
|
|
||||||
rrfScores.set(docIdx, rrfScore)
|
|
||||||
maxScores.set(docIdx, maxScore)
|
|
||||||
}
|
|
||||||
|
|
||||||
return { rrfScores, maxScores }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize the FlexSearch Document instance with the appropriate configuration
|
|
||||||
const index = new FlexSearch.Document<Item>({
|
|
||||||
tokenize: "forward",
|
|
||||||
encode: encoder,
|
|
||||||
document: {
|
|
||||||
id: "id",
|
|
||||||
tag: "tags",
|
|
||||||
index: [
|
|
||||||
{
|
|
||||||
field: "title",
|
|
||||||
tokenize: "forward",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
field: "content",
|
|
||||||
tokenize: "forward",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
field: "tags",
|
|
||||||
tokenize: "forward",
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
})
|
|
||||||
|
|
||||||
const p = new DOMParser()
|
|
||||||
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
|
|
||||||
const numSearchResults = 10
|
|
||||||
const numTagResults = 10
|
|
||||||
function highlightHTML(searchTerm: string, el: HTMLElement) {
|
function highlightHTML(searchTerm: string, el: HTMLElement) {
|
||||||
const p = new DOMParser()
|
const p = new DOMParser()
|
||||||
const tokenizedTerms = tokenizeTerm(searchTerm)
|
const tokenizedTerms = tokenizeTerm(searchTerm)
|
||||||
@@ -249,11 +149,7 @@ function highlightHTML(searchTerm: string, el: HTMLElement) {
|
|||||||
return html.body
|
return html.body
|
||||||
}
|
}
|
||||||
|
|
||||||
async function setupSearch(
|
async function setupSearch(searchElement: Element, currentSlug: FullSlug, data: ContentIndex) {
|
||||||
searchElement: HTMLDivElement,
|
|
||||||
currentSlug: FullSlug,
|
|
||||||
data: ContentIndex,
|
|
||||||
) {
|
|
||||||
const container = searchElement.querySelector(".search-container") as HTMLElement
|
const container = searchElement.querySelector(".search-container") as HTMLElement
|
||||||
if (!container) return
|
if (!container) return
|
||||||
|
|
||||||
@@ -268,183 +164,12 @@ async function setupSearch(
|
|||||||
const searchLayout = searchElement.querySelector(".search-layout") as HTMLElement
|
const searchLayout = searchElement.querySelector(".search-layout") as HTMLElement
|
||||||
if (!searchLayout) return
|
if (!searchLayout) return
|
||||||
|
|
||||||
const searchSpace = searchElement?.querySelector(".search-space") as HTMLFormElement
|
|
||||||
if (!searchSpace) return
|
|
||||||
|
|
||||||
// Create semantic search progress bar
|
|
||||||
const progressBar = document.createElement("div")
|
|
||||||
progressBar.className = "semantic-search-progress"
|
|
||||||
progressBar.style.cssText = `
|
|
||||||
position: absolute;
|
|
||||||
bottom: 0;
|
|
||||||
left: 0;
|
|
||||||
height: 2px;
|
|
||||||
width: 0;
|
|
||||||
background: var(--secondary);
|
|
||||||
transition: width 0.3s ease, opacity 0.3s ease;
|
|
||||||
opacity: 0;
|
|
||||||
z-index: 9999;
|
|
||||||
`
|
|
||||||
searchBar.parentElement?.appendChild(progressBar)
|
|
||||||
|
|
||||||
const startSemanticProgress = () => {
|
|
||||||
progressBar.style.opacity = "1"
|
|
||||||
progressBar.style.width = "0"
|
|
||||||
setTimeout(() => {
|
|
||||||
progressBar.style.width = "100%"
|
|
||||||
}, 10)
|
|
||||||
}
|
|
||||||
|
|
||||||
const completeSemanticProgress = () => {
|
|
||||||
progressBar.style.opacity = "0"
|
|
||||||
setTimeout(() => {
|
|
||||||
progressBar.style.width = "0"
|
|
||||||
}, 300)
|
|
||||||
}
|
|
||||||
|
|
||||||
const resetProgressBar = () => {
|
|
||||||
progressBar.style.opacity = "0"
|
|
||||||
progressBar.style.width = "0"
|
|
||||||
}
|
|
||||||
|
|
||||||
const idDataMap = Object.keys(data) as FullSlug[]
|
const idDataMap = Object.keys(data) as FullSlug[]
|
||||||
const slugToIndex = new Map<FullSlug, number>()
|
|
||||||
idDataMap.forEach((slug, idx) => slugToIndex.set(slug, idx))
|
|
||||||
const modeToggle = searchSpace.querySelector(".search-mode-toggle") as HTMLDivElement | null
|
|
||||||
const modeButtons = modeToggle
|
|
||||||
? Array.from(modeToggle.querySelectorAll<HTMLButtonElement>(".mode-option"))
|
|
||||||
: []
|
|
||||||
|
|
||||||
const appendLayout = (el: HTMLElement) => {
|
const appendLayout = (el: HTMLElement) => {
|
||||||
searchLayout.appendChild(el)
|
searchLayout.appendChild(el)
|
||||||
}
|
}
|
||||||
|
|
||||||
const enablePreview = searchLayout.dataset.preview === "true"
|
const enablePreview = searchLayout.dataset.preview === "true"
|
||||||
if (!semantic && !semanticInitFailed) {
|
|
||||||
const client = new SemanticClient(semanticCfg)
|
|
||||||
try {
|
|
||||||
await client.ensureReady()
|
|
||||||
semantic = client
|
|
||||||
semanticReady = true
|
|
||||||
|
|
||||||
// Load chunk metadata and IDs from manifest
|
|
||||||
try {
|
|
||||||
const manifestUrl = "/embeddings/manifest.json"
|
|
||||||
const res = await fetch(manifestUrl)
|
|
||||||
if (res.ok) {
|
|
||||||
const manifest = await res.json()
|
|
||||||
chunkMetadata = manifest.chunkMetadata || {}
|
|
||||||
manifestIds = manifest.ids || []
|
|
||||||
console.debug(
|
|
||||||
`[Search] Loaded manifest: ${manifestIds.length} chunks, ${Object.keys(chunkMetadata).length} chunked documents`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.warn("[Search] failed to load chunk metadata:", err)
|
|
||||||
chunkMetadata = {}
|
|
||||||
manifestIds = []
|
|
||||||
}
|
|
||||||
} catch (err) {
|
|
||||||
console.warn("[SemanticClient] initialization failed:", err)
|
|
||||||
client.dispose()
|
|
||||||
semantic = null
|
|
||||||
semanticReady = false
|
|
||||||
semanticInitFailed = true
|
|
||||||
}
|
|
||||||
} else if (semantic && !semanticReady) {
|
|
||||||
try {
|
|
||||||
await semantic.ensureReady()
|
|
||||||
semanticReady = true
|
|
||||||
} catch (err) {
|
|
||||||
console.warn("[SemanticClient] became unavailable:", err)
|
|
||||||
semantic.dispose()
|
|
||||||
semantic = null
|
|
||||||
semanticReady = false
|
|
||||||
semanticInitFailed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const storedMode = loadStoredSearchMode()
|
|
||||||
if (storedMode === "semantic") {
|
|
||||||
if (semanticReady) {
|
|
||||||
searchMode = storedMode
|
|
||||||
}
|
|
||||||
} else if (storedMode === "lexical") {
|
|
||||||
searchMode = storedMode
|
|
||||||
}
|
|
||||||
if (!semanticReady && searchMode === "semantic") {
|
|
||||||
searchMode = "lexical"
|
|
||||||
}
|
|
||||||
let searchSeq = 0
|
|
||||||
let runSearchTimer: number | null = null
|
|
||||||
let lastInputAt = 0
|
|
||||||
searchLayout.dataset.mode = searchMode
|
|
||||||
|
|
||||||
const updateModeUI = (mode: SearchMode) => {
|
|
||||||
modeButtons.forEach((button) => {
|
|
||||||
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
|
|
||||||
const isActive = btnMode === mode
|
|
||||||
button.classList.toggle("active", isActive)
|
|
||||||
button.setAttribute("aria-pressed", String(isActive))
|
|
||||||
})
|
|
||||||
if (modeToggle) {
|
|
||||||
modeToggle.dataset.mode = mode
|
|
||||||
}
|
|
||||||
searchLayout.dataset.mode = mode
|
|
||||||
}
|
|
||||||
|
|
||||||
const computeDebounceDelay = (term: string): number => {
|
|
||||||
const trimmed = term.trim()
|
|
||||||
const lastTerm = currentSearchTerm
|
|
||||||
const isExtension =
|
|
||||||
lastTerm.length > 0 && trimmed.length > lastTerm.length && trimmed.startsWith(lastTerm)
|
|
||||||
const isRetraction = lastTerm.length > trimmed.length
|
|
||||||
const isReplacement =
|
|
||||||
lastTerm.length > 0 && !trimmed.startsWith(lastTerm) && !lastTerm.startsWith(trimmed)
|
|
||||||
const baseFullQueryDelay = 200
|
|
||||||
const semanticPenalty = searchMode === "semantic" ? 60 : 0
|
|
||||||
|
|
||||||
if (isExtension && trimmed.length > 2) {
|
|
||||||
return baseFullQueryDelay + semanticPenalty
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isReplacement && trimmed.length > 3) {
|
|
||||||
return Math.max(90, baseFullQueryDelay - 80)
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isRetraction) {
|
|
||||||
return 90
|
|
||||||
}
|
|
||||||
|
|
||||||
return baseFullQueryDelay + (searchMode === "semantic" ? 40 : 0)
|
|
||||||
}
|
|
||||||
|
|
||||||
const triggerSearchWithMode = (mode: SearchMode) => {
|
|
||||||
if (mode === "semantic" && !semanticReady) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (searchMode === mode) return
|
|
||||||
searchMode = mode
|
|
||||||
updateModeUI(mode)
|
|
||||||
persistSearchMode(searchMode)
|
|
||||||
if (rawSearchTerm.trim() !== "") {
|
|
||||||
searchLayout.classList.add("display-results")
|
|
||||||
const token = ++searchSeq
|
|
||||||
void runSearch(rawSearchTerm, token)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
updateModeUI(searchMode)
|
|
||||||
|
|
||||||
modeButtons.forEach((button) => {
|
|
||||||
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
|
|
||||||
if (btnMode === "semantic") {
|
|
||||||
button.disabled = !semanticReady
|
|
||||||
button.setAttribute("aria-disabled", String(!semanticReady))
|
|
||||||
}
|
|
||||||
const handler = () => triggerSearchWithMode(btnMode)
|
|
||||||
button.addEventListener("click", handler)
|
|
||||||
window.addCleanup(() => button.removeEventListener("click", handler))
|
|
||||||
})
|
|
||||||
let preview: HTMLDivElement | undefined = undefined
|
let preview: HTMLDivElement | undefined = undefined
|
||||||
let previewInner: HTMLDivElement | undefined = undefined
|
let previewInner: HTMLDivElement | undefined = undefined
|
||||||
const results = document.createElement("div")
|
const results = document.createElement("div")
|
||||||
@@ -466,23 +191,20 @@ async function setupSearch(
|
|||||||
removeAllChildren(preview)
|
removeAllChildren(preview)
|
||||||
}
|
}
|
||||||
searchLayout.classList.remove("display-results")
|
searchLayout.classList.remove("display-results")
|
||||||
|
searchType = "basic" // reset search type after closing
|
||||||
searchButton.focus()
|
searchButton.focus()
|
||||||
resetProgressBar()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function showSearch(type: SearchType) {
|
function showSearch(searchTypeNew: SearchType) {
|
||||||
|
searchType = searchTypeNew
|
||||||
|
if (sidebar) sidebar.style.zIndex = "1"
|
||||||
container.classList.add("active")
|
container.classList.add("active")
|
||||||
if (type === "tags") {
|
|
||||||
searchBar.value = "#"
|
|
||||||
rawSearchTerm = "#"
|
|
||||||
}
|
|
||||||
searchBar.focus()
|
searchBar.focus()
|
||||||
}
|
}
|
||||||
|
|
||||||
let currentHover: HTMLInputElement | null = null
|
let currentHover: HTMLInputElement | null = null
|
||||||
|
|
||||||
async function shortcutHandler(e: HTMLElementEventMap["keydown"]) {
|
async function shortcutHandler(e: HTMLElementEventMap["keydown"]) {
|
||||||
if ((e.key === "/" || e.key === "k") && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
|
if (e.key === "k" && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
const searchBarOpen = container.classList.contains("active")
|
const searchBarOpen = container.classList.contains("active")
|
||||||
searchBarOpen ? hideSearch() : showSearch("basic")
|
searchBarOpen ? hideSearch() : showSearch("basic")
|
||||||
@@ -492,6 +214,9 @@ async function setupSearch(
|
|||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
const searchBarOpen = container.classList.contains("active")
|
const searchBarOpen = container.classList.contains("active")
|
||||||
searchBarOpen ? hideSearch() : showSearch("tags")
|
searchBarOpen ? hideSearch() : showSearch("tags")
|
||||||
|
|
||||||
|
// add "#" prefix for tag search
|
||||||
|
searchBar.value = "#"
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -501,29 +226,20 @@ async function setupSearch(
|
|||||||
|
|
||||||
// If search is active, then we will render the first result and display accordingly
|
// If search is active, then we will render the first result and display accordingly
|
||||||
if (!container.classList.contains("active")) return
|
if (!container.classList.contains("active")) return
|
||||||
if (e.key === "Enter") {
|
if (e.key === "Enter" && !e.isComposing) {
|
||||||
// If result has focus, navigate to that one, otherwise pick first result
|
// If result has focus, navigate to that one, otherwise pick first result
|
||||||
let anchor: HTMLAnchorElement | undefined
|
|
||||||
if (results.contains(document.activeElement)) {
|
if (results.contains(document.activeElement)) {
|
||||||
anchor = document.activeElement as HTMLAnchorElement
|
const active = document.activeElement as HTMLInputElement
|
||||||
if (anchor.classList.contains("no-match")) return
|
if (active.classList.contains("no-match")) return
|
||||||
await displayPreview(anchor)
|
await displayPreview(active)
|
||||||
e.preventDefault()
|
active.click()
|
||||||
anchor.click()
|
|
||||||
} else {
|
} else {
|
||||||
anchor = document.getElementsByClassName("result-card")[0] as HTMLAnchorElement
|
const anchor = document.getElementsByClassName("result-card")[0] as HTMLInputElement | null
|
||||||
if (!anchor || anchor.classList.contains("no-match")) return
|
if (!anchor || anchor.classList.contains("no-match")) return
|
||||||
await displayPreview(anchor)
|
await displayPreview(anchor)
|
||||||
e.preventDefault()
|
|
||||||
anchor.click()
|
anchor.click()
|
||||||
}
|
}
|
||||||
if (anchor !== undefined)
|
} else if (e.key === "ArrowUp" || (e.shiftKey && e.key === "Tab")) {
|
||||||
window.spaNavigate(new URL(new URL(anchor.href).pathname, window.location.toString()))
|
|
||||||
} else if (
|
|
||||||
e.key === "ArrowUp" ||
|
|
||||||
(e.shiftKey && e.key === "Tab") ||
|
|
||||||
(e.ctrlKey && e.key === "p")
|
|
||||||
) {
|
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
if (results.contains(document.activeElement)) {
|
if (results.contains(document.activeElement)) {
|
||||||
// If an element in results-container already has focus, focus previous one
|
// If an element in results-container already has focus, focus previous one
|
||||||
@@ -536,7 +252,7 @@ async function setupSearch(
|
|||||||
if (prevResult) currentHover = prevResult
|
if (prevResult) currentHover = prevResult
|
||||||
await displayPreview(prevResult)
|
await displayPreview(prevResult)
|
||||||
}
|
}
|
||||||
} else if (e.key === "ArrowDown" || e.key === "Tab" || (e.ctrlKey && e.key === "n")) {
|
} else if (e.key === "ArrowDown" || e.key === "Tab") {
|
||||||
e.preventDefault()
|
e.preventDefault()
|
||||||
// The results should already been focused, so we need to find the next one.
|
// The results should already been focused, so we need to find the next one.
|
||||||
// The activeElement is the search bar, so we need to find the first result and focus it.
|
// The activeElement is the search bar, so we need to find the first result and focus it.
|
||||||
@@ -553,33 +269,25 @@ async function setupSearch(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const formatForDisplay = (term: string, id: number, renderType: SearchType) => {
|
const formatForDisplay = (term: string, id: number) => {
|
||||||
const slug = idDataMap[id]
|
const slug = idDataMap[id]
|
||||||
|
|
||||||
// Check if query contains title words (for boosting exact matches)
|
|
||||||
const queryTokens = tokenizeTerm(term)
|
|
||||||
const titleTokens = tokenizeTerm(data[slug].title ?? "")
|
|
||||||
const titleMatch = titleTokens.some((t) => queryTokens.includes(t))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
id,
|
id,
|
||||||
slug,
|
slug,
|
||||||
title: renderType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
|
title: searchType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
|
||||||
content: highlight(term, data[slug].content ?? "", true),
|
content: highlight(term, data[slug].content ?? "", true),
|
||||||
tags: highlightTags(term, data[slug].tags, renderType),
|
tags: highlightTags(term.substring(1), data[slug].tags),
|
||||||
titleMatch, // Add title match flag for boosting
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function highlightTags(term: string, tags: string[], renderType: SearchType) {
|
function highlightTags(term: string, tags: string[]) {
|
||||||
if (!tags || renderType !== "tags") {
|
if (!tags || searchType !== "tags") {
|
||||||
return []
|
return []
|
||||||
}
|
}
|
||||||
|
|
||||||
const tagTerm = term.toLowerCase()
|
|
||||||
return tags
|
return tags
|
||||||
.map((tag) => {
|
.map((tag) => {
|
||||||
if (tag.toLowerCase().includes(tagTerm)) {
|
if (tag.toLowerCase().includes(term.toLowerCase())) {
|
||||||
return `<li><p class="match-tag">#${tag}</p></li>`
|
return `<li><p class="match-tag">#${tag}</p></li>`
|
||||||
} else {
|
} else {
|
||||||
return `<li><p>#${tag}</p></li>`
|
return `<li><p>#${tag}</p></li>`
|
||||||
@@ -592,40 +300,24 @@ async function setupSearch(
|
|||||||
return new URL(resolveRelative(currentSlug, slug), location.toString())
|
return new URL(resolveRelative(currentSlug, slug), location.toString())
|
||||||
}
|
}
|
||||||
|
|
||||||
const resultToHTML = ({ item, percent }: { item: Item; percent: number | null }) => {
|
const resultToHTML = ({ slug, title, content, tags }: Item) => {
|
||||||
const { slug, title, content, tags, target } = item
|
|
||||||
const htmlTags = tags.length > 0 ? `<ul class="tags">${tags.join("")}</ul>` : ``
|
const htmlTags = tags.length > 0 ? `<ul class="tags">${tags.join("")}</ul>` : ``
|
||||||
const itemTile = document.createElement("a")
|
const itemTile = document.createElement("a")
|
||||||
const titleContent = target ? highlight(currentSearchTerm, target) : title
|
|
||||||
const subscript = target ? `<b>${slug}</b>` : ``
|
|
||||||
let percentLabel = "—"
|
|
||||||
let percentAttr = ""
|
|
||||||
if (percent !== null && Number.isFinite(percent)) {
|
|
||||||
const bounded = Math.max(0, Math.min(100, percent))
|
|
||||||
percentLabel = `${bounded.toFixed(1)}%`
|
|
||||||
percentAttr = bounded.toFixed(3)
|
|
||||||
}
|
|
||||||
itemTile.classList.add("result-card")
|
itemTile.classList.add("result-card")
|
||||||
itemTile.id = slug
|
itemTile.id = slug
|
||||||
itemTile.href = resolveUrl(slug).toString()
|
itemTile.href = resolveUrl(slug).toString()
|
||||||
itemTile.innerHTML = `<hgroup>
|
itemTile.innerHTML = `
|
||||||
<h3>${titleContent}</h3>
|
<h3 class="card-title">${title}</h3>
|
||||||
${subscript}${htmlTags}
|
${htmlTags}
|
||||||
${searchMode === "semantic" ? `<span class="result-likelihood" title="match likelihood"> ${percentLabel}</span>` : ""}
|
<p class="card-description">${content}</p>
|
||||||
${enablePreview && window.innerWidth > 600 ? "" : `<p>${content}</p>`}
|
`
|
||||||
</hgroup>`
|
itemTile.addEventListener("click", (event) => {
|
||||||
if (percentAttr) itemTile.dataset.scorePercent = percentAttr
|
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
|
||||||
else delete itemTile.dataset.scorePercent
|
hideSearch()
|
||||||
|
})
|
||||||
|
|
||||||
const handler = (evt: MouseEvent) => {
|
const handler = (event: MouseEvent) => {
|
||||||
if (evt.altKey || evt.ctrlKey || evt.metaKey || evt.shiftKey) return
|
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
|
||||||
const anchor = evt.currentTarget as HTMLAnchorElement | null
|
|
||||||
if (!anchor) return
|
|
||||||
evt.preventDefault()
|
|
||||||
const href = anchor.getAttribute("href")
|
|
||||||
if (!href) return
|
|
||||||
const url = new URL(href, window.location.toString())
|
|
||||||
window.spaNavigate(url)
|
|
||||||
hideSearch()
|
hideSearch()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -643,22 +335,15 @@ async function setupSearch(
|
|||||||
return itemTile
|
return itemTile
|
||||||
}
|
}
|
||||||
|
|
||||||
async function displayResults(finalResults: SimilarityResult[]) {
|
async function displayResults(finalResults: Item[]) {
|
||||||
removeAllChildren(results)
|
removeAllChildren(results)
|
||||||
if (finalResults.length === 0) {
|
if (finalResults.length === 0) {
|
||||||
results.innerHTML = `<a class="result-card no-match">
|
results.innerHTML = `<a class="result-card no-match">
|
||||||
<h3>No results.</h3>
|
<h3>No results.</h3>
|
||||||
<p>Try another search term?</p>
|
<p>Try another search term?</p>
|
||||||
</a>`
|
</a>`
|
||||||
currentHover = null
|
|
||||||
} else {
|
} else {
|
||||||
const decorated = finalResults.map(({ item, similarity }) => {
|
results.append(...finalResults.map(resultToHTML))
|
||||||
if (!Number.isFinite(similarity)) return { item, percent: null }
|
|
||||||
const bounded = Math.max(-1, Math.min(1, similarity))
|
|
||||||
const percent = ((bounded + 1) / 2) * 100
|
|
||||||
return { item, percent }
|
|
||||||
})
|
|
||||||
results.append(...decorated.map(resultToHTML))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (finalResults.length === 0 && preview) {
|
if (finalResults.length === 0 && preview) {
|
||||||
@@ -678,8 +363,8 @@ async function setupSearch(
|
|||||||
return fetchContentCache.get(slug) as Element[]
|
return fetchContentCache.get(slug) as Element[]
|
||||||
}
|
}
|
||||||
|
|
||||||
const targetUrl = resolveUrl(slug)
|
const targetUrl = resolveUrl(slug).toString()
|
||||||
const contents = await fetchCanonical(targetUrl)
|
const contents = await fetch(targetUrl)
|
||||||
.then((res) => res.text())
|
.then((res) => res.text())
|
||||||
.then((contents) => {
|
.then((contents) => {
|
||||||
if (contents === undefined) {
|
if (contents === undefined) {
|
||||||
@@ -709,296 +394,73 @@ async function setupSearch(
|
|||||||
const highlights = [...preview.getElementsByClassName("highlight")].sort(
|
const highlights = [...preview.getElementsByClassName("highlight")].sort(
|
||||||
(a, b) => b.innerHTML.length - a.innerHTML.length,
|
(a, b) => b.innerHTML.length - a.innerHTML.length,
|
||||||
)
|
)
|
||||||
if (highlights.length > 0) {
|
highlights[0]?.scrollIntoView({ block: "start" })
|
||||||
const highlight = highlights[0]
|
|
||||||
const container = preview
|
|
||||||
if (container && highlight) {
|
|
||||||
// Get the relative positions
|
|
||||||
const containerRect = container.getBoundingClientRect()
|
|
||||||
const highlightRect = highlight.getBoundingClientRect()
|
|
||||||
// Calculate the scroll position relative to the container
|
|
||||||
const relativeTop = highlightRect.top - containerRect.top + container.scrollTop - 20 // 20px buffer
|
|
||||||
// Smoothly scroll the container
|
|
||||||
container.scrollTo({
|
|
||||||
top: relativeTop,
|
|
||||||
behavior: "smooth",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runSearch(rawTerm: string, token: number) {
|
async function onType(e: HTMLElementEventMap["input"]) {
|
||||||
if (!searchLayout || !index) return
|
if (!searchLayout || !index) return
|
||||||
const trimmed = rawTerm.trim()
|
currentSearchTerm = (e.target as HTMLInputElement).value
|
||||||
if (trimmed === "") {
|
searchLayout.classList.toggle("display-results", currentSearchTerm !== "")
|
||||||
removeAllChildren(results)
|
searchType = currentSearchTerm.startsWith("#") ? "tags" : "basic"
|
||||||
if (preview) {
|
|
||||||
removeAllChildren(preview)
|
|
||||||
}
|
|
||||||
currentHover = null
|
|
||||||
searchLayout.classList.remove("display-results")
|
|
||||||
resetProgressBar()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const modeForRanking: SearchMode = searchMode
|
let searchResults: DefaultDocumentSearchResults<Item>
|
||||||
const initialType: SearchType = trimmed.startsWith("#") ? "tags" : "basic"
|
if (searchType === "tags") {
|
||||||
let workingType: SearchType = initialType
|
currentSearchTerm = currentSearchTerm.substring(1).trim()
|
||||||
let highlightTerm = trimmed
|
const separatorIndex = currentSearchTerm.indexOf(" ")
|
||||||
let tagTerm = ""
|
if (separatorIndex != -1) {
|
||||||
let searchResults: DefaultDocumentSearchResults<Item> = []
|
// search by title and content index and then filter by tag (implemented in flexsearch)
|
||||||
|
const tag = currentSearchTerm.substring(0, separatorIndex)
|
||||||
if (initialType === "tags") {
|
const query = currentSearchTerm.substring(separatorIndex + 1).trim()
|
||||||
tagTerm = trimmed.substring(1).trim()
|
searchResults = await index.searchAsync({
|
||||||
const separatorIndex = tagTerm.indexOf(" ")
|
query: query,
|
||||||
if (separatorIndex !== -1) {
|
// return at least 10000 documents, so it is enough to filter them by tag (implemented in flexsearch)
|
||||||
const tag = tagTerm.substring(0, separatorIndex).trim()
|
|
||||||
const query = tagTerm.substring(separatorIndex + 1).trim()
|
|
||||||
const results = await index.searchAsync({
|
|
||||||
query,
|
|
||||||
limit: Math.max(numSearchResults, 10000),
|
limit: Math.max(numSearchResults, 10000),
|
||||||
index: ["title", "content"],
|
index: ["title", "content"],
|
||||||
tag: { tags: tag },
|
tag: { tags: tag },
|
||||||
})
|
})
|
||||||
if (token !== searchSeq) return
|
for (let searchResult of searchResults) {
|
||||||
searchResults = Object.values(results)
|
searchResult.result = searchResult.result.slice(0, numSearchResults)
|
||||||
workingType = "basic"
|
}
|
||||||
highlightTerm = query
|
// set search type to basic and remove tag from term for proper highlightning and scroll
|
||||||
|
searchType = "basic"
|
||||||
|
currentSearchTerm = query
|
||||||
} else {
|
} else {
|
||||||
const results = await index.searchAsync({
|
// default search by tags index
|
||||||
query: tagTerm,
|
searchResults = await index.searchAsync({
|
||||||
|
query: currentSearchTerm,
|
||||||
limit: numSearchResults,
|
limit: numSearchResults,
|
||||||
index: ["tags"],
|
index: ["tags"],
|
||||||
})
|
})
|
||||||
if (token !== searchSeq) return
|
|
||||||
searchResults = Object.values(results)
|
|
||||||
highlightTerm = tagTerm
|
|
||||||
}
|
}
|
||||||
} else {
|
} else if (searchType === "basic") {
|
||||||
const results = await index.searchAsync({
|
searchResults = await index.searchAsync({
|
||||||
query: highlightTerm,
|
query: currentSearchTerm,
|
||||||
limit: numSearchResults,
|
limit: numSearchResults,
|
||||||
index: ["title", "content"],
|
index: ["title", "content"],
|
||||||
})
|
})
|
||||||
if (token !== searchSeq) return
|
|
||||||
searchResults = Object.values(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
const coerceIds = (hit?: DefaultDocumentSearchResults<Item>[number]): number[] => {
|
|
||||||
if (!hit) return []
|
|
||||||
return hit.result
|
|
||||||
.map((value: Id) => {
|
|
||||||
if (typeof value === "number") {
|
|
||||||
return value
|
|
||||||
}
|
|
||||||
const parsed = Number.parseInt(String(value), 10)
|
|
||||||
return Number.isNaN(parsed) ? null : parsed
|
|
||||||
})
|
|
||||||
.filter((value): value is number => value !== null)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const getByField = (field: string): number[] => {
|
const getByField = (field: string): number[] => {
|
||||||
const hit = searchResults.find((x) => x.field === field)
|
const results = searchResults.filter((x) => x.field === field)
|
||||||
return coerceIds(hit)
|
return results.length === 0 ? [] : ([...results[0].result] as number[])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// order titles ahead of content
|
||||||
const allIds: Set<number> = new Set([
|
const allIds: Set<number> = new Set([
|
||||||
...getByField("title"),
|
...getByField("title"),
|
||||||
...getByField("content"),
|
...getByField("content"),
|
||||||
...getByField("tags"),
|
...getByField("tags"),
|
||||||
])
|
])
|
||||||
|
const finalResults = [...allIds].map((id) => formatForDisplay(currentSearchTerm, id))
|
||||||
currentSearchTerm = highlightTerm
|
await displayResults(finalResults)
|
||||||
|
|
||||||
const candidateItems = new Map<string, Item>()
|
|
||||||
const ensureItem = (id: number): Item | null => {
|
|
||||||
const slug = idDataMap[id]
|
|
||||||
if (!slug) return null
|
|
||||||
const cached = candidateItems.get(slug)
|
|
||||||
if (cached) return cached
|
|
||||||
const item = formatForDisplay(highlightTerm, id, workingType)
|
|
||||||
if (item) {
|
|
||||||
candidateItems.set(slug, item)
|
|
||||||
return item
|
|
||||||
}
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
|
|
||||||
const baseIndices: number[] = []
|
|
||||||
for (const id of allIds) {
|
|
||||||
const item = ensureItem(id)
|
|
||||||
if (!item) continue
|
|
||||||
const idx = slugToIndex.get(item.slug)
|
|
||||||
if (typeof idx === "number") {
|
|
||||||
baseIndices.push(idx)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let semanticIds: number[] = []
|
|
||||||
const semanticSimilarity = new Map<number, number>()
|
|
||||||
|
|
||||||
const integrateIds = (ids: number[]) => {
|
|
||||||
ids.forEach((docId) => {
|
|
||||||
ensureItem(docId)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
const orchestrator = semanticReady && semantic ? semantic : null
|
|
||||||
|
|
||||||
const resolveSimilarity = (item: Item): number => {
|
|
||||||
const semanticHit = semanticSimilarity.get(item.id)
|
|
||||||
return semanticHit ?? Number.NaN
|
|
||||||
}
|
|
||||||
|
|
||||||
const render = async () => {
|
|
||||||
if (token !== searchSeq) return
|
|
||||||
const useSemantic = semanticReady && semanticIds.length > 0
|
|
||||||
const weights =
|
|
||||||
modeForRanking === "semantic" && useSemantic
|
|
||||||
? { base: 0.3, semantic: 1.0 }
|
|
||||||
: { base: 1.0, semantic: useSemantic ? 0.3 : 0 }
|
|
||||||
const rrf = new Map<string, number>()
|
|
||||||
const push = (ids: number[], weight: number, applyTitleBoost: boolean = false) => {
|
|
||||||
if (!ids.length || weight <= 0) return
|
|
||||||
ids.forEach((docId, rank) => {
|
|
||||||
const slug = idDataMap[docId]
|
|
||||||
if (!slug) return
|
|
||||||
const item = ensureItem(docId)
|
|
||||||
if (!item) return
|
|
||||||
|
|
||||||
// Apply title boost for FlexSearch results (1.5x boost for exact title matches)
|
|
||||||
let effectiveWeight = weight
|
|
||||||
if (applyTitleBoost && item.titleMatch) {
|
|
||||||
effectiveWeight *= 1.5
|
|
||||||
}
|
|
||||||
|
|
||||||
const prev = rrf.get(slug) ?? 0
|
|
||||||
rrf.set(slug, prev + effectiveWeight / (1 + rank))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
push(baseIndices, weights.base, true) // FlexSearch with title boost
|
|
||||||
push(semanticIds, weights.semantic, false) // Semantic without boost
|
|
||||||
|
|
||||||
const rankedEntries = Array.from(candidateItems.values())
|
|
||||||
.map((item) => ({ item, score: rrf.get(item.slug) ?? 0 }))
|
|
||||||
.sort((a, b) => b.score - a.score)
|
|
||||||
.slice(0, numSearchResults)
|
|
||||||
|
|
||||||
const displayEntries: SimilarityResult[] = []
|
|
||||||
for (const entry of rankedEntries) {
|
|
||||||
const similarity = resolveSimilarity(entry.item)
|
|
||||||
displayEntries.push({ item: entry.item, similarity })
|
|
||||||
}
|
|
||||||
|
|
||||||
await displayResults(displayEntries)
|
|
||||||
}
|
|
||||||
|
|
||||||
await render()
|
|
||||||
|
|
||||||
if (workingType === "tags" || !orchestrator || !semanticReady || highlightTerm.length < 2) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
const showProgress = modeForRanking === "semantic"
|
|
||||||
if (showProgress) {
|
|
||||||
startSemanticProgress()
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { semantic: semRes } = await orchestrator.search(
|
|
||||||
highlightTerm,
|
|
||||||
numSearchResults * 3, // Request more chunks to ensure good document coverage
|
|
||||||
)
|
|
||||||
if (token !== searchSeq) {
|
|
||||||
if (showProgress) completeSemanticProgress()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Aggregate chunk results to document level using RRF
|
|
||||||
const { rrfScores: semRrfScores, maxScores: semMaxScores } = aggregateChunkResults(
|
|
||||||
semRes,
|
|
||||||
slugToIndex,
|
|
||||||
)
|
|
||||||
|
|
||||||
// Use RRF scores for ranking
|
|
||||||
semanticIds = Array.from(semRrfScores.entries())
|
|
||||||
.sort((a, b) => b[1] - a[1])
|
|
||||||
.slice(0, numSearchResults)
|
|
||||||
.map(([docIdx]) => docIdx)
|
|
||||||
|
|
||||||
// Use max chunk similarity for display (0-1 range)
|
|
||||||
semanticSimilarity.clear()
|
|
||||||
semMaxScores.forEach((score, docIdx) => {
|
|
||||||
semanticSimilarity.set(docIdx, score)
|
|
||||||
})
|
|
||||||
|
|
||||||
integrateIds(semanticIds)
|
|
||||||
if (showProgress) completeSemanticProgress()
|
|
||||||
} catch (err) {
|
|
||||||
console.warn("[SemanticClient] search failed:", err)
|
|
||||||
if (showProgress) completeSemanticProgress()
|
|
||||||
orchestrator.dispose()
|
|
||||||
semantic = null
|
|
||||||
semanticReady = false
|
|
||||||
semanticInitFailed = true
|
|
||||||
if (searchMode === "semantic") {
|
|
||||||
searchMode = "lexical"
|
|
||||||
updateModeUI(searchMode)
|
|
||||||
}
|
|
||||||
modeButtons.forEach((button) => {
|
|
||||||
if ((button.dataset.mode as SearchMode) === "semantic") {
|
|
||||||
button.disabled = true
|
|
||||||
button.setAttribute("aria-disabled", "true")
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
await render()
|
|
||||||
}
|
|
||||||
|
|
||||||
function onType(e: HTMLElementEventMap["input"]) {
|
|
||||||
if (!searchLayout || !index) return
|
|
||||||
rawSearchTerm = (e.target as HTMLInputElement).value
|
|
||||||
const hasQuery = rawSearchTerm.trim() !== ""
|
|
||||||
searchLayout.classList.toggle("display-results", hasQuery)
|
|
||||||
const term = rawSearchTerm
|
|
||||||
const token = ++searchSeq
|
|
||||||
if (runSearchTimer !== null) {
|
|
||||||
window.clearTimeout(runSearchTimer)
|
|
||||||
runSearchTimer = null
|
|
||||||
}
|
|
||||||
if (!hasQuery) {
|
|
||||||
void runSearch("", token)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const now = performance.now()
|
|
||||||
lastInputAt = now
|
|
||||||
const delay = computeDebounceDelay(term)
|
|
||||||
const scheduledAt = lastInputAt
|
|
||||||
runSearchTimer = window.setTimeout(() => {
|
|
||||||
if (scheduledAt !== lastInputAt) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
runSearchTimer = null
|
|
||||||
void runSearch(term, token)
|
|
||||||
}, delay)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
document.addEventListener("keydown", shortcutHandler)
|
document.addEventListener("keydown", shortcutHandler)
|
||||||
window.addCleanup(() => document.removeEventListener("keydown", shortcutHandler))
|
window.addCleanup(() => document.removeEventListener("keydown", shortcutHandler))
|
||||||
const openHandler = () => showSearch("basic")
|
searchButton.addEventListener("click", () => showSearch("basic"))
|
||||||
searchButton.addEventListener("click", openHandler)
|
window.addCleanup(() => searchButton.removeEventListener("click", () => showSearch("basic")))
|
||||||
window.addCleanup(() => searchButton.removeEventListener("click", openHandler))
|
|
||||||
searchBar.addEventListener("input", onType)
|
searchBar.addEventListener("input", onType)
|
||||||
window.addCleanup(() => searchBar.removeEventListener("input", onType))
|
window.addCleanup(() => searchBar.removeEventListener("input", onType))
|
||||||
window.addCleanup(() => {
|
|
||||||
if (runSearchTimer !== null) {
|
|
||||||
window.clearTimeout(runSearchTimer)
|
|
||||||
runSearchTimer = null
|
|
||||||
}
|
|
||||||
resetProgressBar()
|
|
||||||
})
|
|
||||||
|
|
||||||
registerEscapeHandler(container, hideSearch)
|
registerEscapeHandler(container, hideSearch)
|
||||||
await fillDocument(data)
|
await fillDocument(data)
|
||||||
@@ -1006,17 +468,17 @@ async function setupSearch(
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Fills flexsearch document with data
|
* Fills flexsearch document with data
|
||||||
|
* @param index index to fill
|
||||||
* @param data data to fill index with
|
* @param data data to fill index with
|
||||||
*/
|
*/
|
||||||
let indexPopulated = false
|
let indexPopulated = false
|
||||||
async function fillDocument(data: ContentIndex) {
|
async function fillDocument(data: ContentIndex) {
|
||||||
if (indexPopulated) return
|
if (indexPopulated) return
|
||||||
let id = 0
|
let id = 0
|
||||||
const promises = []
|
const promises: Array<Promise<unknown>> = []
|
||||||
for (const [slug, fileData] of Object.entries<ContentDetails>(data)) {
|
for (const [slug, fileData] of Object.entries<ContentDetails>(data)) {
|
||||||
promises.push(
|
promises.push(
|
||||||
//@ts-ignore
|
index.addAsync(id++, {
|
||||||
index.addAsync({
|
|
||||||
id,
|
id,
|
||||||
slug: slug as FullSlug,
|
slug: slug as FullSlug,
|
||||||
title: fileData.title,
|
title: fileData.title,
|
||||||
@@ -1024,7 +486,6 @@ async function fillDocument(data: ContentIndex) {
|
|||||||
tags: fileData.tags,
|
tags: fileData.tags,
|
||||||
}),
|
}),
|
||||||
)
|
)
|
||||||
id++
|
|
||||||
}
|
}
|
||||||
|
|
||||||
await Promise.all(promises)
|
await Promise.all(promises)
|
||||||
@@ -1034,9 +495,7 @@ async function fillDocument(data: ContentIndex) {
|
|||||||
document.addEventListener("nav", async (e: CustomEventMap["nav"]) => {
|
document.addEventListener("nav", async (e: CustomEventMap["nav"]) => {
|
||||||
const currentSlug = e.detail.url
|
const currentSlug = e.detail.url
|
||||||
const data = await fetchData
|
const data = await fetchData
|
||||||
const searchElement = document.getElementsByClassName(
|
const searchElement = document.getElementsByClassName("search")
|
||||||
"search",
|
|
||||||
) as HTMLCollectionOf<HTMLDivElement>
|
|
||||||
for (const element of searchElement) {
|
for (const element of searchElement) {
|
||||||
await setupSearch(element, currentSlug, data)
|
await setupSearch(element, currentSlug, data)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,182 +0,0 @@
|
|||||||
export type SemanticResult = { id: number; score: number }
|
|
||||||
|
|
||||||
type ProgressMessage = {
|
|
||||||
type: "progress"
|
|
||||||
loadedRows: number
|
|
||||||
totalRows: number
|
|
||||||
}
|
|
||||||
|
|
||||||
type ReadyMessage = { type: "ready" }
|
|
||||||
|
|
||||||
type ResultMessage = {
|
|
||||||
type: "search-result"
|
|
||||||
seq: number
|
|
||||||
semantic: SemanticResult[]
|
|
||||||
}
|
|
||||||
|
|
||||||
type ErrorMessage = { type: "error"; seq?: number; message: string }
|
|
||||||
|
|
||||||
type SearchPayload = {
|
|
||||||
semantic: SemanticResult[]
|
|
||||||
}
|
|
||||||
|
|
||||||
type PendingResolver = {
|
|
||||||
resolve: (payload: SearchPayload) => void
|
|
||||||
reject: (err: Error) => void
|
|
||||||
}
|
|
||||||
|
|
||||||
export class SemanticClient {
|
|
||||||
private ready: Promise<void>
|
|
||||||
private resolveReady!: () => void
|
|
||||||
private worker: Worker | null = null
|
|
||||||
private pending = new Map<number, PendingResolver>()
|
|
||||||
private seq = 0
|
|
||||||
private disposed = false
|
|
||||||
private readySettled = false
|
|
||||||
private configured = false
|
|
||||||
private lastError: Error | null = null
|
|
||||||
|
|
||||||
constructor(private cfg?: any) {
|
|
||||||
this.ready = new Promise((resolve) => {
|
|
||||||
this.resolveReady = () => {
|
|
||||||
if (this.readySettled) return
|
|
||||||
this.readySettled = true
|
|
||||||
resolve()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
if (this.cfg?.enable === false) {
|
|
||||||
this.lastError = new Error("semantic search disabled by configuration")
|
|
||||||
this.resolveReady()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
this.boot()
|
|
||||||
}
|
|
||||||
|
|
||||||
private boot() {
|
|
||||||
try {
|
|
||||||
this.worker = new Worker("/semantic.worker.js", { type: "module" })
|
|
||||||
} catch (err) {
|
|
||||||
this.handleFatal(err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
this.setupWorker()
|
|
||||||
this.startInit()
|
|
||||||
}
|
|
||||||
|
|
||||||
private setupWorker() {
|
|
||||||
if (!this.worker) return
|
|
||||||
this.worker.onmessage = (
|
|
||||||
event: MessageEvent<ProgressMessage | ReadyMessage | ResultMessage | ErrorMessage>,
|
|
||||||
) => {
|
|
||||||
const msg = event.data
|
|
||||||
if (msg.type === "progress") {
|
|
||||||
// Progress updates during initialization - can be logged if needed
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (msg.type === "ready") {
|
|
||||||
this.configured = true
|
|
||||||
this.lastError = null
|
|
||||||
this.resolveReady()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (msg.type === "search-result") {
|
|
||||||
const pending = this.pending.get(msg.seq)
|
|
||||||
if (pending) {
|
|
||||||
this.pending.delete(msg.seq)
|
|
||||||
pending.resolve({ semantic: msg.semantic ?? [] })
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if (msg.type === "error") {
|
|
||||||
if (typeof msg.seq === "number") {
|
|
||||||
const pending = this.pending.get(msg.seq)
|
|
||||||
if (pending) {
|
|
||||||
this.pending.delete(msg.seq)
|
|
||||||
pending.reject(new Error(msg.message))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
this.handleFatal(msg.message)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private startInit() {
|
|
||||||
if (!this.worker) return
|
|
||||||
const manifestUrl =
|
|
||||||
typeof this.cfg?.manifestUrl === "string" && this.cfg.manifestUrl.length > 0
|
|
||||||
? this.cfg.manifestUrl
|
|
||||||
: "/embeddings/manifest.json"
|
|
||||||
const disableCache = Boolean(this.cfg?.disableCache)
|
|
||||||
const baseUrl =
|
|
||||||
typeof this.cfg?.manifestBaseUrl === "string" ? this.cfg.manifestBaseUrl : undefined
|
|
||||||
this.worker.postMessage({
|
|
||||||
type: "init",
|
|
||||||
cfg: this.cfg,
|
|
||||||
manifestUrl,
|
|
||||||
baseUrl,
|
|
||||||
disableCache,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
private rejectAll(err: Error, fatal = false) {
|
|
||||||
for (const [id, pending] of this.pending.entries()) {
|
|
||||||
pending.reject(err)
|
|
||||||
this.pending.delete(id)
|
|
||||||
}
|
|
||||||
if (fatal) {
|
|
||||||
this.lastError = err
|
|
||||||
this.configured = false
|
|
||||||
if (!this.readySettled) {
|
|
||||||
this.resolveReady()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private handleFatal(err: unknown) {
|
|
||||||
const error = err instanceof Error ? err : new Error(String(err))
|
|
||||||
console.error("[SemanticClient] initialization failure:", error)
|
|
||||||
this.rejectAll(error, true)
|
|
||||||
if (this.worker) {
|
|
||||||
this.worker.postMessage({ type: "reset" })
|
|
||||||
this.worker.terminate()
|
|
||||||
this.worker = null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async ensureReady() {
|
|
||||||
await this.ready
|
|
||||||
if (!this.configured) {
|
|
||||||
throw this.lastError ?? new Error("semantic search unavailable")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async search(text: string, k: number): Promise<SearchPayload> {
|
|
||||||
if (this.disposed) {
|
|
||||||
throw new Error("semantic client has been disposed")
|
|
||||||
}
|
|
||||||
await this.ensureReady()
|
|
||||||
if (!this.worker || !this.configured) {
|
|
||||||
throw this.lastError ?? new Error("worker unavailable")
|
|
||||||
}
|
|
||||||
return new Promise<SearchPayload>((resolve, reject) => {
|
|
||||||
const seq = ++this.seq
|
|
||||||
this.pending.set(seq, { resolve, reject })
|
|
||||||
this.worker?.postMessage({ type: "search", text, k, seq })
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
dispose() {
|
|
||||||
if (this.disposed) return
|
|
||||||
this.disposed = true
|
|
||||||
this.rejectAll(new Error("semantic client disposed"))
|
|
||||||
if (this.worker) {
|
|
||||||
this.worker.postMessage({ type: "reset" })
|
|
||||||
this.worker.terminate()
|
|
||||||
}
|
|
||||||
this.worker = null
|
|
||||||
this.configured = false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -133,12 +133,16 @@ button.desktop-explorer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.folder-outer {
|
.folder-outer {
|
||||||
|
visibility: collapse;
|
||||||
display: grid;
|
display: grid;
|
||||||
grid-template-rows: 0fr;
|
grid-template-rows: 0fr;
|
||||||
transition: grid-template-rows 0.3s ease-in-out;
|
transition-property: grid-template-rows, visibility;
|
||||||
|
transition-duration: 0.3s;
|
||||||
|
transition-timing-function: ease-in-out;
|
||||||
}
|
}
|
||||||
|
|
||||||
.folder-outer.open {
|
.folder-outer.open {
|
||||||
|
visibility: visible;
|
||||||
grid-template-rows: 1fr;
|
grid-template-rows: 1fr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@
|
|||||||
& > p {
|
& > p {
|
||||||
display: inline;
|
display: inline;
|
||||||
color: var(--gray);
|
color: var(--gray);
|
||||||
|
text-wrap: unset;
|
||||||
}
|
}
|
||||||
|
|
||||||
& svg {
|
& svg {
|
||||||
@@ -77,97 +78,16 @@
|
|||||||
margin-bottom: 2em;
|
margin-bottom: 2em;
|
||||||
}
|
}
|
||||||
|
|
||||||
& > .input-container {
|
& > input {
|
||||||
align-items: center;
|
|
||||||
gap: 0.5rem;
|
|
||||||
display: flex;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
position: relative;
|
|
||||||
box-sizing: border-box;
|
box-sizing: border-box;
|
||||||
|
padding: 0.5em 1em;
|
||||||
|
font-family: var(--bodyFont);
|
||||||
|
color: var(--dark);
|
||||||
|
font-size: 1.1em;
|
||||||
|
border: 1px solid var(--lightgray);
|
||||||
|
|
||||||
.search-bar {
|
&:focus {
|
||||||
flex: 1 1 auto;
|
outline: none;
|
||||||
min-width: 0;
|
|
||||||
box-sizing: border-box;
|
|
||||||
padding: 0.5em 1em;
|
|
||||||
font-family: var(--bodyFont);
|
|
||||||
color: var(--dark);
|
|
||||||
font-size: 1.1em;
|
|
||||||
border: none;
|
|
||||||
background: transparent;
|
|
||||||
|
|
||||||
&:focus {
|
|
||||||
outline: none;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
.semantic-search-progress {
|
|
||||||
position: absolute;
|
|
||||||
bottom: 0;
|
|
||||||
left: 0;
|
|
||||||
right: 0;
|
|
||||||
height: 2px;
|
|
||||||
background-color: var(--secondary);
|
|
||||||
width: 0;
|
|
||||||
opacity: 0;
|
|
||||||
transition:
|
|
||||||
width 0.3s ease,
|
|
||||||
opacity 0.2s ease;
|
|
||||||
pointer-events: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.search-mode-toggle {
|
|
||||||
display: inline-flex;
|
|
||||||
align-items: center;
|
|
||||||
border-radius: 9999px;
|
|
||||||
height: 1.4rem;
|
|
||||||
background-color: color-mix(in srgb, var(--darkgray) 12%, transparent);
|
|
||||||
margin-right: 1rem;
|
|
||||||
|
|
||||||
.mode-option {
|
|
||||||
border: none;
|
|
||||||
background: transparent;
|
|
||||||
font: inherit;
|
|
||||||
color: var(--gray);
|
|
||||||
border-radius: 9999px;
|
|
||||||
cursor: pointer;
|
|
||||||
transition:
|
|
||||||
background-color 0.2s ease,
|
|
||||||
color 0.2s ease;
|
|
||||||
display: inline-flex;
|
|
||||||
align-items: center;
|
|
||||||
justify-content: center;
|
|
||||||
width: 1.5rem;
|
|
||||||
height: 1.5rem;
|
|
||||||
position: relative;
|
|
||||||
|
|
||||||
&:focus-visible {
|
|
||||||
outline: 2px solid var(--tertiary);
|
|
||||||
outline-offset: 2px;
|
|
||||||
}
|
|
||||||
|
|
||||||
&.active {
|
|
||||||
background-color: var(--secondary);
|
|
||||||
color: var(--light);
|
|
||||||
}
|
|
||||||
|
|
||||||
svg {
|
|
||||||
width: 18px;
|
|
||||||
height: 18px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.sr-only {
|
|
||||||
position: absolute;
|
|
||||||
width: 1px;
|
|
||||||
height: 1px;
|
|
||||||
padding: 0;
|
|
||||||
margin: -1px;
|
|
||||||
overflow: hidden;
|
|
||||||
clip: rect(0, 0, 0, 0);
|
|
||||||
white-space: nowrap;
|
|
||||||
border: 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,542 +0,0 @@
|
|||||||
# /// script
|
|
||||||
# requires-python = ">=3.11"
|
|
||||||
# dependencies = [
|
|
||||||
# "langchain-text-splitters",
|
|
||||||
# "numpy",
|
|
||||||
# "openai",
|
|
||||||
# "sentence-transformers",
|
|
||||||
# "tiktoken",
|
|
||||||
# ]
|
|
||||||
# ///
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import os, json, argparse, hashlib, math, random, logging
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from functools import lru_cache
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
||||||
|
|
||||||
import tiktoken, numpy as np
|
|
||||||
|
|
||||||
from openai import OpenAI
|
|
||||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
DEFAULT_VLLM_URL = os.environ.get("VLLM_URL") or os.environ.get("VLLM_EMBED_URL") or "http://127.0.0.1:8000/v1"
|
|
||||||
|
|
||||||
|
|
||||||
def resolve_vllm_base_url(url: str) -> str:
|
|
||||||
if not url:
|
|
||||||
raise ValueError("vLLM URL must be non-empty")
|
|
||||||
|
|
||||||
trimmed = url.rstrip("/")
|
|
||||||
if trimmed.endswith("/v1/embeddings"):
|
|
||||||
trimmed = trimmed[: -len("/embeddings")]
|
|
||||||
elif trimmed.endswith("/embeddings"):
|
|
||||||
trimmed = trimmed[: trimmed.rfind("/")]
|
|
||||||
|
|
||||||
if not trimmed.endswith("/v1"):
|
|
||||||
trimmed = f"{trimmed}/v1"
|
|
||||||
|
|
||||||
return trimmed
|
|
||||||
|
|
||||||
|
|
||||||
def load_jsonl(fp: str) -> Iterable[dict]:
|
|
||||||
with open(fp, "r", encoding="utf-8") as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
yield json.loads(line)
|
|
||||||
|
|
||||||
|
|
||||||
def l2_normalize_rows(x: np.ndarray) -> np.ndarray:
|
|
||||||
# x: [N, D]
|
|
||||||
norms = np.linalg.norm(x, ord=2, axis=1, keepdims=True)
|
|
||||||
norms[norms == 0] = 1.0
|
|
||||||
return x / norms
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=1)
|
|
||||||
def get_tiktoken_encoder():
|
|
||||||
# Get the o200k_base tokenizer (GPT-4o) with caching
|
|
||||||
# change this if you want something else.
|
|
||||||
return tiktoken.get_encoding("o200k_base")
|
|
||||||
|
|
||||||
|
|
||||||
def count_tokens(text: str) -> int:
|
|
||||||
# Count tokens using o200k_base encoding
|
|
||||||
encoder = get_tiktoken_encoder()
|
|
||||||
return len(encoder.encode(text))
|
|
||||||
|
|
||||||
|
|
||||||
def get_text_splitter(chunk_size: int, overlap: int):
|
|
||||||
encoder = get_tiktoken_encoder()
|
|
||||||
return RecursiveCharacterTextSplitter(
|
|
||||||
chunk_size=chunk_size * 4, # character approximation
|
|
||||||
chunk_overlap=overlap * 4,
|
|
||||||
separators=["\n\n", "\n", ". ", " ", ""],
|
|
||||||
length_function=lambda t: len(encoder.encode(t)),
|
|
||||||
is_separator_regex=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def chunk_document(
|
|
||||||
doc: dict, max_tokens: int = 512, overlap_tokens: int = 128, min_chunk_size: int = 100
|
|
||||||
) -> list[dict]:
|
|
||||||
"""
|
|
||||||
Chunk a document if it exceeds max_tokens
|
|
||||||
|
|
||||||
Args:
|
|
||||||
doc: {'slug': str, 'title': str, 'text': str}
|
|
||||||
max_tokens: Maximum tokens per chunk
|
|
||||||
overlap_tokens: Overlap between chunks
|
|
||||||
min_chunk_size: Minimum chunk size (avoid tiny chunks)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of chunk dicts with metadata
|
|
||||||
"""
|
|
||||||
text = doc["text"]
|
|
||||||
token_count = count_tokens(text)
|
|
||||||
|
|
||||||
# No chunking needed
|
|
||||||
if token_count <= max_tokens:
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"slug": doc["slug"],
|
|
||||||
"title": doc.get("title", doc["slug"]),
|
|
||||||
"text": text,
|
|
||||||
"chunk_id": 0,
|
|
||||||
"parent_slug": doc["slug"],
|
|
||||||
"is_chunked": False,
|
|
||||||
}
|
|
||||||
]
|
|
||||||
|
|
||||||
# Apply chunking
|
|
||||||
splitter = get_text_splitter(max_tokens, overlap_tokens)
|
|
||||||
raw_chunks = splitter.split_text(text)
|
|
||||||
|
|
||||||
# Filter out tiny chunks
|
|
||||||
valid_chunks = [c for c in raw_chunks if count_tokens(c) >= min_chunk_size]
|
|
||||||
|
|
||||||
return [
|
|
||||||
{
|
|
||||||
"slug": f"{doc['slug']}#chunk{i}",
|
|
||||||
"title": doc.get("title", doc["slug"]),
|
|
||||||
"text": chunk,
|
|
||||||
"chunk_id": i,
|
|
||||||
"parent_slug": doc["slug"],
|
|
||||||
"is_chunked": True,
|
|
||||||
}
|
|
||||||
for i, chunk in enumerate(valid_chunks)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def write_shards(vectors: np.ndarray, shard_size: int, dtype: str, out_dir: Path) -> list[dict]:
|
|
||||||
out_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
rows, dims = vectors.shape
|
|
||||||
shards_meta: list[dict] = []
|
|
||||||
np_dtype = np.float16 if dtype == "fp16" else np.float32
|
|
||||||
bytes_per_value = np.dtype(np_dtype).itemsize
|
|
||||||
row_offset = 0
|
|
||||||
for si, start in enumerate(range(0, rows, shard_size)):
|
|
||||||
end = min(start + shard_size, rows)
|
|
||||||
shard = vectors[start:end] # [n, dims]
|
|
||||||
bin_path = out_dir / f"vectors-{si:03d}.bin"
|
|
||||||
payload = shard.astype(np_dtype, copy=False).tobytes(order="C")
|
|
||||||
digest = hashlib.sha256(payload).hexdigest()
|
|
||||||
with open(bin_path, "wb") as f:
|
|
||||||
f.write(payload)
|
|
||||||
shard_rows = int(shard.shape[0])
|
|
||||||
shards_meta.append(
|
|
||||||
{
|
|
||||||
"path": f"/embeddings/{bin_path.name}",
|
|
||||||
"rows": shard_rows,
|
|
||||||
"rowOffset": row_offset,
|
|
||||||
"byteLength": len(payload),
|
|
||||||
"sha256": digest,
|
|
||||||
"byteStride": dims * bytes_per_value,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
row_offset += shard_rows
|
|
||||||
return shards_meta
|
|
||||||
|
|
||||||
|
|
||||||
def write_hnsw_graph(levels: list[list[list[int]]], rows: int, out_path: Path) -> tuple[list[dict], str]:
|
|
||||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
offset = 0
|
|
||||||
meta: list[dict] = []
|
|
||||||
digest = hashlib.sha256()
|
|
||||||
with open(out_path, "wb") as f:
|
|
||||||
for lvl in levels:
|
|
||||||
indptr = np.zeros(rows + 1, dtype=np.uint32)
|
|
||||||
edge_accum: list[int] = []
|
|
||||||
for idx in range(rows):
|
|
||||||
neighbors = lvl[idx] if idx < len(lvl) else []
|
|
||||||
indptr[idx + 1] = indptr[idx] + len(neighbors)
|
|
||||||
edge_accum.extend(neighbors)
|
|
||||||
indptr_bytes = indptr.tobytes(order="C")
|
|
||||||
indptr_offset = offset
|
|
||||||
f.write(indptr_bytes)
|
|
||||||
digest.update(indptr_bytes)
|
|
||||||
offset += len(indptr_bytes)
|
|
||||||
|
|
||||||
if edge_accum:
|
|
||||||
indices = np.asarray(edge_accum, dtype=np.uint32)
|
|
||||||
indices_bytes = indices.tobytes(order="C")
|
|
||||||
else:
|
|
||||||
indices = np.zeros(0, dtype=np.uint32)
|
|
||||||
indices_bytes = indices.tobytes(order="C")
|
|
||||||
indices_offset = offset
|
|
||||||
f.write(indices_bytes)
|
|
||||||
digest.update(indices_bytes)
|
|
||||||
offset += len(indices_bytes)
|
|
||||||
|
|
||||||
meta.append(
|
|
||||||
{
|
|
||||||
"level": len(meta),
|
|
||||||
"indptr": {
|
|
||||||
"offset": indptr_offset,
|
|
||||||
"elements": int(indptr.shape[0]),
|
|
||||||
"byteLength": len(indptr_bytes),
|
|
||||||
},
|
|
||||||
"indices": {
|
|
||||||
"offset": indices_offset,
|
|
||||||
"elements": int(indices.shape[0]),
|
|
||||||
"byteLength": len(indices_bytes),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return meta, digest.hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def embed_vllm(
|
|
||||||
texts: list[str],
|
|
||||||
model_id: str,
|
|
||||||
vllm_url: str,
|
|
||||||
batch_size: int = 64,
|
|
||||||
concurrency: int = 8,
|
|
||||||
) -> np.ndarray:
|
|
||||||
base_url = resolve_vllm_base_url(vllm_url)
|
|
||||||
api_key = os.environ.get("VLLM_API_KEY") or os.environ.get("OPENAI_API_KEY") or "not-set"
|
|
||||||
client = OpenAI(base_url=base_url, api_key=api_key, timeout=300)
|
|
||||||
|
|
||||||
def list_available_models() -> list[str]:
|
|
||||||
models: list[str] = []
|
|
||||||
page = client.models.list()
|
|
||||||
models.extend(model.id for model in page.data)
|
|
||||||
while getattr(page, "has_more", False) and page.data:
|
|
||||||
cursor = page.data[-1].id
|
|
||||||
page = client.models.list(after=cursor)
|
|
||||||
models.extend(model.id for model in page.data)
|
|
||||||
return models
|
|
||||||
|
|
||||||
try:
|
|
||||||
available_models = list_available_models()
|
|
||||||
except Exception as exc:
|
|
||||||
raise RuntimeError(f"failed to query {base_url}/models: {exc}") from exc
|
|
||||||
|
|
||||||
if model_id not in available_models:
|
|
||||||
suggestions = ", ".join(sorted(available_models)) if available_models else "<none>"
|
|
||||||
logger.warning(
|
|
||||||
"model '%s' not served by vLLM at %s. Available models: %s. Use the first model, results may differ during semantic search (you can omit this message if your weights is a ONNX checkpoint of the same model.)", model_id, base_url, suggestions,
|
|
||||||
)
|
|
||||||
model_id = available_models[0]
|
|
||||||
|
|
||||||
# Apply model-specific prefixes for documents (asymmetric search)
|
|
||||||
model_lower = model_id.lower()
|
|
||||||
if "e5" in model_lower:
|
|
||||||
# E5 models: use "passage:" prefix for documents
|
|
||||||
prefixed = [f"passage: {t}" for t in texts]
|
|
||||||
elif "qwen" in model_lower and "embedding" in model_lower:
|
|
||||||
# Qwen3-Embedding: documents use plain text (no prefix)
|
|
||||||
prefixed = texts
|
|
||||||
elif "embeddinggemma" in model_lower:
|
|
||||||
# embeddinggemma: use "title: none | text:" prefix for documents
|
|
||||||
prefixed = [f"title: none | text: {t}" for t in texts]
|
|
||||||
else:
|
|
||||||
# Default: no prefix for unknown models
|
|
||||||
prefixed = texts
|
|
||||||
|
|
||||||
print(
|
|
||||||
"Embedding"
|
|
||||||
f" {len(prefixed)} texts with vLLM"
|
|
||||||
f" (model={model_id}, batch_size={batch_size}, concurrency={concurrency})",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Create batches
|
|
||||||
batches = []
|
|
||||||
for i in range(0, len(prefixed), batch_size):
|
|
||||||
batch = prefixed[i : i + batch_size]
|
|
||||||
batches.append((i, batch))
|
|
||||||
|
|
||||||
# Function to send a single batch request
|
|
||||||
def send_batch(batch_info: tuple[int, list[str]]) -> tuple[int, list[np.ndarray]]:
|
|
||||||
idx, batch = batch_info
|
|
||||||
response = client.embeddings.create(model=model_id, input=batch)
|
|
||||||
embeddings = [np.asarray(item.embedding, dtype=np.float32) for item in response.data]
|
|
||||||
return (idx, embeddings)
|
|
||||||
|
|
||||||
# Send batches concurrently (or sequentially if only 1 batch)
|
|
||||||
results: dict[int, list[np.ndarray]] = {}
|
|
||||||
if len(batches) == 1:
|
|
||||||
# Single batch - no need for threading
|
|
||||||
idx, embeddings = send_batch(batches[0])
|
|
||||||
results[idx] = embeddings
|
|
||||||
else:
|
|
||||||
# Multiple batches - use concurrent requests
|
|
||||||
with ThreadPoolExecutor(max_workers=concurrency) as executor:
|
|
||||||
futures = {executor.submit(send_batch, batch_info): batch_info[0] for batch_info in batches}
|
|
||||||
completed = 0
|
|
||||||
for future in as_completed(futures):
|
|
||||||
idx, embeddings = future.result()
|
|
||||||
results[idx] = embeddings
|
|
||||||
completed += 1
|
|
||||||
if completed % max(1, len(batches) // 10) == 0 or completed == len(batches):
|
|
||||||
print(f" Completed {completed}/{len(batches)} batches ({completed * 100 // len(batches)}%)")
|
|
||||||
|
|
||||||
# Reconstruct in order
|
|
||||||
out: list[np.ndarray] = []
|
|
||||||
for i in sorted(results.keys()):
|
|
||||||
out.extend(results[i])
|
|
||||||
|
|
||||||
return np.stack(out, axis=0)
|
|
||||||
|
|
||||||
|
|
||||||
def embed_hf(texts: list[str], model_id: str, device: str) -> np.ndarray:
|
|
||||||
# Prefer sentence-transformers for E5 and similar embed models
|
|
||||||
from sentence_transformers import SentenceTransformer
|
|
||||||
|
|
||||||
model = SentenceTransformer(model_id, device=device)
|
|
||||||
|
|
||||||
# Apply model-specific prefixes for documents (asymmetric search)
|
|
||||||
model_lower = model_id.lower()
|
|
||||||
if "e5" in model_lower:
|
|
||||||
# E5 models: use "passage:" prefix for documents
|
|
||||||
prefixed = [f"passage: {t}" for t in texts]
|
|
||||||
elif "qwen" in model_lower and "embedding" in model_lower:
|
|
||||||
# Qwen3-Embedding: documents use plain text (no prefix)
|
|
||||||
prefixed = texts
|
|
||||||
elif "embeddinggemma" in model_lower:
|
|
||||||
# embeddinggemma: use "title: none | text:" prefix for documents
|
|
||||||
prefixed = [f"title: none | text: {t}" for t in texts]
|
|
||||||
else:
|
|
||||||
# Default: no prefix for unknown models
|
|
||||||
prefixed = texts
|
|
||||||
|
|
||||||
vecs = model.encode(
|
|
||||||
prefixed,
|
|
||||||
batch_size=64,
|
|
||||||
normalize_embeddings=True,
|
|
||||||
convert_to_numpy=True,
|
|
||||||
show_progress_bar=True,
|
|
||||||
)
|
|
||||||
return vecs.astype(np.float32, copy=False)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
ap = argparse.ArgumentParser()
|
|
||||||
ap.add_argument("--jsonl", default="public/embeddings-text.jsonl")
|
|
||||||
ap.add_argument("--model", default=os.environ.get("SEM_MODEL", "intfloat/multilingual-e5-large"))
|
|
||||||
ap.add_argument("--dims", type=int, default=int(os.environ.get("SEM_DIMS", "1024")))
|
|
||||||
ap.add_argument("--dtype", choices=["fp16", "fp32"], default=os.environ.get("SEM_DTYPE", "fp32"))
|
|
||||||
ap.add_argument("--shard-size", type=int, default=int(os.environ.get("SEM_SHARD", "1024")))
|
|
||||||
ap.add_argument("--out", default="public/embeddings")
|
|
||||||
ap.add_argument("--use-vllm", action="store_true", default=bool(os.environ.get("USE_VLLM", "")))
|
|
||||||
ap.add_argument(
|
|
||||||
"--vllm-url",
|
|
||||||
default=DEFAULT_VLLM_URL,
|
|
||||||
help="Base URL for the vLLM OpenAI-compatible server (accepts either /v1 or /v1/embeddings)",
|
|
||||||
)
|
|
||||||
ap.add_argument("--chunk-size", type=int, default=512, help="Max tokens per chunk")
|
|
||||||
ap.add_argument("--chunk-overlap", type=int, default=128, help="Overlap tokens between chunks")
|
|
||||||
ap.add_argument("--no-chunking", action="store_true", help="Disable chunking (embed full docs)")
|
|
||||||
ap.add_argument(
|
|
||||||
"--concurrency",
|
|
||||||
type=int,
|
|
||||||
default=int(os.environ.get("VLLM_CONCURRENCY", "8")),
|
|
||||||
help="Number of concurrent requests to vLLM (default: 8)",
|
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
"--batch-size",
|
|
||||||
type=int,
|
|
||||||
default=int(os.environ.get("VLLM_BATCH_SIZE", "64")),
|
|
||||||
help="Batch size for vLLM requests (default: 64)",
|
|
||||||
)
|
|
||||||
args = ap.parse_args()
|
|
||||||
|
|
||||||
recs = list(load_jsonl(args.jsonl))
|
|
||||||
if not recs:
|
|
||||||
print("No input found in public/embeddings-text.jsonl; run the site build first to emit JSONL.")
|
|
||||||
return
|
|
||||||
|
|
||||||
# Apply chunking
|
|
||||||
if args.no_chunking:
|
|
||||||
chunks = recs
|
|
||||||
chunk_metadata = {}
|
|
||||||
print(f"Chunking disabled. Processing {len(chunks)} full documents")
|
|
||||||
else:
|
|
||||||
chunks = []
|
|
||||||
chunk_metadata = {}
|
|
||||||
for rec in recs:
|
|
||||||
doc_chunks = chunk_document(rec, max_tokens=args.chunk_size, overlap_tokens=args.chunk_overlap)
|
|
||||||
chunks.extend(doc_chunks)
|
|
||||||
# Build chunk metadata map
|
|
||||||
for chunk in doc_chunks:
|
|
||||||
if chunk["is_chunked"]:
|
|
||||||
chunk_metadata[chunk["slug"]] = {
|
|
||||||
"parentSlug": chunk["parent_slug"],
|
|
||||||
"chunkId": chunk["chunk_id"],
|
|
||||||
}
|
|
||||||
chunked_count = sum(1 for c in chunks if c.get("is_chunked", False))
|
|
||||||
print(f"Chunked {len(recs)} documents into {len(chunks)} chunks ({chunked_count} chunked, {len(chunks) - chunked_count} unchanged)")
|
|
||||||
print(f" Chunk size: {args.chunk_size} tokens, overlap: {args.chunk_overlap} tokens")
|
|
||||||
|
|
||||||
ids = [c["slug"] for c in chunks]
|
|
||||||
titles = [c.get("title", c["slug"]) for c in chunks]
|
|
||||||
texts = [c["text"] for c in chunks]
|
|
||||||
|
|
||||||
if args.use_vllm:
|
|
||||||
vecs = embed_vllm(
|
|
||||||
texts,
|
|
||||||
args.model,
|
|
||||||
args.vllm_url,
|
|
||||||
batch_size=args.batch_size,
|
|
||||||
concurrency=args.concurrency,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
|
|
||||||
vecs = embed_hf(texts, args.model, device)
|
|
||||||
|
|
||||||
# Coerce dims and re-normalize
|
|
||||||
if vecs.shape[1] != args.dims:
|
|
||||||
if vecs.shape[1] > args.dims:
|
|
||||||
vecs = vecs[:, : args.dims]
|
|
||||||
else:
|
|
||||||
vecs = np.pad(vecs, ((0, 0), (0, args.dims - vecs.shape[1])))
|
|
||||||
vecs = l2_normalize_rows(vecs.astype(np.float32, copy=False))
|
|
||||||
|
|
||||||
out_dir = Path(args.out)
|
|
||||||
shards = write_shards(vecs, args.shard_size, args.dtype, out_dir)
|
|
||||||
|
|
||||||
# Build a lightweight HNSW graph and store it in a compact binary layout
|
|
||||||
def hnsw_build(data: np.ndarray, M: int = 16, efC: int = 200, seed: int = 0) -> dict:
|
|
||||||
rng = random.Random(seed)
|
|
||||||
N, D = data.shape
|
|
||||||
levels: list[list[list[int]]] = [] # levels[L][i] = neighbors of node i at level L
|
|
||||||
|
|
||||||
# random level assignment using 1/e distribution
|
|
||||||
node_levels = []
|
|
||||||
for _ in range(N):
|
|
||||||
lvl = 0
|
|
||||||
while rng.random() < 1 / math.e:
|
|
||||||
lvl += 1
|
|
||||||
node_levels.append(lvl)
|
|
||||||
max_level = max(node_levels) if N > 0 else 0
|
|
||||||
for _ in range(max_level + 1):
|
|
||||||
levels.append([[] for _ in range(N)])
|
|
||||||
|
|
||||||
def sim(i: int, j: int) -> float:
|
|
||||||
return float((data[i] * data[j]).sum())
|
|
||||||
|
|
||||||
entry = 0 if N > 0 else -1
|
|
||||||
|
|
||||||
def search_layer(q: int, ep: int, ef: int, L: int) -> list[int]:
|
|
||||||
if ep < 0:
|
|
||||||
return []
|
|
||||||
visited = set()
|
|
||||||
cand: list[tuple[float, int]] = []
|
|
||||||
top: list[tuple[float, int]] = []
|
|
||||||
def push(node: int):
|
|
||||||
if node in visited:
|
|
||||||
return
|
|
||||||
visited.add(node)
|
|
||||||
cand.append((sim(q, node), node))
|
|
||||||
push(ep)
|
|
||||||
while cand:
|
|
||||||
cand.sort(reverse=True)
|
|
||||||
s, v = cand.pop(0)
|
|
||||||
if len(top) >= ef and s <= top[-1][0]:
|
|
||||||
break
|
|
||||||
top.append((s, v))
|
|
||||||
for u in levels[L][v]:
|
|
||||||
push(u)
|
|
||||||
top.sort(reverse=True)
|
|
||||||
return [n for _, n in top]
|
|
||||||
|
|
||||||
for i in range(N):
|
|
||||||
if i == 0:
|
|
||||||
continue
|
|
||||||
lvl = node_levels[i]
|
|
||||||
ep = entry
|
|
||||||
for L in range(max_level, lvl, -1):
|
|
||||||
c = search_layer(i, ep, 1, L)
|
|
||||||
if c:
|
|
||||||
ep = c[0]
|
|
||||||
for L in range(min(max_level, lvl), -1, -1):
|
|
||||||
W = search_layer(i, ep, efC, L)
|
|
||||||
# Select top M by similarity
|
|
||||||
neigh = sorted(((sim(i, j), j) for j in W if j != i), reverse=True)[:M]
|
|
||||||
for _, e in neigh:
|
|
||||||
if e not in levels[L][i]:
|
|
||||||
levels[L][i].append(e)
|
|
||||||
if i not in levels[L][e]:
|
|
||||||
levels[L][e].append(i)
|
|
||||||
|
|
||||||
# trim neighbors to M
|
|
||||||
for L in range(len(levels)):
|
|
||||||
for i in range(N):
|
|
||||||
if len(levels[L][i]) > M:
|
|
||||||
# keep top M by sim
|
|
||||||
nb = levels[L][i]
|
|
||||||
nb = sorted(nb, key=lambda j: sim(i, j), reverse=True)[:M]
|
|
||||||
levels[L][i] = nb
|
|
||||||
|
|
||||||
return {
|
|
||||||
"M": M,
|
|
||||||
"efConstruction": efC,
|
|
||||||
"entryPoint": entry,
|
|
||||||
"maxLevel": max_level,
|
|
||||||
"levels": levels,
|
|
||||||
}
|
|
||||||
|
|
||||||
hnsw = hnsw_build(vecs, M=16, efC=200)
|
|
||||||
hnsw_meta, hnsw_sha = write_hnsw_graph(hnsw["levels"], int(vecs.shape[0]), out_dir / "hnsw.bin")
|
|
||||||
|
|
||||||
manifest = {
|
|
||||||
"version": 2,
|
|
||||||
"dims": args.dims,
|
|
||||||
"dtype": args.dtype,
|
|
||||||
"normalized": True,
|
|
||||||
"rows": int(vecs.shape[0]),
|
|
||||||
"shardSizeRows": args.shard_size,
|
|
||||||
"vectors": {
|
|
||||||
"dtype": args.dtype,
|
|
||||||
"rows": int(vecs.shape[0]),
|
|
||||||
"dims": args.dims,
|
|
||||||
"shards": shards,
|
|
||||||
},
|
|
||||||
"ids": ids,
|
|
||||||
"titles": titles,
|
|
||||||
"chunkMetadata": chunk_metadata,
|
|
||||||
"hnsw": {
|
|
||||||
"M": hnsw["M"],
|
|
||||||
"efConstruction": hnsw["efConstruction"],
|
|
||||||
"entryPoint": hnsw["entryPoint"],
|
|
||||||
"maxLevel": hnsw["maxLevel"],
|
|
||||||
"graph": {
|
|
||||||
"path": "/embeddings/hnsw.bin",
|
|
||||||
"sha256": hnsw_sha,
|
|
||||||
"levels": hnsw_meta,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
(out_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False), encoding="utf-8")
|
|
||||||
print(f"Wrote {len(shards)} vector shard(s), HNSW graph, and manifest to {out_dir}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -27,6 +27,8 @@ import lt from "./locales/lt-LT"
|
|||||||
import fi from "./locales/fi-FI"
|
import fi from "./locales/fi-FI"
|
||||||
import no from "./locales/nb-NO"
|
import no from "./locales/nb-NO"
|
||||||
import id from "./locales/id-ID"
|
import id from "./locales/id-ID"
|
||||||
|
import kk from "./locales/kk-KZ"
|
||||||
|
import he from "./locales/he-IL"
|
||||||
|
|
||||||
export const TRANSLATIONS = {
|
export const TRANSLATIONS = {
|
||||||
"en-US": enUs,
|
"en-US": enUs,
|
||||||
@@ -78,6 +80,8 @@ export const TRANSLATIONS = {
|
|||||||
"fi-FI": fi,
|
"fi-FI": fi,
|
||||||
"nb-NO": no,
|
"nb-NO": no,
|
||||||
"id-ID": id,
|
"id-ID": id,
|
||||||
|
"kk-KZ": kk,
|
||||||
|
"he-IL": he,
|
||||||
} as const
|
} as const
|
||||||
|
|
||||||
export const defaultTranslation = "en-US"
|
export const defaultTranslation = "en-US"
|
||||||
|
|||||||
88
quartz/i18n/locales/he-IL.ts
Normal file
88
quartz/i18n/locales/he-IL.ts
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import { Translation } from "./definition"
|
||||||
|
|
||||||
|
export default {
|
||||||
|
propertyDefaults: {
|
||||||
|
title: "ללא כותרת",
|
||||||
|
description: "לא סופק תיאור",
|
||||||
|
},
|
||||||
|
direction: "rtl" as const,
|
||||||
|
components: {
|
||||||
|
callout: {
|
||||||
|
note: "הערה",
|
||||||
|
abstract: "תקציר",
|
||||||
|
info: "מידע",
|
||||||
|
todo: "לעשות",
|
||||||
|
tip: "טיפ",
|
||||||
|
success: "הצלחה",
|
||||||
|
question: "שאלה",
|
||||||
|
warning: "אזהרה",
|
||||||
|
failure: "כשלון",
|
||||||
|
danger: "סכנה",
|
||||||
|
bug: "באג",
|
||||||
|
example: "דוגמה",
|
||||||
|
quote: "ציטוט",
|
||||||
|
},
|
||||||
|
backlinks: {
|
||||||
|
title: "קישורים חוזרים",
|
||||||
|
noBacklinksFound: "לא נמצאו קישורים חוזרים",
|
||||||
|
},
|
||||||
|
themeToggle: {
|
||||||
|
lightMode: "מצב בהיר",
|
||||||
|
darkMode: "מצב כהה",
|
||||||
|
},
|
||||||
|
readerMode: {
|
||||||
|
title: "מצב קריאה",
|
||||||
|
},
|
||||||
|
explorer: {
|
||||||
|
title: "סייר",
|
||||||
|
},
|
||||||
|
footer: {
|
||||||
|
createdWith: "נוצר באמצעות",
|
||||||
|
},
|
||||||
|
graph: {
|
||||||
|
title: "מבט גרף",
|
||||||
|
},
|
||||||
|
recentNotes: {
|
||||||
|
title: "הערות אחרונות",
|
||||||
|
seeRemainingMore: ({ remaining }) => `עיין ב ${remaining} נוספים →`,
|
||||||
|
},
|
||||||
|
transcludes: {
|
||||||
|
transcludeOf: ({ targetSlug }) => `מצוטט מ ${targetSlug}`,
|
||||||
|
linkToOriginal: "קישור למקורי",
|
||||||
|
},
|
||||||
|
search: {
|
||||||
|
title: "חיפוש",
|
||||||
|
searchBarPlaceholder: "חפשו משהו",
|
||||||
|
},
|
||||||
|
tableOfContents: {
|
||||||
|
title: "תוכן עניינים",
|
||||||
|
},
|
||||||
|
contentMeta: {
|
||||||
|
readingTime: ({ minutes }) => `${minutes} דקות קריאה`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pages: {
|
||||||
|
rss: {
|
||||||
|
recentNotes: "הערות אחרונות",
|
||||||
|
lastFewNotes: ({ count }) => `${count} הערות אחרונות`,
|
||||||
|
},
|
||||||
|
error: {
|
||||||
|
title: "לא נמצא",
|
||||||
|
notFound: "העמוד הזה פרטי או לא קיים.",
|
||||||
|
home: "חזרה לעמוד הבית",
|
||||||
|
},
|
||||||
|
folderContent: {
|
||||||
|
folder: "תיקייה",
|
||||||
|
itemsUnderFolder: ({ count }) =>
|
||||||
|
count === 1 ? "פריט אחד תחת תיקייה זו." : `${count} פריטים תחת תיקייה זו.`,
|
||||||
|
},
|
||||||
|
tagContent: {
|
||||||
|
tag: "תגית",
|
||||||
|
tagIndex: "מפתח התגיות",
|
||||||
|
itemsUnderTag: ({ count }) =>
|
||||||
|
count === 1 ? "פריט אחד עם תגית זו." : `${count} פריטים עם תגית זו.`,
|
||||||
|
showingFirst: ({ count }) => `מראה את ה-${count} תגיות הראשונות.`,
|
||||||
|
totalTags: ({ count }) => `${count} תגיות נמצאו סך הכל.`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as const satisfies Translation
|
||||||
@@ -8,7 +8,7 @@ export default {
|
|||||||
components: {
|
components: {
|
||||||
callout: {
|
callout: {
|
||||||
note: "Nota",
|
note: "Nota",
|
||||||
abstract: "Astratto",
|
abstract: "Abstract",
|
||||||
info: "Info",
|
info: "Info",
|
||||||
todo: "Da fare",
|
todo: "Da fare",
|
||||||
tip: "Consiglio",
|
tip: "Consiglio",
|
||||||
@@ -17,7 +17,7 @@ export default {
|
|||||||
warning: "Attenzione",
|
warning: "Attenzione",
|
||||||
failure: "Errore",
|
failure: "Errore",
|
||||||
danger: "Pericolo",
|
danger: "Pericolo",
|
||||||
bug: "Bug",
|
bug: "Problema",
|
||||||
example: "Esempio",
|
example: "Esempio",
|
||||||
quote: "Citazione",
|
quote: "Citazione",
|
||||||
},
|
},
|
||||||
@@ -43,10 +43,11 @@ export default {
|
|||||||
},
|
},
|
||||||
recentNotes: {
|
recentNotes: {
|
||||||
title: "Note recenti",
|
title: "Note recenti",
|
||||||
seeRemainingMore: ({ remaining }) => `Vedi ${remaining} altro →`,
|
seeRemainingMore: ({ remaining }) =>
|
||||||
|
remaining === 1 ? "Vedi 1 altra →" : `Vedi altre ${remaining} →`,
|
||||||
},
|
},
|
||||||
transcludes: {
|
transcludes: {
|
||||||
transcludeOf: ({ targetSlug }) => `Transclusione di ${targetSlug}`,
|
transcludeOf: ({ targetSlug }) => `Inclusione di ${targetSlug}`,
|
||||||
linkToOriginal: "Link all'originale",
|
linkToOriginal: "Link all'originale",
|
||||||
},
|
},
|
||||||
search: {
|
search: {
|
||||||
@@ -54,16 +55,16 @@ export default {
|
|||||||
searchBarPlaceholder: "Cerca qualcosa",
|
searchBarPlaceholder: "Cerca qualcosa",
|
||||||
},
|
},
|
||||||
tableOfContents: {
|
tableOfContents: {
|
||||||
title: "Tabella dei contenuti",
|
title: "Indice",
|
||||||
},
|
},
|
||||||
contentMeta: {
|
contentMeta: {
|
||||||
readingTime: ({ minutes }) => `${minutes} minuti`,
|
readingTime: ({ minutes }) => (minutes === 1 ? "1 minuto" : `${minutes} minuti`),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
pages: {
|
pages: {
|
||||||
rss: {
|
rss: {
|
||||||
recentNotes: "Note recenti",
|
recentNotes: "Note recenti",
|
||||||
lastFewNotes: ({ count }) => `Ultime ${count} note`,
|
lastFewNotes: ({ count }) => (count === 1 ? "Ultima nota" : `Ultime ${count} note`),
|
||||||
},
|
},
|
||||||
error: {
|
error: {
|
||||||
title: "Non trovato",
|
title: "Non trovato",
|
||||||
@@ -80,8 +81,9 @@ export default {
|
|||||||
tagIndex: "Indice etichette",
|
tagIndex: "Indice etichette",
|
||||||
itemsUnderTag: ({ count }) =>
|
itemsUnderTag: ({ count }) =>
|
||||||
count === 1 ? "1 oggetto con questa etichetta." : `${count} oggetti con questa etichetta.`,
|
count === 1 ? "1 oggetto con questa etichetta." : `${count} oggetti con questa etichetta.`,
|
||||||
showingFirst: ({ count }) => `Prime ${count} etichette.`,
|
showingFirst: ({ count }) => (count === 1 ? "Prima etichetta." : `Prime ${count} etichette.`),
|
||||||
totalTags: ({ count }) => `Trovate ${count} etichette totali.`,
|
totalTags: ({ count }) =>
|
||||||
|
count === 1 ? "Trovata 1 etichetta in totale." : `Trovate ${count} etichette totali.`,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
} as const satisfies Translation
|
} as const satisfies Translation
|
||||||
|
|||||||
87
quartz/i18n/locales/kk-KZ.ts
Normal file
87
quartz/i18n/locales/kk-KZ.ts
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
import { Translation } from "./definition"
|
||||||
|
|
||||||
|
export default {
|
||||||
|
propertyDefaults: {
|
||||||
|
title: "Атаусыз",
|
||||||
|
description: "Сипаттама берілмеген",
|
||||||
|
},
|
||||||
|
components: {
|
||||||
|
callout: {
|
||||||
|
note: "Ескерту",
|
||||||
|
abstract: "Аннотация",
|
||||||
|
info: "Ақпарат",
|
||||||
|
todo: "Істеу керек",
|
||||||
|
tip: "Кеңес",
|
||||||
|
success: "Сәттілік",
|
||||||
|
question: "Сұрақ",
|
||||||
|
warning: "Ескерту",
|
||||||
|
failure: "Қате",
|
||||||
|
danger: "Қауіп",
|
||||||
|
bug: "Қате",
|
||||||
|
example: "Мысал",
|
||||||
|
quote: "Дәйексөз",
|
||||||
|
},
|
||||||
|
backlinks: {
|
||||||
|
title: "Артқа сілтемелер",
|
||||||
|
noBacklinksFound: "Артқа сілтемелер табылмады",
|
||||||
|
},
|
||||||
|
themeToggle: {
|
||||||
|
lightMode: "Жарық режимі",
|
||||||
|
darkMode: "Қараңғы режим",
|
||||||
|
},
|
||||||
|
readerMode: {
|
||||||
|
title: "Оқу режимі",
|
||||||
|
},
|
||||||
|
explorer: {
|
||||||
|
title: "Зерттеуші",
|
||||||
|
},
|
||||||
|
footer: {
|
||||||
|
createdWith: "Құрастырылған құрал:",
|
||||||
|
},
|
||||||
|
graph: {
|
||||||
|
title: "Граф көрінісі",
|
||||||
|
},
|
||||||
|
recentNotes: {
|
||||||
|
title: "Соңғы жазбалар",
|
||||||
|
seeRemainingMore: ({ remaining }) => `Тағы ${remaining} жазбаны қарау →`,
|
||||||
|
},
|
||||||
|
transcludes: {
|
||||||
|
transcludeOf: ({ targetSlug }) => `${targetSlug} кірістіру`,
|
||||||
|
linkToOriginal: "Бастапқыға сілтеме",
|
||||||
|
},
|
||||||
|
search: {
|
||||||
|
title: "Іздеу",
|
||||||
|
searchBarPlaceholder: "Бірдеңе іздеу",
|
||||||
|
},
|
||||||
|
tableOfContents: {
|
||||||
|
title: "Мазмұны",
|
||||||
|
},
|
||||||
|
contentMeta: {
|
||||||
|
readingTime: ({ minutes }) => `${minutes} мин оқу`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
pages: {
|
||||||
|
rss: {
|
||||||
|
recentNotes: "Соңғы жазбалар",
|
||||||
|
lastFewNotes: ({ count }) => `Соңғы ${count} жазба`,
|
||||||
|
},
|
||||||
|
error: {
|
||||||
|
title: "Табылмады",
|
||||||
|
notFound: "Бұл бет жеке немесе жоқ болуы мүмкін.",
|
||||||
|
home: "Басты бетке оралу",
|
||||||
|
},
|
||||||
|
folderContent: {
|
||||||
|
folder: "Қалта",
|
||||||
|
itemsUnderFolder: ({ count }) =>
|
||||||
|
count === 1 ? "Бұл қалтада 1 элемент бар." : `Бұл қалтада ${count} элемент бар.`,
|
||||||
|
},
|
||||||
|
tagContent: {
|
||||||
|
tag: "Тег",
|
||||||
|
tagIndex: "Тегтер индексі",
|
||||||
|
itemsUnderTag: ({ count }) =>
|
||||||
|
count === 1 ? "Бұл тегпен 1 элемент." : `Бұл тегпен ${count} элемент.`,
|
||||||
|
showingFirst: ({ count }) => `Алғашқы ${count} тег көрсетілуде.`,
|
||||||
|
totalTags: ({ count }) => `Барлығы ${count} тег табылды.`,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
} as const satisfies Translation
|
||||||
@@ -40,7 +40,7 @@ export const NotFoundPage: QuartzEmitterPlugin = () => {
|
|||||||
description: notFound,
|
description: notFound,
|
||||||
frontmatter: { title: notFound, tags: [] },
|
frontmatter: { title: notFound, tags: [] },
|
||||||
})
|
})
|
||||||
const externalResources = pageResources(path, resources, ctx.cfg.configuration)
|
const externalResources = pageResources(path, resources)
|
||||||
const componentData: QuartzComponentProps = {
|
const componentData: QuartzComponentProps = {
|
||||||
ctx,
|
ctx,
|
||||||
fileData: vfile.data,
|
fileData: vfile.data,
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import { FullSlug, joinSegments } from "../../util/path"
|
import { FullSlug, joinSegments } from "../../util/path"
|
||||||
import { QuartzEmitterPlugin } from "../types"
|
import { QuartzEmitterPlugin } from "../types"
|
||||||
import path from "path"
|
|
||||||
import fs from "node:fs/promises"
|
|
||||||
import { globby } from "globby"
|
|
||||||
|
|
||||||
// @ts-ignore
|
// @ts-ignore
|
||||||
import spaRouterScript from "../../components/scripts/spa.inline"
|
import spaRouterScript from "../../components/scripts/spa.inline"
|
||||||
@@ -19,7 +16,7 @@ import {
|
|||||||
processGoogleFonts,
|
processGoogleFonts,
|
||||||
} from "../../util/theme"
|
} from "../../util/theme"
|
||||||
import { Features, transform } from "lightningcss"
|
import { Features, transform } from "lightningcss"
|
||||||
import { transform as transpile, build as bundle } from "esbuild"
|
import { transform as transpile } from "esbuild"
|
||||||
import { write } from "./helpers"
|
import { write } from "./helpers"
|
||||||
|
|
||||||
type ComponentResources = {
|
type ComponentResources = {
|
||||||
@@ -360,47 +357,7 @@ export const ComponentResources: QuartzEmitterPlugin = () => {
|
|||||||
ext: ".js",
|
ext: ".js",
|
||||||
content: postscript,
|
content: postscript,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Bundle all worker files
|
|
||||||
const workerFiles = await globby(["quartz/**/*.worker.ts"])
|
|
||||||
for (const src of workerFiles) {
|
|
||||||
const result = await bundle({
|
|
||||||
entryPoints: [src],
|
|
||||||
bundle: true,
|
|
||||||
minify: true,
|
|
||||||
platform: "browser",
|
|
||||||
format: "esm",
|
|
||||||
write: false,
|
|
||||||
})
|
|
||||||
const code = result.outputFiles[0].text
|
|
||||||
const name = path.basename(src).replace(/\.ts$/, "")
|
|
||||||
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
|
|
||||||
}
|
|
||||||
},
|
|
||||||
async *partialEmit(ctx, _content, _resources, changeEvents) {
|
|
||||||
// Handle worker file changes in incremental builds
|
|
||||||
for (const changeEvent of changeEvents) {
|
|
||||||
if (!/\.worker\.ts$/.test(changeEvent.path)) continue
|
|
||||||
if (changeEvent.type === "delete") {
|
|
||||||
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
|
|
||||||
const dest = joinSegments(ctx.argv.output, `${name}.js`)
|
|
||||||
try {
|
|
||||||
await fs.unlink(dest)
|
|
||||||
} catch {}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
const result = await bundle({
|
|
||||||
entryPoints: [changeEvent.path],
|
|
||||||
bundle: true,
|
|
||||||
minify: true,
|
|
||||||
platform: "browser",
|
|
||||||
format: "esm",
|
|
||||||
write: false,
|
|
||||||
})
|
|
||||||
const code = result.outputFiles[0].text
|
|
||||||
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
|
|
||||||
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
|
async *partialEmit() {},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ async function processContent(
|
|||||||
) {
|
) {
|
||||||
const slug = fileData.slug!
|
const slug = fileData.slug!
|
||||||
const cfg = ctx.cfg.configuration
|
const cfg = ctx.cfg.configuration
|
||||||
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
|
const externalResources = pageResources(pathToRoot(slug), resources)
|
||||||
const componentData: QuartzComponentProps = {
|
const componentData: QuartzComponentProps = {
|
||||||
ctx,
|
ctx,
|
||||||
fileData,
|
fileData,
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ async function* processFolderInfo(
|
|||||||
const slug = joinSegments(folder, "index") as FullSlug
|
const slug = joinSegments(folder, "index") as FullSlug
|
||||||
const [tree, file] = folderContent
|
const [tree, file] = folderContent
|
||||||
const cfg = ctx.cfg.configuration
|
const cfg = ctx.cfg.configuration
|
||||||
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
|
const externalResources = pageResources(pathToRoot(slug), resources)
|
||||||
const componentData: QuartzComponentProps = {
|
const componentData: QuartzComponentProps = {
|
||||||
ctx,
|
ctx,
|
||||||
fileData: file.data,
|
fileData: file.data,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
export { ContentPage } from "./contentPage"
|
export { ContentPage } from "./contentPage"
|
||||||
export { TagPage } from "./tagPage"
|
export { TagPage } from "./tagPage"
|
||||||
export { FolderPage } from "./folderPage"
|
export { FolderPage } from "./folderPage"
|
||||||
export { ContentIndex } from "./contentIndex"
|
export { ContentIndex as ContentIndex } from "./contentIndex"
|
||||||
export { AliasRedirects } from "./aliases"
|
export { AliasRedirects } from "./aliases"
|
||||||
export { Assets } from "./assets"
|
export { Assets } from "./assets"
|
||||||
export { Static } from "./static"
|
export { Static } from "./static"
|
||||||
@@ -10,4 +10,3 @@ export { ComponentResources } from "./componentResources"
|
|||||||
export { NotFoundPage } from "./404"
|
export { NotFoundPage } from "./404"
|
||||||
export { CNAME } from "./cname"
|
export { CNAME } from "./cname"
|
||||||
export { CustomOgImages } from "./ogImage"
|
export { CustomOgImages } from "./ogImage"
|
||||||
export { SemanticIndex } from "./semantic"
|
|
||||||
|
|||||||
@@ -1,235 +0,0 @@
|
|||||||
import { write } from "./helpers"
|
|
||||||
import { QuartzEmitterPlugin } from "../types"
|
|
||||||
import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path"
|
|
||||||
import { ReadTimeResults } from "reading-time"
|
|
||||||
import { GlobalConfiguration } from "../../cfg"
|
|
||||||
import { spawn } from "child_process"
|
|
||||||
|
|
||||||
const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX"
|
|
||||||
|
|
||||||
const defaults: GlobalConfiguration["semanticSearch"] = {
|
|
||||||
enable: true,
|
|
||||||
model: DEFAULT_MODEL_ID,
|
|
||||||
aot: false,
|
|
||||||
dims: 1024,
|
|
||||||
dtype: "fp32",
|
|
||||||
shardSizeRows: 1024,
|
|
||||||
hnsw: { M: 16, efConstruction: 200 },
|
|
||||||
chunking: {
|
|
||||||
chunkSize: 512,
|
|
||||||
chunkOverlap: 128,
|
|
||||||
noChunking: false,
|
|
||||||
},
|
|
||||||
vllm: {
|
|
||||||
enable: false,
|
|
||||||
vllmUrl:
|
|
||||||
process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings",
|
|
||||||
concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10),
|
|
||||||
batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
type ContentDetails = {
|
|
||||||
slug: string
|
|
||||||
title: string
|
|
||||||
filePath: FilePath
|
|
||||||
content: string
|
|
||||||
readingTime?: Partial<ReadTimeResults>
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if uv is installed
|
|
||||||
*/
|
|
||||||
function checkUvInstalled(): Promise<boolean> {
|
|
||||||
return new Promise((resolve) => {
|
|
||||||
const proc = spawn("uv", ["--version"], { shell: true })
|
|
||||||
proc.on("error", () => resolve(false))
|
|
||||||
proc.on("close", (code) => resolve(code === 0))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run the Python embedding build script using uv
|
|
||||||
* Script uses PEP 723 inline metadata for dependency management
|
|
||||||
*/
|
|
||||||
function runEmbedBuild(
|
|
||||||
jsonlPath: string,
|
|
||||||
outDir: string,
|
|
||||||
opts: {
|
|
||||||
model: string
|
|
||||||
dtype: string
|
|
||||||
dims: number
|
|
||||||
shardSizeRows: number
|
|
||||||
chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean }
|
|
||||||
vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number }
|
|
||||||
},
|
|
||||||
): Promise<void> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
const scriptPath = joinSegments(QUARTZ, "embed_build.py")
|
|
||||||
const args = [
|
|
||||||
"run",
|
|
||||||
scriptPath,
|
|
||||||
"--jsonl",
|
|
||||||
jsonlPath,
|
|
||||||
"--model",
|
|
||||||
opts.model,
|
|
||||||
"--out",
|
|
||||||
outDir,
|
|
||||||
"--dtype",
|
|
||||||
opts.dtype,
|
|
||||||
"--dims",
|
|
||||||
String(opts.dims),
|
|
||||||
"--shard-size",
|
|
||||||
String(opts.shardSizeRows),
|
|
||||||
"--chunk-size",
|
|
||||||
String(opts.chunking.chunkSize),
|
|
||||||
"--chunk-overlap",
|
|
||||||
String(opts.chunking.chunkOverlap),
|
|
||||||
]
|
|
||||||
|
|
||||||
if (opts.chunking.noChunking) {
|
|
||||||
args.push("--no-chunking")
|
|
||||||
}
|
|
||||||
|
|
||||||
if (opts.vllm.enable) {
|
|
||||||
args.push("--use-vllm")
|
|
||||||
if (opts.vllm.vllmUrl) {
|
|
||||||
args.push("--vllm-url", opts.vllm.vllmUrl)
|
|
||||||
}
|
|
||||||
args.push("--concurrency", String(opts.vllm.concurrency))
|
|
||||||
args.push("--batch-size", String(opts.vllm.batchSize))
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log("\nRunning embedding generation:")
|
|
||||||
console.log(` uv ${args.join(" ")}`)
|
|
||||||
|
|
||||||
const env = { ...process.env }
|
|
||||||
if (opts.vllm.enable && !env.USE_VLLM) {
|
|
||||||
env.USE_VLLM = "1"
|
|
||||||
}
|
|
||||||
|
|
||||||
const proc = spawn("uv", args, {
|
|
||||||
stdio: "inherit",
|
|
||||||
shell: true,
|
|
||||||
env,
|
|
||||||
})
|
|
||||||
|
|
||||||
proc.on("error", (err) => {
|
|
||||||
reject(new Error(`Failed to spawn uv: ${err.message}`))
|
|
||||||
})
|
|
||||||
|
|
||||||
proc.on("close", (code) => {
|
|
||||||
if (code === 0) {
|
|
||||||
console.log("Embedding generation completed successfully")
|
|
||||||
resolve()
|
|
||||||
} else {
|
|
||||||
reject(new Error(`embed_build.py exited with code ${code}`))
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
export const SemanticIndex: QuartzEmitterPlugin<Partial<GlobalConfiguration["semanticSearch"]>> = (
|
|
||||||
opts,
|
|
||||||
) => {
|
|
||||||
const merged = { ...defaults, ...opts }
|
|
||||||
const o = {
|
|
||||||
enable: merged.enable!,
|
|
||||||
model: merged.model!,
|
|
||||||
aot: merged.aot!,
|
|
||||||
dims: merged.dims!,
|
|
||||||
dtype: merged.dtype!,
|
|
||||||
shardSizeRows: merged.shardSizeRows!,
|
|
||||||
hnsw: {
|
|
||||||
M: merged.hnsw?.M ?? defaults.hnsw!.M!,
|
|
||||||
efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!,
|
|
||||||
efSearch: merged.hnsw?.efSearch,
|
|
||||||
},
|
|
||||||
chunking: {
|
|
||||||
chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!,
|
|
||||||
chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!,
|
|
||||||
noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!,
|
|
||||||
},
|
|
||||||
vllm: {
|
|
||||||
enable: merged.vllm?.enable ?? defaults.vllm!.enable!,
|
|
||||||
vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl,
|
|
||||||
concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!,
|
|
||||||
batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!o.model) {
|
|
||||||
throw new Error("Semantic search requires a model identifier")
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
name: "SemanticIndex",
|
|
||||||
getQuartzComponents() {
|
|
||||||
return []
|
|
||||||
},
|
|
||||||
async *partialEmit() {},
|
|
||||||
async *emit(ctx, content, _resources) {
|
|
||||||
if (!o.enable) return
|
|
||||||
|
|
||||||
const docs: ContentDetails[] = []
|
|
||||||
for (const [_, file] of content) {
|
|
||||||
const slug = file.data.slug!
|
|
||||||
const title = file.data.frontmatter?.title ?? slug
|
|
||||||
const text = file.data.text
|
|
||||||
if (text) {
|
|
||||||
docs.push({
|
|
||||||
slug,
|
|
||||||
title,
|
|
||||||
filePath: file.data.filePath!,
|
|
||||||
content: text,
|
|
||||||
readingTime: file.data.readingTime,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Emit JSONL with the exact text used for embeddings
|
|
||||||
const jsonl = docs
|
|
||||||
.map((d) => ({ slug: d.slug, title: d.title, text: d.content }))
|
|
||||||
.map((o) => JSON.stringify(o))
|
|
||||||
.join("\n")
|
|
||||||
|
|
||||||
const jsonlSlug = "embeddings-text" as FullSlug
|
|
||||||
yield write({
|
|
||||||
ctx,
|
|
||||||
slug: jsonlSlug,
|
|
||||||
ext: ".jsonl",
|
|
||||||
content: jsonl,
|
|
||||||
})
|
|
||||||
|
|
||||||
// If aot is false, run the embedding generation script
|
|
||||||
if (!o.aot) {
|
|
||||||
console.log("\nGenerating embeddings (aot=false)...")
|
|
||||||
|
|
||||||
// Check for uv
|
|
||||||
const hasUv = await checkUvInstalled()
|
|
||||||
if (!hasUv) {
|
|
||||||
throw new Error(
|
|
||||||
"uv is required for embedding generation. Install it from https://docs.astral.sh/uv/",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl")
|
|
||||||
const outDir = joinSegments(ctx.argv.output, "embeddings")
|
|
||||||
|
|
||||||
try {
|
|
||||||
await runEmbedBuild(jsonlPath, outDir, o)
|
|
||||||
} catch (err) {
|
|
||||||
const message = err instanceof Error ? err.message : String(err)
|
|
||||||
throw new Error(`Embedding generation failed: ${message}`)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log(
|
|
||||||
"\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/",
|
|
||||||
)
|
|
||||||
}
|
|
||||||
},
|
|
||||||
externalResources(_ctx) {
|
|
||||||
return {}
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -73,7 +73,7 @@ async function processTagPage(
|
|||||||
const slug = joinSegments("tags", tag) as FullSlug
|
const slug = joinSegments("tags", tag) as FullSlug
|
||||||
const [tree, file] = tagContent
|
const [tree, file] = tagContent
|
||||||
const cfg = ctx.cfg.configuration
|
const cfg = ctx.cfg.configuration
|
||||||
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
|
const externalResources = pageResources(pathToRoot(slug), resources)
|
||||||
const componentData: QuartzComponentProps = {
|
const componentData: QuartzComponentProps = {
|
||||||
ctx,
|
ctx,
|
||||||
fileData: file.data,
|
fileData: file.data,
|
||||||
|
|||||||
@@ -103,7 +103,6 @@ export const FrontMatter: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
|
|||||||
const created = coalesceAliases(data, ["created", "date"])
|
const created = coalesceAliases(data, ["created", "date"])
|
||||||
if (created) {
|
if (created) {
|
||||||
data.created = created
|
data.created = created
|
||||||
data.modified ||= created // if modified is not set, use created
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const modified = coalesceAliases(data, [
|
const modified = coalesceAliases(data, [
|
||||||
@@ -113,6 +112,8 @@ export const FrontMatter: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
|
|||||||
"last-modified",
|
"last-modified",
|
||||||
])
|
])
|
||||||
if (modified) data.modified = modified
|
if (modified) data.modified = modified
|
||||||
|
data.modified ||= created // if modified is not set, use created
|
||||||
|
|
||||||
const published = coalesceAliases(data, ["published", "publishDate", "date"])
|
const published = coalesceAliases(data, ["published", "publishDate", "date"])
|
||||||
if (published) data.published = published
|
if (published) data.published = published
|
||||||
|
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ export const CrawlLinks: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
|
|||||||
) {
|
) {
|
||||||
let dest = node.properties.href as RelativeURL
|
let dest = node.properties.href as RelativeURL
|
||||||
const classes = (node.properties.className ?? []) as string[]
|
const classes = (node.properties.className ?? []) as string[]
|
||||||
const isExternal = isAbsoluteUrl(dest)
|
const isExternal = isAbsoluteUrl(dest, { httpOnly: false })
|
||||||
classes.push(isExternal ? "external" : "internal")
|
classes.push(isExternal ? "external" : "internal")
|
||||||
|
|
||||||
if (isExternal && opts.externalLinkIcon) {
|
if (isExternal && opts.externalLinkIcon) {
|
||||||
@@ -99,7 +99,9 @@ export const CrawlLinks: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// don't process external links or intra-document anchors
|
// don't process external links or intra-document anchors
|
||||||
const isInternal = !(isAbsoluteUrl(dest) || dest.startsWith("#"))
|
const isInternal = !(
|
||||||
|
isAbsoluteUrl(dest, { httpOnly: false }) || dest.startsWith("#")
|
||||||
|
)
|
||||||
if (isInternal) {
|
if (isInternal) {
|
||||||
dest = node.properties.href = transformLink(
|
dest = node.properties.href = transformLink(
|
||||||
file.data.slug!,
|
file.data.slug!,
|
||||||
@@ -145,7 +147,7 @@ export const CrawlLinks: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
|
|||||||
node.properties.loading = "lazy"
|
node.properties.loading = "lazy"
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isAbsoluteUrl(node.properties.src)) {
|
if (!isAbsoluteUrl(node.properties.src, { httpOnly: false })) {
|
||||||
let dest = node.properties.src as RelativeURL
|
let dest = node.properties.src as RelativeURL
|
||||||
dest = node.properties.src = transformLink(
|
dest = node.properties.src = transformLink(
|
||||||
file.data.slug!,
|
file.data.slug!,
|
||||||
|
|||||||
@@ -123,13 +123,22 @@
|
|||||||
transform: rotateZ(-90deg);
|
transform: rotateZ(-90deg);
|
||||||
}
|
}
|
||||||
|
|
||||||
.callout-content > :first-child {
|
.callout-content {
|
||||||
transition:
|
& > * {
|
||||||
height 0.1s cubic-bezier(0.02, 0.01, 0.47, 1),
|
transition:
|
||||||
margin 0.1s cubic-bezier(0.02, 0.01, 0.47, 1);
|
height 0.1s cubic-bezier(0.02, 0.01, 0.47, 1),
|
||||||
overflow-y: clip;
|
margin 0.1s cubic-bezier(0.02, 0.01, 0.47, 1),
|
||||||
height: 0;
|
padding 0.1s cubic-bezier(0.02, 0.01, 0.47, 1);
|
||||||
margin-top: -1rem;
|
overflow-y: clip;
|
||||||
|
height: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
margin-top: 0;
|
||||||
|
padding-bottom: 0;
|
||||||
|
padding-top: 0;
|
||||||
|
}
|
||||||
|
& > :first-child {
|
||||||
|
margin-top: -1rem;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,548 +0,0 @@
|
|||||||
// Unified semantic search worker: handles data loading and query execution
|
|
||||||
import { env, pipeline } from "@huggingface/transformers"
|
|
||||||
import "onnxruntime-web/webgpu"
|
|
||||||
import "onnxruntime-web/wasm"
|
|
||||||
|
|
||||||
export {}
|
|
||||||
|
|
||||||
type VectorShardMeta = {
|
|
||||||
path: string
|
|
||||||
rows: number
|
|
||||||
rowOffset: number
|
|
||||||
byteLength: number
|
|
||||||
sha256?: string
|
|
||||||
byteStride: number
|
|
||||||
}
|
|
||||||
|
|
||||||
type LevelSection = {
|
|
||||||
level: number
|
|
||||||
indptr: { offset: number; elements: number; byteLength: number }
|
|
||||||
indices: { offset: number; elements: number; byteLength: number }
|
|
||||||
}
|
|
||||||
|
|
||||||
type ChunkMetadata = {
|
|
||||||
parentSlug: string
|
|
||||||
chunkId: number
|
|
||||||
}
|
|
||||||
|
|
||||||
type Manifest = {
|
|
||||||
version: number
|
|
||||||
dims: number
|
|
||||||
dtype: string
|
|
||||||
normalized: boolean
|
|
||||||
rows: number
|
|
||||||
shardSizeRows: number
|
|
||||||
vectors: {
|
|
||||||
dtype: string
|
|
||||||
rows: number
|
|
||||||
dims: number
|
|
||||||
shards: VectorShardMeta[]
|
|
||||||
}
|
|
||||||
ids: string[]
|
|
||||||
titles?: string[]
|
|
||||||
chunkMetadata?: Record<string, ChunkMetadata>
|
|
||||||
hnsw: {
|
|
||||||
M: number
|
|
||||||
efConstruction: number
|
|
||||||
entryPoint: number
|
|
||||||
maxLevel: number
|
|
||||||
graph: {
|
|
||||||
path: string
|
|
||||||
sha256?: string
|
|
||||||
levels: LevelSection[]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
type InitMessage = {
|
|
||||||
type: "init"
|
|
||||||
cfg: any
|
|
||||||
manifestUrl: string
|
|
||||||
baseUrl?: string
|
|
||||||
disableCache?: boolean
|
|
||||||
}
|
|
||||||
|
|
||||||
type SearchMessage = { type: "search"; text: string; k: number; seq: number }
|
|
||||||
type ResetMessage = { type: "reset" }
|
|
||||||
|
|
||||||
type WorkerMessage = InitMessage | SearchMessage | ResetMessage
|
|
||||||
|
|
||||||
type ReadyMessage = { type: "ready" }
|
|
||||||
|
|
||||||
type ProgressMessage = {
|
|
||||||
type: "progress"
|
|
||||||
loadedRows: number
|
|
||||||
totalRows: number
|
|
||||||
}
|
|
||||||
|
|
||||||
type SearchHit = { id: number; score: number }
|
|
||||||
|
|
||||||
type SearchResultMessage = {
|
|
||||||
type: "search-result"
|
|
||||||
seq: number
|
|
||||||
semantic: SearchHit[]
|
|
||||||
}
|
|
||||||
|
|
||||||
type ErrorMessage = { type: "error"; seq?: number; message: string }
|
|
||||||
|
|
||||||
type WorkerState = "idle" | "loading" | "ready" | "error"
|
|
||||||
|
|
||||||
// IndexedDB configuration
|
|
||||||
const DB_NAME = "semantic-search-cache"
|
|
||||||
const STORE_NAME = "assets"
|
|
||||||
const DB_VERSION = 1
|
|
||||||
const hasIndexedDB = typeof indexedDB !== "undefined"
|
|
||||||
const supportsSharedArrayBuffer = typeof SharedArrayBuffer !== "undefined"
|
|
||||||
|
|
||||||
// State
|
|
||||||
let state: WorkerState = "idle"
|
|
||||||
let manifest: Manifest | null = null
|
|
||||||
let cfg: any = null
|
|
||||||
let vectorsView: Float32Array | null = null
|
|
||||||
let dims = 0
|
|
||||||
let rows = 0
|
|
||||||
let classifier: any = null
|
|
||||||
let envConfigured = false
|
|
||||||
let entryPoint = -1
|
|
||||||
let maxLevel = 0
|
|
||||||
let efDefault = 128
|
|
||||||
let levelGraph: { indptr: Uint32Array; indices: Uint32Array }[] = []
|
|
||||||
let abortController: AbortController | null = null
|
|
||||||
let dbPromise: Promise<IDBDatabase> | null = null
|
|
||||||
|
|
||||||
// IndexedDB helpers
|
|
||||||
function openDatabase(): Promise<IDBDatabase> {
|
|
||||||
if (!hasIndexedDB) {
|
|
||||||
return Promise.reject(new Error("indexedDB unavailable"))
|
|
||||||
}
|
|
||||||
if (!dbPromise) {
|
|
||||||
dbPromise = new Promise((resolve, reject) => {
|
|
||||||
const req = indexedDB.open(DB_NAME, DB_VERSION)
|
|
||||||
req.onupgradeneeded = () => {
|
|
||||||
const db = req.result
|
|
||||||
if (!db.objectStoreNames.contains(STORE_NAME)) {
|
|
||||||
db.createObjectStore(STORE_NAME)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
req.onsuccess = () => resolve(req.result)
|
|
||||||
req.onerror = () => reject(req.error ?? new Error("failed to open cache store"))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return dbPromise
|
|
||||||
}
|
|
||||||
|
|
||||||
async function readAsset(hash: string): Promise<ArrayBuffer | null> {
|
|
||||||
if (!hasIndexedDB) {
|
|
||||||
return null
|
|
||||||
}
|
|
||||||
const db = await openDatabase()
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
const tx = db.transaction(STORE_NAME, "readonly")
|
|
||||||
const store = tx.objectStore(STORE_NAME)
|
|
||||||
const req = store.get(hash)
|
|
||||||
req.onsuccess = () => {
|
|
||||||
const value = req.result
|
|
||||||
if (value instanceof ArrayBuffer) {
|
|
||||||
resolve(value)
|
|
||||||
} else if (value && value.buffer instanceof ArrayBuffer) {
|
|
||||||
resolve(value.buffer as ArrayBuffer)
|
|
||||||
} else {
|
|
||||||
resolve(null)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
req.onerror = () => reject(req.error ?? new Error("failed to read cached asset"))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
async function writeAsset(hash: string, buffer: ArrayBuffer): Promise<void> {
|
|
||||||
if (!hasIndexedDB) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
const db = await openDatabase()
|
|
||||||
await new Promise<void>((resolve, reject) => {
|
|
||||||
const tx = db.transaction(STORE_NAME, "readwrite")
|
|
||||||
const store = tx.objectStore(STORE_NAME)
|
|
||||||
const req = store.put(buffer, hash)
|
|
||||||
req.onsuccess = () => resolve()
|
|
||||||
req.onerror = () => reject(req.error ?? new Error("failed to cache asset"))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function toAbsolute(path: string, baseUrl?: string): string {
|
|
||||||
if (path.startsWith("http://") || path.startsWith("https://")) {
|
|
||||||
return path
|
|
||||||
}
|
|
||||||
const base = baseUrl ?? self.location.origin
|
|
||||||
return new URL(path, base).toString()
|
|
||||||
}
|
|
||||||
|
|
||||||
async function fetchBinary(
|
|
||||||
path: string,
|
|
||||||
disableCache: boolean,
|
|
||||||
sha?: string,
|
|
||||||
): Promise<ArrayBuffer> {
|
|
||||||
if (!disableCache && sha && hasIndexedDB) {
|
|
||||||
try {
|
|
||||||
const cached = await readAsset(sha)
|
|
||||||
if (cached) {
|
|
||||||
return cached
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// fall through to network fetch on cache errors
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const res = await fetch(path, { signal: abortController?.signal ?? undefined })
|
|
||||||
if (!res.ok) {
|
|
||||||
throw new Error(`failed to fetch ${path}: ${res.status} ${res.statusText}`)
|
|
||||||
}
|
|
||||||
const payload = await res.arrayBuffer()
|
|
||||||
if (!disableCache && sha && hasIndexedDB) {
|
|
||||||
try {
|
|
||||||
await writeAsset(sha, payload)
|
|
||||||
} catch {
|
|
||||||
// ignore cache write failures
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return payload
|
|
||||||
}
|
|
||||||
|
|
||||||
async function populateVectors(
|
|
||||||
manifest: Manifest,
|
|
||||||
baseUrl: string | undefined,
|
|
||||||
disableCache: boolean | undefined,
|
|
||||||
): Promise<{ buffer: Float32Array; rowsLoaded: number }> {
|
|
||||||
if (manifest.vectors.dtype !== "fp32") {
|
|
||||||
throw new Error(`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`)
|
|
||||||
}
|
|
||||||
const rows = manifest.rows
|
|
||||||
const dims = manifest.dims
|
|
||||||
const totalBytes = rows * dims * Float32Array.BYTES_PER_ELEMENT
|
|
||||||
const buffer = supportsSharedArrayBuffer
|
|
||||||
? new Float32Array(new SharedArrayBuffer(totalBytes))
|
|
||||||
: new Float32Array(totalBytes)
|
|
||||||
let loadedRows = 0
|
|
||||||
for (const shard of manifest.vectors.shards) {
|
|
||||||
const absolute = toAbsolute(shard.path, baseUrl)
|
|
||||||
const payload = await fetchBinary(absolute, Boolean(disableCache), shard.sha256)
|
|
||||||
const view = new Float32Array(payload)
|
|
||||||
if (view.length !== shard.rows * dims) {
|
|
||||||
throw new Error(
|
|
||||||
`shard ${shard.path} has mismatched length (expected ${shard.rows * dims}, got ${view.length})`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
buffer.set(view, shard.rowOffset * dims)
|
|
||||||
loadedRows = Math.min(rows, shard.rowOffset + shard.rows)
|
|
||||||
const progress: ProgressMessage = {
|
|
||||||
type: "progress",
|
|
||||||
loadedRows,
|
|
||||||
totalRows: rows,
|
|
||||||
}
|
|
||||||
self.postMessage(progress)
|
|
||||||
}
|
|
||||||
return { buffer, rowsLoaded: loadedRows }
|
|
||||||
}
|
|
||||||
|
|
||||||
async function populateGraph(
|
|
||||||
manifest: Manifest,
|
|
||||||
baseUrl: string | undefined,
|
|
||||||
disableCache: boolean | undefined,
|
|
||||||
): Promise<ArrayBuffer> {
|
|
||||||
const graphMeta = manifest.hnsw.graph
|
|
||||||
const absolute = toAbsolute(graphMeta.path, baseUrl)
|
|
||||||
return await fetchBinary(absolute, Boolean(disableCache), graphMeta.sha256)
|
|
||||||
}
|
|
||||||
|
|
||||||
function configureRuntimeEnv() {
|
|
||||||
if (envConfigured) return
|
|
||||||
env.allowLocalModels = false
|
|
||||||
env.allowRemoteModels = true
|
|
||||||
const wasmBackend = env.backends?.onnx?.wasm
|
|
||||||
if (!wasmBackend) {
|
|
||||||
throw new Error("transformers.js ONNX runtime backend unavailable")
|
|
||||||
}
|
|
||||||
const cdnBase = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`
|
|
||||||
wasmBackend.wasmPaths = cdnBase
|
|
||||||
envConfigured = true
|
|
||||||
}
|
|
||||||
|
|
||||||
async function ensureEncoder() {
|
|
||||||
if (classifier) return
|
|
||||||
if (!cfg?.model) {
|
|
||||||
throw new Error("semantic worker missing model identifier")
|
|
||||||
}
|
|
||||||
configureRuntimeEnv()
|
|
||||||
const dtype = typeof cfg?.dtype === "string" && cfg.dtype.length > 0 ? cfg.dtype : "fp32"
|
|
||||||
const pipelineOpts: Record<string, unknown> = {
|
|
||||||
device: "wasm",
|
|
||||||
dtype,
|
|
||||||
local_files_only: false,
|
|
||||||
}
|
|
||||||
classifier = await pipeline("feature-extraction", cfg.model, pipelineOpts)
|
|
||||||
cfg.dtype = dtype
|
|
||||||
}
|
|
||||||
|
|
||||||
function vectorSlice(id: number): Float32Array {
|
|
||||||
if (!vectorsView) {
|
|
||||||
throw new Error("vector buffer not configured")
|
|
||||||
}
|
|
||||||
const start = id * dims
|
|
||||||
const end = start + dims
|
|
||||||
return vectorsView.subarray(start, end)
|
|
||||||
}
|
|
||||||
|
|
||||||
function dot(a: Float32Array, b: Float32Array): number {
|
|
||||||
let s = 0
|
|
||||||
for (let i = 0; i < dims; i++) {
|
|
||||||
s += a[i] * b[i]
|
|
||||||
}
|
|
||||||
return s
|
|
||||||
}
|
|
||||||
|
|
||||||
function neighborsFor(level: number, node: number): Uint32Array {
|
|
||||||
const meta = levelGraph[level]
|
|
||||||
if (!meta) return new Uint32Array()
|
|
||||||
const { indptr, indices } = meta
|
|
||||||
if (node < 0 || node + 1 >= indptr.length) return new Uint32Array()
|
|
||||||
const start = indptr[node]
|
|
||||||
const end = indptr[node + 1]
|
|
||||||
return indices.subarray(start, end)
|
|
||||||
}
|
|
||||||
|
|
||||||
function insertSortedDescending(arr: SearchHit[], item: SearchHit) {
|
|
||||||
let idx = arr.length
|
|
||||||
while (idx > 0 && arr[idx - 1].score < item.score) {
|
|
||||||
idx -= 1
|
|
||||||
}
|
|
||||||
arr.splice(idx, 0, item)
|
|
||||||
}
|
|
||||||
|
|
||||||
function bruteForceSearch(query: Float32Array, k: number): SearchHit[] {
|
|
||||||
if (!vectorsView) return []
|
|
||||||
const hits: SearchHit[] = []
|
|
||||||
for (let id = 0; id < rows; id++) {
|
|
||||||
const score = dot(query, vectorSlice(id))
|
|
||||||
if (hits.length < k) {
|
|
||||||
insertSortedDescending(hits, { id, score })
|
|
||||||
} else if (score > hits[hits.length - 1].score) {
|
|
||||||
insertSortedDescending(hits, { id, score })
|
|
||||||
hits.length = k
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return hits
|
|
||||||
}
|
|
||||||
|
|
||||||
function hnswSearch(query: Float32Array, k: number): SearchHit[] {
|
|
||||||
if (!manifest || !vectorsView || entryPoint < 0 || levelGraph.length === 0) {
|
|
||||||
return bruteForceSearch(query, k)
|
|
||||||
}
|
|
||||||
const ef = Math.max(efDefault, k * 10)
|
|
||||||
let ep = entryPoint
|
|
||||||
let epScore = dot(query, vectorSlice(ep))
|
|
||||||
for (let level = maxLevel; level > 0; level--) {
|
|
||||||
let changed = true
|
|
||||||
while (changed) {
|
|
||||||
changed = false
|
|
||||||
const neigh = neighborsFor(level, ep)
|
|
||||||
for (let i = 0; i < neigh.length; i++) {
|
|
||||||
const candidate = neigh[i]
|
|
||||||
if (candidate >= rows) continue
|
|
||||||
const score = dot(query, vectorSlice(candidate))
|
|
||||||
if (score > epScore) {
|
|
||||||
epScore = score
|
|
||||||
ep = candidate
|
|
||||||
changed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const visited = new Set<number>()
|
|
||||||
const candidateQueue: SearchHit[] = []
|
|
||||||
const best: SearchHit[] = []
|
|
||||||
insertSortedDescending(candidateQueue, { id: ep, score: epScore })
|
|
||||||
insertSortedDescending(best, { id: ep, score: epScore })
|
|
||||||
visited.add(ep)
|
|
||||||
|
|
||||||
while (candidateQueue.length > 0) {
|
|
||||||
const current = candidateQueue.shift()!
|
|
||||||
const worstBest = best.length >= ef ? best[best.length - 1].score : -Infinity
|
|
||||||
if (current.score < worstBest && best.length >= ef) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
const neigh = neighborsFor(0, current.id)
|
|
||||||
for (let i = 0; i < neigh.length; i++) {
|
|
||||||
const candidate = neigh[i]
|
|
||||||
if (candidate >= rows || visited.has(candidate)) continue
|
|
||||||
visited.add(candidate)
|
|
||||||
const score = dot(query, vectorSlice(candidate))
|
|
||||||
const hit = { id: candidate, score }
|
|
||||||
insertSortedDescending(candidateQueue, hit)
|
|
||||||
if (best.length < ef || score > best[best.length - 1].score) {
|
|
||||||
insertSortedDescending(best, hit)
|
|
||||||
if (best.length > ef) {
|
|
||||||
best.pop()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
best.sort((a, b) => b.score - a.score)
|
|
||||||
return best.slice(0, k)
|
|
||||||
}
|
|
||||||
|
|
||||||
async function embed(text: string, isQuery: boolean = false): Promise<Float32Array> {
|
|
||||||
await ensureEncoder()
|
|
||||||
// Apply model-specific prefixes for asymmetric search
|
|
||||||
let prefixedText = text
|
|
||||||
if (cfg?.model) {
|
|
||||||
const modelName = cfg.model.toLowerCase()
|
|
||||||
switch (true) {
|
|
||||||
case modelName.includes("e5"): {
|
|
||||||
// E5 models require query: or passage: prefix
|
|
||||||
prefixedText = isQuery ? `query: ${text}` : `passage: ${text}`
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case modelName.includes("qwen") && modelName.includes("embedding"): {
|
|
||||||
// Qwen3-Embedding requires task instruction for queries only
|
|
||||||
if (isQuery) {
|
|
||||||
const task = "Given a web search query, retrieve relevant passages that answer the query"
|
|
||||||
prefixedText = `Instruct: ${task}\nQuery: ${text}`
|
|
||||||
}
|
|
||||||
// Documents use plain text (no prefix)
|
|
||||||
break
|
|
||||||
}
|
|
||||||
case modelName.includes("embeddinggemma"): {
|
|
||||||
// embeddinggemma requires specific prefixes
|
|
||||||
prefixedText = isQuery
|
|
||||||
? `task: search result | query: ${text}`
|
|
||||||
: `title: none | text: ${text}`
|
|
||||||
break
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const out = await classifier(prefixedText, { pooling: "mean", normalize: true })
|
|
||||||
const data = Array.from(out?.data ?? out) as number[]
|
|
||||||
const vec = new Float32Array(dims)
|
|
||||||
for (let i = 0; i < dims; i++) vec[i] = data[i] ?? 0
|
|
||||||
return vec
|
|
||||||
}
|
|
||||||
|
|
||||||
async function handleInit(msg: InitMessage) {
|
|
||||||
if (state === "loading" || state === "ready") {
|
|
||||||
throw new Error("worker already initialized or loading")
|
|
||||||
}
|
|
||||||
|
|
||||||
state = "loading"
|
|
||||||
abortController?.abort()
|
|
||||||
abortController = new AbortController()
|
|
||||||
|
|
||||||
try {
|
|
||||||
cfg = msg.cfg
|
|
||||||
|
|
||||||
const manifestUrl = toAbsolute(msg.manifestUrl, msg.baseUrl)
|
|
||||||
const response = await fetch(manifestUrl, { signal: abortController.signal })
|
|
||||||
if (!response.ok) {
|
|
||||||
throw new Error(
|
|
||||||
`failed to fetch manifest ${manifestUrl}: ${response.status} ${response.statusText}`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
manifest = (await response.json()) as Manifest
|
|
||||||
|
|
||||||
if (manifest.vectors.dtype !== "fp32") {
|
|
||||||
throw new Error(
|
|
||||||
`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
dims = manifest.dims
|
|
||||||
rows = manifest.rows
|
|
||||||
|
|
||||||
const { buffer: vectorBuffer } = await populateVectors(manifest, msg.baseUrl, msg.disableCache)
|
|
||||||
vectorsView = vectorBuffer
|
|
||||||
|
|
||||||
const graphBuffer = await populateGraph(manifest, msg.baseUrl, msg.disableCache)
|
|
||||||
|
|
||||||
entryPoint = manifest.hnsw.entryPoint
|
|
||||||
maxLevel = manifest.hnsw.maxLevel
|
|
||||||
efDefault = Math.max(64, manifest.hnsw.M * 4)
|
|
||||||
levelGraph = manifest.hnsw.graph.levels.map((level) => {
|
|
||||||
const indptr = new Uint32Array(graphBuffer, level.indptr.offset, level.indptr.elements)
|
|
||||||
const indices = new Uint32Array(graphBuffer, level.indices.offset, level.indices.elements)
|
|
||||||
return { indptr, indices }
|
|
||||||
})
|
|
||||||
|
|
||||||
state = "ready"
|
|
||||||
const ready: ReadyMessage = { type: "ready" }
|
|
||||||
self.postMessage(ready)
|
|
||||||
} catch (err) {
|
|
||||||
state = "error"
|
|
||||||
throw err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function handleSearch(msg: SearchMessage) {
|
|
||||||
if (state !== "ready") {
|
|
||||||
throw new Error("worker not ready for search")
|
|
||||||
}
|
|
||||||
if (!manifest || !vectorsView) {
|
|
||||||
throw new Error("semantic worker not configured")
|
|
||||||
}
|
|
||||||
|
|
||||||
const queryVec = await embed(msg.text, true)
|
|
||||||
const semanticHits = hnswSearch(queryVec, Math.max(1, msg.k))
|
|
||||||
const message: SearchResultMessage = {
|
|
||||||
type: "search-result",
|
|
||||||
seq: msg.seq,
|
|
||||||
semantic: semanticHits,
|
|
||||||
}
|
|
||||||
self.postMessage(message)
|
|
||||||
}
|
|
||||||
|
|
||||||
function handleReset() {
|
|
||||||
abortController?.abort()
|
|
||||||
abortController = null
|
|
||||||
state = "idle"
|
|
||||||
manifest = null
|
|
||||||
cfg = null
|
|
||||||
vectorsView = null
|
|
||||||
dims = 0
|
|
||||||
rows = 0
|
|
||||||
classifier = null
|
|
||||||
envConfigured = false
|
|
||||||
levelGraph = []
|
|
||||||
entryPoint = -1
|
|
||||||
maxLevel = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
self.onmessage = (event: MessageEvent<WorkerMessage>) => {
|
|
||||||
const data = event.data
|
|
||||||
|
|
||||||
if (data.type === "reset") {
|
|
||||||
handleReset()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.type === "init") {
|
|
||||||
void handleInit(data).catch((err: unknown) => {
|
|
||||||
const message: ErrorMessage = {
|
|
||||||
type: "error",
|
|
||||||
message: err instanceof Error ? err.message : String(err),
|
|
||||||
}
|
|
||||||
self.postMessage(message)
|
|
||||||
})
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if (data.type === "search") {
|
|
||||||
void handleSearch(data).catch((err: unknown) => {
|
|
||||||
const message: ErrorMessage = {
|
|
||||||
type: "error",
|
|
||||||
seq: data.seq,
|
|
||||||
message: err instanceof Error ? err.message : String(err),
|
|
||||||
}
|
|
||||||
self.postMessage(message)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user