Compare commits

..

3 Commits

Author SHA1 Message Date
Aaron Pham
eb8a4cce18 revert: redudant changes
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 20:04:19 -04:00
Aaron Pham
68682a8fe3 chore: revert vault specific branch
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 19:58:20 -04:00
Aaron Pham
f533902c75 feat: semantic search (1/n)
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
2025-10-05 19:50:52 -04:00
96 changed files with 3459 additions and 5100 deletions

View File

@@ -11,17 +11,17 @@ jobs:
runs-on: ubuntu-latest
name: Build Preview
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Setup Node
uses: actions/setup-node@v6
uses: actions/setup-node@v5
with:
node-version: 22
- name: Cache dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
@@ -37,7 +37,7 @@ jobs:
run: npx quartz build -d docs -v
- name: Upload build artifact
uses: actions/upload-artifact@v6
uses: actions/upload-artifact@v4
with:
name: preview-build
path: public

View File

@@ -19,17 +19,17 @@ jobs:
permissions:
contents: write
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Setup Node
uses: actions/setup-node@v6
uses: actions/setup-node@v5
with:
node-version: 22
- name: Cache dependencies
uses: actions/cache@v5
uses: actions/cache@v4
with:
path: ~/.npm
key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }}
@@ -53,11 +53,11 @@ jobs:
permissions:
contents: write
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@v5
with:
fetch-depth: 0
- name: Setup Node
uses: actions/setup-node@v6
uses: actions/setup-node@v5
with:
node-version: 22
- name: Get package version

View File

@@ -18,7 +18,7 @@ jobs:
name: Deploy Preview to Cloudflare Pages
steps:
- name: Download build artifact
uses: actions/download-artifact@v7
uses: actions/download-artifact@v5
id: preview-build-artifact
with:
name: preview-build

View File

@@ -21,11 +21,11 @@ jobs:
echo "OWNER_LOWERCASE=${OWNER,,}" >> ${GITHUB_ENV}
env:
OWNER: "${{ github.repository_owner }}"
- uses: actions/checkout@v6
- uses: actions/checkout@v5
with:
fetch-depth: 1
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v5.4.0
uses: rlespinasse/github-slug-action@v5.2.0
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
@@ -37,7 +37,7 @@ jobs:
network=host
- name: Install cosign
if: github.event_name != 'pull_request'
uses: sigstore/cosign-installer@v4.0.0
uses: sigstore/cosign-installer@v3.10.0
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
if: github.event_name != 'pull_request'

16
.gitignore vendored
View File

@@ -9,19 +9,3 @@ tsconfig.tsbuildinfo
private/
.replit
replit.nix
erl_crash.dump
# content/ is generated by the export script; only keep the placeholder
content/*
!content/.gitkeep
# static/ox-hugo/ is populated by ox-hugo during export
static/ox-hugo/
# Elixir/Mix build artifacts for the pipeline project
scripts/pipeline/_build/
scripts/pipeline/deps/
scripts/pipeline/erl_crash.dump
# Test helpers (not needed in production)
scripts/test.bib
scripts/test_pipeline.exs
/org-garden/deps/
/org-garden/_build/
/org-garden/result

254
AGENTS.md
View File

@@ -1,254 +0,0 @@
# AGENTS.md - Coding Agent Instructions
This document provides essential information for AI coding agents working in this repository.
## Project Overview
**Quartz** is a static site generator for publishing digital gardens and notes as websites.
Built with TypeScript, Preact, and unified/remark/rehype for markdown processing.
| Stack | Technology |
| ------------- | ----------------------------------------- |
| Language | TypeScript 5.x (strict mode) |
| Runtime | Node.js >=22 (v22.16.0 pinned) |
| Package Mgr | npm >=10.9.2 |
| Module System | ES Modules (`"type": "module"`) |
| UI Framework | Preact 10.x (JSX with `react-jsx` pragma) |
| Build Tool | esbuild |
| Styling | SCSS via esbuild-sass-plugin |
## Environment
This is a Nix project. Use the provided `flake.nix` to enter a dev shell with Node.js 22 and npm:
```bash
nix develop
```
All `npm` commands below must be run inside the dev shell.
## Build, Lint, and Test Commands
```bash
# Type check and format check (CI validation)
npm run check
# Auto-format code with Prettier
npm run format
# Run all tests
npm run test
# Run a single test file
npx tsx --test quartz/util/path.test.ts
# Run tests matching a pattern (use --test-name-pattern)
npx tsx --test --test-name-pattern="typeguards" quartz/util/path.test.ts
# Build the static site
npx quartz build
# Build and serve with hot reload
npx quartz build --serve
# Profile build performance
npm run profile
```
### Test Files Location
Tests use Node.js native test runner via `tsx`. Test files follow the `*.test.ts` pattern:
- `quartz/util/path.test.ts`
- `quartz/util/fileTrie.test.ts`
- `quartz/components/scripts/search.test.ts`
## Code Style Guidelines
### Prettier Configuration (`.prettierrc`)
```json
{
"printWidth": 100,
"tabWidth": 2,
"semi": false,
"trailingComma": "all",
"quoteProps": "as-needed"
}
```
**No ESLint** - only Prettier for formatting. Run `npm run format` before committing.
### TypeScript Configuration
- **Strict mode enabled** (`strict: true`)
- `noUnusedLocals: true` - no unused variables
- `noUnusedParameters: true` - no unused function parameters
- JSX configured for Preact (`jsxImportSource: "preact"`)
### Import Conventions
```typescript
// 1. External packages first
import { PluggableList } from "unified"
import { visit } from "unist-util-visit"
// 2. Internal utilities/types (relative paths)
import { QuartzTransformerPlugin } from "../types"
import { FilePath, slugifyFilePath } from "../../util/path"
import { i18n } from "../../i18n"
```
### Naming Conventions
| Element | Convention | Example |
| ---------------- | ------------ | ----------------------------------- |
| Files (utils) | camelCase | `path.ts`, `fileTrie.ts` |
| Files (comps) | PascalCase | `TableOfContents.tsx`, `Search.tsx` |
| Types/Interfaces | PascalCase | `QuartzComponent`, `FullSlug` |
| Type Guards | `is*` prefix | `isFilePath()`, `isFullSlug()` |
| Constants | UPPER_CASE | `QUARTZ`, `UPSTREAM_NAME` |
| Options types | `Options` | `interface Options { ... }` |
### Branded Types Pattern
This codebase uses branded types for type-safe path handling:
```typescript
type SlugLike<T> = string & { __brand: T }
export type FilePath = SlugLike<"filepath">
export type FullSlug = SlugLike<"full">
export type SimpleSlug = SlugLike<"simple">
// Always validate with type guards before using
export function isFilePath(s: string): s is FilePath { ... }
```
### Component Pattern (Preact)
Components use a factory function pattern with attached static properties:
```typescript
export default ((userOpts?: Partial<Options>) => {
const opts: Options = { ...defaultOptions, ...userOpts }
const ComponentName: QuartzComponent = ({ cfg, displayClass }: QuartzComponentProps) => {
return <div class={classNames(displayClass, "component-name")}>...</div>
}
ComponentName.css = style // SCSS styles
ComponentName.afterDOMLoaded = script // Client-side JS
return ComponentName
}) satisfies QuartzComponentConstructor
```
### Plugin Pattern
Three plugin types: transformers, filters, and emitters.
```typescript
export const PluginName: QuartzTransformerPlugin<Partial<Options>> = (userOpts) => {
const opts = { ...defaultOptions, ...userOpts }
return {
name: "PluginName",
markdownPlugins(ctx) { return [...] },
htmlPlugins(ctx) { return [...] },
externalResources(ctx) { return { js: [], css: [] } },
}
}
```
### Testing Pattern
Use Node.js native test runner with `assert`:
```typescript
import test, { describe, beforeEach } from "node:test"
import assert from "node:assert"
describe("FeatureName", () => {
test("should do something", () => {
assert.strictEqual(actual, expected)
assert.deepStrictEqual(actualObj, expectedObj)
assert(condition) // truthy assertion
assert(!condition) // falsy assertion
})
})
```
### Error Handling
- Use `try/catch` for critical operations (file I/O, parsing)
- Custom `trace` utility for error reporting with stack traces
- `process.exit(1)` for fatal errors
- `console.warn()` for non-fatal issues
### Async Patterns
- Prefer `async/await` over raw promises
- Use async generators (`async *emit()`) for streaming file output
- Use `async-mutex` for concurrent build protection
## Project Structure
```
quartz/
├── bootstrap-cli.mjs # CLI entry point
├── build.ts # Build orchestration
├── cfg.ts # Configuration types
├── components/ # Preact UI components
│ ├── *.tsx # Components
│ ├── scripts/ # Client-side scripts (*.inline.ts)
│ └── styles/ # Component SCSS
├── plugins/
│ ├── transformers/ # Markdown AST transformers
│ ├── filters/ # Content filters
│ ├── emitters/ # Output generators
│ └── types.ts # Plugin type definitions
├── processors/ # Build pipeline (parse/filter/emit)
├── util/ # Utility functions
└── i18n/ # Internationalization (30+ locales)
```
## Branch Workflow
This is a fork of [jackyzha0/quartz](https://github.com/jackyzha0/quartz) with org-roam customizations.
| Branch | Purpose |
| ----------- | ------------------------------------------------ |
| `main` | Clean mirror of upstream quartz — no custom code |
| `org-roam` | Default branch — all customizations live here |
| `feature/*` | Short-lived branches off `org-roam` |
### Pulling Upstream Updates
```bash
git checkout main
git fetch upstream
git merge upstream/main
git checkout org-roam
git merge main
# Resolve conflicts if any, then commit
```
### Working on Features
```bash
git checkout org-roam
git checkout -b feature/my-feature
# ... work ...
git checkout org-roam
git merge feature/my-feature
git branch -d feature/my-feature
```
**Merge direction:** `upstream → main → org-roam → feature/*`
## Important Notes
- **Client-side scripts**: Use `.inline.ts` suffix, bundled via esbuild
- **Isomorphic code**: `quartz/util/path.ts` must not use Node.js APIs
- **Incremental builds**: Plugins can implement `partialEmit` for efficiency
- **Markdown flavors**: Supports Obsidian (`ofm.ts`) and Roam (`roam.ts`) syntax
- **Pipeline build artifacts**: `scripts/pipeline/_build/` and `scripts/pipeline/deps/`
are gitignored — run `mix deps.get` inside `scripts/pipeline/` after a fresh clone

View File

@@ -1,96 +1,14 @@
# Quartz v4 — org-roam edition
# Quartz v4
> "[One] who works with the door open gets all kinds of interruptions, but [they] also occasionally gets clues as to what the world is and what might be important." — Richard Hamming
> [One] who works with the door open gets all kinds of interruptions, but [they] also occasionally gets clues as to what the world is and what might be important. — Richard Hamming
Quartz is a set of tools that helps you publish your [digital garden](https://jzhao.xyz/posts/networked-thought) and notes as a website for free.
Quartz v4 features a from-the-ground rewrite focusing on end-user extensibility and ease-of-use.
This fork adds first-class support for [org-roam](https://www.orgroam.com/) notes via [ox-hugo](https://ox-hugo.scripter.co/).
🔗 Upstream documentation: https://quartz.jzhao.xyz/
🔗 Read the documentation and get started: https://quartz.jzhao.xyz/
[Join the Discord Community](https://discord.gg/cRFFHYye7t)
## Quick Start
### Prerequisites
This project uses Nix. Enter the development shell, which provides Node.js 22, Elixir, and Emacs with ox-hugo:
```bash
nix develop
```
All commands below must be run inside this shell.
```bash
npm install
```
### Building from org-roam notes
Your org-roam notes live in a separate directory. Point `NOTES_DIR` at it:
```bash
# Export notes to content/ and build the site
NOTES_DIR=/path/to/notes npm run build:notes
# Export, build, and serve with hot reload
NOTES_DIR=/path/to/notes npm run serve:notes
# Export only (wipes content/ and re-exports all .org files)
NOTES_DIR=/path/to/notes npm run export
```
The export pipeline runs in four phases:
1. **Wipe** `content/` clean
2. **Export** every `.org` file via `emacs --batch` + ox-hugo → Markdown
3. **Transform** — post-process the Markdown (citation resolution, etc.)
4. **Index** — generate a fallback `index.md` if none was exported
#### Citations (org-citar → Zotero links)
org-citar references (`[cite:@key]`) are resolved to clickable Zotero links.
With Zotero running and the [Better BibTeX](https://retorque.re/zotero-better-bibtex/)
plugin installed, no extra configuration is needed — the pipeline detects it
automatically and links directly to the PDF in your library.
```bash
# Use a local .bib file as fallback when Zotero is not running
BIBTEX_FILE=/path/to/refs.bib NOTES_DIR=/path/to/notes npm run export
# Control warning verbosity for unresolved keys
CITATION_MODE=strict NOTES_DIR=/path/to/notes npm run export
```
| Env var | Default | Purpose |
| --------------- | ------------------------ | ----------------------------------------- |
| `BIBTEX_FILE` | — | Path to `.bib` file for citation fallback |
| `ZOTERO_URL` | `http://localhost:23119` | Zotero Better BibTeX base URL |
| `CITATION_MODE` | `warn` | `silent` / `warn` / `strict` |
### Building without org-roam notes
If you manage `content/` directly with Markdown files:
```bash
# Build the site
npx quartz build
# Build and serve with hot reload
npx quartz build --serve
```
The site is generated in `public/`. When serving, visit http://localhost:8080.
### Development
```bash
npm run check # type check + format check
npm run format # auto-format with Prettier
npm run test # run tests
```
## Sponsors
<p align="center">

View File

@@ -36,7 +36,6 @@ This part of the configuration concerns anything that can affect the whole site.
- `{provider: 'clarity', projectId: '<your-clarity-id-code' }`: use [Microsoft clarity](https://clarity.microsoft.com/). The project id can be found on top of the overview page.
- `{ provider: 'matomo', siteId: '<your-matomo-id-code', host: 'matomo.example.com' }`: use [Matomo](https://matomo.org/), without protocol.
- `{ provider: 'vercel' }`: use [Vercel Web Analytics](https://vercel.com/docs/concepts/analytics).
- `{ provider: 'rybbit', siteId: 'my-rybbit-id' }` (managed) or `{ provider: 'rybbit', siteId: 'my-rybbit-id', host: 'my-rybbit-domain.com' }` (self-hosted) use [Rybbit](https://rybbit.com);
- `locale`: used for [[i18n]] and date formatting
- `baseUrl`: this is used for sitemaps and RSS feeds that require an absolute URL to know where the canonical 'home' of your site lives. This is normally the deployed URL of your site (e.g. `quartz.jzhao.xyz` for this site). Do not include the protocol (i.e. `https://`) or any leading or trailing slashes.
- This should also include the subpath if you are [[hosting]] on GitHub pages without a custom domain. For example, if my repository is `jackyzha0/quartz`, GitHub pages would deploy to `https://jackyzha0.github.io/quartz` and the `baseUrl` would be `jackyzha0.github.io/quartz`.

BIN
docs/embeddings/hnsw.bin Normal file

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View File

@@ -5,7 +5,3 @@ You can run the below one-liner to run Quartz in Docker.
```sh
docker run --rm -itp 8080:8080 -p 3001:3001 -v ./content:/usr/src/app/content $(docker build -q .)
```
> [!warning] Not to be used for production
> Serve mode is intended for local previews only.
> For production workloads, see the page on [[hosting]].

View File

@@ -162,7 +162,7 @@ You can access the tags of a file by `node.data.tags`.
Component.Explorer({
filterFn: (node) => {
// exclude files with the tag "explorerexclude"
return node.data?.tags?.includes("explorerexclude") !== true
return node.data.tags?.includes("explorerexclude") !== true
},
})
```

View File

@@ -8,7 +8,7 @@ By default, Quartz only fetches previews for pages inside your vault due to [COR
When [[creating components|creating your own components]], you can include this `popover-hint` class to also include it in the popover.
Similar to Obsidian, [[quartz-layout-desktop.png|images referenced using wikilinks]] can also be viewed as popups.
Similar to Obsidian, [[quartz layout.png|images referenced using wikilinks]] can also be viewed as popups.
## Configuration

126
flake.lock generated
View File

@@ -1,126 +0,0 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"flake-utils_2": {
"inputs": {
"systems": "systems_2"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1771008912,
"narHash": "sha256-gf2AmWVTs8lEq7z/3ZAsgnZDhWIckkb+ZnAo5RzSxJg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "a82ccc39b39b621151d6732718e3e250109076fa",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1771369470,
"narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0182a361324364ae3f436a63005877674cf45efb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"org-garden": {
"inputs": {
"flake-utils": "flake-utils_2",
"nixpkgs": "nixpkgs_2"
},
"locked": {
"path": "./org-garden",
"type": "path"
},
"original": {
"path": "./org-garden",
"type": "path"
},
"parent": []
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs",
"org-garden": "org-garden"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"systems_2": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

View File

@@ -1,47 +0,0 @@
{
description = "Quartz org-roam org notes to website";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
org-garden.url = "path:./org-garden";
};
outputs = { self, nixpkgs, flake-utils, org-garden }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
# Re-export org-garden's packages
orgGardenPkgs = org-garden.packages.${system};
# Convenience aliases
orgGardenApp = orgGardenPkgs.default;
in
{
# All packages come from org-garden
packages = orgGardenPkgs // {
default = orgGardenApp;
};
# Apps
apps = {
default = { type = "app"; program = "${orgGardenApp}/bin/org-garden"; };
org-garden = { type = "app"; program = "${orgGardenApp}/bin/org-garden"; };
};
# Dev shell for working on the repo
devShells.default = pkgs.mkShell {
buildInputs = [
pkgs.nodejs_22
pkgs.elixir
];
shellHook = ''
echo "Node $(node --version) / npm $(npm --version)"
elixir --version 2>/dev/null | head -1 || true
'';
};
});
}

1
index.d.ts vendored
View File

@@ -13,3 +13,4 @@ interface CustomEventMap {
type ContentIndex = Record<FullSlug, ContentDetails>
declare const fetchData: Promise<ContentIndex>
declare const semanticCfg: import("./quartz/cfg").GlobalConfiguration["semanticSearch"]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.2 KiB

View File

@@ -1,16 +0,0 @@
:PROPERTIES:
:ID: emt-madrid
:END:
#+title: EMT Madrid (urban bus)
Empresa Municipal de Transportes (EMT) operates the urban bus network
within the municipality of Madrid — around 200 lines.
* Notable lines
- *Line 27* — connects Embajadores with Barrio de la Concepción, one of the
oldest routes in the network.
- *Line 34* — Argüelles to Carabanchel, crossing the city centre via Gran Vía.
- *Búho (owl) lines* — night buses running from Cibeles from midnight to 6 am.
* See also
- [[id:madrid-transport][Madrid Public Transport]]

View File

@@ -1,13 +0,0 @@
#+title: Example: Citation Reference
This file demonstrates how org-citar citations pass through ox-hugo into
markdown, where the pipeline transform resolves them.
The methodology described in [cite:@podlovics2021journalArticle] provides a
useful framework for analysis.
Multiple citations can appear together:
[cite:@podlovics2021journalArticle;@petersen2022book]
Older bare-cite style (org-roam v1 / older citar) also works:
cite:@podlovics2021journalArticle

View File

@@ -1,33 +0,0 @@
:PROPERTIES:
:ID: example-images
:END:
#+title: Example: Image References
This note demonstrates the three image reference scenarios that the pipeline
must handle.
* Scenario 1: External image (URL)
An image hosted on the web — ox-hugo passes the URL through as-is and no
local file handling is needed.
#+attr_html: :link "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSkzsTuLOt8esM6enoKwkzqA52G3p9hldlf2g&s"
[[file:quartz-logo-external.png]]
* Scenario 2: Local image (same notes directory)
An image sitting next to this .org file inside the notes directory.
ox-hugo copies files referenced with a relative path into the Hugo =static/=
assets tree automatically.
#+CAPTION: Quartz logo (local, same notes dir)
[[file:quartz-logo.png]]
* Scenario 3: External image (outside notes directory)
An image that lives outside the notes directory entirely — for example a
shared assets folder or a system path. ox-hugo still copies it into =static/=
and rewrites the reference.
#+CAPTION: Quartz logo (outside notes dir)
[[file:../notes-external/external-location-image.png]]

View File

@@ -1,17 +0,0 @@
:PROPERTIES:
:ID: madrid-transport
:END:
#+title: Madrid Public Transport
Madrid has one of the most extensive public transport networks in Europe,
operated primarily by [[id:crtm][Consorcio Regional de Transportes de Madrid]] (CRTM).
* Modes
- [[id:metro-madrid][Metro de Madrid]] — 13 lines, ~300 km of track
- [[id:emt-madrid][EMT Bus]] — urban buses within the city
- Cercanías — suburban rail run by Renfe
- Interurbano — regional buses to the wider Community of Madrid
* Ticketing
A single [[https://www.crtm.es][tarjeta transporte]] (transport card) works across all modes.
The Multi card covers zones AC and is topped up at any metro station.

View File

@@ -1,18 +0,0 @@
:PROPERTIES:
:ID: metro-madrid
:END:
#+title: Metro de Madrid
The Madrid Metro is the main rapid transit network in the city, opened in 1919.
It is the second oldest metro in the Iberian Peninsula after Barcelona.
* Key Lines
| Line | Name | Colour | Terminals |
|------+-----------------+--------+------------------------------|
| L1 | Pinar de ChamartínValdecarros | Blue | Pinar de Chamartín / Valdecarros |
| L6 | Circular | Grey | Circular (loop) |
| L10 | — | Dark blue | Hospital Infanta Sofía / Tres Olivos |
* See also
- [[id:madrid-transport][Madrid Public Transport]]
- [[id:sol-interchange][Sol interchange]]

View File

@@ -1,12 +0,0 @@
:PROPERTIES:
:ID: sol-interchange
:END:
#+title: Sol (interchange)
Sol is the busiest interchange station in the Madrid Metro, sitting beneath
Puerta del Sol in the city centre.
Lines serving Sol: [[id:metro-madrid][L1]], L2, L3.
It also connects to the Cercanías hub underneath, making it the de-facto
zero point of Madrid's public transport.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 8.2 KiB

View File

@@ -1,22 +0,0 @@
:PROPERTIES:
:ID: crtm
:END:
#+title: CRTM — Consorcio Regional de Transportes de Madrid
The CRTM is the regional authority that coordinates public transport across
the Community of Madrid. It does not operate services directly but sets
fares, zones, and integration policy.
* Fare zones
| Zone | Coverage |
|-------+-----------------------------|
| A | Municipality of Madrid |
| B1 | Inner ring municipalities |
| B2 | Outer ring municipalities |
| B3 | Further suburban area |
| C1C2 | Commuter belt |
* Related
- [[id:madrid-transport][Madrid Public Transport]]
- [[id:metro-madrid][Metro de Madrid]]
- [[id:emt-madrid][EMT Madrid]]

View File

@@ -1,19 +0,0 @@
:PROPERTIES:
:ID: m30
:END:
#+title: M-30
The M-30 is Madrid's innermost ring road, circling the city centre at a
radius of roughly 35 km from Puerta del Sol.
It runs mostly underground through the Madrid Río tunnel section along the
Manzanares river, built during the 20042007 renovation that reclaimed the
riverbank as a public park.
* Key junctions
- Nudo Norte — connects to A-1 (Burgos) and A-6 (La Coruña)
- Nudo Sur — connects to A-4 (Cádiz) and A-42 (Toledo)
* See also
- [[id:crtm][CRTM]]
- [[id:madrid-transport][Madrid Public Transport]]

View File

@@ -1,10 +0,0 @@
{
"$schema": "https://opencode.ai/config.json",
"mcp": {
"nixos": {
"type": "local",
"command": ["mcp-nixos"],
"enabled": true
}
}
}

61
org-garden/flake.lock generated
View File

@@ -1,61 +0,0 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1771369470,
"narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0182a361324364ae3f436a63005877674cf45efb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

View File

@@ -1,147 +0,0 @@
{
description = "Org-garden org-roam to website publishing pipeline";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
fs = pkgs.lib.fileset;
# =========================================================================
# Emacs with ox-hugo
# =========================================================================
# Needed at runtime by the escript (export calls `emacs --batch` with ox-hugo)
emacsWithOxHugo = (pkgs.emacsPackagesFor pkgs.emacs-nox).emacsWithPackages
(epkgs: [ epkgs.ox-hugo ]);
# =========================================================================
# Elixir Pipeline
# =========================================================================
# Pre-fetched Hex/Mix dependencies
mixDeps = pkgs.beamPackages.fetchMixDeps {
pname = "org-garden-mix-deps";
version = "0.1.0";
src = fs.toSource {
root = ./.;
fileset = fs.unions [
./mix.exs
./mix.lock
];
};
sha256 = "sha256-si7JAomY1HZ33m6ihUJP5i6PO39CE1clYvuMtn0CbPU=";
};
# Compiled org-garden escript
orgGardenEscript = pkgs.beamPackages.mixRelease {
pname = "org-garden";
version = "0.1.0";
src = fs.toSource {
root = ./.;
fileset = fs.unions [
./mix.exs
./mix.lock
./lib
];
};
escriptBinName = "org_garden";
mixFodDeps = mixDeps;
stripDebug = true;
};
# =========================================================================
# Quartz (fetched from upstream, patched)
# =========================================================================
# Pin to specific upstream commit
quartzVersion = "4.5.2";
quartzRev = "ec00a40aefca73596ab76e3ebe3a8e1129b43688";
# Fetch upstream Quartz source
quartzSrc = pkgs.fetchFromGitHub {
owner = "jackyzha0";
repo = "quartz";
rev = quartzRev;
hash = "sha256-HdtQB5+SRWiypOvAJuJa3Nodl4JHehp2Mz6Rj5gOG0w=";
};
# Apply our patches to Quartz
quartzPatched = pkgs.runCommand "quartz-patched-${quartzVersion}" {
src = quartzSrc;
} ''
cp -r $src $out
chmod -R u+w $out
cd $out
patch -p1 < ${./patches/01-glob-gitignore.patch}
patch -p1 < ${./patches/02-build-gitignore.patch}
patch -p1 < ${./patches/03-static-hugo.patch}
patch -p1 < ${./patches/04-oxhugofm-figure.patch}
'';
# Pre-fetch Quartz npm dependencies
quartzDeps = pkgs.buildNpmPackage {
pname = "org-garden-quartz-deps";
version = quartzVersion;
src = quartzPatched;
npmDepsHash = "sha256-7u+VlIx44B3/ivM9vLMIOn+e4TL4eS6B682vhS+Ikb4=";
dontBuild = true;
installPhase = ''
mkdir -p $out
cp -r node_modules $out/node_modules
'';
};
# =========================================================================
# Combined Application
# =========================================================================
# Wrapped org-garden with Quartz bundled
orgGardenApp = pkgs.writeShellApplication {
name = "org-garden";
runtimeInputs = [ emacsWithOxHugo pkgs.inotify-tools pkgs.nodejs_22 ];
text = ''
# Set up Quartz working directory
QUARTZ_WORK=$(mktemp -d)
trap 'rm -rf "$QUARTZ_WORK"' EXIT
# Copy patched Quartz source
cp -r ${quartzPatched}/. "$QUARTZ_WORK/"
chmod -R u+w "$QUARTZ_WORK"
# Copy default config files
cp ${./quartz-config/quartz.config.ts} "$QUARTZ_WORK/"
cp ${./quartz-config/quartz.layout.ts} "$QUARTZ_WORK/"
cp ${./quartz-config/globals.d.ts} "$QUARTZ_WORK/"
cp ${./quartz-config/index.d.ts} "$QUARTZ_WORK/"
# Link pre-built node_modules
ln -s ${quartzDeps}/node_modules "$QUARTZ_WORK/node_modules"
export QUARTZ_PATH="$QUARTZ_WORK"
export NODE_PATH="${pkgs.nodejs_22}/bin/node"
exec ${orgGardenEscript}/bin/org_garden "$@"
'';
};
in
{
packages.default = orgGardenApp;
packages.escript = orgGardenEscript;
packages.quartz-patched = quartzPatched;
devShells.default = pkgs.mkShell {
buildInputs = [
pkgs.elixir
pkgs.inotify-tools
emacsWithOxHugo
pkgs.nodejs_22
];
};
});
}

View File

@@ -1,189 +0,0 @@
defmodule OrgGarden do
@moduledoc """
Org-roam to website publishing pipeline.
Orchestrates:
1. Org → Markdown export (via Emacs + ox-hugo)
2. Markdown transforms (citations, etc.)
3. Markdown → HTML + serving (via Quartz)
## Usage
opts = %{
zotero_url: "http://localhost:23119",
bibtex_file: System.get_env("BIBTEX_FILE"),
citation_mode: :warn # :silent | :warn | :strict
}
# Batch: all .md files in a directory
OrgGarden.run(content_dir, [OrgGarden.Transforms.Citations], opts)
# Targeted: specific files only
OrgGarden.run_on_files(["content/foo.md"], [OrgGarden.Transforms.Citations], opts)
# With pre-initialized transforms (for watch mode, avoids re-init)
initialized = OrgGarden.init_transforms([OrgGarden.Transforms.Citations], opts)
OrgGarden.run_on_files_with(["content/foo.md"], initialized, opts)
"""
require Logger
@type transform :: module()
@type initialized_transform :: {module(), term()}
@type opts :: map()
@doc "One-shot build: org files → static site"
def build(notes_dir, opts \\ []) do
OrgGarden.CLI.handle_build([notes_dir | opts_to_args(opts)])
end
@doc "Development server: watch + live reload"
def serve(notes_dir, opts \\ []) do
OrgGarden.CLI.handle_serve([notes_dir | opts_to_args(opts)])
end
@doc "Export only: org files → markdown (no Quartz)"
def export(notes_dir, opts \\ []) do
OrgGarden.CLI.handle_export([notes_dir | opts_to_args(opts)])
end
defp opts_to_args(opts) do
Enum.flat_map(opts, fn
{:output, v} -> ["--output", v]
{:port, v} -> ["--port", to_string(v)]
{:ws_port, v} -> ["--ws-port", to_string(v)]
{:watch, true} -> ["--watch"]
{:watch, false} -> []
_ -> []
end)
end
@doc """
Initialize transform modules. Returns a list of `{module, state}` tuples.
Call this once and reuse the result with `run_on_files_with/3` to avoid
re-initializing transforms on every file change (e.g., in watch mode).
"""
@spec init_transforms([transform()], opts()) :: [initialized_transform()]
def init_transforms(transforms, opts) do
Enum.map(transforms, fn mod ->
state = mod.init(opts)
{mod, state}
end)
end
@doc """
Tear down previously initialized transforms, releasing any resources.
"""
@spec teardown_transforms([initialized_transform()]) :: :ok
def teardown_transforms(initialized) do
Enum.each(initialized, fn {mod, state} ->
if function_exported?(mod, :teardown, 1) do
mod.teardown(state)
end
end)
:ok
end
@doc """
Run all transforms over every `.md` file under `content_dir`.
Initializes and tears down transforms automatically.
Returns `{:ok, stats}` where stats maps each transform to a count of files it changed.
"""
@spec run(String.t(), [transform()], opts()) :: {:ok, map()}
def run(content_dir, transforms, opts \\ %{}) do
md_files =
content_dir
|> Path.join("**/*.md")
|> Path.wildcard()
if md_files == [] do
Logger.warning("OrgGarden: no .md files found in #{content_dir}")
{:ok, %{}}
else
Logger.info(
"OrgGarden: processing #{length(md_files)} markdown files " <>
"with #{length(transforms)} transform(s)"
)
initialized = init_transforms(transforms, opts)
stats = apply_transforms(md_files, initialized, opts)
teardown_transforms(initialized)
{:ok, stats}
end
end
@doc """
Run all transforms over specific `.md` files only.
Initializes and tears down transforms automatically.
Files that don't exist are silently skipped.
"""
@spec run_on_files([String.t()], [transform()], opts()) :: {:ok, map()}
def run_on_files(file_paths, transforms, opts \\ %{}) do
existing = Enum.filter(file_paths, &File.exists?/1)
if existing == [] do
Logger.debug("OrgGarden: no files to process")
{:ok, %{}}
else
Logger.info("OrgGarden: processing #{length(existing)} file(s)")
initialized = init_transforms(transforms, opts)
stats = apply_transforms(existing, initialized, opts)
teardown_transforms(initialized)
{:ok, stats}
end
end
@doc """
Run pre-initialized transforms over specific `.md` files.
Does NOT call `init` or `teardown` — the caller manages the transform
lifecycle. Use this in watch mode to avoid re-initializing on every change.
"""
@spec run_on_files_with([String.t()], [initialized_transform()], opts()) :: {:ok, map()}
def run_on_files_with(file_paths, initialized, opts) do
existing = Enum.filter(file_paths, &File.exists?/1)
if existing == [] do
Logger.debug("OrgGarden: no files to process")
{:ok, %{}}
else
stats = apply_transforms(existing, initialized, opts)
{:ok, stats}
end
end
# -------------------------------------------------------------------
# Private
# -------------------------------------------------------------------
defp apply_transforms(md_files, initialized, opts) do
Enum.reduce(md_files, %{}, fn path, acc ->
original = File.read!(path)
{transformed, file_stats} =
Enum.reduce(initialized, {original, %{}}, fn {mod, state}, {content, fstats} ->
result = mod.apply(content, state, opts)
changed = result != content
{result,
Map.update(
fstats,
mod,
if(changed, do: 1, else: 0),
&(&1 + if(changed, do: 1, else: 0))
)}
end)
if transformed != original do
File.write!(path, transformed)
Logger.debug("OrgGarden: updated #{Path.relative_to_cwd(path)}")
end
Map.merge(acc, file_stats, fn _k, a, b -> a + b end)
end)
end
end

View File

@@ -1,14 +0,0 @@
defmodule OrgGarden.Application do
@moduledoc false
use Application
@impl true
def start(_type, _args) do
children = [
{Finch, name: OrgGarden.Finch}
]
opts = [strategy: :one_for_one, name: OrgGarden.AppSupervisor]
Supervisor.start_link(children, opts)
end
end

View File

@@ -1,375 +0,0 @@
defmodule OrgGarden.CLI do
@moduledoc """
Escript entry point for the org-garden pipeline.
## Commands
org-garden serve <notes-dir> [--port 8080] [--ws-port 3001]
org-garden build <notes-dir> [--output <path>]
org-garden export <notes-dir> [--watch]
### serve
Development server with watch + live reload. Starts both the org→md
watcher and Quartz in serve mode.
### build
One-shot build for CI/production. Exports org files, runs transforms,
then builds static site with Quartz.
### export
Just export org→md (current pipeline behavior). Use --watch for
incremental re-export on file changes.
## Arguments
notes-dir Path to the directory containing `.org` notes (required).
Also accepts the `NOTES_DIR` env var.
## Options
--output <path> Output root directory (used as ox-hugo base dir).
Defaults to the `OUTPUT_DIR` env var, or the current
working directory.
--content-dir <p> Output directory for exported Markdown. Defaults to
`<output>/content`.
--port <n> HTTP server port (default: 8080). Only for `serve`.
--ws-port <n> WebSocket hot reload port (default: 3001). Only for `serve`.
--watch After initial batch, watch notes-dir for changes and
incrementally re-export affected files. Only for `export`.
## Environment Variables
BIBTEX_FILE Path to a `.bib` file used as citation fallback.
ZOTERO_URL Zotero Better BibTeX base URL (default: http://localhost:23119).
CITATION_MODE silent | warn (default) | strict.
QUARTZ_PATH Path to quartz directory (required for serve/build).
NODE_PATH Node.js executable (default: node).
"""
require Logger
@transforms [OrgGarden.Transforms.Citations]
def main(argv) do
Application.ensure_all_started(:org_garden)
case argv do
["serve" | rest] -> handle_serve(rest)
["build" | rest] -> handle_build(rest)
["export" | rest] -> handle_export(rest)
# Legacy: treat bare args as export command for backward compatibility
[_ | _] -> handle_export(argv)
_ -> abort("Usage: org-garden <serve|build|export> <notes-dir> [options]")
end
end
# ---------------------------------------------------------------------------
# Command: serve
# ---------------------------------------------------------------------------
def handle_serve(argv) do
require_quartz_env()
{notes_dir, output_dir, content_dir, opts} = parse_serve_args(argv)
pipeline_opts = build_pipeline_opts()
# Initial batch export
wipe(content_dir)
export_all(notes_dir, output_dir)
run_pipeline(content_dir, pipeline_opts)
generate_index(content_dir)
IO.puts("==> Starting development server...")
{:ok, _pid} =
OrgGarden.Supervisor.start_link(
notes_dir: notes_dir,
output_dir: output_dir,
content_dir: content_dir,
pipeline_opts: pipeline_opts,
transforms: @transforms,
port: opts[:port] || 8080,
ws_port: opts[:ws_port] || 3001
)
IO.puts("==> Server running at http://localhost:#{opts[:port] || 8080}")
IO.puts("==> Watching #{notes_dir} for changes (Ctrl+C to stop)")
Process.sleep(:infinity)
end
defp parse_serve_args(argv) do
{opts, positional, _invalid} =
OptionParser.parse(argv,
strict: [
output: :string,
content_dir: :string,
port: :integer,
ws_port: :integer
]
)
notes_dir = extract_notes_dir(positional, "serve")
output_dir = extract_output_dir(opts)
content_dir = extract_content_dir(opts, output_dir)
{notes_dir, output_dir, content_dir, opts}
end
# ---------------------------------------------------------------------------
# Command: build
# ---------------------------------------------------------------------------
def handle_build(argv) do
quartz_path = require_quartz_env()
{notes_dir, output_dir, content_dir, _opts} = parse_build_args(argv)
pipeline_opts = build_pipeline_opts()
# Full batch export
wipe(content_dir)
export_all(notes_dir, output_dir)
run_pipeline(content_dir, pipeline_opts)
generate_index(content_dir)
node_path = System.get_env("NODE_PATH", "node")
IO.puts("==> Building static site with Quartz...")
{output, status} =
System.cmd(
node_path,
[
Path.join(quartz_path, "quartz/bootstrap-cli.mjs"),
"build",
"--directory",
content_dir,
"--output",
Path.join(output_dir, "public")
],
cd: quartz_path,
stderr_to_stdout: true
)
IO.puts(output)
if status != 0 do
abort("Quartz build failed with status #{status}")
end
IO.puts("==> Build complete. Output: #{Path.join(output_dir, "public")}")
end
defp parse_build_args(argv) do
{opts, positional, _invalid} =
OptionParser.parse(argv,
strict: [output: :string, content_dir: :string]
)
notes_dir = extract_notes_dir(positional, "build")
output_dir = extract_output_dir(opts)
content_dir = extract_content_dir(opts, output_dir)
{notes_dir, output_dir, content_dir, opts}
end
# ---------------------------------------------------------------------------
# Command: export (original pipeline behavior)
# ---------------------------------------------------------------------------
def handle_export(argv) do
{notes_dir, output_dir, content_dir, watch?} = parse_export_args(argv)
pipeline_opts = build_pipeline_opts()
# Phase 1-4: full batch export
wipe(content_dir)
export_all(notes_dir, output_dir)
run_pipeline(content_dir, pipeline_opts)
generate_index(content_dir)
md_count =
content_dir
|> Path.join("**/*.md")
|> Path.wildcard()
|> length()
IO.puts("==> Done. #{md_count} markdown files in #{content_dir}")
# Phase 5: optional watch mode
if watch? do
IO.puts("==> Watching #{notes_dir} for .org changes... (Ctrl+C to stop)")
{:ok, _pid} =
OrgGarden.Watcher.start_link(
notes_dir: notes_dir,
output_dir: output_dir,
content_dir: content_dir,
pipeline_opts: pipeline_opts,
transforms: @transforms
)
Process.sleep(:infinity)
end
end
defp parse_export_args(argv) do
{opts, positional, _invalid} =
OptionParser.parse(argv,
strict: [output: :string, content_dir: :string, watch: :boolean]
)
notes_dir = extract_notes_dir(positional, "export")
output_dir = extract_output_dir(opts)
content_dir = extract_content_dir(opts, output_dir)
watch? = Keyword.get(opts, :watch, false)
{notes_dir, output_dir, content_dir, watch?}
end
# ---------------------------------------------------------------------------
# Shared argument extraction
# ---------------------------------------------------------------------------
defp extract_notes_dir(positional, command) do
notes_dir =
case positional do
[dir | _] ->
dir
[] ->
System.get_env("NOTES_DIR") ||
abort("Usage: org-garden #{command} <notes-dir> [options]")
end
notes_dir = Path.expand(notes_dir)
unless File.dir?(notes_dir) do
abort("Error: notes directory does not exist: #{notes_dir}")
end
notes_dir
end
defp extract_output_dir(opts) do
(opts[:output] || System.get_env("OUTPUT_DIR") || File.cwd!())
|> Path.expand()
end
defp extract_content_dir(opts, output_dir) do
(opts[:content_dir] || Path.join(output_dir, "content"))
|> Path.expand()
end
# ---------------------------------------------------------------------------
# Phase 1: Wipe content/
# ---------------------------------------------------------------------------
defp wipe(content_dir) do
IO.puts("==> Wiping #{content_dir}")
File.mkdir_p!(content_dir)
content_dir
|> File.ls!()
|> Enum.reject(&(&1 == ".gitkeep"))
|> Enum.each(fn entry ->
Path.join(content_dir, entry) |> File.rm_rf!()
end)
end
# ---------------------------------------------------------------------------
# Phase 2: Export org files via Emacs + ox-hugo
# ---------------------------------------------------------------------------
defp export_all(notes_dir, output_dir) do
IO.puts("==> Exporting org files from #{notes_dir}")
case OrgGarden.Export.export_all(notes_dir, output_dir) do
{:ok, 0} ->
IO.puts("No .org files found in #{notes_dir}")
System.halt(0)
{:ok, count} ->
IO.puts(" exported #{count} file(s)")
{:error, failures} ->
IO.puts(:stderr, "\nFailed to export #{length(failures)} file(s):")
Enum.each(failures, fn {f, {:error, reason}} ->
IO.puts(:stderr, " #{f}: #{inspect(reason)}")
end)
System.halt(1)
end
end
# ---------------------------------------------------------------------------
# Phase 3: Markdown transformation pipeline
# ---------------------------------------------------------------------------
defp run_pipeline(content_dir, pipeline_opts) do
IO.puts("==> Running markdown pipeline")
{:ok, stats} = OrgGarden.run(content_dir, @transforms, pipeline_opts)
Enum.each(stats, fn {mod, count} ->
IO.puts(" #{inspect(mod)}: #{count} file(s) modified")
end)
end
# ---------------------------------------------------------------------------
# Phase 4: Generate default index.md if none was exported
# ---------------------------------------------------------------------------
defp generate_index(content_dir) do
IO.puts("==> Generating index")
OrgGarden.Index.generate(content_dir)
end
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
defp require_quartz_env do
case System.get_env("QUARTZ_PATH") do
nil ->
abort("""
Error: QUARTZ_PATH environment variable not set.
The 'serve' and 'build' commands require Quartz to be available.
Use the wrapper scripts that set up the environment:
nix run .#notes -- <notes-dir> # for serve
nix run .#build -- <notes-dir> # for build
Or set QUARTZ_PATH manually to point to a quartz-org-roam checkout
with node_modules installed.
For export-only mode (no Quartz), use:
org-garden export <notes-dir> [--watch]
""")
path ->
unless File.exists?(Path.join(path, "quartz/bootstrap-cli.mjs")) do
abort("Error: QUARTZ_PATH=#{path} does not contain quartz/bootstrap-cli.mjs")
end
path
end
end
defp build_pipeline_opts do
%{
zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"),
bibtex_file: System.get_env("BIBTEX_FILE"),
citation_mode:
case System.get_env("CITATION_MODE", "warn") do
"silent" -> :silent
"strict" -> :strict
_ -> :warn
end
}
end
defp abort(message) do
IO.puts(:stderr, message)
System.halt(1)
end
end

View File

@@ -1,135 +0,0 @@
defmodule OrgGarden.Export do
@moduledoc """
Org-to-Markdown export via Emacs batch + ox-hugo.
Provides both single-file and batch export, plus a helper to compute
the expected `.md` output path for a given `.org` source file.
"""
require Logger
@doc """
Export a single `.org` file to Markdown via `emacs --batch` + ox-hugo.
Returns `{:ok, exit_code}` with the emacs exit code (0 = success),
or `{:error, reason}` if the command could not be executed.
"""
@spec export_file(String.t(), String.t(), String.t()) :: {:ok, non_neg_integer()} | {:error, term()}
def export_file(orgfile, notes_dir, output_dir) do
section =
orgfile
|> Path.dirname()
|> Path.relative_to(notes_dir)
# ox-hugo requires static/ to exist for image asset copying
File.mkdir_p!(Path.join(output_dir, "static"))
{output, exit_code} =
System.cmd(
"emacs",
[
"--batch",
"--eval", "(require 'ox-hugo)",
"--eval", """
(org-cite-register-processor 'passthrough
:export-citation
(lambda (citation _style _backend _info)
(let ((keys (mapcar (lambda (ref)
(concat "@" (org-element-property :key ref)))
(org-cite-get-references citation))))
(format "[cite:%s]" (string-join keys ";")))))
""",
"--eval", "(setq org-cite-export-processors '((t passthrough)))",
"--eval", ~s[(setq org-hugo-base-dir "#{output_dir}")],
"--eval", ~s[(setq org-hugo-default-section-directory "#{section}")],
"--visit", orgfile,
"--funcall", "org-hugo-export-to-md"
],
stderr_to_stdout: true
)
filtered =
output
|> String.split("\n")
|> Enum.reject(&String.match?(&1, ~r/^Loading|^ad-handle|^For information/))
|> Enum.join("\n")
if filtered != "", do: Logger.info("emacs: #{filtered}")
if exit_code == 0 do
{:ok, exit_code}
else
{:error, {:emacs_exit, exit_code, filtered}}
end
rescue
e -> {:error, e}
end
@doc """
Export all `.org` files found under `notes_dir`.
Returns `{:ok, count}` where `count` is the number of successfully
exported files, or `{:error, failures}` if any files failed.
"""
@spec export_all(String.t(), String.t()) :: {:ok, non_neg_integer()} | {:error, list()}
def export_all(notes_dir, output_dir) do
org_files =
Path.join(notes_dir, "**/*.org")
|> Path.wildcard()
if org_files == [] do
Logger.warning("No .org files found in #{notes_dir}")
{:ok, 0}
else
Logger.info("Exporting #{length(org_files)} org file(s) from #{notes_dir}")
results =
Enum.map(org_files, fn orgfile ->
IO.puts(" exporting: #{orgfile}")
{orgfile, export_file(orgfile, notes_dir, output_dir)}
end)
failures =
Enum.filter(results, fn
{_, {:ok, _}} -> false
{_, {:error, _}} -> true
end)
if failures == [] do
{:ok, length(results)}
else
{:error, failures}
end
end
end
@doc """
Compute the expected `.md` path for a given `.org` file.
Uses the same section-mapping logic as ox-hugo: the relative directory
of the `.org` file within `notes_dir` becomes the section directory
under `content_dir`.
## Examples
iex> OrgGarden.Export.expected_md_path("/notes/bus/emt.org", "/notes", "/out/content")
"/out/content/bus/emt.md"
iex> OrgGarden.Export.expected_md_path("/notes/top-level.org", "/notes", "/out/content")
"/out/content/top-level.md"
"""
@spec expected_md_path(String.t(), String.t(), String.t()) :: String.t()
def expected_md_path(orgfile, notes_dir, content_dir) do
section =
orgfile
|> Path.dirname()
|> Path.relative_to(notes_dir)
basename = Path.basename(orgfile, ".org") <> ".md"
case section do
"." -> Path.join(content_dir, basename)
_ -> Path.join([content_dir, section, basename])
end
end
end

View File

@@ -1,83 +0,0 @@
defmodule OrgGarden.Index do
@moduledoc """
Generates a fallback `index.md` in the content directory if none was
exported from an `.org` file.
The generated index lists all markdown pages alphabetically with links.
"""
@doc """
Generate `content_dir/index.md` if it does not already exist.
If an `index.md` was already created by ox-hugo (from an `index.org`),
it is left untouched.
"""
@spec generate(String.t()) :: :ok
def generate(content_dir) do
index_path = Path.join(content_dir, "index.md")
unless File.exists?(index_path) do
IO.puts(" generating default index.md")
pages =
Path.join(content_dir, "**/*.md")
|> Path.wildcard()
|> Enum.map(fn path ->
slug = Path.relative_to(path, content_dir) |> Path.rootname()
title =
path
|> File.read!()
|> then(fn content ->
case Regex.run(~r/^title\s*=\s*"(.+)"/m, content) do
[_, t] -> t
_ -> slug
end
end)
{slug, title}
end)
|> Enum.sort_by(fn {_, title} -> title end)
|> Enum.map(fn {slug, title} -> "- [#{title}](#{slug})" end)
|> Enum.join("\n")
File.write!(index_path, """
---
title: Index
---
#{pages}
""")
end
:ok
end
@doc """
Regenerate the index by removing any previously generated one first.
Only removes the index if it was generated by us (contains `title: Index`).
User-exported index files (from `index.org`) are left untouched.
"""
@spec regenerate(String.t()) :: :ok
def regenerate(content_dir) do
index_path = Path.join(content_dir, "index.md")
if File.exists?(index_path) do
content = File.read!(index_path)
if generated_index?(content) do
File.rm!(index_path)
end
end
generate(content_dir)
end
defp generated_index?(content) do
# Our generated index uses "title: Index" in YAML frontmatter.
# ox-hugo uses TOML frontmatter (title = "..."), so this won't
# match user-exported files.
String.contains?(content, "title: Index")
end
end

View File

@@ -1,118 +0,0 @@
defmodule OrgGarden.Quartz do
@moduledoc """
Manages Quartz Node.js process as an Erlang Port.
Required environment:
- QUARTZ_PATH: path to quartz repo (with node_modules)
- NODE_PATH: path to node executable (default: "node")
Starts Quartz in serve mode (`npx quartz build --serve`) and forwards
all stdout/stderr output to the Logger with a `[quartz]` prefix.
If Quartz exits, this GenServer will stop, which triggers the supervisor
to restart the entire supervision tree (strategy: :one_for_all).
"""
use GenServer
require Logger
defstruct [:port, :quartz_path, :content_dir, :http_port, :ws_port]
# -------------------------------------------------------------------
# Client API
# -------------------------------------------------------------------
@doc """
Start the Quartz process as a linked GenServer.
## Options
* `:content_dir` — directory where markdown files are located (required)
* `:port` — HTTP server port (default: 8080)
* `:ws_port` — WebSocket hot reload port (default: 3001)
"""
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
# -------------------------------------------------------------------
# GenServer callbacks
# -------------------------------------------------------------------
@impl true
def init(opts) do
quartz_path =
System.get_env("QUARTZ_PATH") ||
raise "QUARTZ_PATH environment variable not set"
node_path = System.get_env("NODE_PATH", "node")
content_dir = Keyword.fetch!(opts, :content_dir)
http_port = Keyword.get(opts, :port, 8080)
ws_port = Keyword.get(opts, :ws_port, 3001)
cli_path = Path.join(quartz_path, "quartz/bootstrap-cli.mjs")
unless File.exists?(cli_path) do
raise "Quartz CLI not found at #{cli_path}. Check QUARTZ_PATH."
end
args = [
cli_path,
"build",
"--serve",
"--directory", content_dir,
"--port", to_string(http_port),
"--wsPort", to_string(ws_port)
]
Logger.info("[quartz] Starting: #{node_path} #{Enum.join(args, " ")}")
Logger.info("[quartz] Working directory: #{quartz_path}")
port =
Port.open({:spawn_executable, node_path}, [
:binary,
:exit_status,
:stderr_to_stdout,
args: args,
cd: quartz_path,
env: [{~c"NODE_NO_WARNINGS", ~c"1"}]
])
state = %__MODULE__{
port: port,
quartz_path: quartz_path,
content_dir: content_dir,
http_port: http_port,
ws_port: ws_port
}
{:ok, state}
end
@impl true
def handle_info({port, {:data, data}}, %{port: port} = state) do
data
|> String.split("\n", trim: true)
|> Enum.each(&Logger.info("[quartz] #{&1}"))
{:noreply, state}
end
@impl true
def handle_info({port, {:exit_status, status}}, %{port: port} = state) do
Logger.error("[quartz] Process exited with status #{status}")
{:stop, {:quartz_exit, status}, state}
end
@impl true
def terminate(_reason, %{port: port}) when is_port(port) do
# Attempt graceful shutdown
Port.close(port)
:ok
rescue
_ -> :ok
end
def terminate(_reason, _state), do: :ok
end

View File

@@ -1,178 +0,0 @@
defmodule OrgGarden.Resolvers.BibTeX do
@moduledoc """
Resolves citation keys from a local BibTeX (.bib) file.
Configured via the `BIBTEX_FILE` environment variable, or passed directly
as `opts.bibtex_file`. The file is parsed once at init time and the
resulting entry map is reused for all lookups.
Supports extracting: author last names, year, title, DOI, URL.
BibTeX entry format parsed:
@type{citationkey,
author = {Last, First and Last2, First2},
year = {2021},
title = {Some Title},
doi = {10.xxxx/yyyy},
url = {https://example.com},
}
Returns `{:ok, %{label: "Author, Year", url: "..."}}` or `:error`.
"""
require Logger
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
@doc """
Parse a .bib file and return a map of `%{citation_key => entry_map}`.
Returns `{:ok, entries}` or `{:error, reason}`.
"""
@spec load(String.t()) :: {:ok, map()} | {:error, term()}
def load(path) do
case File.read(path) do
{:ok, content} ->
entries = parse_entries(content)
Logger.info("BibTeX: loaded #{map_size(entries)} entries from #{path}")
{:ok, entries}
{:error, reason} ->
{:error, reason}
end
end
@doc """
Resolve a citation key from pre-loaded BibTeX entries.
"""
@spec resolve(String.t(), map()) :: {:ok, map()} | :error
def resolve(key, entries) do
case Map.fetch(entries, key) do
{:ok, entry} ->
label = build_label(entry)
url = build_url(entry)
{:ok, %{label: label, url: url}}
:error ->
:error
end
end
# ------------------------------------------------------------------
# Parsing
# ------------------------------------------------------------------
# Match @type{key, ...fields...}
# We handle nested braces by scanning character by character after
# finding the opening, rather than relying on a single regex.
@entry_header ~r/@\w+\s*\{\s*([^,\s]+)\s*,/
defp parse_entries(content) do
# Split on "@" boundaries, then parse each chunk
content
|> String.split(~r/(?=@\w+\s*\{)/, trim: true)
|> Enum.reduce(%{}, fn chunk, acc ->
case Regex.run(@entry_header, chunk) do
[_, key] ->
fields = parse_fields(chunk)
Map.put(acc, String.trim(key), fields)
_ ->
acc
end
end)
end
# Extract key = {value} or key = "value" pairs from an entry block.
# Handles simple single-depth braces; good enough for common fields.
@field_regex ~r/(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")/
defp parse_fields(chunk) do
@field_regex
|> Regex.scan(chunk)
|> Enum.reduce(%{}, fn match, acc ->
field_name = Enum.at(match, 1) |> String.downcase()
# Value is in capture group 2 (braces) or 3 (quotes)
value =
case {Enum.at(match, 2, ""), Enum.at(match, 3, "")} do
{"", q} -> q
{b, _} -> b
end
Map.put(acc, field_name, String.trim(value))
end)
end
# ------------------------------------------------------------------
# Label & URL building
# ------------------------------------------------------------------
defp build_label(entry) do
author_part =
entry
|> Map.get("author", "")
|> parse_authors()
|> format_authors()
year = Map.get(entry, "year", Map.get(entry, "date", ""))
year = extract_year(year)
if year && author_part != "", do: "#{author_part}, #{year}", else: author_part
end
defp parse_authors(""), do: []
defp parse_authors(author_str) do
author_str
|> String.split(" and ", trim: true)
|> Enum.map(&extract_last_name/1)
|> Enum.reject(&(&1 == ""))
end
# Handles "Last, First" and "First Last" formats
defp extract_last_name(name) do
name = String.trim(name)
cond do
String.contains?(name, ",") ->
name |> String.split(",") |> List.first() |> String.trim()
String.contains?(name, " ") ->
name |> String.split(" ") |> List.last() |> String.trim()
true ->
name
end
end
defp format_authors([]), do: "Unknown"
defp format_authors([single]), do: single
defp format_authors([first | rest]), do: "#{first} & #{List.last(rest)}"
defp extract_year(""), do: nil
defp extract_year(str) do
case Regex.run(~r/\b(\d{4})\b/, str) do
[_, year] -> year
_ -> nil
end
end
defp build_url(entry) do
cond do
doi = Map.get(entry, "doi", "") |> non_empty() ->
"https://doi.org/#{doi}"
url = Map.get(entry, "url", "") |> non_empty() ->
url
true ->
nil
end
end
defp non_empty(""), do: nil
defp non_empty(v), do: v
end

View File

@@ -1,18 +0,0 @@
defmodule OrgGarden.Resolvers.DOI do
@moduledoc """
Last-resort citation resolver — always succeeds.
If the citation key looks like a DOI (starts with "10."), returns a
`https://doi.org/...` link. Otherwise returns the key itself as a
plain label with no URL.
"""
@spec resolve(String.t()) :: {:ok, map()}
def resolve(key) do
if String.starts_with?(key, "10.") do
{:ok, %{label: key, url: "https://doi.org/#{key}"}}
else
{:ok, %{label: key, url: nil}}
end
end
end

View File

@@ -1,182 +0,0 @@
defmodule OrgGarden.Resolvers.Zotero do
@moduledoc """
Resolves citation keys via Zotero Better BibTeX's JSON-RPC API.
Requires Zotero to be running with the Better BibTeX plugin installed.
Default endpoint: http://localhost:23119/better-bibtex/json-rpc
Resolution strategy:
1. Search by citation key via `item.search`
2. If found, try to get a PDF attachment link (zotero://open-pdf/...)
3. Fall back to zotero://select/items/@key
Returns `{:ok, %{label: "Author, Year", url: "zotero://..."}}` or `:error`.
"""
require Logger
@rpc_path "/better-bibtex/json-rpc"
@doc """
Attempt to resolve `key` against a running Zotero instance.
`base_url` defaults to `http://localhost:23119`.
"""
@spec resolve(String.t(), String.t()) :: {:ok, map()} | :error
def resolve(key, base_url \\ "http://localhost:23119") do
url = base_url <> @rpc_path
payload =
Jason.encode!(%{
jsonrpc: "2.0",
method: "item.search",
params: [
[["citationKey", "is", key]]
],
id: 1
})
case Req.post(url,
body: payload,
headers: [{"content-type", "application/json"}],
receive_timeout: 5_000,
finch: OrgGarden.Finch
) do
{:ok, %{status: 200, body: body}} ->
parse_response(body, key, base_url)
{:ok, %{status: status}} ->
Logger.debug("Zotero: unexpected HTTP #{status} for key #{key}")
:error
{:error, reason} ->
Logger.debug("Zotero: connection failed for key #{key}: #{inspect(reason)}")
:error
other ->
Logger.debug("Zotero: unexpected result for key #{key}: #{inspect(other)}")
:error
end
rescue
e ->
Logger.debug("Zotero: exception resolving key #{key}: #{inspect(e)}")
:error
end
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
defp parse_response(%{"result" => [item | _]}, key, base_url) do
label = build_label(item)
url = resolve_url(item, key, base_url)
{:ok, %{label: label, url: url}}
end
defp parse_response(%{"result" => []}, key, _base_url) do
Logger.debug("Zotero: no item found for key #{key}")
:error
end
defp parse_response(%{"error" => err}, key, _base_url) do
Logger.debug("Zotero: RPC error for key #{key}: #{inspect(err)}")
:error
end
defp parse_response(body, key, _base_url) do
Logger.debug("Zotero: unexpected response shape for key #{key}: #{inspect(body)}")
:error
end
defp fetch_pdf_url(key, base_url) do
payload =
Jason.encode!(%{
jsonrpc: "2.0",
method: "item.attachments",
params: [key],
id: 2
})
case Req.post(base_url <> @rpc_path,
body: payload,
headers: [{"content-type", "application/json"}],
receive_timeout: 5_000,
finch: OrgGarden.Finch
) do
{:ok, %{status: 200, body: %{"result" => attachments}}} when is_list(attachments) ->
attachments
|> Enum.find_value(fn att ->
open = Map.get(att, "open", "")
path = Map.get(att, "path", "")
if String.ends_with?(path, ".pdf"), do: open, else: nil
end)
_ ->
nil
end
rescue
_ -> nil
end
# CSL-JSON format: authors are under "author" with "family"/"given" keys.
# Year is under "issued" -> "date-parts" -> [[year, month, day]].
defp build_label(item) do
authors = Map.get(item, "author", [])
year = extract_year(item)
author_part =
case authors do
[] ->
"Unknown"
[single] ->
Map.get(single, "family", Map.get(single, "literal", "Unknown"))
[first | rest] ->
first_name = Map.get(first, "family", Map.get(first, "literal", "Unknown"))
last_name =
rest
|> List.last()
|> then(&Map.get(&1, "family", Map.get(&1, "literal", "Unknown")))
"#{first_name} & #{last_name}"
end
if year, do: "#{author_part}, #{year}", else: author_part
end
# "issued": {"date-parts": [["2021", 2, 3]]}
defp extract_year(item) do
case get_in(item, ["issued", "date-parts"]) do
[[year | _] | _] -> to_string(year)
_ -> nil
end
end
defp resolve_url(item, key, base_url) do
# Prefer zotero://open-pdf/... for items with a PDF attachment.
# Fall back to zotero://select/library/items/KEY to open the item in Zotero.
# The "id" field is a URI like "http://zotero.org/users/123/items/ABCD1234".
pdf_url = fetch_pdf_url(key, base_url)
if pdf_url do
pdf_url
else
item_key =
item
|> Map.get("id", "")
|> String.split("/")
|> List.last()
|> non_empty()
if item_key do
"zotero://select/library/items/#{item_key}"
else
"zotero://select/items/@#{key}"
end
end
end
defp non_empty(nil), do: nil
defp non_empty(""), do: nil
defp non_empty(v), do: v
end

View File

@@ -1,40 +0,0 @@
defmodule OrgGarden.Supervisor do
@moduledoc """
Supervises development server components.
Strategy: :one_for_all
If either child fails, restart both to ensure consistent state.
Children:
1. OrgGarden.Watcher - watches .org files for changes
2. OrgGarden.Quartz - runs Quartz Node.js server
## Usage
OrgGarden.Supervisor.start_link(
notes_dir: "/path/to/notes",
output_dir: "/path/to/output",
content_dir: "/path/to/output/content",
pipeline_opts: %{zotero_url: "...", ...},
transforms: [OrgGarden.Transforms.Citations],
port: 8080,
ws_port: 3001
)
"""
use Supervisor
def start_link(opts) do
Supervisor.start_link(__MODULE__, opts, name: __MODULE__)
end
@impl true
def init(opts) do
children = [
{OrgGarden.Watcher,
Keyword.take(opts, [:notes_dir, :output_dir, :content_dir, :pipeline_opts, :transforms])},
{OrgGarden.Quartz, Keyword.take(opts, [:content_dir, :port, :ws_port])}
]
Supervisor.init(children, strategy: :one_for_all)
end
end

View File

@@ -1,48 +0,0 @@
defmodule OrgGarden.Transform do
@moduledoc """
Behaviour that all markdown transform modules must implement.
## Callbacks
- `init/1` — called once before processing; returns transform-specific state.
Default implementation returns the opts map unchanged.
- `apply/3` — called per .md file; returns the (possibly modified) content.
- `teardown/1` — optional cleanup after all files are processed.
## Example
defmodule MyTransform do
@behaviour OrgGarden.Transform
@impl true
def init(opts), do: %{some_state: opts[:value]}
@impl true
def apply(content, state, _opts) do
String.replace(content, "foo", state.some_state)
end
end
"""
@doc "One-time initialisation. Returns opaque state passed to apply/3."
@callback init(opts :: map()) :: term()
@doc "Transform file content. Returns the (possibly modified) content string."
@callback apply(content :: String.t(), state :: term(), opts :: map()) :: String.t()
@doc "Optional cleanup after all files are processed."
@callback teardown(state :: term()) :: :ok
@optional_callbacks teardown: 1
defmacro __using__(_) do
quote do
@behaviour OrgGarden.Transform
@impl OrgGarden.Transform
def init(opts), do: opts
defoverridable init: 1
end
end
end

View File

@@ -1,231 +0,0 @@
defmodule OrgGarden.Transforms.Citations do
@moduledoc """
Markdown transform: resolves org-citar citation keys to hyperlinks.
## Recognised citation syntax (as output by ox-hugo from org-citar)
[cite:@key] → org-cite / citar standard (most common)
[cite:@key1;@key2] → multiple citations
cite:key → older roam-style bare cite syntax
## Resolution chain (in order)
1. Zotero (live instance via Better BibTeX JSON-RPC) — preferred
2. BibTeX file (BIBTEX_FILE env var) — fallback
3. DOI / bare key — always succeeds
## Modes (opts.citation_mode)
:silent — silently use DOI/bare-key fallback when Zotero+BibTeX fail
:warn — (default) emit a Logger.warning for unresolved keys
:strict — raise on unresolved keys (aborts pipeline)
## Format
Resolved citations are rendered as:
[Label](url) when a URL is available
[Label] when no URL could be determined (bare key fallback)
Multiple semicolon-separated keys become space-separated links:
[cite:@a;@b] → [Author A, 2020](url_a) [Author B, 2019](url_b)
## init/1 callback
Loads the BibTeX file (if configured) once before processing begins,
and probes Zotero availability, emitting warnings as appropriate.
"""
@behaviour OrgGarden.Transform
require Logger
alias OrgGarden.Resolvers.Zotero
alias OrgGarden.Resolvers.BibTeX
alias OrgGarden.Resolvers.DOI
# Match [cite:@key] and [cite:@key1;@key2;...] (org-cite / citar style)
@cite_bracket_regex ~r/\[cite:(@[^\]]+)\]/
# Match bare cite:key or cite:@key (older roam style, no brackets, optional @ prefix)
@cite_bare_regex ~r/(?<![(\[])cite:@?([a-zA-Z0-9_:-]+)/
# ------------------------------------------------------------------
# OrgGarden callbacks
# ------------------------------------------------------------------
@doc """
Called once before processing any files. Loads BibTeX, probes Zotero.
Returns a state map passed to every `apply/3` call.
"""
def init(opts) do
bibtex_entries = load_bibtex(opts)
zotero_available = probe_zotero(opts)
if not zotero_available and bibtex_entries == %{} do
Logger.warning(
"Citations: neither Zotero nor a BibTeX file is available. " <>
"All citations will fall back to bare-key rendering. " <>
"Set BIBTEX_FILE env var or start Zotero with Better BibTeX to resolve citations."
)
end
%{
bibtex_entries: bibtex_entries,
zotero_available: zotero_available,
zotero_url: Map.get(opts, :zotero_url, "http://localhost:23119"),
citation_mode: Map.get(opts, :citation_mode, :warn)
}
end
@doc """
Apply citation resolution to a single markdown file's content.
"""
def apply(content, state, _opts) do
content
|> resolve_bracket_citations(state)
|> resolve_bare_citations(state)
end
# ------------------------------------------------------------------
# Resolution passes
# ------------------------------------------------------------------
defp resolve_bracket_citations(content, state) do
Regex.replace(@cite_bracket_regex, content, fn _full, keys_str ->
keys_str
|> String.split(";")
|> Enum.map(&String.trim/1)
|> Enum.map(fn "@" <> key -> key end)
|> Enum.map(&resolve_key(&1, state))
|> Enum.join(" ")
end)
end
defp resolve_bare_citations(content, state) do
Regex.replace(@cite_bare_regex, content, fn _full, key ->
resolve_key(key, state)
end)
end
# ------------------------------------------------------------------
# Single-key resolution chain
# ------------------------------------------------------------------
defp resolve_key(key, state) do
info =
with :error <- try_zotero(key, state),
:error <- try_bibtex(key, state) do
handle_unresolved(key, state)
else
{:ok, citation_info} -> citation_info
end
format_result(info)
end
defp try_zotero(_key, %{zotero_available: false}), do: :error
defp try_zotero(key, %{zotero_url: url}) do
Zotero.resolve(key, url)
end
defp try_bibtex(_key, %{bibtex_entries: entries}) when map_size(entries) == 0, do: :error
defp try_bibtex(key, %{bibtex_entries: entries}) do
BibTeX.resolve(key, entries)
end
defp handle_unresolved(key, %{citation_mode: mode}) do
case mode do
:strict ->
raise "Citations: could not resolve citation key '#{key}' and mode is :strict"
:warn ->
Logger.warning("Citations: unresolved citation key '#{key}' — using bare-key fallback")
{:ok, result} = DOI.resolve(key)
result
:silent ->
{:ok, result} = DOI.resolve(key)
result
end
end
defp format_result(%{label: label, url: nil}), do: "[#{label}]"
defp format_result(%{label: label, url: url}), do: "[#{label}](#{url})"
# ------------------------------------------------------------------
# Init helpers
# ------------------------------------------------------------------
defp load_bibtex(opts) do
path = Map.get(opts, :bibtex_file) || System.get_env("BIBTEX_FILE")
cond do
is_nil(path) ->
Logger.debug("Citations: BIBTEX_FILE not set — BibTeX resolver disabled")
%{}
not File.exists?(path) ->
Logger.warning("Citations: BIBTEX_FILE=#{path} does not exist — BibTeX resolver disabled")
%{}
true ->
case BibTeX.load(path) do
{:ok, entries} -> entries
{:error, reason} ->
Logger.warning("Citations: failed to load BibTeX file #{path}: #{inspect(reason)}")
%{}
end
end
end
defp probe_zotero(opts) do
url = Map.get(opts, :zotero_url, "http://localhost:23119")
# Use a no-op JSON-RPC call to probe availability.
# /better-bibtex/cayw is intentionally avoided — it blocks waiting for
# user interaction and never returns without a pick.
payload =
Jason.encode!(%{
jsonrpc: "2.0",
method: "item.search",
params: [[[]]],
id: 0
})
result =
try do
Req.post(url <> "/better-bibtex/json-rpc",
body: payload,
headers: [{"content-type", "application/json"}],
receive_timeout: 3_000,
finch: OrgGarden.Finch
)
rescue
e -> {:error, e}
end
case result do
{:ok, %{status: 200}} ->
Logger.info("Citations: Zotero Better BibTeX is available at #{url}")
true
{:ok, %{status: status}} ->
Logger.warning(
"Citations: Zotero responded HTTP #{status} at #{url}" <>
"is Better BibTeX installed?"
)
false
_ ->
Logger.warning(
"Citations: Zotero not reachable at #{url}" <>
"start Zotero with Better BibTeX or set BIBTEX_FILE as fallback"
)
false
end
end
end

View File

@@ -1,236 +0,0 @@
defmodule OrgGarden.Watcher do
@moduledoc """
File-watching GenServer that detects `.org` file changes and triggers
incremental export + transform for only the affected files.
Uses the `file_system` package (inotify on Linux, fsevents on macOS)
to watch the notes directory. Events are debounced per-file (500ms)
to coalesce rapid writes (e.g., Emacs auto-save).
## Lifecycle
Started dynamically by `OrgGarden.CLI` after the initial batch export.
Transforms are initialized once at startup and reused across all
incremental rebuilds to avoid repeated Zotero probes and BibTeX loads.
## Usage
OrgGarden.Watcher.start_link(
notes_dir: "/path/to/notes",
output_dir: "/path/to/output",
content_dir: "/path/to/output/content",
pipeline_opts: %{zotero_url: "...", ...},
transforms: [OrgGarden.Transforms.Citations]
)
"""
use GenServer
require Logger
@debounce_ms 500
# -------------------------------------------------------------------
# Client API
# -------------------------------------------------------------------
@doc """
Start the watcher as a linked process.
## Options
* `:notes_dir` — directory to watch for `.org` changes (required)
* `:output_dir` — ox-hugo base dir (required)
* `:content_dir` — directory where `.md` files are written (required)
* `:pipeline_opts` — opts map passed to transforms (required)
* `:transforms` — list of transform modules (default: `[OrgGarden.Transforms.Citations]`)
"""
def start_link(opts) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
# -------------------------------------------------------------------
# GenServer callbacks
# -------------------------------------------------------------------
@impl true
def init(opts) do
notes_dir = Keyword.fetch!(opts, :notes_dir)
output_dir = Keyword.fetch!(opts, :output_dir)
content_dir = Keyword.fetch!(opts, :content_dir)
pipeline_opts = Keyword.fetch!(opts, :pipeline_opts)
transforms = Keyword.get(opts, :transforms, [OrgGarden.Transforms.Citations])
# Initialize transforms once — reused for all incremental rebuilds
initialized_transforms = OrgGarden.init_transforms(transforms, pipeline_opts)
# Start the file system watcher
{:ok, watcher_pid} = FileSystem.start_link(dirs: [notes_dir], recursive: true)
FileSystem.subscribe(watcher_pid)
Logger.info("Watcher: monitoring #{notes_dir} for .org changes")
{:ok,
%{
notes_dir: notes_dir,
output_dir: output_dir,
content_dir: content_dir,
pipeline_opts: pipeline_opts,
watcher_pid: watcher_pid,
initialized_transforms: initialized_transforms,
pending: %{}
}}
end
@impl true
def handle_info({:file_event, _pid, {path, events}}, state) do
path = to_string(path)
if org_file?(path) and not temporary_file?(path) do
event_type = classify_events(events)
Logger.debug("Watcher: #{event_type} event for #{path}")
{:noreply, schedule_debounce(path, event_type, state)}
else
{:noreply, state}
end
end
@impl true
def handle_info({:file_event, _pid, :stop}, state) do
Logger.warning("Watcher: file system monitor stopped unexpectedly")
{:stop, :watcher_stopped, state}
end
@impl true
def handle_info({:debounced, path, event_type}, state) do
state = %{state | pending: Map.delete(state.pending, path)}
case event_type do
:deleted ->
handle_delete(path, state)
_created_or_modified ->
handle_change(path, state)
end
{:noreply, state}
end
@impl true
def terminate(_reason, state) do
OrgGarden.teardown_transforms(state.initialized_transforms)
:ok
end
# -------------------------------------------------------------------
# Event handling
# -------------------------------------------------------------------
defp handle_change(orgfile, state) do
%{
notes_dir: notes_dir,
output_dir: output_dir,
content_dir: content_dir,
pipeline_opts: pipeline_opts,
initialized_transforms: initialized_transforms
} = state
md_path = OrgGarden.Export.expected_md_path(orgfile, notes_dir, content_dir)
IO.puts("==> Changed: #{Path.relative_to(orgfile, notes_dir)}")
case OrgGarden.Export.export_file(orgfile, notes_dir, output_dir) do
{:ok, _} ->
IO.puts(" exported: #{Path.relative_to(md_path, content_dir)}")
{:ok, stats} = OrgGarden.run_on_files_with([md_path], initialized_transforms, pipeline_opts)
Enum.each(stats, fn {mod, count} ->
if count > 0, do: IO.puts(" #{inspect(mod)}: #{count} file(s) modified")
end)
regenerate_index(content_dir)
IO.puts("==> Done")
{:error, reason} ->
Logger.error("Watcher: export failed for #{orgfile}: #{inspect(reason)}")
end
end
defp handle_delete(orgfile, state) do
%{notes_dir: notes_dir, content_dir: content_dir} = state
md_path = OrgGarden.Export.expected_md_path(orgfile, notes_dir, content_dir)
IO.puts("==> Deleted: #{Path.relative_to(orgfile, notes_dir)}")
if File.exists?(md_path) do
File.rm!(md_path)
IO.puts(" removed: #{Path.relative_to(md_path, content_dir)}")
# Clean up empty parent directories left behind
cleanup_empty_dirs(Path.dirname(md_path), content_dir)
end
regenerate_index(content_dir)
IO.puts("==> Done")
end
# -------------------------------------------------------------------
# Index generation
# -------------------------------------------------------------------
defp regenerate_index(content_dir) do
OrgGarden.Index.regenerate(content_dir)
end
# -------------------------------------------------------------------
# Helpers
# -------------------------------------------------------------------
defp schedule_debounce(path, event_type, state) do
# Cancel any existing timer for this path
case Map.get(state.pending, path) do
nil -> :ok
old_ref -> Process.cancel_timer(old_ref)
end
ref = Process.send_after(self(), {:debounced, path, event_type}, @debounce_ms)
%{state | pending: Map.put(state.pending, path, ref)}
end
defp org_file?(path), do: String.ends_with?(path, ".org")
defp temporary_file?(path) do
basename = Path.basename(path)
# Emacs creates temp files like .#file.org and #file.org#
String.starts_with?(basename, ".#") or
(String.starts_with?(basename, "#") and String.ends_with?(basename, "#"))
end
defp classify_events(events) do
cond do
:removed in events or :deleted in events -> :deleted
:created in events -> :created
:modified in events or :changed in events -> :modified
# renamed can mean created or deleted depending on context;
# if the file exists it was renamed into the watched dir
:renamed in events -> :modified
true -> :modified
end
end
defp cleanup_empty_dirs(dir, stop_at) do
dir = Path.expand(dir)
stop_at = Path.expand(stop_at)
if dir != stop_at and File.dir?(dir) do
case File.ls!(dir) do
[] ->
File.rmdir!(dir)
cleanup_empty_dirs(Path.dirname(dir), stop_at)
_ ->
:ok
end
end
end
end

View File

@@ -1,34 +0,0 @@
defmodule OrgGarden.MixProject do
use Mix.Project
def project do
[
app: :org_garden,
version: "0.1.0",
elixir: "~> 1.17",
start_permanent: Mix.env() == :prod,
deps: deps(),
escript: escript()
]
end
def application do
[
extra_applications: [:logger],
mod: {OrgGarden.Application, []}
]
end
defp escript do
[main_module: OrgGarden.CLI]
end
defp deps do
[
{:finch, "~> 0.19"},
{:req, "~> 0.5"},
{:jason, "~> 1.4"},
{:file_system, "~> 1.0"}
]
end
end

View File

@@ -1,12 +0,0 @@
%{
"file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"},
"finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"},
"hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
"jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
"mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"},
"mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"},
"nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
"req": {:hex, :req, "0.5.17", "0096ddd5b0ed6f576a03dde4b158a0c727215b15d2795e59e0916c6971066ede", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "0b8bc6ffdfebbc07968e59d3ff96d52f2202d0536f10fef4dc11dc02a2a43e39"},
"telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"},
}

View File

@@ -1,19 +0,0 @@
diff --git a/quartz/util/glob.ts b/quartz/util/glob.ts
index 7a71160..91fbaa7 100644
--- a/quartz/util/glob.ts
+++ b/quartz/util/glob.ts
@@ -10,12 +10,13 @@ export async function glob(
pattern: string,
cwd: string,
ignorePatterns: string[],
+ respectGitignore: boolean = true,
): Promise<FilePath[]> {
const fps = (
await globby(pattern, {
cwd,
ignore: ignorePatterns,
- gitignore: true,
+ gitignore: respectGitignore,
})
).map(toPosixPath)
return fps as FilePath[]

View File

@@ -1,13 +0,0 @@
diff --git a/quartz/build.ts b/quartz/build.ts
index b98f4a8..3166a06 100644
--- a/quartz/build.ts
+++ b/quartz/build.ts
@@ -71,7 +71,7 @@ async function buildQuartz(argv: Argv, mut: Mutex, clientRefresh: () => void) {
console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince("clean")}`)
perf.addEvent("glob")
- const allFiles = await glob("**/*.*", argv.directory, cfg.configuration.ignorePatterns)
+ const allFiles = await glob("**/*.*", argv.directory, cfg.configuration.ignorePatterns, false)
const markdownPaths = allFiles.filter((fp) => fp.endsWith(".md")).sort()
console.log(
`Found ${markdownPaths.length} input files from \`${argv.directory}\` in ${perf.timeSince("glob")}`,

View File

@@ -1,34 +0,0 @@
diff --git a/quartz/plugins/emitters/static.ts b/quartz/plugins/emitters/static.ts
index 0b45290..8b34049 100644
--- a/quartz/plugins/emitters/static.ts
+++ b/quartz/plugins/emitters/static.ts
@@ -7,6 +7,7 @@ import { dirname } from "path"
export const Static: QuartzEmitterPlugin = () => ({
name: "Static",
async *emit({ argv, cfg }) {
+ // Copy Quartz's own internal static assets (quartz/static/) → output/static/
const staticPath = joinSegments(QUARTZ, "static")
const fps = await glob("**", staticPath, cfg.configuration.ignorePatterns)
const outputStaticPath = joinSegments(argv.output, "static")
@@ -18,6 +19,21 @@ export const Static: QuartzEmitterPlugin = () => ({
await fs.promises.copyFile(src, dest)
yield dest
}
+
+ // Copy user-facing static assets (static/) → output/ preserving paths.
+ // This mirrors Hugo's convention: static/ox-hugo/foo.png is served at /ox-hugo/foo.png,
+ // which matches the src="/ox-hugo/..." paths that ox-hugo writes into exported markdown.
+ const userStaticPath = "static"
+ if (fs.existsSync(userStaticPath)) {
+ const userFps = await glob("**", userStaticPath, cfg.configuration.ignorePatterns, false)
+ for (const fp of userFps) {
+ const src = joinSegments(userStaticPath, fp) as FilePath
+ const dest = joinSegments(argv.output, fp) as FilePath
+ await fs.promises.mkdir(dirname(dest), { recursive: true })
+ await fs.promises.copyFile(src, dest)
+ yield dest
+ }
+ }
},
async *partialEmit() {},
})

View File

@@ -1,44 +0,0 @@
diff --git a/quartz/plugins/transformers/oxhugofm.ts b/quartz/plugins/transformers/oxhugofm.ts
index 303566e..4fb5e2c 100644
--- a/quartz/plugins/transformers/oxhugofm.ts
+++ b/quartz/plugins/transformers/oxhugofm.ts
@@ -27,7 +27,10 @@ const defaultOptions: Options = {
const relrefRegex = new RegExp(/\[([^\]]+)\]\(\{\{< relref "([^"]+)" >\}\}\)/, "g")
const predefinedHeadingIdRegex = new RegExp(/(.*) {#(?:.*)}/, "g")
const hugoShortcodeRegex = new RegExp(/{{(.*)}}/, "g")
-const figureTagRegex = new RegExp(/< ?figure src="(.*)" ?>/, "g")
+// Matches the full Hugo {{< figure src="..." ... >}} shortcode and captures src.
+// Must run before the generic shortcode stripper to avoid partial-match issues
+// with captions that contain HTML (e.g. <span class="figure-number">).
+const figureShortcodeRegex = new RegExp(/{{<\s*figure\b[^}]*\bsrc="([^"]*)"[^}]*>}}/, "g")
// \\\\\( -> matches \\(
// (.+?) -> Lazy match for capturing the equation
// \\\\\) -> matches \\)
@@ -70,19 +73,19 @@ export const OxHugoFlavouredMarkdown: QuartzTransformerPlugin<Partial<Options>>
})
}
- if (opts.removeHugoShortcode) {
+ if (opts.replaceFigureWithMdImg) {
src = src.toString()
- src = src.replaceAll(hugoShortcodeRegex, (_value, ...capture) => {
- const [scContent] = capture
- return scContent
+ src = src.replaceAll(figureShortcodeRegex, (_value, ...capture) => {
+ const [imgSrc] = capture
+ return `![](${imgSrc})`
})
}
- if (opts.replaceFigureWithMdImg) {
+ if (opts.removeHugoShortcode) {
src = src.toString()
- src = src.replaceAll(figureTagRegex, (_value, ...capture) => {
- const [src] = capture
- return `![](${src})`
+ src = src.replaceAll(hugoShortcodeRegex, (_value, ...capture) => {
+ const [scContent] = capture
+ return scContent
})
}

View File

@@ -1,17 +0,0 @@
export declare global {
interface Document {
addEventListener<K extends keyof CustomEventMap>(
type: K,
listener: (this: Document, ev: CustomEventMap[K]) => void,
): void
removeEventListener<K extends keyof CustomEventMap>(
type: K,
listener: (this: Document, ev: CustomEventMap[K]) => void,
): void
dispatchEvent<K extends keyof CustomEventMap>(ev: CustomEventMap[K] | UIEvent): void
}
interface Window {
spaNavigate(url: URL, isBack: boolean = false)
addCleanup(fn: (...args: any[]) => void)
}
}

View File

@@ -1,15 +0,0 @@
declare module "*.scss" {
const content: string
export = content
}
// dom custom event
interface CustomEventMap {
prenav: CustomEvent<{}>
nav: CustomEvent<{ url: FullSlug }>
themechange: CustomEvent<{ theme: "light" | "dark" }>
readermodechange: CustomEvent<{ mode: "on" | "off" }>
}
type ContentIndex = Record<FullSlug, ContentDetails>
declare const fetchData: Promise<ContentIndex>

View File

@@ -1,101 +0,0 @@
import { QuartzConfig } from "./quartz/cfg"
import * as Plugin from "./quartz/plugins"
/**
* Quartz 4 Configuration
*
* See https://quartz.jzhao.xyz/configuration for more information.
*/
const config: QuartzConfig = {
configuration: {
pageTitle: "Quartz 4",
pageTitleSuffix: "",
enableSPA: true,
enablePopovers: true,
analytics: {
provider: "plausible",
},
locale: "en-US",
baseUrl: "quartz.jzhao.xyz",
ignorePatterns: ["private", "templates", ".obsidian"],
defaultDateType: "modified",
theme: {
fontOrigin: "googleFonts",
cdnCaching: true,
typography: {
header: "Schibsted Grotesk",
body: "Source Sans Pro",
code: "IBM Plex Mono",
},
colors: {
lightMode: {
light: "#faf8f8",
lightgray: "#e5e5e5",
gray: "#b8b8b8",
darkgray: "#4e4e4e",
dark: "#2b2b2b",
secondary: "#284b63",
tertiary: "#84a59d",
highlight: "rgba(143, 159, 169, 0.15)",
textHighlight: "#fff23688",
},
darkMode: {
light: "#161618",
lightgray: "#393639",
gray: "#646464",
darkgray: "#d4d4d4",
dark: "#ebebec",
secondary: "#7b97aa",
tertiary: "#84a59d",
highlight: "rgba(143, 159, 169, 0.15)",
textHighlight: "#b3aa0288",
},
},
},
},
plugins: {
transformers: [
Plugin.FrontMatter({ delimiters: "+++", language: "toml" }),
Plugin.CreatedModifiedDate({
priority: ["frontmatter", "git", "filesystem"],
}),
Plugin.SyntaxHighlighting({
theme: {
light: "github-light",
dark: "github-dark",
},
keepBackground: false,
}),
// OxHugoFlavouredMarkdown must come before GitHubFlavoredMarkdown.
// Note: not compatible with ObsidianFlavoredMarkdown — use one or the other.
// If ox-hugo exports TOML frontmatter, change FrontMatter to:
// Plugin.FrontMatter({ delims: "+++", language: "toml" })
Plugin.OxHugoFlavouredMarkdown(),
Plugin.GitHubFlavoredMarkdown(),
Plugin.TableOfContents(),
Plugin.CrawlLinks({ markdownLinkResolution: "shortest" }),
Plugin.Description(),
Plugin.Latex({ renderEngine: "katex" }),
],
filters: [Plugin.RemoveDrafts()],
emitters: [
Plugin.AliasRedirects(),
Plugin.ComponentResources(),
Plugin.ContentPage(),
Plugin.FolderPage(),
Plugin.TagPage(),
Plugin.ContentIndex({
enableSiteMap: true,
enableRSS: true,
}),
Plugin.Assets(),
Plugin.Static(),
Plugin.Favicon(),
Plugin.NotFoundPage(),
// Comment out CustomOgImages to speed up build time
Plugin.CustomOgImages(),
],
},
}
export default config

View File

@@ -1,68 +0,0 @@
import { PageLayout, SharedLayout } from "./quartz/cfg"
import * as Component from "./quartz/components"
// components shared across all pages
export const sharedPageComponents: SharedLayout = {
head: Component.Head(),
header: [],
afterBody: [],
footer: Component.Footer({
links: {
GitHub: "https://github.com/jackyzha0/quartz",
"Discord Community": "https://discord.gg/cRFFHYye7t",
},
}),
}
// components for pages that display a single page (e.g. a single note)
export const defaultContentPageLayout: PageLayout = {
beforeBody: [
Component.ConditionalRender({
component: Component.Breadcrumbs(),
condition: (page) => page.fileData.slug !== "index",
}),
Component.ArticleTitle(),
Component.ContentMeta(),
Component.TagList(),
],
left: [
Component.PageTitle(),
Component.MobileOnly(Component.Spacer()),
Component.Flex({
components: [
{
Component: Component.Search(),
grow: true,
},
{ Component: Component.Darkmode() },
{ Component: Component.ReaderMode() },
],
}),
Component.Explorer(),
],
right: [
Component.Graph(),
Component.DesktopOnly(Component.TableOfContents()),
Component.Backlinks(),
],
}
// components for pages that display lists of pages (e.g. tags or folders)
export const defaultListPageLayout: PageLayout = {
beforeBody: [Component.Breadcrumbs(), Component.ArticleTitle(), Component.ContentMeta()],
left: [
Component.PageTitle(),
Component.MobileOnly(Component.Spacer()),
Component.Flex({
components: [
{
Component: Component.Search(),
grow: true,
},
{ Component: Component.Darkmode() },
],
}),
Component.Explorer(),
],
right: [],
}

2091
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -17,10 +17,7 @@
"check": "tsc --noEmit && npx prettier . --check",
"format": "npx prettier . --write",
"test": "tsx --test",
"profile": "0x -D prof ./quartz/bootstrap-cli.mjs build --concurrency=1",
"export": "elixir scripts/export.exs",
"build:notes": "elixir scripts/export.exs && npx quartz build",
"serve:notes": "elixir scripts/export.exs && npx quartz build --serve"
"profile": "0x -D prof ./quartz/bootstrap-cli.mjs build --concurrency=1"
},
"engines": {
"npm": ">=10.9.2",
@@ -40,33 +37,35 @@
"dependencies": {
"@clack/prompts": "^0.11.0",
"@floating-ui/dom": "^1.7.4",
"@huggingface/transformers": "^3.7.5",
"@myriaddreamin/rehype-typst": "^0.6.0",
"@napi-rs/simple-git": "0.1.22",
"@tweenjs/tween.js": "^25.0.0",
"ansi-truncate": "^1.4.0",
"async-mutex": "^0.5.0",
"chokidar": "^5.0.0",
"chokidar": "^4.0.3",
"cli-spinner": "^0.2.10",
"d3": "^7.9.0",
"esbuild-sass-plugin": "^3.6.0",
"esbuild-sass-plugin": "^3.3.1",
"flexsearch": "^0.8.205",
"github-slugger": "^2.0.0",
"globby": "^16.1.0",
"globby": "^15.0.0",
"gray-matter": "^4.0.3",
"hast-util-to-html": "^9.0.5",
"hast-util-to-jsx-runtime": "^2.3.6",
"hast-util-to-string": "^3.0.1",
"is-absolute-url": "^5.0.0",
"js-yaml": "^4.1.1",
"lightningcss": "^1.31.1",
"js-yaml": "^4.1.0",
"lightningcss": "^1.30.2",
"mdast-util-find-and-replace": "^3.0.2",
"mdast-util-to-hast": "^13.2.1",
"mdast-util-to-hast": "^13.2.0",
"mdast-util-to-string": "^4.0.0",
"micromorph": "^0.4.5",
"minimatch": "^10.1.1",
"pixi.js": "^8.15.0",
"preact": "^10.28.2",
"preact-render-to-string": "^6.6.5",
"minimatch": "^10.0.3",
"onnxruntime-web": "^1.23.0",
"pixi.js": "^8.13.2",
"preact": "^10.27.2",
"preact-render-to-string": "^6.6.1",
"pretty-bytes": "^7.1.0",
"pretty-time": "^1.1.0",
"reading-time": "^1.5.0",
@@ -86,32 +85,32 @@
"remark-rehype": "^11.1.2",
"remark-smartypants": "^3.0.2",
"rfdc": "^1.4.1",
"satori": "^0.19.1",
"satori": "^0.18.3",
"serve-handler": "^6.1.6",
"sharp": "^0.34.5",
"sharp": "^0.34.4",
"shiki": "^1.26.2",
"source-map-support": "^0.5.21",
"to-vfile": "^8.0.0",
"toml": "^3.0.0",
"unified": "^11.0.5",
"unist-util-visit": "^5.1.0",
"unist-util-visit": "^5.0.0",
"vfile": "^6.0.3",
"workerpool": "^10.0.1",
"ws": "^8.19.0",
"workerpool": "^9.3.4",
"ws": "^8.18.3",
"yargs": "^18.0.0"
},
"devDependencies": {
"@types/d3": "^7.4.3",
"@types/hast": "^3.0.4",
"@types/js-yaml": "^4.0.9",
"@types/node": "^25.0.10",
"@types/node": "^24.6.0",
"@types/pretty-time": "^1.1.5",
"@types/source-map-support": "^0.5.10",
"@types/ws": "^8.18.1",
"@types/yargs": "^17.0.35",
"esbuild": "^0.27.2",
"prettier": "^3.8.1",
"tsx": "^4.21.0",
"typescript": "^5.9.3"
"@types/yargs": "^17.0.33",
"esbuild": "^0.25.10",
"prettier": "^3.6.2",
"tsx": "^4.20.6",
"typescript": "^5.9.2"
}
}

View File

@@ -1,6 +1,18 @@
import { QuartzConfig } from "./quartz/cfg"
import { GlobalConfiguration, QuartzConfig } from "./quartz/cfg"
import * as Plugin from "./quartz/plugins"
const semanticSearch: GlobalConfiguration["semanticSearch"] = {
enable: true,
model: "onnx-community/embeddinggemma-300m-ONNX",
aot: true,
dims: 768,
dtype: "fp32",
shardSizeRows: 1024,
hnsw: { M: 16, efConstruction: 200 },
chunking: { chunkSize: 256, chunkOverlap: 64 },
vllm: { enable: true, concurrency: 16, batchSize: 128 },
}
/**
* Quartz 4 Configuration
*
@@ -52,10 +64,11 @@ const config: QuartzConfig = {
},
},
},
semanticSearch,
},
plugins: {
transformers: [
Plugin.FrontMatter({ delimiters: "+++", language: "toml" }),
Plugin.FrontMatter(),
Plugin.CreatedModifiedDate({
priority: ["frontmatter", "git", "filesystem"],
}),
@@ -66,11 +79,7 @@ const config: QuartzConfig = {
},
keepBackground: false,
}),
// OxHugoFlavouredMarkdown must come before GitHubFlavoredMarkdown.
// Note: not compatible with ObsidianFlavoredMarkdown — use one or the other.
// If ox-hugo exports TOML frontmatter, change FrontMatter to:
// Plugin.FrontMatter({ delims: "+++", language: "toml" })
Plugin.OxHugoFlavouredMarkdown(),
Plugin.ObsidianFlavoredMarkdown({ enableInHtmlEmbed: false }),
Plugin.GitHubFlavoredMarkdown(),
Plugin.TableOfContents(),
Plugin.CrawlLinks({ markdownLinkResolution: "shortest" }),
@@ -88,6 +97,7 @@ const config: QuartzConfig = {
enableSiteMap: true,
enableRSS: true,
}),
Plugin.SemanticIndex(semanticSearch),
Plugin.Assets(),
Plugin.Static(),
Plugin.Favicon(),

View File

@@ -71,7 +71,7 @@ async function buildQuartz(argv: Argv, mut: Mutex, clientRefresh: () => void) {
console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince("clean")}`)
perf.addEvent("glob")
const allFiles = await glob("**/*.*", argv.directory, cfg.configuration.ignorePatterns, false)
const allFiles = await glob("**/*.*", argv.directory, cfg.configuration.ignorePatterns)
const markdownPaths = allFiles.filter((fp) => fp.endsWith(".md")).sort()
console.log(
`Found ${markdownPaths.length} input files from \`${argv.directory}\` in ${perf.timeSince("glob")}`,
@@ -143,7 +143,6 @@ async function startWatching(
}
const watcher = chokidar.watch(".", {
awaitWriteFinish: { stabilityThreshold: 250 },
persistent: true,
cwd: argv.directory,
ignoreInitial: true,

View File

@@ -50,11 +50,6 @@ export type Analytics =
| {
provider: "vercel"
}
| {
provider: "rybbit"
siteId: string
host?: string
}
export interface GlobalConfiguration {
pageTitle: string
@@ -83,6 +78,34 @@ export interface GlobalConfiguration {
* Region Codes: https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
*/
locale: ValidLocale
/** Semantic search configuration */
semanticSearch?: {
enable: boolean
model: string
aot: boolean
dtype: "fp32" | "fp16"
dims: number
shardSizeRows: number
manifestUrl?: string
manifestBaseUrl?: string
disableCache?: boolean
hnsw: {
M: number
efConstruction: number
efSearch?: number
}
chunking: {
chunkSize: number
chunkOverlap: number
noChunking?: boolean
}
vllm?: {
enable: boolean
vllmUrl?: string
concurrency: number
batchSize: number
}
}
}
export interface QuartzConfig {

View File

@@ -7,8 +7,8 @@ import fs from "fs"
export function escapePath(fp) {
return fp
.replace(/\\ /g, " ") // unescape spaces
.replace(/^"(.*)"$/, "$1")
.replace(/^'(.*)'$/, "$1")
.replace(/^".*"$/, "$1")
.replace(/^'.*"$/, "$1")
.trim()
}

View File

@@ -7,10 +7,12 @@ import { i18n } from "../i18n"
export interface SearchOptions {
enablePreview: boolean
includeButton: boolean
}
const defaultOptions: SearchOptions = {
enablePreview: true,
includeButton: true,
}
export default ((userOpts?: Partial<SearchOptions>) => {
@@ -29,19 +31,54 @@ export default ((userOpts?: Partial<SearchOptions>) => {
</svg>
<p>{i18n(cfg.locale).components.search.title}</p>
</button>
<div class="search-container">
<div class="search-space">
<input
autocomplete="off"
class="search-bar"
name="search"
type="text"
aria-label={searchPlaceholder}
placeholder={searchPlaceholder}
/>
<div class="search-layout" data-preview={opts.enablePreview}></div>
</div>
</div>
<search class="search-container">
<form class="search-space">
<div class="input-container">
<input
autocomplete="off"
class="search-bar"
name="search"
type="text"
aria-label={searchPlaceholder}
placeholder={searchPlaceholder}
/>
<div class="search-mode-toggle" role="radiogroup" aria-label="Search mode">
<button
type="button"
class="mode-option"
data-mode="lexical"
aria-pressed="true"
aria-label="Full-text search"
>
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
<path d="M4 6h12M4 10h8M4 14h6" />
</g>
</svg>
<span class="sr-only">Full-text</span>
</button>
<button
type="button"
class="mode-option"
data-mode="semantic"
aria-pressed="false"
aria-label="Semantic search"
>
<svg viewBox="0 0 20 20" role="img" aria-hidden="true">
<g fill="none" stroke="currentColor" stroke-width="1.5" stroke-linecap="round">
<circle cx="5.2" cy="10" r="2.4" />
<circle cx="14.8" cy="4.8" r="2.1" />
<circle cx="14.8" cy="15.2" r="2.1" />
<path d="M7.1 8.7l5.2-2.4M7.1 11.3l5.2 2.4M14.8 6.9v6.2" />
</g>
</svg>
<span class="sr-only">Semantic</span>
</button>
</div>
</div>
<output class="search-layout" data-preview={opts.enablePreview} />
</form>
</search>
</div>
)
}

View File

@@ -9,7 +9,6 @@ import { visit } from "unist-util-visit"
import { Root, Element, ElementContent } from "hast"
import { GlobalConfiguration } from "../cfg"
import { i18n } from "../i18n"
import { styleText } from "util"
interface RenderComponents {
head: QuartzComponent
@@ -26,6 +25,7 @@ const headerRegex = new RegExp(/h[1-6]/)
export function pageResources(
baseDir: FullSlug | RelativeURL,
staticResources: StaticResources,
cfg?: GlobalConfiguration,
): StaticResources {
const contentIndexPath = joinSegments(baseDir, "static/contentIndex.json")
const contentIndexScript = `const fetchData = fetch("${contentIndexPath}").then(data => data.json())`
@@ -49,6 +49,12 @@ export function pageResources(
spaPreserve: true,
script: contentIndexScript,
},
{
loadTime: "beforeDOMReady",
contentType: "inline",
spaPreserve: true,
script: `const semanticCfg = ${JSON.stringify(cfg?.semanticSearch ?? {})};`,
},
...staticResources.js,
],
additionalHead: staticResources.additionalHead,
@@ -69,7 +75,6 @@ function renderTranscludes(
cfg: GlobalConfiguration,
slug: FullSlug,
componentData: QuartzComponentProps,
visited: Set<FullSlug>,
) {
// process transcludes in componentData
visit(root, "element", (node, _index, _parent) => {
@@ -78,30 +83,6 @@ function renderTranscludes(
if (classNames.includes("transclude")) {
const inner = node.children[0] as Element
const transcludeTarget = (inner.properties["data-slug"] ?? slug) as FullSlug
if (visited.has(transcludeTarget)) {
console.warn(
styleText(
"yellow",
`Warning: Skipping circular transclusion: ${slug} -> ${transcludeTarget}`,
),
)
node.children = [
{
type: "element",
tagName: "p",
properties: { style: "color: var(--secondary);" },
children: [
{
type: "text",
value: `Circular transclusion detected: ${transcludeTarget}`,
},
],
},
]
return
}
visited.add(transcludeTarget)
const page = componentData.allFiles.find((f) => f.slug === transcludeTarget)
if (!page) {
return
@@ -222,8 +203,7 @@ export function renderPage(
// make a deep copy of the tree so we don't remove the transclusion references
// for the file cached in contentMap in build.ts
const root = clone(componentData.tree) as Root
const visited = new Set<FullSlug>([slug])
renderTranscludes(root, cfg, slug, componentData, visited)
renderTranscludes(root, cfg, slug, componentData)
// set componentData.tree to the edited html that has transclusions rendered
componentData.tree = root
@@ -294,7 +274,7 @@ export function renderPage(
</body>
{pageResources.js
.filter((resource) => resource.loadTime === "afterDOMReady")
.map((res) => JSResourceToScriptElement(res, true))}
.map((res) => JSResourceToScriptElement(res))}
</html>
)

View File

@@ -111,10 +111,6 @@ function createFolderNode(
const folderPath = node.slug
folderContainer.dataset.folderpath = folderPath
if (currentSlug === folderPath) {
folderContainer.classList.add("active")
}
if (opts.folderClickBehavior === "link") {
// Replace button with link for link behavior
const button = titleContainer.querySelector(".folder-button") as HTMLElement

View File

@@ -29,31 +29,17 @@ class DiagramPanZoom {
const mouseDownHandler = this.onMouseDown.bind(this)
const mouseMoveHandler = this.onMouseMove.bind(this)
const mouseUpHandler = this.onMouseUp.bind(this)
// Touch drag events
const touchStartHandler = this.onTouchStart.bind(this)
const touchMoveHandler = this.onTouchMove.bind(this)
const touchEndHandler = this.onTouchEnd.bind(this)
const resizeHandler = this.resetTransform.bind(this)
this.container.addEventListener("mousedown", mouseDownHandler)
document.addEventListener("mousemove", mouseMoveHandler)
document.addEventListener("mouseup", mouseUpHandler)
this.container.addEventListener("touchstart", touchStartHandler, { passive: false })
document.addEventListener("touchmove", touchMoveHandler, { passive: false })
document.addEventListener("touchend", touchEndHandler)
window.addEventListener("resize", resizeHandler)
this.cleanups.push(
() => this.container.removeEventListener("mousedown", mouseDownHandler),
() => document.removeEventListener("mousemove", mouseMoveHandler),
() => document.removeEventListener("mouseup", mouseUpHandler),
() => this.container.removeEventListener("touchstart", touchStartHandler),
() => document.removeEventListener("touchmove", touchMoveHandler),
() => document.removeEventListener("touchend", touchEndHandler),
() => window.removeEventListener("resize", resizeHandler),
)
}
@@ -113,30 +99,6 @@ class DiagramPanZoom {
this.container.style.cursor = "grab"
}
private onTouchStart(e: TouchEvent) {
if (e.touches.length !== 1) return
this.isDragging = true
const touch = e.touches[0]
this.startPan = { x: touch.clientX - this.currentPan.x, y: touch.clientY - this.currentPan.y }
}
private onTouchMove(e: TouchEvent) {
if (!this.isDragging || e.touches.length !== 1) return
e.preventDefault() // Prevent scrolling
const touch = e.touches[0]
this.currentPan = {
x: touch.clientX - this.startPan.x,
y: touch.clientY - this.startPan.y,
}
this.updateTransform()
}
private onTouchEnd() {
this.isDragging = false
}
private zoom(delta: number) {
const newScale = Math.min(Math.max(this.scale + delta, this.MIN_SCALE), this.MAX_SCALE)
@@ -158,15 +120,11 @@ class DiagramPanZoom {
}
private resetTransform() {
const svg = this.content.querySelector("svg")!
const rect = svg.getBoundingClientRect()
const width = rect.width / this.scale
const height = rect.height / this.scale
this.scale = 1
const svg = this.content.querySelector("svg")!
this.currentPan = {
x: (this.container.clientWidth - width) / 2,
y: (this.container.clientHeight - height) / 2,
x: svg.getBoundingClientRect().width / 2,
y: svg.getBoundingClientRect().height / 2,
}
this.updateTransform()
}

View File

@@ -1,6 +1,7 @@
import FlexSearch, { DefaultDocumentSearchResults } from "flexsearch"
import FlexSearch, { DefaultDocumentSearchResults, Id } from "flexsearch"
import { ContentDetails } from "../../plugins/emitters/contentIndex"
import { registerEscapeHandler, removeAllChildren } from "./util"
import { SemanticClient, type SemanticResult } from "./semantic.inline"
import { registerEscapeHandler, removeAllChildren, fetchCanonical } from "./util"
import { FullSlug, normalizeRelativeURLs, resolveRelative } from "../../util/path"
interface Item {
@@ -14,81 +15,46 @@ interface Item {
// Can be expanded with things like "term" in the future
type SearchType = "basic" | "tags"
let searchType: SearchType = "basic"
let currentSearchTerm: string = ""
const encoder = (str: string): string[] => {
const tokens: string[] = []
let bufferStart = -1
let bufferEnd = -1
const lower = str.toLowerCase()
type SearchMode = "lexical" | "semantic"
const SEARCH_MODE_STORAGE_KEY = "quartz:search:mode"
let i = 0
for (const char of lower) {
const code = char.codePointAt(0)!
const isCJK =
(code >= 0x3040 && code <= 0x309f) ||
(code >= 0x30a0 && code <= 0x30ff) ||
(code >= 0x4e00 && code <= 0x9fff) ||
(code >= 0xac00 && code <= 0xd7af) ||
(code >= 0x20000 && code <= 0x2a6df)
const isWhitespace = code === 32 || code === 9 || code === 10 || code === 13
if (isCJK) {
if (bufferStart !== -1) {
tokens.push(lower.slice(bufferStart, bufferEnd))
bufferStart = -1
}
tokens.push(char)
} else if (isWhitespace) {
if (bufferStart !== -1) {
tokens.push(lower.slice(bufferStart, bufferEnd))
bufferStart = -1
}
} else {
if (bufferStart === -1) bufferStart = i
bufferEnd = i + char.length
}
i += char.length
const loadStoredSearchMode = (): SearchMode | null => {
if (typeof window === "undefined") {
return null
}
if (bufferStart !== -1) {
tokens.push(lower.slice(bufferStart))
try {
const stored = window.localStorage.getItem(SEARCH_MODE_STORAGE_KEY)
return stored === "lexical" || stored === "semantic" ? stored : null
} catch (err) {
console.warn("[Search] failed to read stored search mode:", err)
return null
}
return tokens
}
let index = new FlexSearch.Document<Item>({
encode: encoder,
document: {
id: "id",
tag: "tags",
index: [
{
field: "title",
tokenize: "forward",
},
{
field: "content",
tokenize: "forward",
},
{
field: "tags",
tokenize: "forward",
},
],
},
})
const persistSearchMode = (mode: SearchMode) => {
if (typeof window === "undefined") {
return
}
try {
window.localStorage.setItem(SEARCH_MODE_STORAGE_KEY, mode)
} catch (err) {
console.warn("[Search] failed to persist search mode:", err)
}
}
let searchMode: SearchMode = "lexical"
let currentSearchTerm: string = ""
let rawSearchTerm: string = ""
let semantic: SemanticClient | null = null
let semanticReady = false
let semanticInitFailed = false
type SimilarityResult = { item: Item; similarity: number }
let chunkMetadata: Record<string, { parentSlug: string; chunkId: number }> = {}
let manifestIds: string[] = []
const p = new DOMParser()
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
const contextWindowWords = 30
const numSearchResults = 8
const numTagResults = 5
const tokenizeTerm = (term: string) => {
const tokens = term.split(/\s+/).filter((t) => t.trim() !== "")
const tokenLen = tokens.length
@@ -146,6 +112,102 @@ function highlight(searchTerm: string, text: string, trim?: boolean) {
}`
}
// To be used with search and everything else with flexsearch
const encoder = (str: string) =>
str
.toLowerCase()
.split(/\s+/)
.filter((token) => token.length > 0)
/**
* Get parent document slug for a chunk ID
*/
function getParentSlug(slug: string): string {
const meta = chunkMetadata[slug]
return meta ? meta.parentSlug : slug
}
/**
* Aggregate semantic search results from chunks to documents using RRF
* @param results Raw semantic results (chunk-level)
* @param slugToDocIndex Map from document slug to index in idDataMap
* @returns Object with rrfScores (for ranking) and maxScores (for display)
*/
function aggregateChunkResults(
results: SemanticResult[],
slugToDocIndex: Map<FullSlug, number>,
): { rrfScores: Map<number, number>; maxScores: Map<number, number> } {
// Group chunks by parent document
const docChunks = new Map<string, Array<{ score: number }>>()
results.forEach(({ id, score }) => {
// id is an index into manifestIds (the chunk IDs from embeddings)
const chunkSlug = manifestIds[id]
if (!chunkSlug) return
// Get parent document slug
const parentSlug = getParentSlug(chunkSlug)
if (!docChunks.has(parentSlug)) {
docChunks.set(parentSlug, [])
}
docChunks.get(parentSlug)!.push({ score })
})
// Apply RRF for ranking and track max similarity for display
const rrfScores = new Map<number, number>()
const maxScores = new Map<number, number>()
const RRF_K = 60
for (const [parentSlug, chunks] of docChunks) {
const docIdx = slugToDocIndex.get(parentSlug as FullSlug)
if (typeof docIdx !== "number") continue
// Sort chunks by score descending to assign per-document ranks
chunks.sort((a, b) => b.score - a.score)
// RRF formula: sum(1 / (k + rank)) across all chunks, using per-document ranks
const rrfScore = chunks.reduce((sum, _, rank) => sum + 1.0 / (RRF_K + rank), 0)
// Max similarity score for display (original 0-1 range)
const maxScore = chunks[0].score
rrfScores.set(docIdx, rrfScore)
maxScores.set(docIdx, maxScore)
}
return { rrfScores, maxScores }
}
// Initialize the FlexSearch Document instance with the appropriate configuration
const index = new FlexSearch.Document<Item>({
tokenize: "forward",
encode: encoder,
document: {
id: "id",
tag: "tags",
index: [
{
field: "title",
tokenize: "forward",
},
{
field: "content",
tokenize: "forward",
},
{
field: "tags",
tokenize: "forward",
},
],
},
})
const p = new DOMParser()
const fetchContentCache: Map<FullSlug, Element[]> = new Map()
const numSearchResults = 10
const numTagResults = 10
function highlightHTML(searchTerm: string, el: HTMLElement) {
const p = new DOMParser()
const tokenizedTerms = tokenizeTerm(searchTerm)
@@ -187,7 +249,11 @@ function highlightHTML(searchTerm: string, el: HTMLElement) {
return html.body
}
async function setupSearch(searchElement: Element, currentSlug: FullSlug, data: ContentIndex) {
async function setupSearch(
searchElement: HTMLDivElement,
currentSlug: FullSlug,
data: ContentIndex,
) {
const container = searchElement.querySelector(".search-container") as HTMLElement
if (!container) return
@@ -202,12 +268,183 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
const searchLayout = searchElement.querySelector(".search-layout") as HTMLElement
if (!searchLayout) return
const searchSpace = searchElement?.querySelector(".search-space") as HTMLFormElement
if (!searchSpace) return
// Create semantic search progress bar
const progressBar = document.createElement("div")
progressBar.className = "semantic-search-progress"
progressBar.style.cssText = `
position: absolute;
bottom: 0;
left: 0;
height: 2px;
width: 0;
background: var(--secondary);
transition: width 0.3s ease, opacity 0.3s ease;
opacity: 0;
z-index: 9999;
`
searchBar.parentElement?.appendChild(progressBar)
const startSemanticProgress = () => {
progressBar.style.opacity = "1"
progressBar.style.width = "0"
setTimeout(() => {
progressBar.style.width = "100%"
}, 10)
}
const completeSemanticProgress = () => {
progressBar.style.opacity = "0"
setTimeout(() => {
progressBar.style.width = "0"
}, 300)
}
const resetProgressBar = () => {
progressBar.style.opacity = "0"
progressBar.style.width = "0"
}
const idDataMap = Object.keys(data) as FullSlug[]
const slugToIndex = new Map<FullSlug, number>()
idDataMap.forEach((slug, idx) => slugToIndex.set(slug, idx))
const modeToggle = searchSpace.querySelector(".search-mode-toggle") as HTMLDivElement | null
const modeButtons = modeToggle
? Array.from(modeToggle.querySelectorAll<HTMLButtonElement>(".mode-option"))
: []
const appendLayout = (el: HTMLElement) => {
searchLayout.appendChild(el)
}
const enablePreview = searchLayout.dataset.preview === "true"
if (!semantic && !semanticInitFailed) {
const client = new SemanticClient(semanticCfg)
try {
await client.ensureReady()
semantic = client
semanticReady = true
// Load chunk metadata and IDs from manifest
try {
const manifestUrl = "/embeddings/manifest.json"
const res = await fetch(manifestUrl)
if (res.ok) {
const manifest = await res.json()
chunkMetadata = manifest.chunkMetadata || {}
manifestIds = manifest.ids || []
console.debug(
`[Search] Loaded manifest: ${manifestIds.length} chunks, ${Object.keys(chunkMetadata).length} chunked documents`,
)
}
} catch (err) {
console.warn("[Search] failed to load chunk metadata:", err)
chunkMetadata = {}
manifestIds = []
}
} catch (err) {
console.warn("[SemanticClient] initialization failed:", err)
client.dispose()
semantic = null
semanticReady = false
semanticInitFailed = true
}
} else if (semantic && !semanticReady) {
try {
await semantic.ensureReady()
semanticReady = true
} catch (err) {
console.warn("[SemanticClient] became unavailable:", err)
semantic.dispose()
semantic = null
semanticReady = false
semanticInitFailed = true
}
}
const storedMode = loadStoredSearchMode()
if (storedMode === "semantic") {
if (semanticReady) {
searchMode = storedMode
}
} else if (storedMode === "lexical") {
searchMode = storedMode
}
if (!semanticReady && searchMode === "semantic") {
searchMode = "lexical"
}
let searchSeq = 0
let runSearchTimer: number | null = null
let lastInputAt = 0
searchLayout.dataset.mode = searchMode
const updateModeUI = (mode: SearchMode) => {
modeButtons.forEach((button) => {
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
const isActive = btnMode === mode
button.classList.toggle("active", isActive)
button.setAttribute("aria-pressed", String(isActive))
})
if (modeToggle) {
modeToggle.dataset.mode = mode
}
searchLayout.dataset.mode = mode
}
const computeDebounceDelay = (term: string): number => {
const trimmed = term.trim()
const lastTerm = currentSearchTerm
const isExtension =
lastTerm.length > 0 && trimmed.length > lastTerm.length && trimmed.startsWith(lastTerm)
const isRetraction = lastTerm.length > trimmed.length
const isReplacement =
lastTerm.length > 0 && !trimmed.startsWith(lastTerm) && !lastTerm.startsWith(trimmed)
const baseFullQueryDelay = 200
const semanticPenalty = searchMode === "semantic" ? 60 : 0
if (isExtension && trimmed.length > 2) {
return baseFullQueryDelay + semanticPenalty
}
if (isReplacement && trimmed.length > 3) {
return Math.max(90, baseFullQueryDelay - 80)
}
if (isRetraction) {
return 90
}
return baseFullQueryDelay + (searchMode === "semantic" ? 40 : 0)
}
const triggerSearchWithMode = (mode: SearchMode) => {
if (mode === "semantic" && !semanticReady) {
return
}
if (searchMode === mode) return
searchMode = mode
updateModeUI(mode)
persistSearchMode(searchMode)
if (rawSearchTerm.trim() !== "") {
searchLayout.classList.add("display-results")
const token = ++searchSeq
void runSearch(rawSearchTerm, token)
}
}
updateModeUI(searchMode)
modeButtons.forEach((button) => {
const btnMode = (button.dataset.mode as SearchMode) ?? "lexical"
if (btnMode === "semantic") {
button.disabled = !semanticReady
button.setAttribute("aria-disabled", String(!semanticReady))
}
const handler = () => triggerSearchWithMode(btnMode)
button.addEventListener("click", handler)
window.addCleanup(() => button.removeEventListener("click", handler))
})
let preview: HTMLDivElement | undefined = undefined
let previewInner: HTMLDivElement | undefined = undefined
const results = document.createElement("div")
@@ -229,20 +466,23 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
removeAllChildren(preview)
}
searchLayout.classList.remove("display-results")
searchType = "basic" // reset search type after closing
searchButton.focus()
resetProgressBar()
}
function showSearch(searchTypeNew: SearchType) {
searchType = searchTypeNew
if (sidebar) sidebar.style.zIndex = "1"
function showSearch(type: SearchType) {
container.classList.add("active")
if (type === "tags") {
searchBar.value = "#"
rawSearchTerm = "#"
}
searchBar.focus()
}
let currentHover: HTMLInputElement | null = null
async function shortcutHandler(e: HTMLElementEventMap["keydown"]) {
if (e.key === "k" && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
if ((e.key === "/" || e.key === "k") && (e.ctrlKey || e.metaKey) && !e.shiftKey) {
e.preventDefault()
const searchBarOpen = container.classList.contains("active")
searchBarOpen ? hideSearch() : showSearch("basic")
@@ -252,9 +492,6 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
e.preventDefault()
const searchBarOpen = container.classList.contains("active")
searchBarOpen ? hideSearch() : showSearch("tags")
// add "#" prefix for tag search
searchBar.value = "#"
return
}
@@ -264,20 +501,29 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
// If search is active, then we will render the first result and display accordingly
if (!container.classList.contains("active")) return
if (e.key === "Enter" && !e.isComposing) {
if (e.key === "Enter") {
// If result has focus, navigate to that one, otherwise pick first result
let anchor: HTMLAnchorElement | undefined
if (results.contains(document.activeElement)) {
const active = document.activeElement as HTMLInputElement
if (active.classList.contains("no-match")) return
await displayPreview(active)
active.click()
anchor = document.activeElement as HTMLAnchorElement
if (anchor.classList.contains("no-match")) return
await displayPreview(anchor)
e.preventDefault()
anchor.click()
} else {
const anchor = document.getElementsByClassName("result-card")[0] as HTMLInputElement | null
anchor = document.getElementsByClassName("result-card")[0] as HTMLAnchorElement
if (!anchor || anchor.classList.contains("no-match")) return
await displayPreview(anchor)
e.preventDefault()
anchor.click()
}
} else if (e.key === "ArrowUp" || (e.shiftKey && e.key === "Tab")) {
if (anchor !== undefined)
window.spaNavigate(new URL(new URL(anchor.href).pathname, window.location.toString()))
} else if (
e.key === "ArrowUp" ||
(e.shiftKey && e.key === "Tab") ||
(e.ctrlKey && e.key === "p")
) {
e.preventDefault()
if (results.contains(document.activeElement)) {
// If an element in results-container already has focus, focus previous one
@@ -290,7 +536,7 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
if (prevResult) currentHover = prevResult
await displayPreview(prevResult)
}
} else if (e.key === "ArrowDown" || e.key === "Tab") {
} else if (e.key === "ArrowDown" || e.key === "Tab" || (e.ctrlKey && e.key === "n")) {
e.preventDefault()
// The results should already been focused, so we need to find the next one.
// The activeElement is the search bar, so we need to find the first result and focus it.
@@ -307,25 +553,33 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
}
}
const formatForDisplay = (term: string, id: number) => {
const formatForDisplay = (term: string, id: number, renderType: SearchType) => {
const slug = idDataMap[id]
// Check if query contains title words (for boosting exact matches)
const queryTokens = tokenizeTerm(term)
const titleTokens = tokenizeTerm(data[slug].title ?? "")
const titleMatch = titleTokens.some((t) => queryTokens.includes(t))
return {
id,
slug,
title: searchType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
title: renderType === "tags" ? data[slug].title : highlight(term, data[slug].title ?? ""),
content: highlight(term, data[slug].content ?? "", true),
tags: highlightTags(term.substring(1), data[slug].tags),
tags: highlightTags(term, data[slug].tags, renderType),
titleMatch, // Add title match flag for boosting
}
}
function highlightTags(term: string, tags: string[]) {
if (!tags || searchType !== "tags") {
function highlightTags(term: string, tags: string[], renderType: SearchType) {
if (!tags || renderType !== "tags") {
return []
}
const tagTerm = term.toLowerCase()
return tags
.map((tag) => {
if (tag.toLowerCase().includes(term.toLowerCase())) {
if (tag.toLowerCase().includes(tagTerm)) {
return `<li><p class="match-tag">#${tag}</p></li>`
} else {
return `<li><p>#${tag}</p></li>`
@@ -338,24 +592,40 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
return new URL(resolveRelative(currentSlug, slug), location.toString())
}
const resultToHTML = ({ slug, title, content, tags }: Item) => {
const resultToHTML = ({ item, percent }: { item: Item; percent: number | null }) => {
const { slug, title, content, tags, target } = item
const htmlTags = tags.length > 0 ? `<ul class="tags">${tags.join("")}</ul>` : ``
const itemTile = document.createElement("a")
const titleContent = target ? highlight(currentSearchTerm, target) : title
const subscript = target ? `<b>${slug}</b>` : ``
let percentLabel = "—"
let percentAttr = ""
if (percent !== null && Number.isFinite(percent)) {
const bounded = Math.max(0, Math.min(100, percent))
percentLabel = `${bounded.toFixed(1)}%`
percentAttr = bounded.toFixed(3)
}
itemTile.classList.add("result-card")
itemTile.id = slug
itemTile.href = resolveUrl(slug).toString()
itemTile.innerHTML = `
<h3 class="card-title">${title}</h3>
${htmlTags}
<p class="card-description">${content}</p>
`
itemTile.addEventListener("click", (event) => {
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
hideSearch()
})
itemTile.innerHTML = `<hgroup>
<h3>${titleContent}</h3>
${subscript}${htmlTags}
${searchMode === "semantic" ? `<span class="result-likelihood" title="match likelihood">&nbsp;${percentLabel}</span>` : ""}
${enablePreview && window.innerWidth > 600 ? "" : `<p>${content}</p>`}
</hgroup>`
if (percentAttr) itemTile.dataset.scorePercent = percentAttr
else delete itemTile.dataset.scorePercent
const handler = (event: MouseEvent) => {
if (event.altKey || event.ctrlKey || event.metaKey || event.shiftKey) return
const handler = (evt: MouseEvent) => {
if (evt.altKey || evt.ctrlKey || evt.metaKey || evt.shiftKey) return
const anchor = evt.currentTarget as HTMLAnchorElement | null
if (!anchor) return
evt.preventDefault()
const href = anchor.getAttribute("href")
if (!href) return
const url = new URL(href, window.location.toString())
window.spaNavigate(url)
hideSearch()
}
@@ -373,15 +643,22 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
return itemTile
}
async function displayResults(finalResults: Item[]) {
async function displayResults(finalResults: SimilarityResult[]) {
removeAllChildren(results)
if (finalResults.length === 0) {
results.innerHTML = `<a class="result-card no-match">
<h3>No results.</h3>
<p>Try another search term?</p>
</a>`
currentHover = null
} else {
results.append(...finalResults.map(resultToHTML))
const decorated = finalResults.map(({ item, similarity }) => {
if (!Number.isFinite(similarity)) return { item, percent: null }
const bounded = Math.max(-1, Math.min(1, similarity))
const percent = ((bounded + 1) / 2) * 100
return { item, percent }
})
results.append(...decorated.map(resultToHTML))
}
if (finalResults.length === 0 && preview) {
@@ -401,8 +678,8 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
return fetchContentCache.get(slug) as Element[]
}
const targetUrl = resolveUrl(slug).toString()
const contents = await fetch(targetUrl)
const targetUrl = resolveUrl(slug)
const contents = await fetchCanonical(targetUrl)
.then((res) => res.text())
.then((contents) => {
if (contents === undefined) {
@@ -432,73 +709,296 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
const highlights = [...preview.getElementsByClassName("highlight")].sort(
(a, b) => b.innerHTML.length - a.innerHTML.length,
)
highlights[0]?.scrollIntoView({ block: "start" })
if (highlights.length > 0) {
const highlight = highlights[0]
const container = preview
if (container && highlight) {
// Get the relative positions
const containerRect = container.getBoundingClientRect()
const highlightRect = highlight.getBoundingClientRect()
// Calculate the scroll position relative to the container
const relativeTop = highlightRect.top - containerRect.top + container.scrollTop - 20 // 20px buffer
// Smoothly scroll the container
container.scrollTo({
top: relativeTop,
behavior: "smooth",
})
}
}
}
async function onType(e: HTMLElementEventMap["input"]) {
async function runSearch(rawTerm: string, token: number) {
if (!searchLayout || !index) return
currentSearchTerm = (e.target as HTMLInputElement).value
searchLayout.classList.toggle("display-results", currentSearchTerm !== "")
searchType = currentSearchTerm.startsWith("#") ? "tags" : "basic"
const trimmed = rawTerm.trim()
if (trimmed === "") {
removeAllChildren(results)
if (preview) {
removeAllChildren(preview)
}
currentHover = null
searchLayout.classList.remove("display-results")
resetProgressBar()
return
}
let searchResults: DefaultDocumentSearchResults<Item>
if (searchType === "tags") {
currentSearchTerm = currentSearchTerm.substring(1).trim()
const separatorIndex = currentSearchTerm.indexOf(" ")
if (separatorIndex != -1) {
// search by title and content index and then filter by tag (implemented in flexsearch)
const tag = currentSearchTerm.substring(0, separatorIndex)
const query = currentSearchTerm.substring(separatorIndex + 1).trim()
searchResults = await index.searchAsync({
query: query,
// return at least 10000 documents, so it is enough to filter them by tag (implemented in flexsearch)
const modeForRanking: SearchMode = searchMode
const initialType: SearchType = trimmed.startsWith("#") ? "tags" : "basic"
let workingType: SearchType = initialType
let highlightTerm = trimmed
let tagTerm = ""
let searchResults: DefaultDocumentSearchResults<Item> = []
if (initialType === "tags") {
tagTerm = trimmed.substring(1).trim()
const separatorIndex = tagTerm.indexOf(" ")
if (separatorIndex !== -1) {
const tag = tagTerm.substring(0, separatorIndex).trim()
const query = tagTerm.substring(separatorIndex + 1).trim()
const results = await index.searchAsync({
query,
limit: Math.max(numSearchResults, 10000),
index: ["title", "content"],
tag: { tags: tag },
})
for (let searchResult of searchResults) {
searchResult.result = searchResult.result.slice(0, numSearchResults)
}
// set search type to basic and remove tag from term for proper highlightning and scroll
searchType = "basic"
currentSearchTerm = query
if (token !== searchSeq) return
searchResults = Object.values(results)
workingType = "basic"
highlightTerm = query
} else {
// default search by tags index
searchResults = await index.searchAsync({
query: currentSearchTerm,
const results = await index.searchAsync({
query: tagTerm,
limit: numSearchResults,
index: ["tags"],
})
if (token !== searchSeq) return
searchResults = Object.values(results)
highlightTerm = tagTerm
}
} else if (searchType === "basic") {
searchResults = await index.searchAsync({
query: currentSearchTerm,
} else {
const results = await index.searchAsync({
query: highlightTerm,
limit: numSearchResults,
index: ["title", "content"],
})
if (token !== searchSeq) return
searchResults = Object.values(results)
}
const coerceIds = (hit?: DefaultDocumentSearchResults<Item>[number]): number[] => {
if (!hit) return []
return hit.result
.map((value: Id) => {
if (typeof value === "number") {
return value
}
const parsed = Number.parseInt(String(value), 10)
return Number.isNaN(parsed) ? null : parsed
})
.filter((value): value is number => value !== null)
}
const getByField = (field: string): number[] => {
const results = searchResults.filter((x) => x.field === field)
return results.length === 0 ? [] : ([...results[0].result] as number[])
const hit = searchResults.find((x) => x.field === field)
return coerceIds(hit)
}
// order titles ahead of content
const allIds: Set<number> = new Set([
...getByField("title"),
...getByField("content"),
...getByField("tags"),
])
const finalResults = [...allIds].map((id) => formatForDisplay(currentSearchTerm, id))
await displayResults(finalResults)
currentSearchTerm = highlightTerm
const candidateItems = new Map<string, Item>()
const ensureItem = (id: number): Item | null => {
const slug = idDataMap[id]
if (!slug) return null
const cached = candidateItems.get(slug)
if (cached) return cached
const item = formatForDisplay(highlightTerm, id, workingType)
if (item) {
candidateItems.set(slug, item)
return item
}
return null
}
const baseIndices: number[] = []
for (const id of allIds) {
const item = ensureItem(id)
if (!item) continue
const idx = slugToIndex.get(item.slug)
if (typeof idx === "number") {
baseIndices.push(idx)
}
}
let semanticIds: number[] = []
const semanticSimilarity = new Map<number, number>()
const integrateIds = (ids: number[]) => {
ids.forEach((docId) => {
ensureItem(docId)
})
}
const orchestrator = semanticReady && semantic ? semantic : null
const resolveSimilarity = (item: Item): number => {
const semanticHit = semanticSimilarity.get(item.id)
return semanticHit ?? Number.NaN
}
const render = async () => {
if (token !== searchSeq) return
const useSemantic = semanticReady && semanticIds.length > 0
const weights =
modeForRanking === "semantic" && useSemantic
? { base: 0.3, semantic: 1.0 }
: { base: 1.0, semantic: useSemantic ? 0.3 : 0 }
const rrf = new Map<string, number>()
const push = (ids: number[], weight: number, applyTitleBoost: boolean = false) => {
if (!ids.length || weight <= 0) return
ids.forEach((docId, rank) => {
const slug = idDataMap[docId]
if (!slug) return
const item = ensureItem(docId)
if (!item) return
// Apply title boost for FlexSearch results (1.5x boost for exact title matches)
let effectiveWeight = weight
if (applyTitleBoost && item.titleMatch) {
effectiveWeight *= 1.5
}
const prev = rrf.get(slug) ?? 0
rrf.set(slug, prev + effectiveWeight / (1 + rank))
})
}
push(baseIndices, weights.base, true) // FlexSearch with title boost
push(semanticIds, weights.semantic, false) // Semantic without boost
const rankedEntries = Array.from(candidateItems.values())
.map((item) => ({ item, score: rrf.get(item.slug) ?? 0 }))
.sort((a, b) => b.score - a.score)
.slice(0, numSearchResults)
const displayEntries: SimilarityResult[] = []
for (const entry of rankedEntries) {
const similarity = resolveSimilarity(entry.item)
displayEntries.push({ item: entry.item, similarity })
}
await displayResults(displayEntries)
}
await render()
if (workingType === "tags" || !orchestrator || !semanticReady || highlightTerm.length < 2) {
return
}
const showProgress = modeForRanking === "semantic"
if (showProgress) {
startSemanticProgress()
}
try {
const { semantic: semRes } = await orchestrator.search(
highlightTerm,
numSearchResults * 3, // Request more chunks to ensure good document coverage
)
if (token !== searchSeq) {
if (showProgress) completeSemanticProgress()
return
}
// Aggregate chunk results to document level using RRF
const { rrfScores: semRrfScores, maxScores: semMaxScores } = aggregateChunkResults(
semRes,
slugToIndex,
)
// Use RRF scores for ranking
semanticIds = Array.from(semRrfScores.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, numSearchResults)
.map(([docIdx]) => docIdx)
// Use max chunk similarity for display (0-1 range)
semanticSimilarity.clear()
semMaxScores.forEach((score, docIdx) => {
semanticSimilarity.set(docIdx, score)
})
integrateIds(semanticIds)
if (showProgress) completeSemanticProgress()
} catch (err) {
console.warn("[SemanticClient] search failed:", err)
if (showProgress) completeSemanticProgress()
orchestrator.dispose()
semantic = null
semanticReady = false
semanticInitFailed = true
if (searchMode === "semantic") {
searchMode = "lexical"
updateModeUI(searchMode)
}
modeButtons.forEach((button) => {
if ((button.dataset.mode as SearchMode) === "semantic") {
button.disabled = true
button.setAttribute("aria-disabled", "true")
}
})
}
await render()
}
function onType(e: HTMLElementEventMap["input"]) {
if (!searchLayout || !index) return
rawSearchTerm = (e.target as HTMLInputElement).value
const hasQuery = rawSearchTerm.trim() !== ""
searchLayout.classList.toggle("display-results", hasQuery)
const term = rawSearchTerm
const token = ++searchSeq
if (runSearchTimer !== null) {
window.clearTimeout(runSearchTimer)
runSearchTimer = null
}
if (!hasQuery) {
void runSearch("", token)
return
}
const now = performance.now()
lastInputAt = now
const delay = computeDebounceDelay(term)
const scheduledAt = lastInputAt
runSearchTimer = window.setTimeout(() => {
if (scheduledAt !== lastInputAt) {
return
}
runSearchTimer = null
void runSearch(term, token)
}, delay)
}
document.addEventListener("keydown", shortcutHandler)
window.addCleanup(() => document.removeEventListener("keydown", shortcutHandler))
searchButton.addEventListener("click", () => showSearch("basic"))
window.addCleanup(() => searchButton.removeEventListener("click", () => showSearch("basic")))
const openHandler = () => showSearch("basic")
searchButton.addEventListener("click", openHandler)
window.addCleanup(() => searchButton.removeEventListener("click", openHandler))
searchBar.addEventListener("input", onType)
window.addCleanup(() => searchBar.removeEventListener("input", onType))
window.addCleanup(() => {
if (runSearchTimer !== null) {
window.clearTimeout(runSearchTimer)
runSearchTimer = null
}
resetProgressBar()
})
registerEscapeHandler(container, hideSearch)
await fillDocument(data)
@@ -506,17 +1006,17 @@ async function setupSearch(searchElement: Element, currentSlug: FullSlug, data:
/**
* Fills flexsearch document with data
* @param index index to fill
* @param data data to fill index with
*/
let indexPopulated = false
async function fillDocument(data: ContentIndex) {
if (indexPopulated) return
let id = 0
const promises: Array<Promise<unknown>> = []
const promises = []
for (const [slug, fileData] of Object.entries<ContentDetails>(data)) {
promises.push(
index.addAsync(id++, {
//@ts-ignore
index.addAsync({
id,
slug: slug as FullSlug,
title: fileData.title,
@@ -524,6 +1024,7 @@ async function fillDocument(data: ContentIndex) {
tags: fileData.tags,
}),
)
id++
}
await Promise.all(promises)
@@ -533,7 +1034,9 @@ async function fillDocument(data: ContentIndex) {
document.addEventListener("nav", async (e: CustomEventMap["nav"]) => {
const currentSlug = e.detail.url
const data = await fetchData
const searchElement = document.getElementsByClassName("search")
const searchElement = document.getElementsByClassName(
"search",
) as HTMLCollectionOf<HTMLDivElement>
for (const element of searchElement) {
await setupSearch(element, currentSlug, data)
}

View File

@@ -1,163 +0,0 @@
import test, { describe } from "node:test"
import assert from "node:assert"
// Inline the encoder function from search.inline.ts for testing
const encoder = (str: string): string[] => {
const tokens: string[] = []
let bufferStart = -1
let bufferEnd = -1
const lower = str.toLowerCase()
let i = 0
for (const char of lower) {
const code = char.codePointAt(0)!
const isCJK =
(code >= 0x3040 && code <= 0x309f) ||
(code >= 0x30a0 && code <= 0x30ff) ||
(code >= 0x4e00 && code <= 0x9fff) ||
(code >= 0xac00 && code <= 0xd7af) ||
(code >= 0x20000 && code <= 0x2a6df)
const isWhitespace = code === 32 || code === 9 || code === 10 || code === 13
if (isCJK) {
if (bufferStart !== -1) {
tokens.push(lower.slice(bufferStart, bufferEnd))
bufferStart = -1
}
tokens.push(char)
} else if (isWhitespace) {
if (bufferStart !== -1) {
tokens.push(lower.slice(bufferStart, bufferEnd))
bufferStart = -1
}
} else {
if (bufferStart === -1) bufferStart = i
bufferEnd = i + char.length
}
i += char.length
}
if (bufferStart !== -1) {
tokens.push(lower.slice(bufferStart))
}
return tokens
}
describe("search encoder", () => {
describe("English text", () => {
test("should tokenize simple English words", () => {
const result = encoder("hello world")
assert.deepStrictEqual(result, ["hello", "world"])
})
test("should handle multiple spaces", () => {
const result = encoder("hello world")
assert.deepStrictEqual(result, ["hello", "world"])
})
test("should handle tabs and newlines", () => {
const result = encoder("hello\tworld\ntest")
assert.deepStrictEqual(result, ["hello", "world", "test"])
})
test("should lowercase all text", () => {
const result = encoder("Hello WORLD Test")
assert.deepStrictEqual(result, ["hello", "world", "test"])
})
})
describe("CJK text", () => {
test("should tokenize Japanese Hiragana character by character", () => {
const result = encoder("こんにちは")
assert.deepStrictEqual(result, ["こ", "ん", "に", "ち", "は"])
})
test("should tokenize Japanese Katakana character by character", () => {
const result = encoder("コントロール")
assert.deepStrictEqual(result, ["コ", "ン", "ト", "ロ", "ー", "ル"])
})
test("should tokenize Japanese Kanji character by character", () => {
const result = encoder("日本語")
assert.deepStrictEqual(result, ["日", "本", "語"])
})
test("should tokenize Korean Hangul character by character", () => {
const result = encoder("안녕하세요")
assert.deepStrictEqual(result, ["안", "녕", "하", "세", "요"])
})
test("should tokenize Chinese characters character by character", () => {
const result = encoder("你好世界")
assert.deepStrictEqual(result, ["你", "好", "世", "界"])
})
test("should handle mixed Hiragana/Katakana/Kanji", () => {
const result = encoder("て以来")
assert.deepStrictEqual(result, ["て", "以", "来"])
})
})
describe("Mixed CJK and English", () => {
test("should handle Japanese with English words", () => {
const result = encoder("hello 世界")
assert.deepStrictEqual(result, ["hello", "世", "界"])
})
test("should handle English with Japanese words", () => {
const result = encoder("世界 hello world")
assert.deepStrictEqual(result, ["世", "界", "hello", "world"])
})
test("should handle complex mixed content", () => {
const result = encoder("これはtest文章です")
assert.deepStrictEqual(result, ["こ", "れ", "は", "test", "文", "章", "で", "す"])
})
test("should handle mixed Korean and English", () => {
const result = encoder("hello 안녕 world")
assert.deepStrictEqual(result, ["hello", "안", "녕", "world"])
})
test("should handle mixed Chinese and English", () => {
const result = encoder("你好 world")
assert.deepStrictEqual(result, ["你", "好", "world"])
})
})
describe("Edge cases", () => {
test("should handle empty string", () => {
const result = encoder("")
assert.deepStrictEqual(result, [])
})
test("should handle only whitespace", () => {
const result = encoder(" \t\n ")
assert.deepStrictEqual(result, [])
})
test("should handle single character", () => {
const result = encoder("a")
assert.deepStrictEqual(result, ["a"])
})
test("should handle single CJK character", () => {
const result = encoder("あ")
assert.deepStrictEqual(result, ["あ"])
})
test("should handle CJK with trailing whitespace", () => {
const result = encoder("日本語 ")
assert.deepStrictEqual(result, ["日", "本", "語"])
})
test("should handle English with trailing whitespace", () => {
const result = encoder("hello ")
assert.deepStrictEqual(result, ["hello"])
})
})
})

View File

@@ -0,0 +1,182 @@
export type SemanticResult = { id: number; score: number }
type ProgressMessage = {
type: "progress"
loadedRows: number
totalRows: number
}
type ReadyMessage = { type: "ready" }
type ResultMessage = {
type: "search-result"
seq: number
semantic: SemanticResult[]
}
type ErrorMessage = { type: "error"; seq?: number; message: string }
type SearchPayload = {
semantic: SemanticResult[]
}
type PendingResolver = {
resolve: (payload: SearchPayload) => void
reject: (err: Error) => void
}
export class SemanticClient {
private ready: Promise<void>
private resolveReady!: () => void
private worker: Worker | null = null
private pending = new Map<number, PendingResolver>()
private seq = 0
private disposed = false
private readySettled = false
private configured = false
private lastError: Error | null = null
constructor(private cfg?: any) {
this.ready = new Promise((resolve) => {
this.resolveReady = () => {
if (this.readySettled) return
this.readySettled = true
resolve()
}
})
if (this.cfg?.enable === false) {
this.lastError = new Error("semantic search disabled by configuration")
this.resolveReady()
return
}
this.boot()
}
private boot() {
try {
this.worker = new Worker("/semantic.worker.js", { type: "module" })
} catch (err) {
this.handleFatal(err)
return
}
this.setupWorker()
this.startInit()
}
private setupWorker() {
if (!this.worker) return
this.worker.onmessage = (
event: MessageEvent<ProgressMessage | ReadyMessage | ResultMessage | ErrorMessage>,
) => {
const msg = event.data
if (msg.type === "progress") {
// Progress updates during initialization - can be logged if needed
return
}
if (msg.type === "ready") {
this.configured = true
this.lastError = null
this.resolveReady()
return
}
if (msg.type === "search-result") {
const pending = this.pending.get(msg.seq)
if (pending) {
this.pending.delete(msg.seq)
pending.resolve({ semantic: msg.semantic ?? [] })
}
return
}
if (msg.type === "error") {
if (typeof msg.seq === "number") {
const pending = this.pending.get(msg.seq)
if (pending) {
this.pending.delete(msg.seq)
pending.reject(new Error(msg.message))
}
} else {
this.handleFatal(msg.message)
}
}
}
}
private startInit() {
if (!this.worker) return
const manifestUrl =
typeof this.cfg?.manifestUrl === "string" && this.cfg.manifestUrl.length > 0
? this.cfg.manifestUrl
: "/embeddings/manifest.json"
const disableCache = Boolean(this.cfg?.disableCache)
const baseUrl =
typeof this.cfg?.manifestBaseUrl === "string" ? this.cfg.manifestBaseUrl : undefined
this.worker.postMessage({
type: "init",
cfg: this.cfg,
manifestUrl,
baseUrl,
disableCache,
})
}
private rejectAll(err: Error, fatal = false) {
for (const [id, pending] of this.pending.entries()) {
pending.reject(err)
this.pending.delete(id)
}
if (fatal) {
this.lastError = err
this.configured = false
if (!this.readySettled) {
this.resolveReady()
}
}
}
private handleFatal(err: unknown) {
const error = err instanceof Error ? err : new Error(String(err))
console.error("[SemanticClient] initialization failure:", error)
this.rejectAll(error, true)
if (this.worker) {
this.worker.postMessage({ type: "reset" })
this.worker.terminate()
this.worker = null
}
}
async ensureReady() {
await this.ready
if (!this.configured) {
throw this.lastError ?? new Error("semantic search unavailable")
}
}
async search(text: string, k: number): Promise<SearchPayload> {
if (this.disposed) {
throw new Error("semantic client has been disposed")
}
await this.ensureReady()
if (!this.worker || !this.configured) {
throw this.lastError ?? new Error("worker unavailable")
}
return new Promise<SearchPayload>((resolve, reject) => {
const seq = ++this.seq
this.pending.set(seq, { resolve, reject })
this.worker?.postMessage({ type: "search", text, k, seq })
})
}
dispose() {
if (this.disposed) return
this.disposed = true
this.rejectAll(new Error("semantic client disposed"))
if (this.worker) {
this.worker.postMessage({ type: "reset" })
this.worker.terminate()
}
this.worker = null
this.configured = false
}
}

View File

@@ -115,9 +115,9 @@ async function _navigate(url: URL, isBack: boolean = false) {
}
// now, patch head, re-executing scripts
const elementsToRemove = document.head.querySelectorAll(":not([data-persist])")
const elementsToRemove = document.head.querySelectorAll(":not([spa-preserve])")
elementsToRemove.forEach((el) => el.remove())
const elementsToAdd = html.head.querySelectorAll(":not([data-persist])")
const elementsToAdd = html.head.querySelectorAll(":not([spa-preserve])")
elementsToAdd.forEach((el) => document.head.appendChild(el))
// delay setting the url until now

View File

@@ -5,7 +5,7 @@
background: none;
border: none;
width: 20px;
height: 32px;
height: 20px;
margin: 0;
text-align: inherit;
flex-shrink: 0;

View File

@@ -6,7 +6,6 @@
& > :not(.sidebar.left:has(.explorer)) {
transition: transform 300ms ease-in-out;
}
&.lock-scroll > :not(.sidebar.left:has(.explorer)) {
transform: translateX(100dvw);
transition: transform 300ms ease-in-out;
@@ -34,10 +33,8 @@
min-height: 1.2rem;
flex: 0 1 auto;
&.collapsed {
flex: 0 1 1.2rem;
& .fold {
transform: rotateZ(-90deg);
}
@@ -121,10 +118,7 @@ button.desktop-explorer {
list-style: none;
margin: 0;
padding: 0;
&.explorer-ul {
overscroll-behavior: contain;
}
overscroll-behavior: contain;
& li > a {
color: var(--dark);
@@ -139,16 +133,12 @@ button.desktop-explorer {
}
.folder-outer {
visibility: collapse;
display: grid;
grid-template-rows: 0fr;
transition-property: grid-template-rows, visibility;
transition-duration: 0.3s;
transition-timing-function: ease-in-out;
transition: grid-template-rows 0.3s ease-in-out;
}
.folder-outer.open {
visibility: visible;
grid-template-rows: 1fr;
}
@@ -275,8 +265,6 @@ li:has(> .folder-outer:not(.open)) > .folder-container > svg {
.mobile-no-scroll {
@media all and ($mobile) {
.explorer-content > .explorer-ul {
overscroll-behavior: contain;
}
overscroll-behavior: none;
}
}

View File

@@ -65,6 +65,7 @@ pre {
overflow: hidden;
& > .mermaid-content {
padding: 2rem;
position: relative;
transform-origin: 0 0;
transition: transform 0.1s ease;

View File

@@ -5,7 +5,7 @@
background: none;
border: none;
width: 20px;
height: 32px;
height: 20px;
margin: 0;
text-align: inherit;
flex-shrink: 0;

View File

@@ -25,7 +25,6 @@
& > p {
display: inline;
color: var(--gray);
text-wrap: unset;
}
& svg {
@@ -78,16 +77,97 @@
margin-bottom: 2em;
}
& > input {
& > .input-container {
align-items: center;
gap: 0.5rem;
display: flex;
flex-wrap: wrap;
position: relative;
box-sizing: border-box;
padding: 0.5em 1em;
font-family: var(--bodyFont);
color: var(--dark);
font-size: 1.1em;
border: 1px solid var(--lightgray);
&:focus {
outline: none;
.search-bar {
flex: 1 1 auto;
min-width: 0;
box-sizing: border-box;
padding: 0.5em 1em;
font-family: var(--bodyFont);
color: var(--dark);
font-size: 1.1em;
border: none;
background: transparent;
&:focus {
outline: none;
}
}
.semantic-search-progress {
position: absolute;
bottom: 0;
left: 0;
right: 0;
height: 2px;
background-color: var(--secondary);
width: 0;
opacity: 0;
transition:
width 0.3s ease,
opacity 0.2s ease;
pointer-events: none;
}
.search-mode-toggle {
display: inline-flex;
align-items: center;
border-radius: 9999px;
height: 1.4rem;
background-color: color-mix(in srgb, var(--darkgray) 12%, transparent);
margin-right: 1rem;
.mode-option {
border: none;
background: transparent;
font: inherit;
color: var(--gray);
border-radius: 9999px;
cursor: pointer;
transition:
background-color 0.2s ease,
color 0.2s ease;
display: inline-flex;
align-items: center;
justify-content: center;
width: 1.5rem;
height: 1.5rem;
position: relative;
&:focus-visible {
outline: 2px solid var(--tertiary);
outline-offset: 2px;
}
&.active {
background-color: var(--secondary);
color: var(--light);
}
svg {
width: 18px;
height: 18px;
}
.sr-only {
position: absolute;
width: 1px;
height: 1px;
padding: 0;
margin: -1px;
overflow: hidden;
clip: rect(0, 0, 0, 0);
white-space: nowrap;
border: 0;
}
}
}
}

542
quartz/embed_build.py Normal file
View File

@@ -0,0 +1,542 @@
# /// script
# requires-python = ">=3.11"
# dependencies = [
# "langchain-text-splitters",
# "numpy",
# "openai",
# "sentence-transformers",
# "tiktoken",
# ]
# ///
from __future__ import annotations
import os, json, argparse, hashlib, math, random, logging
from pathlib import Path
from functools import lru_cache
from collections.abc import Iterable
from concurrent.futures import ThreadPoolExecutor, as_completed
import tiktoken, numpy as np
from openai import OpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter
logger = logging.getLogger(__name__)
DEFAULT_VLLM_URL = os.environ.get("VLLM_URL") or os.environ.get("VLLM_EMBED_URL") or "http://127.0.0.1:8000/v1"
def resolve_vllm_base_url(url: str) -> str:
if not url:
raise ValueError("vLLM URL must be non-empty")
trimmed = url.rstrip("/")
if trimmed.endswith("/v1/embeddings"):
trimmed = trimmed[: -len("/embeddings")]
elif trimmed.endswith("/embeddings"):
trimmed = trimmed[: trimmed.rfind("/")]
if not trimmed.endswith("/v1"):
trimmed = f"{trimmed}/v1"
return trimmed
def load_jsonl(fp: str) -> Iterable[dict]:
with open(fp, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if not line:
continue
yield json.loads(line)
def l2_normalize_rows(x: np.ndarray) -> np.ndarray:
# x: [N, D]
norms = np.linalg.norm(x, ord=2, axis=1, keepdims=True)
norms[norms == 0] = 1.0
return x / norms
@lru_cache(maxsize=1)
def get_tiktoken_encoder():
# Get the o200k_base tokenizer (GPT-4o) with caching
# change this if you want something else.
return tiktoken.get_encoding("o200k_base")
def count_tokens(text: str) -> int:
# Count tokens using o200k_base encoding
encoder = get_tiktoken_encoder()
return len(encoder.encode(text))
def get_text_splitter(chunk_size: int, overlap: int):
encoder = get_tiktoken_encoder()
return RecursiveCharacterTextSplitter(
chunk_size=chunk_size * 4, # character approximation
chunk_overlap=overlap * 4,
separators=["\n\n", "\n", ". ", " ", ""],
length_function=lambda t: len(encoder.encode(t)),
is_separator_regex=False,
)
def chunk_document(
doc: dict, max_tokens: int = 512, overlap_tokens: int = 128, min_chunk_size: int = 100
) -> list[dict]:
"""
Chunk a document if it exceeds max_tokens
Args:
doc: {'slug': str, 'title': str, 'text': str}
max_tokens: Maximum tokens per chunk
overlap_tokens: Overlap between chunks
min_chunk_size: Minimum chunk size (avoid tiny chunks)
Returns:
List of chunk dicts with metadata
"""
text = doc["text"]
token_count = count_tokens(text)
# No chunking needed
if token_count <= max_tokens:
return [
{
"slug": doc["slug"],
"title": doc.get("title", doc["slug"]),
"text": text,
"chunk_id": 0,
"parent_slug": doc["slug"],
"is_chunked": False,
}
]
# Apply chunking
splitter = get_text_splitter(max_tokens, overlap_tokens)
raw_chunks = splitter.split_text(text)
# Filter out tiny chunks
valid_chunks = [c for c in raw_chunks if count_tokens(c) >= min_chunk_size]
return [
{
"slug": f"{doc['slug']}#chunk{i}",
"title": doc.get("title", doc["slug"]),
"text": chunk,
"chunk_id": i,
"parent_slug": doc["slug"],
"is_chunked": True,
}
for i, chunk in enumerate(valid_chunks)
]
def write_shards(vectors: np.ndarray, shard_size: int, dtype: str, out_dir: Path) -> list[dict]:
out_dir.mkdir(parents=True, exist_ok=True)
rows, dims = vectors.shape
shards_meta: list[dict] = []
np_dtype = np.float16 if dtype == "fp16" else np.float32
bytes_per_value = np.dtype(np_dtype).itemsize
row_offset = 0
for si, start in enumerate(range(0, rows, shard_size)):
end = min(start + shard_size, rows)
shard = vectors[start:end] # [n, dims]
bin_path = out_dir / f"vectors-{si:03d}.bin"
payload = shard.astype(np_dtype, copy=False).tobytes(order="C")
digest = hashlib.sha256(payload).hexdigest()
with open(bin_path, "wb") as f:
f.write(payload)
shard_rows = int(shard.shape[0])
shards_meta.append(
{
"path": f"/embeddings/{bin_path.name}",
"rows": shard_rows,
"rowOffset": row_offset,
"byteLength": len(payload),
"sha256": digest,
"byteStride": dims * bytes_per_value,
},
)
row_offset += shard_rows
return shards_meta
def write_hnsw_graph(levels: list[list[list[int]]], rows: int, out_path: Path) -> tuple[list[dict], str]:
out_path.parent.mkdir(parents=True, exist_ok=True)
offset = 0
meta: list[dict] = []
digest = hashlib.sha256()
with open(out_path, "wb") as f:
for lvl in levels:
indptr = np.zeros(rows + 1, dtype=np.uint32)
edge_accum: list[int] = []
for idx in range(rows):
neighbors = lvl[idx] if idx < len(lvl) else []
indptr[idx + 1] = indptr[idx] + len(neighbors)
edge_accum.extend(neighbors)
indptr_bytes = indptr.tobytes(order="C")
indptr_offset = offset
f.write(indptr_bytes)
digest.update(indptr_bytes)
offset += len(indptr_bytes)
if edge_accum:
indices = np.asarray(edge_accum, dtype=np.uint32)
indices_bytes = indices.tobytes(order="C")
else:
indices = np.zeros(0, dtype=np.uint32)
indices_bytes = indices.tobytes(order="C")
indices_offset = offset
f.write(indices_bytes)
digest.update(indices_bytes)
offset += len(indices_bytes)
meta.append(
{
"level": len(meta),
"indptr": {
"offset": indptr_offset,
"elements": int(indptr.shape[0]),
"byteLength": len(indptr_bytes),
},
"indices": {
"offset": indices_offset,
"elements": int(indices.shape[0]),
"byteLength": len(indices_bytes),
},
},
)
return meta, digest.hexdigest()
def embed_vllm(
texts: list[str],
model_id: str,
vllm_url: str,
batch_size: int = 64,
concurrency: int = 8,
) -> np.ndarray:
base_url = resolve_vllm_base_url(vllm_url)
api_key = os.environ.get("VLLM_API_KEY") or os.environ.get("OPENAI_API_KEY") or "not-set"
client = OpenAI(base_url=base_url, api_key=api_key, timeout=300)
def list_available_models() -> list[str]:
models: list[str] = []
page = client.models.list()
models.extend(model.id for model in page.data)
while getattr(page, "has_more", False) and page.data:
cursor = page.data[-1].id
page = client.models.list(after=cursor)
models.extend(model.id for model in page.data)
return models
try:
available_models = list_available_models()
except Exception as exc:
raise RuntimeError(f"failed to query {base_url}/models: {exc}") from exc
if model_id not in available_models:
suggestions = ", ".join(sorted(available_models)) if available_models else "<none>"
logger.warning(
"model '%s' not served by vLLM at %s. Available models: %s. Use the first model, results may differ during semantic search (you can omit this message if your weights is a ONNX checkpoint of the same model.)", model_id, base_url, suggestions,
)
model_id = available_models[0]
# Apply model-specific prefixes for documents (asymmetric search)
model_lower = model_id.lower()
if "e5" in model_lower:
# E5 models: use "passage:" prefix for documents
prefixed = [f"passage: {t}" for t in texts]
elif "qwen" in model_lower and "embedding" in model_lower:
# Qwen3-Embedding: documents use plain text (no prefix)
prefixed = texts
elif "embeddinggemma" in model_lower:
# embeddinggemma: use "title: none | text:" prefix for documents
prefixed = [f"title: none | text: {t}" for t in texts]
else:
# Default: no prefix for unknown models
prefixed = texts
print(
"Embedding"
f" {len(prefixed)} texts with vLLM"
f" (model={model_id}, batch_size={batch_size}, concurrency={concurrency})",
)
# Create batches
batches = []
for i in range(0, len(prefixed), batch_size):
batch = prefixed[i : i + batch_size]
batches.append((i, batch))
# Function to send a single batch request
def send_batch(batch_info: tuple[int, list[str]]) -> tuple[int, list[np.ndarray]]:
idx, batch = batch_info
response = client.embeddings.create(model=model_id, input=batch)
embeddings = [np.asarray(item.embedding, dtype=np.float32) for item in response.data]
return (idx, embeddings)
# Send batches concurrently (or sequentially if only 1 batch)
results: dict[int, list[np.ndarray]] = {}
if len(batches) == 1:
# Single batch - no need for threading
idx, embeddings = send_batch(batches[0])
results[idx] = embeddings
else:
# Multiple batches - use concurrent requests
with ThreadPoolExecutor(max_workers=concurrency) as executor:
futures = {executor.submit(send_batch, batch_info): batch_info[0] for batch_info in batches}
completed = 0
for future in as_completed(futures):
idx, embeddings = future.result()
results[idx] = embeddings
completed += 1
if completed % max(1, len(batches) // 10) == 0 or completed == len(batches):
print(f" Completed {completed}/{len(batches)} batches ({completed * 100 // len(batches)}%)")
# Reconstruct in order
out: list[np.ndarray] = []
for i in sorted(results.keys()):
out.extend(results[i])
return np.stack(out, axis=0)
def embed_hf(texts: list[str], model_id: str, device: str) -> np.ndarray:
# Prefer sentence-transformers for E5 and similar embed models
from sentence_transformers import SentenceTransformer
model = SentenceTransformer(model_id, device=device)
# Apply model-specific prefixes for documents (asymmetric search)
model_lower = model_id.lower()
if "e5" in model_lower:
# E5 models: use "passage:" prefix for documents
prefixed = [f"passage: {t}" for t in texts]
elif "qwen" in model_lower and "embedding" in model_lower:
# Qwen3-Embedding: documents use plain text (no prefix)
prefixed = texts
elif "embeddinggemma" in model_lower:
# embeddinggemma: use "title: none | text:" prefix for documents
prefixed = [f"title: none | text: {t}" for t in texts]
else:
# Default: no prefix for unknown models
prefixed = texts
vecs = model.encode(
prefixed,
batch_size=64,
normalize_embeddings=True,
convert_to_numpy=True,
show_progress_bar=True,
)
return vecs.astype(np.float32, copy=False)
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--jsonl", default="public/embeddings-text.jsonl")
ap.add_argument("--model", default=os.environ.get("SEM_MODEL", "intfloat/multilingual-e5-large"))
ap.add_argument("--dims", type=int, default=int(os.environ.get("SEM_DIMS", "1024")))
ap.add_argument("--dtype", choices=["fp16", "fp32"], default=os.environ.get("SEM_DTYPE", "fp32"))
ap.add_argument("--shard-size", type=int, default=int(os.environ.get("SEM_SHARD", "1024")))
ap.add_argument("--out", default="public/embeddings")
ap.add_argument("--use-vllm", action="store_true", default=bool(os.environ.get("USE_VLLM", "")))
ap.add_argument(
"--vllm-url",
default=DEFAULT_VLLM_URL,
help="Base URL for the vLLM OpenAI-compatible server (accepts either /v1 or /v1/embeddings)",
)
ap.add_argument("--chunk-size", type=int, default=512, help="Max tokens per chunk")
ap.add_argument("--chunk-overlap", type=int, default=128, help="Overlap tokens between chunks")
ap.add_argument("--no-chunking", action="store_true", help="Disable chunking (embed full docs)")
ap.add_argument(
"--concurrency",
type=int,
default=int(os.environ.get("VLLM_CONCURRENCY", "8")),
help="Number of concurrent requests to vLLM (default: 8)",
)
ap.add_argument(
"--batch-size",
type=int,
default=int(os.environ.get("VLLM_BATCH_SIZE", "64")),
help="Batch size for vLLM requests (default: 64)",
)
args = ap.parse_args()
recs = list(load_jsonl(args.jsonl))
if not recs:
print("No input found in public/embeddings-text.jsonl; run the site build first to emit JSONL.")
return
# Apply chunking
if args.no_chunking:
chunks = recs
chunk_metadata = {}
print(f"Chunking disabled. Processing {len(chunks)} full documents")
else:
chunks = []
chunk_metadata = {}
for rec in recs:
doc_chunks = chunk_document(rec, max_tokens=args.chunk_size, overlap_tokens=args.chunk_overlap)
chunks.extend(doc_chunks)
# Build chunk metadata map
for chunk in doc_chunks:
if chunk["is_chunked"]:
chunk_metadata[chunk["slug"]] = {
"parentSlug": chunk["parent_slug"],
"chunkId": chunk["chunk_id"],
}
chunked_count = sum(1 for c in chunks if c.get("is_chunked", False))
print(f"Chunked {len(recs)} documents into {len(chunks)} chunks ({chunked_count} chunked, {len(chunks) - chunked_count} unchanged)")
print(f" Chunk size: {args.chunk_size} tokens, overlap: {args.chunk_overlap} tokens")
ids = [c["slug"] for c in chunks]
titles = [c.get("title", c["slug"]) for c in chunks]
texts = [c["text"] for c in chunks]
if args.use_vllm:
vecs = embed_vllm(
texts,
args.model,
args.vllm_url,
batch_size=args.batch_size,
concurrency=args.concurrency,
)
else:
device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES") else "cpu"
vecs = embed_hf(texts, args.model, device)
# Coerce dims and re-normalize
if vecs.shape[1] != args.dims:
if vecs.shape[1] > args.dims:
vecs = vecs[:, : args.dims]
else:
vecs = np.pad(vecs, ((0, 0), (0, args.dims - vecs.shape[1])))
vecs = l2_normalize_rows(vecs.astype(np.float32, copy=False))
out_dir = Path(args.out)
shards = write_shards(vecs, args.shard_size, args.dtype, out_dir)
# Build a lightweight HNSW graph and store it in a compact binary layout
def hnsw_build(data: np.ndarray, M: int = 16, efC: int = 200, seed: int = 0) -> dict:
rng = random.Random(seed)
N, D = data.shape
levels: list[list[list[int]]] = [] # levels[L][i] = neighbors of node i at level L
# random level assignment using 1/e distribution
node_levels = []
for _ in range(N):
lvl = 0
while rng.random() < 1 / math.e:
lvl += 1
node_levels.append(lvl)
max_level = max(node_levels) if N > 0 else 0
for _ in range(max_level + 1):
levels.append([[] for _ in range(N)])
def sim(i: int, j: int) -> float:
return float((data[i] * data[j]).sum())
entry = 0 if N > 0 else -1
def search_layer(q: int, ep: int, ef: int, L: int) -> list[int]:
if ep < 0:
return []
visited = set()
cand: list[tuple[float, int]] = []
top: list[tuple[float, int]] = []
def push(node: int):
if node in visited:
return
visited.add(node)
cand.append((sim(q, node), node))
push(ep)
while cand:
cand.sort(reverse=True)
s, v = cand.pop(0)
if len(top) >= ef and s <= top[-1][0]:
break
top.append((s, v))
for u in levels[L][v]:
push(u)
top.sort(reverse=True)
return [n for _, n in top]
for i in range(N):
if i == 0:
continue
lvl = node_levels[i]
ep = entry
for L in range(max_level, lvl, -1):
c = search_layer(i, ep, 1, L)
if c:
ep = c[0]
for L in range(min(max_level, lvl), -1, -1):
W = search_layer(i, ep, efC, L)
# Select top M by similarity
neigh = sorted(((sim(i, j), j) for j in W if j != i), reverse=True)[:M]
for _, e in neigh:
if e not in levels[L][i]:
levels[L][i].append(e)
if i not in levels[L][e]:
levels[L][e].append(i)
# trim neighbors to M
for L in range(len(levels)):
for i in range(N):
if len(levels[L][i]) > M:
# keep top M by sim
nb = levels[L][i]
nb = sorted(nb, key=lambda j: sim(i, j), reverse=True)[:M]
levels[L][i] = nb
return {
"M": M,
"efConstruction": efC,
"entryPoint": entry,
"maxLevel": max_level,
"levels": levels,
}
hnsw = hnsw_build(vecs, M=16, efC=200)
hnsw_meta, hnsw_sha = write_hnsw_graph(hnsw["levels"], int(vecs.shape[0]), out_dir / "hnsw.bin")
manifest = {
"version": 2,
"dims": args.dims,
"dtype": args.dtype,
"normalized": True,
"rows": int(vecs.shape[0]),
"shardSizeRows": args.shard_size,
"vectors": {
"dtype": args.dtype,
"rows": int(vecs.shape[0]),
"dims": args.dims,
"shards": shards,
},
"ids": ids,
"titles": titles,
"chunkMetadata": chunk_metadata,
"hnsw": {
"M": hnsw["M"],
"efConstruction": hnsw["efConstruction"],
"entryPoint": hnsw["entryPoint"],
"maxLevel": hnsw["maxLevel"],
"graph": {
"path": "/embeddings/hnsw.bin",
"sha256": hnsw_sha,
"levels": hnsw_meta,
},
},
}
(out_dir / "manifest.json").write_text(json.dumps(manifest, ensure_ascii=False), encoding="utf-8")
print(f"Wrote {len(shards)} vector shard(s), HNSW graph, and manifest to {out_dir}")
if __name__ == "__main__":
main()

View File

@@ -27,8 +27,6 @@ import lt from "./locales/lt-LT"
import fi from "./locales/fi-FI"
import no from "./locales/nb-NO"
import id from "./locales/id-ID"
import kk from "./locales/kk-KZ"
import he from "./locales/he-IL"
export const TRANSLATIONS = {
"en-US": enUs,
@@ -80,8 +78,6 @@ export const TRANSLATIONS = {
"fi-FI": fi,
"nb-NO": no,
"id-ID": id,
"kk-KZ": kk,
"he-IL": he,
} as const
export const defaultTranslation = "en-US"

View File

@@ -1,88 +0,0 @@
import { Translation } from "./definition"
export default {
propertyDefaults: {
title: "ללא כותרת",
description: "לא סופק תיאור",
},
direction: "rtl" as const,
components: {
callout: {
note: "הערה",
abstract: "תקציר",
info: "מידע",
todo: "לעשות",
tip: "טיפ",
success: "הצלחה",
question: "שאלה",
warning: "אזהרה",
failure: "כשלון",
danger: "סכנה",
bug: "באג",
example: "דוגמה",
quote: "ציטוט",
},
backlinks: {
title: "קישורים חוזרים",
noBacklinksFound: "לא נמצאו קישורים חוזרים",
},
themeToggle: {
lightMode: "מצב בהיר",
darkMode: "מצב כהה",
},
readerMode: {
title: "מצב קריאה",
},
explorer: {
title: "סייר",
},
footer: {
createdWith: "נוצר באמצעות",
},
graph: {
title: "מבט גרף",
},
recentNotes: {
title: "הערות אחרונות",
seeRemainingMore: ({ remaining }) => `עיין ב ${remaining} נוספים →`,
},
transcludes: {
transcludeOf: ({ targetSlug }) => `מצוטט מ ${targetSlug}`,
linkToOriginal: "קישור למקורי",
},
search: {
title: "חיפוש",
searchBarPlaceholder: "חפשו משהו",
},
tableOfContents: {
title: "תוכן עניינים",
},
contentMeta: {
readingTime: ({ minutes }) => `${minutes} דקות קריאה`,
},
},
pages: {
rss: {
recentNotes: "הערות אחרונות",
lastFewNotes: ({ count }) => `${count} הערות אחרונות`,
},
error: {
title: "לא נמצא",
notFound: "העמוד הזה פרטי או לא קיים.",
home: "חזרה לעמוד הבית",
},
folderContent: {
folder: "תיקייה",
itemsUnderFolder: ({ count }) =>
count === 1 ? "פריט אחד תחת תיקייה זו." : `${count} פריטים תחת תיקייה זו.`,
},
tagContent: {
tag: "תגית",
tagIndex: "מפתח התגיות",
itemsUnderTag: ({ count }) =>
count === 1 ? "פריט אחד עם תגית זו." : `${count} פריטים עם תגית זו.`,
showingFirst: ({ count }) => `מראה את ה-${count} תגיות הראשונות.`,
totalTags: ({ count }) => `${count} תגיות נמצאו סך הכל.`,
},
},
} as const satisfies Translation

View File

@@ -8,7 +8,7 @@ export default {
components: {
callout: {
note: "Nota",
abstract: "Abstract",
abstract: "Astratto",
info: "Info",
todo: "Da fare",
tip: "Consiglio",
@@ -17,7 +17,7 @@ export default {
warning: "Attenzione",
failure: "Errore",
danger: "Pericolo",
bug: "Problema",
bug: "Bug",
example: "Esempio",
quote: "Citazione",
},
@@ -43,11 +43,10 @@ export default {
},
recentNotes: {
title: "Note recenti",
seeRemainingMore: ({ remaining }) =>
remaining === 1 ? "Vedi 1 altra →" : `Vedi altre ${remaining}`,
seeRemainingMore: ({ remaining }) => `Vedi ${remaining} altro →`,
},
transcludes: {
transcludeOf: ({ targetSlug }) => `Inclusione di ${targetSlug}`,
transcludeOf: ({ targetSlug }) => `Transclusione di ${targetSlug}`,
linkToOriginal: "Link all'originale",
},
search: {
@@ -55,16 +54,16 @@ export default {
searchBarPlaceholder: "Cerca qualcosa",
},
tableOfContents: {
title: "Indice",
title: "Tabella dei contenuti",
},
contentMeta: {
readingTime: ({ minutes }) => (minutes === 1 ? "1 minuto" : `${minutes} minuti`),
readingTime: ({ minutes }) => `${minutes} minuti`,
},
},
pages: {
rss: {
recentNotes: "Note recenti",
lastFewNotes: ({ count }) => (count === 1 ? "Ultima nota" : `Ultime ${count} note`),
lastFewNotes: ({ count }) => `Ultime ${count} note`,
},
error: {
title: "Non trovato",
@@ -81,9 +80,8 @@ export default {
tagIndex: "Indice etichette",
itemsUnderTag: ({ count }) =>
count === 1 ? "1 oggetto con questa etichetta." : `${count} oggetti con questa etichetta.`,
showingFirst: ({ count }) => (count === 1 ? "Prima etichetta." : `Prime ${count} etichette.`),
totalTags: ({ count }) =>
count === 1 ? "Trovata 1 etichetta in totale." : `Trovate ${count} etichette totali.`,
showingFirst: ({ count }) => `Prime ${count} etichette.`,
totalTags: ({ count }) => `Trovate ${count} etichette totali.`,
},
},
} as const satisfies Translation

View File

@@ -1,87 +0,0 @@
import { Translation } from "./definition"
export default {
propertyDefaults: {
title: "Атаусыз",
description: "Сипаттама берілмеген",
},
components: {
callout: {
note: "Ескерту",
abstract: "Аннотация",
info: "Ақпарат",
todo: "Істеу керек",
tip: "Кеңес",
success: "Сәттілік",
question: "Сұрақ",
warning: "Ескерту",
failure: "Қате",
danger: "Қауіп",
bug: "Қате",
example: "Мысал",
quote: "Дәйексөз",
},
backlinks: {
title: "Артқа сілтемелер",
noBacklinksFound: "Артқа сілтемелер табылмады",
},
themeToggle: {
lightMode: "Жарық режимі",
darkMode: "Қараңғы режим",
},
readerMode: {
title: "Оқу режимі",
},
explorer: {
title: "Зерттеуші",
},
footer: {
createdWith: "Құрастырылған құрал:",
},
graph: {
title: "Граф көрінісі",
},
recentNotes: {
title: "Соңғы жазбалар",
seeRemainingMore: ({ remaining }) => `Тағы ${remaining} жазбаны қарау →`,
},
transcludes: {
transcludeOf: ({ targetSlug }) => `${targetSlug} кірістіру`,
linkToOriginal: "Бастапқыға сілтеме",
},
search: {
title: "Іздеу",
searchBarPlaceholder: "Бірдеңе іздеу",
},
tableOfContents: {
title: "Мазмұны",
},
contentMeta: {
readingTime: ({ minutes }) => `${minutes} мин оқу`,
},
},
pages: {
rss: {
recentNotes: "Соңғы жазбалар",
lastFewNotes: ({ count }) => `Соңғы ${count} жазба`,
},
error: {
title: "Табылмады",
notFound: "Бұл бет жеке немесе жоқ болуы мүмкін.",
home: "Басты бетке оралу",
},
folderContent: {
folder: "Қалта",
itemsUnderFolder: ({ count }) =>
count === 1 ? "Бұл қалтада 1 элемент бар." : `Бұл қалтада ${count} элемент бар.`,
},
tagContent: {
tag: "Тег",
tagIndex: "Тегтер индексі",
itemsUnderTag: ({ count }) =>
count === 1 ? "Бұл тегпен 1 элемент." : `Бұл тегпен ${count} элемент.`,
showingFirst: ({ count }) => `Алғашқы ${count} тег көрсетілуде.`,
totalTags: ({ count }) => `Барлығы ${count} тег табылды.`,
},
},
} as const satisfies Translation

View File

@@ -3,83 +3,85 @@ import { Translation } from "./definition"
export default {
propertyDefaults: {
title: "Không có tiêu đề",
description: "Không có mô tả",
description: "Không có mô tả được cung cấp",
},
components: {
callout: {
note: "Ghi chú",
abstract: "Tổng quan",
note: "Ghi Chú",
abstract: "Tóm Tắt",
info: "Thông tin",
todo: "Cần phải làm",
tip: "Gợi ý",
success: "Thành công",
question: "Câu hỏi",
warning: "Cảnh báo",
failure: "Thất bại",
danger: "Nguy hiểm",
todo: "Cần Làm",
tip: "Gợi Ý",
success: "Thành Công",
question: "Nghi Vấn",
warning: "Cảnh Báo",
failure: "Thất Bại",
danger: "Nguy Hiểm",
bug: "Lỗi",
example: "Ví dụ",
quote: "Trích dẫn",
example: "Ví Dụ",
quote: "Trích Dẫn",
},
backlinks: {
title: "Liên kết ngược",
noBacklinksFound: "Không có liên kết ngược nào",
title: "Liên Kết Ngược",
noBacklinksFound: "Không có liên kết ngược được tìm thấy",
},
themeToggle: {
lightMode: "Chế độ sáng",
darkMode: "Chế độ tối",
lightMode: "Sáng",
darkMode: "Tối",
},
readerMode: {
title: "Chế độ đọc",
},
explorer: {
title: "Nội dung",
title: "Trong bài này",
},
footer: {
createdWith: "Được tạo bằng",
createdWith: "Được tạo bởi",
},
graph: {
title: "Sơ đồ",
title: "Biểu Đồ",
},
recentNotes: {
title: "Ghi chú gần đây",
seeRemainingMore: ({ remaining }) => `Xem thêm ${remaining} ghi chú`,
title: "Bài viết gần đây",
seeRemainingMore: ({ remaining }) => `Xem ${remaining} thêm`,
},
transcludes: {
transcludeOf: ({ targetSlug }) => `Trích dẫn toàn bộ từ ${targetSlug}`,
linkToOriginal: "Xem trang gốc",
transcludeOf: ({ targetSlug }) => `Bao gồm ${targetSlug}`,
linkToOriginal: "Liên Kết Gốc",
},
search: {
title: "Tìm",
title: "Tìm Kiếm",
searchBarPlaceholder: "Tìm kiếm thông tin",
},
tableOfContents: {
title: "Mục lục",
title: "Bảng Nội Dung",
},
contentMeta: {
readingTime: ({ minutes }) => `${minutes} phút đọc`,
readingTime: ({ minutes }) => `đọc ${minutes} phút`,
},
},
pages: {
rss: {
recentNotes: "Ghi chú gần đây",
lastFewNotes: ({ count }) => `${count} Trang gần đây`,
recentNotes: "Những bài gần đây",
lastFewNotes: ({ count }) => `${count} Bài gần đây`,
},
error: {
title: "Không tìm thấy",
notFound: "Trang này riêng tư hoặc không tồn tại.",
home: "Về trang chủ",
title: "Không Tìm Thấy",
notFound: "Trang này được bảo mật hoặc không tồn tại.",
home: "Trở về trang chủ",
},
folderContent: {
folder: "Thư mục",
itemsUnderFolder: ({ count }) => `${count} trang trong thư mục này.`,
folder: "Thư Mục",
itemsUnderFolder: ({ count }) =>
count === 1 ? "1 mục trong thư mục này." : `${count} mục trong thư mục này.`,
},
tagContent: {
tag: "Thẻ",
tagIndex: "Danh sách thẻ",
itemsUnderTag: ({ count }) => `${count} trang gắn thẻ này.`,
showingFirst: ({ count }) => `Đang hiển thị ${count} trang đầu tiên.`,
totalTags: ({ count }) => `Có tổng cộng ${count} thẻ.`,
tagIndex: "Thẻ Mục Lục",
itemsUnderTag: ({ count }) =>
count === 1 ? "1 mục gắn thẻ này." : `${count} mục gắn thẻ này.`,
showingFirst: ({ count }) => `Hiển thị trước ${count} thẻ.`,
totalTags: ({ count }) => `Tìm thấy ${count} thẻ tổng cộng.`,
},
},
} as const satisfies Translation

View File

@@ -40,7 +40,7 @@ export const NotFoundPage: QuartzEmitterPlugin = () => {
description: notFound,
frontmatter: { title: notFound, tags: [] },
})
const externalResources = pageResources(path, resources)
const externalResources = pageResources(path, resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData: vfile.data,

View File

@@ -1,5 +1,8 @@
import { FullSlug, joinSegments } from "../../util/path"
import { QuartzEmitterPlugin } from "../types"
import path from "path"
import fs from "node:fs/promises"
import { globby } from "globby"
// @ts-ignore
import spaRouterScript from "../../components/scripts/spa.inline"
@@ -16,7 +19,7 @@ import {
processGoogleFonts,
} from "../../util/theme"
import { Features, transform } from "lightningcss"
import { transform as transpile } from "esbuild"
import { transform as transpile, build as bundle } from "esbuild"
import { write } from "./helpers"
type ComponentResources = {
@@ -241,16 +244,6 @@ function addGlobalPageResources(ctx: BuildCtx, componentResources: ComponentReso
vercelInsightsScript.defer = true
document.head.appendChild(vercelInsightsScript)
`)
} else if (cfg.analytics?.provider === "rybbit") {
componentResources.afterDOMLoaded.push(`
const rybbitScript = document.createElement("script");
rybbitScript.src = "${cfg.analytics.host ?? "https://app.rybbit.io"}/api/script.js";
rybbitScript.setAttribute("data-site-id", "${cfg.analytics.siteId}");
rybbitScript.async = true;
rybbitScript.defer = true;
document.head.appendChild(rybbitScript);
`)
}
if (cfg.enableSPA) {
@@ -367,7 +360,47 @@ export const ComponentResources: QuartzEmitterPlugin = () => {
ext: ".js",
content: postscript,
})
// Bundle all worker files
const workerFiles = await globby(["quartz/**/*.worker.ts"])
for (const src of workerFiles) {
const result = await bundle({
entryPoints: [src],
bundle: true,
minify: true,
platform: "browser",
format: "esm",
write: false,
})
const code = result.outputFiles[0].text
const name = path.basename(src).replace(/\.ts$/, "")
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
}
},
async *partialEmit(ctx, _content, _resources, changeEvents) {
// Handle worker file changes in incremental builds
for (const changeEvent of changeEvents) {
if (!/\.worker\.ts$/.test(changeEvent.path)) continue
if (changeEvent.type === "delete") {
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
const dest = joinSegments(ctx.argv.output, `${name}.js`)
try {
await fs.unlink(dest)
} catch {}
continue
}
const result = await bundle({
entryPoints: [changeEvent.path],
bundle: true,
minify: true,
platform: "browser",
format: "esm",
write: false,
})
const code = result.outputFiles[0].text
const name = path.basename(changeEvent.path).replace(/\.ts$/, "")
yield write({ ctx, slug: name as FullSlug, ext: ".js", content: code })
}
},
async *partialEmit() {},
}
}

View File

@@ -25,7 +25,7 @@ async function processContent(
) {
const slug = fileData.slug!
const cfg = ctx.cfg.configuration
const externalResources = pageResources(pathToRoot(slug), resources)
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData,

View File

@@ -38,7 +38,7 @@ async function* processFolderInfo(
const slug = joinSegments(folder, "index") as FullSlug
const [tree, file] = folderContent
const cfg = ctx.cfg.configuration
const externalResources = pageResources(pathToRoot(slug), resources)
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData: file.data,

View File

@@ -1,7 +1,7 @@
export { ContentPage } from "./contentPage"
export { TagPage } from "./tagPage"
export { FolderPage } from "./folderPage"
export { ContentIndex as ContentIndex } from "./contentIndex"
export { ContentIndex } from "./contentIndex"
export { AliasRedirects } from "./aliases"
export { Assets } from "./assets"
export { Static } from "./static"
@@ -10,3 +10,4 @@ export { ComponentResources } from "./componentResources"
export { NotFoundPage } from "./404"
export { CNAME } from "./cname"
export { CustomOgImages } from "./ogImage"
export { SemanticIndex } from "./semantic"

View File

@@ -0,0 +1,235 @@
import { write } from "./helpers"
import { QuartzEmitterPlugin } from "../types"
import { FilePath, FullSlug, joinSegments, QUARTZ } from "../../util/path"
import { ReadTimeResults } from "reading-time"
import { GlobalConfiguration } from "../../cfg"
import { spawn } from "child_process"
const DEFAULT_MODEL_ID = "onnx-community/Qwen3-Embedding-0.6B-ONNX"
const defaults: GlobalConfiguration["semanticSearch"] = {
enable: true,
model: DEFAULT_MODEL_ID,
aot: false,
dims: 1024,
dtype: "fp32",
shardSizeRows: 1024,
hnsw: { M: 16, efConstruction: 200 },
chunking: {
chunkSize: 512,
chunkOverlap: 128,
noChunking: false,
},
vllm: {
enable: false,
vllmUrl:
process.env.VLLM_URL || process.env.VLLM_EMBED_URL || "http://127.0.0.1:8000/v1/embeddings",
concurrency: parseInt(process.env.VLLM_CONCURRENCY || "8", 10),
batchSize: parseInt(process.env.VLLM_BATCH_SIZE || "64", 10),
},
}
type ContentDetails = {
slug: string
title: string
filePath: FilePath
content: string
readingTime?: Partial<ReadTimeResults>
}
/**
* Check if uv is installed
*/
function checkUvInstalled(): Promise<boolean> {
return new Promise((resolve) => {
const proc = spawn("uv", ["--version"], { shell: true })
proc.on("error", () => resolve(false))
proc.on("close", (code) => resolve(code === 0))
})
}
/**
* Run the Python embedding build script using uv
* Script uses PEP 723 inline metadata for dependency management
*/
function runEmbedBuild(
jsonlPath: string,
outDir: string,
opts: {
model: string
dtype: string
dims: number
shardSizeRows: number
chunking: { chunkSize: number; chunkOverlap: number; noChunking: boolean }
vllm: { enable: boolean; vllmUrl?: string; concurrency: number; batchSize: number }
},
): Promise<void> {
return new Promise((resolve, reject) => {
const scriptPath = joinSegments(QUARTZ, "embed_build.py")
const args = [
"run",
scriptPath,
"--jsonl",
jsonlPath,
"--model",
opts.model,
"--out",
outDir,
"--dtype",
opts.dtype,
"--dims",
String(opts.dims),
"--shard-size",
String(opts.shardSizeRows),
"--chunk-size",
String(opts.chunking.chunkSize),
"--chunk-overlap",
String(opts.chunking.chunkOverlap),
]
if (opts.chunking.noChunking) {
args.push("--no-chunking")
}
if (opts.vllm.enable) {
args.push("--use-vllm")
if (opts.vllm.vllmUrl) {
args.push("--vllm-url", opts.vllm.vllmUrl)
}
args.push("--concurrency", String(opts.vllm.concurrency))
args.push("--batch-size", String(opts.vllm.batchSize))
}
console.log("\nRunning embedding generation:")
console.log(` uv ${args.join(" ")}`)
const env = { ...process.env }
if (opts.vllm.enable && !env.USE_VLLM) {
env.USE_VLLM = "1"
}
const proc = spawn("uv", args, {
stdio: "inherit",
shell: true,
env,
})
proc.on("error", (err) => {
reject(new Error(`Failed to spawn uv: ${err.message}`))
})
proc.on("close", (code) => {
if (code === 0) {
console.log("Embedding generation completed successfully")
resolve()
} else {
reject(new Error(`embed_build.py exited with code ${code}`))
}
})
})
}
export const SemanticIndex: QuartzEmitterPlugin<Partial<GlobalConfiguration["semanticSearch"]>> = (
opts,
) => {
const merged = { ...defaults, ...opts }
const o = {
enable: merged.enable!,
model: merged.model!,
aot: merged.aot!,
dims: merged.dims!,
dtype: merged.dtype!,
shardSizeRows: merged.shardSizeRows!,
hnsw: {
M: merged.hnsw?.M ?? defaults.hnsw!.M!,
efConstruction: merged.hnsw?.efConstruction ?? defaults.hnsw!.efConstruction!,
efSearch: merged.hnsw?.efSearch,
},
chunking: {
chunkSize: merged.chunking?.chunkSize ?? defaults.chunking!.chunkSize!,
chunkOverlap: merged.chunking?.chunkOverlap ?? defaults.chunking!.chunkOverlap!,
noChunking: merged.chunking?.noChunking ?? defaults.chunking!.noChunking!,
},
vllm: {
enable: merged.vllm?.enable ?? defaults.vllm!.enable!,
vllmUrl: merged.vllm?.vllmUrl ?? defaults.vllm!.vllmUrl,
concurrency: merged.vllm?.concurrency ?? defaults.vllm!.concurrency!,
batchSize: merged.vllm?.batchSize ?? defaults.vllm!.batchSize!,
},
}
if (!o.model) {
throw new Error("Semantic search requires a model identifier")
}
return {
name: "SemanticIndex",
getQuartzComponents() {
return []
},
async *partialEmit() {},
async *emit(ctx, content, _resources) {
if (!o.enable) return
const docs: ContentDetails[] = []
for (const [_, file] of content) {
const slug = file.data.slug!
const title = file.data.frontmatter?.title ?? slug
const text = file.data.text
if (text) {
docs.push({
slug,
title,
filePath: file.data.filePath!,
content: text,
readingTime: file.data.readingTime,
})
}
}
// Emit JSONL with the exact text used for embeddings
const jsonl = docs
.map((d) => ({ slug: d.slug, title: d.title, text: d.content }))
.map((o) => JSON.stringify(o))
.join("\n")
const jsonlSlug = "embeddings-text" as FullSlug
yield write({
ctx,
slug: jsonlSlug,
ext: ".jsonl",
content: jsonl,
})
// If aot is false, run the embedding generation script
if (!o.aot) {
console.log("\nGenerating embeddings (aot=false)...")
// Check for uv
const hasUv = await checkUvInstalled()
if (!hasUv) {
throw new Error(
"uv is required for embedding generation. Install it from https://docs.astral.sh/uv/",
)
}
const jsonlPath = joinSegments(ctx.argv.output, "embeddings-text.jsonl")
const outDir = joinSegments(ctx.argv.output, "embeddings")
try {
await runEmbedBuild(jsonlPath, outDir, o)
} catch (err) {
const message = err instanceof Error ? err.message : String(err)
throw new Error(`Embedding generation failed: ${message}`)
}
} else {
console.log(
"\nSkipping embedding generation (aot=true). Expecting pre-generated embeddings in public/embeddings/",
)
}
},
externalResources(_ctx) {
return {}
},
}
}

View File

@@ -7,7 +7,6 @@ import { dirname } from "path"
export const Static: QuartzEmitterPlugin = () => ({
name: "Static",
async *emit({ argv, cfg }) {
// Copy Quartz's own internal static assets (quartz/static/) → output/static/
const staticPath = joinSegments(QUARTZ, "static")
const fps = await glob("**", staticPath, cfg.configuration.ignorePatterns)
const outputStaticPath = joinSegments(argv.output, "static")
@@ -19,21 +18,6 @@ export const Static: QuartzEmitterPlugin = () => ({
await fs.promises.copyFile(src, dest)
yield dest
}
// Copy user-facing static assets (static/) → output/ preserving paths.
// This mirrors Hugo's convention: static/ox-hugo/foo.png is served at /ox-hugo/foo.png,
// which matches the src="/ox-hugo/..." paths that ox-hugo writes into exported markdown.
const userStaticPath = "static"
if (fs.existsSync(userStaticPath)) {
const userFps = await glob("**", userStaticPath, cfg.configuration.ignorePatterns, false)
for (const fp of userFps) {
const src = joinSegments(userStaticPath, fp) as FilePath
const dest = joinSegments(argv.output, fp) as FilePath
await fs.promises.mkdir(dirname(dest), { recursive: true })
await fs.promises.copyFile(src, dest)
yield dest
}
}
},
async *partialEmit() {},
})

View File

@@ -73,7 +73,7 @@ async function processTagPage(
const slug = joinSegments("tags", tag) as FullSlug
const [tree, file] = tagContent
const cfg = ctx.cfg.configuration
const externalResources = pageResources(pathToRoot(slug), resources)
const externalResources = pageResources(pathToRoot(slug), resources, ctx.cfg.configuration)
const componentData: QuartzComponentProps = {
ctx,
fileData: file.data,

View File

@@ -23,16 +23,7 @@ export const Citations: QuartzTransformerPlugin<Partial<Options>> = (userOpts) =
name: "Citations",
htmlPlugins(ctx) {
const plugins: PluggableList = []
// per default, rehype-citations only supports en-US
// see: https://github.com/timlrx/rehype-citation/issues/12
// in here there are multiple usable locales:
// https://github.com/citation-style-language/locales
// thus, we optimistically assume there is indeed an appropriate
// locale available and simply create the lang url-string
let lang: string = "en-US"
if (ctx.cfg.configuration.locale !== "en-US") {
lang = `https://raw.githubusercontent.com/citation-stylelanguage/locales/refs/heads/master/locales-${ctx.cfg.configuration.locale}.xml`
}
// Add rehype-citation to the list of plugins
plugins.push([
rehypeCitation,
@@ -41,7 +32,7 @@ export const Citations: QuartzTransformerPlugin<Partial<Options>> = (userOpts) =
suppressBibliography: opts.suppressBibliography,
linkCitations: opts.linkCitations,
csl: opts.csl,
lang,
lang: ctx.cfg.configuration.locale ?? "en-US",
},
])

View File

@@ -103,6 +103,7 @@ export const FrontMatter: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
const created = coalesceAliases(data, ["created", "date"])
if (created) {
data.created = created
data.modified ||= created // if modified is not set, use created
}
const modified = coalesceAliases(data, [
@@ -112,8 +113,6 @@ export const FrontMatter: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
"last-modified",
])
if (modified) data.modified = modified
data.modified ||= created // if modified is not set, use created
const published = coalesceAliases(data, ["published", "publishDate", "date"])
if (published) data.published = published

View File

@@ -17,10 +17,8 @@ interface Options {
typstOptions: TypstOptions
}
// mathjax macros
export type Args = boolean | number | string | null
interface MacroType {
[key: string]: string | Args[]
[key: string]: string
}
export const Latex: QuartzTransformerPlugin<Partial<Options>> = (opts) => {
@@ -39,20 +37,11 @@ export const Latex: QuartzTransformerPlugin<Partial<Options>> = (opts) => {
case "typst": {
return [[rehypeTypst, opts?.typstOptions ?? {}]]
}
default:
case "mathjax": {
return [
[
rehypeMathjax,
{
...(opts?.mathJaxOptions ?? {}),
tex: {
...(opts?.mathJaxOptions?.tex ?? {}),
macros,
},
},
],
]
return [[rehypeMathjax, { macros, ...(opts?.mathJaxOptions ?? {}) }]]
}
default: {
return [[rehypeMathjax, { macros, ...(opts?.mathJaxOptions ?? {}) }]]
}
}
},

View File

@@ -57,7 +57,7 @@ export const CrawlLinks: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
) {
let dest = node.properties.href as RelativeURL
const classes = (node.properties.className ?? []) as string[]
const isExternal = isAbsoluteUrl(dest, { httpOnly: false })
const isExternal = isAbsoluteUrl(dest)
classes.push(isExternal ? "external" : "internal")
if (isExternal && opts.externalLinkIcon) {
@@ -99,9 +99,7 @@ export const CrawlLinks: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
}
// don't process external links or intra-document anchors
const isInternal = !(
isAbsoluteUrl(dest, { httpOnly: false }) || dest.startsWith("#")
)
const isInternal = !(isAbsoluteUrl(dest) || dest.startsWith("#"))
if (isInternal) {
dest = node.properties.href = transformLink(
file.data.slug!,
@@ -147,7 +145,7 @@ export const CrawlLinks: QuartzTransformerPlugin<Partial<Options>> = (userOpts)
node.properties.loading = "lazy"
}
if (!isAbsoluteUrl(node.properties.src, { httpOnly: false })) {
if (!isAbsoluteUrl(node.properties.src)) {
let dest = node.properties.src as RelativeURL
dest = node.properties.src = transformLink(
file.data.slug!,

View File

@@ -27,10 +27,7 @@ const defaultOptions: Options = {
const relrefRegex = new RegExp(/\[([^\]]+)\]\(\{\{< relref "([^"]+)" >\}\}\)/, "g")
const predefinedHeadingIdRegex = new RegExp(/(.*) {#(?:.*)}/, "g")
const hugoShortcodeRegex = new RegExp(/{{(.*)}}/, "g")
// Matches the full Hugo {{< figure src="..." ... >}} shortcode and captures src.
// Must run before the generic shortcode stripper to avoid partial-match issues
// with captions that contain HTML (e.g. <span class="figure-number">).
const figureShortcodeRegex = new RegExp(/{{<\s*figure\b[^}]*\bsrc="([^"]*)"[^}]*>}}/, "g")
const figureTagRegex = new RegExp(/< ?figure src="(.*)" ?>/, "g")
// \\\\\( -> matches \\(
// (.+?) -> Lazy match for capturing the equation
// \\\\\) -> matches \\)
@@ -73,14 +70,6 @@ export const OxHugoFlavouredMarkdown: QuartzTransformerPlugin<Partial<Options>>
})
}
if (opts.replaceFigureWithMdImg) {
src = src.toString()
src = src.replaceAll(figureShortcodeRegex, (_value, ...capture) => {
const [imgSrc] = capture
return `![](${imgSrc})`
})
}
if (opts.removeHugoShortcode) {
src = src.toString()
src = src.replaceAll(hugoShortcodeRegex, (_value, ...capture) => {
@@ -89,6 +78,14 @@ export const OxHugoFlavouredMarkdown: QuartzTransformerPlugin<Partial<Options>>
})
}
if (opts.replaceFigureWithMdImg) {
src = src.toString()
src = src.replaceAll(figureTagRegex, (_value, ...capture) => {
const [src] = capture
return `![](${src})`
})
}
if (opts.replaceOrgLatex) {
src = src.toString()
src = src.replaceAll(inlineLatexRegex, (_value, ...capture) => {

View File

@@ -9,10 +9,6 @@ html {
text-size-adjust: none;
overflow-x: hidden;
width: 100vw;
@media all and ($mobile) {
scroll-padding-top: 4rem;
}
}
body {
@@ -45,17 +41,13 @@ ul,
.katex,
.math,
.typst-doc,
g[class~="typst-text"] {
.typst-doc * {
color: var(--darkgray);
fill: var(--darkgray);
overflow-wrap: break-word;
text-wrap: pretty;
}
path[class~="typst-shape"] {
stroke: var(--darkgray);
}
.math {
&.math-display {
text-align: center;

View File

@@ -123,22 +123,13 @@
transform: rotateZ(-90deg);
}
.callout-content {
& > * {
transition:
height 0.1s cubic-bezier(0.02, 0.01, 0.47, 1),
margin 0.1s cubic-bezier(0.02, 0.01, 0.47, 1),
padding 0.1s cubic-bezier(0.02, 0.01, 0.47, 1);
overflow-y: clip;
height: 0;
margin-bottom: 0;
margin-top: 0;
padding-bottom: 0;
padding-top: 0;
}
& > :first-child {
margin-top: -1rem;
}
.callout-content > :first-child {
transition:
height 0.1s cubic-bezier(0.02, 0.01, 0.47, 1),
margin 0.1s cubic-bezier(0.02, 0.01, 0.47, 1);
overflow-y: clip;
height: 0;
margin-top: -1rem;
}
}
}

View File

@@ -10,13 +10,12 @@ export async function glob(
pattern: string,
cwd: string,
ignorePatterns: string[],
respectGitignore: boolean = true,
): Promise<FilePath[]> {
const fps = (
await globby(pattern, {
cwd,
ignore: ignorePatterns,
gitignore: respectGitignore,
gitignore: true,
})
).map(toPosixPath)
return fps as FilePath[]

View File

@@ -26,10 +26,9 @@ export type CSSResource = {
export function JSResourceToScriptElement(resource: JSResource, preserve?: boolean): JSX.Element {
const scriptType = resource.moduleType ?? "application/javascript"
const spaPreserve = preserve ?? resource.spaPreserve
if (resource.contentType === "external") {
return (
<script key={resource.src} src={resource.src} type={scriptType} data-persist={spaPreserve} />
<script key={resource.src} src={resource.src} type={scriptType} spa-preserve={spaPreserve} />
)
} else {
const content = resource.script
@@ -37,7 +36,7 @@ export function JSResourceToScriptElement(resource: JSResource, preserve?: boole
<script
key={randomUUID()}
type={scriptType}
data-persist={spaPreserve}
spa-preserve={spaPreserve}
dangerouslySetInnerHTML={{ __html: content }}
></script>
)
@@ -55,7 +54,7 @@ export function CSSResourceToStyleElement(resource: CSSResource, preserve?: bool
href={resource.content}
rel="stylesheet"
type="text/css"
data-persist={spaPreserve}
spa-preserve={spaPreserve}
/>
)
}

View File

@@ -0,0 +1,548 @@
// Unified semantic search worker: handles data loading and query execution
import { env, pipeline } from "@huggingface/transformers"
import "onnxruntime-web/webgpu"
import "onnxruntime-web/wasm"
export {}
type VectorShardMeta = {
path: string
rows: number
rowOffset: number
byteLength: number
sha256?: string
byteStride: number
}
type LevelSection = {
level: number
indptr: { offset: number; elements: number; byteLength: number }
indices: { offset: number; elements: number; byteLength: number }
}
type ChunkMetadata = {
parentSlug: string
chunkId: number
}
type Manifest = {
version: number
dims: number
dtype: string
normalized: boolean
rows: number
shardSizeRows: number
vectors: {
dtype: string
rows: number
dims: number
shards: VectorShardMeta[]
}
ids: string[]
titles?: string[]
chunkMetadata?: Record<string, ChunkMetadata>
hnsw: {
M: number
efConstruction: number
entryPoint: number
maxLevel: number
graph: {
path: string
sha256?: string
levels: LevelSection[]
}
}
}
type InitMessage = {
type: "init"
cfg: any
manifestUrl: string
baseUrl?: string
disableCache?: boolean
}
type SearchMessage = { type: "search"; text: string; k: number; seq: number }
type ResetMessage = { type: "reset" }
type WorkerMessage = InitMessage | SearchMessage | ResetMessage
type ReadyMessage = { type: "ready" }
type ProgressMessage = {
type: "progress"
loadedRows: number
totalRows: number
}
type SearchHit = { id: number; score: number }
type SearchResultMessage = {
type: "search-result"
seq: number
semantic: SearchHit[]
}
type ErrorMessage = { type: "error"; seq?: number; message: string }
type WorkerState = "idle" | "loading" | "ready" | "error"
// IndexedDB configuration
const DB_NAME = "semantic-search-cache"
const STORE_NAME = "assets"
const DB_VERSION = 1
const hasIndexedDB = typeof indexedDB !== "undefined"
const supportsSharedArrayBuffer = typeof SharedArrayBuffer !== "undefined"
// State
let state: WorkerState = "idle"
let manifest: Manifest | null = null
let cfg: any = null
let vectorsView: Float32Array | null = null
let dims = 0
let rows = 0
let classifier: any = null
let envConfigured = false
let entryPoint = -1
let maxLevel = 0
let efDefault = 128
let levelGraph: { indptr: Uint32Array; indices: Uint32Array }[] = []
let abortController: AbortController | null = null
let dbPromise: Promise<IDBDatabase> | null = null
// IndexedDB helpers
function openDatabase(): Promise<IDBDatabase> {
if (!hasIndexedDB) {
return Promise.reject(new Error("indexedDB unavailable"))
}
if (!dbPromise) {
dbPromise = new Promise((resolve, reject) => {
const req = indexedDB.open(DB_NAME, DB_VERSION)
req.onupgradeneeded = () => {
const db = req.result
if (!db.objectStoreNames.contains(STORE_NAME)) {
db.createObjectStore(STORE_NAME)
}
}
req.onsuccess = () => resolve(req.result)
req.onerror = () => reject(req.error ?? new Error("failed to open cache store"))
})
}
return dbPromise
}
async function readAsset(hash: string): Promise<ArrayBuffer | null> {
if (!hasIndexedDB) {
return null
}
const db = await openDatabase()
return new Promise((resolve, reject) => {
const tx = db.transaction(STORE_NAME, "readonly")
const store = tx.objectStore(STORE_NAME)
const req = store.get(hash)
req.onsuccess = () => {
const value = req.result
if (value instanceof ArrayBuffer) {
resolve(value)
} else if (value && value.buffer instanceof ArrayBuffer) {
resolve(value.buffer as ArrayBuffer)
} else {
resolve(null)
}
}
req.onerror = () => reject(req.error ?? new Error("failed to read cached asset"))
})
}
async function writeAsset(hash: string, buffer: ArrayBuffer): Promise<void> {
if (!hasIndexedDB) {
return
}
const db = await openDatabase()
await new Promise<void>((resolve, reject) => {
const tx = db.transaction(STORE_NAME, "readwrite")
const store = tx.objectStore(STORE_NAME)
const req = store.put(buffer, hash)
req.onsuccess = () => resolve()
req.onerror = () => reject(req.error ?? new Error("failed to cache asset"))
})
}
function toAbsolute(path: string, baseUrl?: string): string {
if (path.startsWith("http://") || path.startsWith("https://")) {
return path
}
const base = baseUrl ?? self.location.origin
return new URL(path, base).toString()
}
async function fetchBinary(
path: string,
disableCache: boolean,
sha?: string,
): Promise<ArrayBuffer> {
if (!disableCache && sha && hasIndexedDB) {
try {
const cached = await readAsset(sha)
if (cached) {
return cached
}
} catch {
// fall through to network fetch on cache errors
}
}
const res = await fetch(path, { signal: abortController?.signal ?? undefined })
if (!res.ok) {
throw new Error(`failed to fetch ${path}: ${res.status} ${res.statusText}`)
}
const payload = await res.arrayBuffer()
if (!disableCache && sha && hasIndexedDB) {
try {
await writeAsset(sha, payload)
} catch {
// ignore cache write failures
}
}
return payload
}
async function populateVectors(
manifest: Manifest,
baseUrl: string | undefined,
disableCache: boolean | undefined,
): Promise<{ buffer: Float32Array; rowsLoaded: number }> {
if (manifest.vectors.dtype !== "fp32") {
throw new Error(`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`)
}
const rows = manifest.rows
const dims = manifest.dims
const totalBytes = rows * dims * Float32Array.BYTES_PER_ELEMENT
const buffer = supportsSharedArrayBuffer
? new Float32Array(new SharedArrayBuffer(totalBytes))
: new Float32Array(totalBytes)
let loadedRows = 0
for (const shard of manifest.vectors.shards) {
const absolute = toAbsolute(shard.path, baseUrl)
const payload = await fetchBinary(absolute, Boolean(disableCache), shard.sha256)
const view = new Float32Array(payload)
if (view.length !== shard.rows * dims) {
throw new Error(
`shard ${shard.path} has mismatched length (expected ${shard.rows * dims}, got ${view.length})`,
)
}
buffer.set(view, shard.rowOffset * dims)
loadedRows = Math.min(rows, shard.rowOffset + shard.rows)
const progress: ProgressMessage = {
type: "progress",
loadedRows,
totalRows: rows,
}
self.postMessage(progress)
}
return { buffer, rowsLoaded: loadedRows }
}
async function populateGraph(
manifest: Manifest,
baseUrl: string | undefined,
disableCache: boolean | undefined,
): Promise<ArrayBuffer> {
const graphMeta = manifest.hnsw.graph
const absolute = toAbsolute(graphMeta.path, baseUrl)
return await fetchBinary(absolute, Boolean(disableCache), graphMeta.sha256)
}
function configureRuntimeEnv() {
if (envConfigured) return
env.allowLocalModels = false
env.allowRemoteModels = true
const wasmBackend = env.backends?.onnx?.wasm
if (!wasmBackend) {
throw new Error("transformers.js ONNX runtime backend unavailable")
}
const cdnBase = `https://cdn.jsdelivr.net/npm/@huggingface/transformers@${env.version}/dist/`
wasmBackend.wasmPaths = cdnBase
envConfigured = true
}
async function ensureEncoder() {
if (classifier) return
if (!cfg?.model) {
throw new Error("semantic worker missing model identifier")
}
configureRuntimeEnv()
const dtype = typeof cfg?.dtype === "string" && cfg.dtype.length > 0 ? cfg.dtype : "fp32"
const pipelineOpts: Record<string, unknown> = {
device: "wasm",
dtype,
local_files_only: false,
}
classifier = await pipeline("feature-extraction", cfg.model, pipelineOpts)
cfg.dtype = dtype
}
function vectorSlice(id: number): Float32Array {
if (!vectorsView) {
throw new Error("vector buffer not configured")
}
const start = id * dims
const end = start + dims
return vectorsView.subarray(start, end)
}
function dot(a: Float32Array, b: Float32Array): number {
let s = 0
for (let i = 0; i < dims; i++) {
s += a[i] * b[i]
}
return s
}
function neighborsFor(level: number, node: number): Uint32Array {
const meta = levelGraph[level]
if (!meta) return new Uint32Array()
const { indptr, indices } = meta
if (node < 0 || node + 1 >= indptr.length) return new Uint32Array()
const start = indptr[node]
const end = indptr[node + 1]
return indices.subarray(start, end)
}
function insertSortedDescending(arr: SearchHit[], item: SearchHit) {
let idx = arr.length
while (idx > 0 && arr[idx - 1].score < item.score) {
idx -= 1
}
arr.splice(idx, 0, item)
}
function bruteForceSearch(query: Float32Array, k: number): SearchHit[] {
if (!vectorsView) return []
const hits: SearchHit[] = []
for (let id = 0; id < rows; id++) {
const score = dot(query, vectorSlice(id))
if (hits.length < k) {
insertSortedDescending(hits, { id, score })
} else if (score > hits[hits.length - 1].score) {
insertSortedDescending(hits, { id, score })
hits.length = k
}
}
return hits
}
function hnswSearch(query: Float32Array, k: number): SearchHit[] {
if (!manifest || !vectorsView || entryPoint < 0 || levelGraph.length === 0) {
return bruteForceSearch(query, k)
}
const ef = Math.max(efDefault, k * 10)
let ep = entryPoint
let epScore = dot(query, vectorSlice(ep))
for (let level = maxLevel; level > 0; level--) {
let changed = true
while (changed) {
changed = false
const neigh = neighborsFor(level, ep)
for (let i = 0; i < neigh.length; i++) {
const candidate = neigh[i]
if (candidate >= rows) continue
const score = dot(query, vectorSlice(candidate))
if (score > epScore) {
epScore = score
ep = candidate
changed = true
}
}
}
}
const visited = new Set<number>()
const candidateQueue: SearchHit[] = []
const best: SearchHit[] = []
insertSortedDescending(candidateQueue, { id: ep, score: epScore })
insertSortedDescending(best, { id: ep, score: epScore })
visited.add(ep)
while (candidateQueue.length > 0) {
const current = candidateQueue.shift()!
const worstBest = best.length >= ef ? best[best.length - 1].score : -Infinity
if (current.score < worstBest && best.length >= ef) {
break
}
const neigh = neighborsFor(0, current.id)
for (let i = 0; i < neigh.length; i++) {
const candidate = neigh[i]
if (candidate >= rows || visited.has(candidate)) continue
visited.add(candidate)
const score = dot(query, vectorSlice(candidate))
const hit = { id: candidate, score }
insertSortedDescending(candidateQueue, hit)
if (best.length < ef || score > best[best.length - 1].score) {
insertSortedDescending(best, hit)
if (best.length > ef) {
best.pop()
}
}
}
}
best.sort((a, b) => b.score - a.score)
return best.slice(0, k)
}
async function embed(text: string, isQuery: boolean = false): Promise<Float32Array> {
await ensureEncoder()
// Apply model-specific prefixes for asymmetric search
let prefixedText = text
if (cfg?.model) {
const modelName = cfg.model.toLowerCase()
switch (true) {
case modelName.includes("e5"): {
// E5 models require query: or passage: prefix
prefixedText = isQuery ? `query: ${text}` : `passage: ${text}`
break
}
case modelName.includes("qwen") && modelName.includes("embedding"): {
// Qwen3-Embedding requires task instruction for queries only
if (isQuery) {
const task = "Given a web search query, retrieve relevant passages that answer the query"
prefixedText = `Instruct: ${task}\nQuery: ${text}`
}
// Documents use plain text (no prefix)
break
}
case modelName.includes("embeddinggemma"): {
// embeddinggemma requires specific prefixes
prefixedText = isQuery
? `task: search result | query: ${text}`
: `title: none | text: ${text}`
break
}
default:
break
}
}
const out = await classifier(prefixedText, { pooling: "mean", normalize: true })
const data = Array.from(out?.data ?? out) as number[]
const vec = new Float32Array(dims)
for (let i = 0; i < dims; i++) vec[i] = data[i] ?? 0
return vec
}
async function handleInit(msg: InitMessage) {
if (state === "loading" || state === "ready") {
throw new Error("worker already initialized or loading")
}
state = "loading"
abortController?.abort()
abortController = new AbortController()
try {
cfg = msg.cfg
const manifestUrl = toAbsolute(msg.manifestUrl, msg.baseUrl)
const response = await fetch(manifestUrl, { signal: abortController.signal })
if (!response.ok) {
throw new Error(
`failed to fetch manifest ${manifestUrl}: ${response.status} ${response.statusText}`,
)
}
manifest = (await response.json()) as Manifest
if (manifest.vectors.dtype !== "fp32") {
throw new Error(
`unsupported embedding dtype '${manifest.vectors.dtype}', regenerate with fp32`,
)
}
dims = manifest.dims
rows = manifest.rows
const { buffer: vectorBuffer } = await populateVectors(manifest, msg.baseUrl, msg.disableCache)
vectorsView = vectorBuffer
const graphBuffer = await populateGraph(manifest, msg.baseUrl, msg.disableCache)
entryPoint = manifest.hnsw.entryPoint
maxLevel = manifest.hnsw.maxLevel
efDefault = Math.max(64, manifest.hnsw.M * 4)
levelGraph = manifest.hnsw.graph.levels.map((level) => {
const indptr = new Uint32Array(graphBuffer, level.indptr.offset, level.indptr.elements)
const indices = new Uint32Array(graphBuffer, level.indices.offset, level.indices.elements)
return { indptr, indices }
})
state = "ready"
const ready: ReadyMessage = { type: "ready" }
self.postMessage(ready)
} catch (err) {
state = "error"
throw err
}
}
async function handleSearch(msg: SearchMessage) {
if (state !== "ready") {
throw new Error("worker not ready for search")
}
if (!manifest || !vectorsView) {
throw new Error("semantic worker not configured")
}
const queryVec = await embed(msg.text, true)
const semanticHits = hnswSearch(queryVec, Math.max(1, msg.k))
const message: SearchResultMessage = {
type: "search-result",
seq: msg.seq,
semantic: semanticHits,
}
self.postMessage(message)
}
function handleReset() {
abortController?.abort()
abortController = null
state = "idle"
manifest = null
cfg = null
vectorsView = null
dims = 0
rows = 0
classifier = null
envConfigured = false
levelGraph = []
entryPoint = -1
maxLevel = 0
}
self.onmessage = (event: MessageEvent<WorkerMessage>) => {
const data = event.data
if (data.type === "reset") {
handleReset()
return
}
if (data.type === "init") {
void handleInit(data).catch((err: unknown) => {
const message: ErrorMessage = {
type: "error",
message: err instanceof Error ? err.message : String(err),
}
self.postMessage(message)
})
return
}
if (data.type === "search") {
void handleSearch(data).catch((err: unknown) => {
const message: ErrorMessage = {
type: "error",
seq: data.seq,
message: err instanceof Error ? err.message : String(err),
}
self.postMessage(message)
})
}
}