feat(pipeline): refactor into its own project

This commit is contained in:
Ignacio Ballesteros
2026-02-20 17:54:12 +01:00
parent 0ea5808cd2
commit dc348185a7
17 changed files with 490 additions and 358 deletions

107
AGENTS.md
View File

@@ -244,113 +244,6 @@ git branch -d feature/my-feature
**Merge direction:** `upstream → main → org-roam → feature/*` **Merge direction:** `upstream → main → org-roam → feature/*`
## Org-Roam Workflow
Notes live in a **separate directory** outside this repo. The export pipeline
converts them to Markdown via ox-hugo, applies post-processing transforms, then
Quartz builds the site.
### Tooling
The dev shell (`nix develop`) provides:
- `nodejs_22` — Quartz build
- `elixir` — runs the export script and pipeline
- `emacs` + `ox-hugo` — performs the org → markdown conversion
### Export and build
```bash
# Export only (wipes content/, exports all .org files, runs pipeline)
NOTES_DIR=/path/to/notes npm run export
# Export then build the site
NOTES_DIR=/path/to/notes npm run build:notes
# Positional arg also works
elixir scripts/export.exs /path/to/notes
```
Optional env vars for the pipeline:
| Var | Default | Purpose |
| --------------- | ------------------------ | ----------------------------------------- |
| `BIBTEX_FILE` | — | Path to `.bib` file for citation fallback |
| `ZOTERO_URL` | `http://localhost:23119` | Zotero Better BibTeX base URL |
| `CITATION_MODE` | `warn` | `silent` / `warn` / `strict` |
### Export pipeline phases
`scripts/export.exs` runs four phases in sequence:
1. **Wipe** `content/` (preserving `.gitkeep`)
2. **Export** each `.org` file via `emacs --batch` + `ox-hugo``content/**/*.md`
3. **Pipeline** — run Elixir transform modules over every `.md` file
4. **Index** — generate a fallback `content/index.md` if none was exported
The export uses TOML frontmatter (`+++`) and per-file mode (not per-subtree).
### Markdown pipeline (`scripts/pipeline/`)
A standalone Mix project that post-processes `content/*.md` after ox-hugo.
It is compiled automatically on first run; subsequent runs use the `_build/`
cache and are fast.
**Architecture:**
```
scripts/pipeline/
├── mix.exs # deps: req, jason
└── lib/
├── pipeline.ex # Generic runner (fold transforms over .md files)
├── pipeline/
│ ├── application.ex # OTP app — starts Finch HTTP pool
│ ├── transform.ex # Behaviour: init/1, apply/3, teardown/1
│ ├── transforms/
│ │ └── citations.ex # Resolves cite:key → [Label](url)
│ └── resolvers/
│ ├── zotero.ex # JSON-RPC to Zotero Better BibTeX
│ ├── bibtex.ex # Parses local .bib file
│ └── doi.ex # Bare-key fallback (always succeeds)
```
**Adding a new transform:**
1. Create `scripts/pipeline/lib/pipeline/transforms/my_transform.ex`
2. Implement the `Pipeline.Transform` behaviour (`init/1`, `apply/3`)
3. Append the module to `transforms` in `scripts/export.exs`
```elixir
transforms = [
Pipeline.Transforms.Citations,
Pipeline.Transforms.MyTransform, # new
]
```
### Citation resolution (`Pipeline.Transforms.Citations`)
Handles org-citar syntax that passes through ox-hugo unchanged:
| Syntax | Example |
| ---------------- | -------------------- |
| org-cite / citar | `[cite:@key]` |
| multiple keys | `[cite:@key1;@key2]` |
| bare (legacy) | `cite:key` |
Resolution chain (first success wins):
1. **Zotero** — JSON-RPC to `localhost:23119/better-bibtex/json-rpc`
- Calls `item.search` to find the item, then `item.attachments` to get
the PDF link (`zotero://open-pdf/library/items/KEY`)
- Falls back to `zotero://select/library/items/KEY` if no PDF attachment
- Probe uses a JSON-RPC call, **not** `/better-bibtex/cayw`
(that endpoint blocks waiting for interactive input)
2. **BibTeX** — parses `BIBTEX_FILE`; extracts authors, year, DOI/URL
3. **DOI fallback** — always succeeds; renders bare key or `https://doi.org/...`
**Zotero JSON-RPC gotcha:** `Req 0.5` does not allow combining `:finch` and
`:connect_options` in the same call. Use `:receive_timeout` only.
## Important Notes ## Important Notes
- **Client-side scripts**: Use `.inline.ts` suffix, bundled via esbuild - **Client-side scripts**: Use `.inline.ts` suffix, bundled via esbuild

67
flake.lock generated
View File

@@ -18,6 +18,24 @@
"type": "github" "type": "github"
} }
}, },
"flake-utils_2": {
"inputs": {
"systems": "systems_2"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1771008912, "lastModified": 1771008912,
@@ -34,10 +52,42 @@
"type": "github" "type": "github"
} }
}, },
"nixpkgs_2": {
"locked": {
"lastModified": 1771369470,
"narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0182a361324364ae3f436a63005877674cf45efb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"pipeline": {
"inputs": {
"flake-utils": "flake-utils_2",
"nixpkgs": "nixpkgs_2"
},
"locked": {
"path": "./pipeline",
"type": "path"
},
"original": {
"path": "./pipeline",
"type": "path"
},
"parent": []
},
"root": { "root": {
"inputs": { "inputs": {
"flake-utils": "flake-utils", "flake-utils": "flake-utils",
"nixpkgs": "nixpkgs" "nixpkgs": "nixpkgs",
"pipeline": "pipeline"
} }
}, },
"systems": { "systems": {
@@ -54,6 +104,21 @@
"repo": "default", "repo": "default",
"type": "github" "type": "github"
} }
},
"systems_2": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
} }
}, },
"root": "root", "root": "root",

View File

@@ -4,22 +4,30 @@
inputs = { inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils"; flake-utils.url = "github:numtide/flake-utils";
pipeline.url = "path:./pipeline";
}; };
outputs = { self, nixpkgs, flake-utils }: outputs = { self, nixpkgs, flake-utils, pipeline }:
flake-utils.lib.eachDefaultSystem (system: flake-utils.lib.eachDefaultSystem (system:
let let
pkgs = import nixpkgs { inherit system; }; pkgs = import nixpkgs { inherit system; };
fs = pkgs.lib.fileset;
# Emacs with ox-hugo — shared between devShell and buildApp pipelineApp = pipeline.packages.${system}.default;
emacsWithOxHugo = (pkgs.emacsPackagesFor pkgs.emacs-nox).emacsWithPackages
(epkgs: [ epkgs.ox-hugo ]);
# Pre-fetched npm dependency tree (node_modules) # Pre-fetched npm dependency tree (node_modules).
# src is filtered to only package.json + package-lock.json so that
# edits to Quartz source files do not invalidate this derivation.
quartzDeps = pkgs.buildNpmPackage { quartzDeps = pkgs.buildNpmPackage {
pname = "quartz-deps"; pname = "quartz-deps";
version = "4.5.2"; version = "4.5.2";
src = ./.; src = fs.toSource {
root = ./.;
fileset = fs.unions [
./package.json
./package-lock.json
];
};
npmDepsHash = "sha256-7u+VlIx44B3/ivM9vLMIOn+e4TL4eS6B682vhS+Ikb4="; npmDepsHash = "sha256-7u+VlIx44B3/ivM9vLMIOn+e4TL4eS6B682vhS+Ikb4=";
dontBuild = true; dontBuild = true;
installPhase = '' installPhase = ''
@@ -28,18 +36,10 @@
''; '';
}; };
# Pre-fetched Hex/Mix dependencies for scripts/pipeline
pipelineMixDeps = pkgs.beamPackages.fetchMixDeps {
pname = "pipeline-mix-deps";
version = "0.1.0";
src = ./scripts/pipeline;
sha256 = "sha256-E79X+nUy86G1Jrwv3T7dXekoGv8Hd14ZgJSKWjvlmAw=";
};
# The build application wrapper script # The build application wrapper script
buildApp = pkgs.writeShellApplication { buildApp = pkgs.writeShellApplication {
name = "build"; name = "build";
runtimeInputs = [ pkgs.nodejs_22 pkgs.elixir emacsWithOxHugo ]; runtimeInputs = [ pkgs.nodejs_22 ];
text = '' text = ''
NOTES_DIR="''${1:?Usage: build <path-to-notes-dir>}" NOTES_DIR="''${1:?Usage: build <path-to-notes-dir>}"
NOTES_DIR=$(realpath "$NOTES_DIR") NOTES_DIR=$(realpath "$NOTES_DIR")
@@ -54,15 +54,10 @@
# Drop in pre-built node_modules # Drop in pre-built node_modules
ln -s ${quartzDeps}/node_modules "$WORK/repo/node_modules" ln -s ${quartzDeps}/node_modules "$WORK/repo/node_modules"
# Drop in pre-fetched Mix deps so mix compile runs offline # Run the pre-compiled pipeline escript (org md, citations transform)
cp -r ${pipelineMixDeps} "$WORK/repo/scripts/pipeline/deps" ${pipelineApp}/bin/pipeline "$NOTES_DIR" \
chmod -R u+w "$WORK/repo/scripts/pipeline/deps" --output "$WORK/repo" \
--content-dir "$WORK/repo/content"
# ox-hugo requires static/ to exist before it can copy image assets
mkdir -p "$WORK/repo/static"
# Run the export pipeline (org md, citations transform)
NOTES_DIR="$NOTES_DIR" elixir "$WORK/repo/scripts/export.exs"
# Build the static site from within the repo copy so relative paths # Build the static site from within the repo copy so relative paths
# (e.g. ./package.json in constants.js) resolve correctly. # (e.g. ./package.json in constants.js) resolve correctly.
@@ -79,19 +74,18 @@
buildInputs = [ buildInputs = [
pkgs.nodejs_22 pkgs.nodejs_22
pkgs.elixir pkgs.elixir
emacsWithOxHugo
pkgs.mcp-nixos pkgs.mcp-nixos
]; ];
shellHook = '' shellHook = ''
echo "Node $(node --version) / npm $(npm --version)" echo "Node $(node --version) / npm $(npm --version)"
elixir --version 2>/dev/null | head -1 || true elixir --version 2>/dev/null | head -1 || true
echo "Emacs $(emacs --version | head -1)"
''; '';
}; };
packages.default = buildApp; packages.default = buildApp;
packages.build = buildApp; packages.build = buildApp;
packages.pipeline = pipelineApp;
apps.default = { type = "app"; program = "${buildApp}/bin/build"; }; apps.default = { type = "app"; program = "${buildApp}/bin/build"; };
apps.build = { type = "app"; program = "${buildApp}/bin/build"; }; apps.build = { type = "app"; program = "${buildApp}/bin/build"; };

View File

@@ -7,7 +7,7 @@ The methodology described in [cite:@podlovics2021journalArticle] provides a
useful framework for analysis. useful framework for analysis.
Multiple citations can appear together: Multiple citations can appear together:
[cite:@podlovics2021journalArticle] [cite:@podlovics2021journalArticle;@petersen2022book]
Older bare-cite style (org-roam v1 / older citar) also works: Older bare-cite style (org-roam v1 / older citar) also works:
cite:podlovics2021journalArticle cite:@podlovics2021journalArticle

61
pipeline/flake.lock generated Normal file
View File

@@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1771369470,
"narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "0182a361324364ae3f436a63005877674cf45efb",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

69
pipeline/flake.nix Normal file
View File

@@ -0,0 +1,69 @@
{
description = "Org-roam export pipeline Elixir escript";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = import nixpkgs { inherit system; };
fs = pkgs.lib.fileset;
# Emacs with ox-hugo — needed at runtime by the pipeline escript
# (export_org_files calls `emacs --batch` with ox-hugo).
emacsWithOxHugo = (pkgs.emacsPackagesFor pkgs.emacs-nox).emacsWithPackages
(epkgs: [ epkgs.ox-hugo ]);
# Pre-fetched Hex/Mix dependencies.
# src is filtered to mix.exs + mix.lock so source edits don't
# invalidate this derivation.
mixDeps = pkgs.beamPackages.fetchMixDeps {
pname = "pipeline-mix-deps";
version = "0.1.0";
src = fs.toSource {
root = ./.;
fileset = fs.unions [
./mix.exs
./mix.lock
];
};
sha256 = "sha256-E79X+nUy86G1Jrwv3T7dXekoGv8Hd14ZgJSKWjvlmAw=";
};
# Compiled pipeline escript (without runtime wrappers).
pipelineEscript = pkgs.beamPackages.mixRelease {
pname = "pipeline";
version = "0.1.0";
src = ./.;
escriptBinName = "pipeline";
mixFodDeps = mixDeps;
stripDebug = true;
};
# Wrapped pipeline that puts emacs (with ox-hugo) on PATH so
# the escript's System.cmd("emacs", ...) calls succeed.
pipelineApp = pkgs.writeShellApplication {
name = "pipeline";
runtimeInputs = [ emacsWithOxHugo ];
text = ''
exec ${pipelineEscript}/bin/pipeline "$@"
'';
};
in
{
packages.default = pipelineApp;
packages.escript = pipelineEscript;
devShells.default = pkgs.mkShell {
buildInputs = [
pkgs.elixir
emacsWithOxHugo
];
};
});
}

View File

@@ -0,0 +1,261 @@
defmodule Pipeline.CLI do
@moduledoc """
Escript entry point for the org-roam export pipeline.
Runs four phases in sequence:
1. Wipe `content/` (preserving `.gitkeep`)
2. Export each `.org` file via `emacs --batch` + ox-hugo → `content/**/*.md`
3. Run Elixir transform modules over every `.md` file
4. Generate a fallback `content/index.md` if none was exported
## Usage
pipeline <notes-dir> [--output <path>]
Arguments:
notes-dir Path to the directory containing `.org` notes (required).
Also accepts the `NOTES_DIR` env var.
Options:
--output <path> Output root directory (used as ox-hugo base dir).
Defaults to the `OUTPUT_DIR` env var, or the current
working directory.
--content-dir <p> Output directory for exported Markdown. Defaults to
`<output>/content`.
Optional env vars:
BIBTEX_FILE Path to a `.bib` file used as citation fallback.
ZOTERO_URL Zotero Better BibTeX base URL (default: http://localhost:23119).
CITATION_MODE silent | warn (default) | strict.
"""
require Logger
def main(argv) do
Application.ensure_all_started(:pipeline)
{notes_dir, output_dir, content_dir} = parse_args(argv)
wipe(content_dir)
export_org_files(notes_dir, output_dir)
run_pipeline(content_dir)
generate_index(content_dir)
md_count =
content_dir
|> Path.join("**/*.md")
|> Path.wildcard()
|> length()
IO.puts("==> Done. #{md_count} markdown files in #{content_dir}")
end
# ---------------------------------------------------------------------------
# Argument parsing
# ---------------------------------------------------------------------------
defp parse_args(argv) do
{opts, positional, _invalid} =
OptionParser.parse(argv,
strict: [output: :string, content_dir: :string]
)
notes_dir =
case positional do
[dir | _] ->
dir
[] ->
System.get_env("NOTES_DIR") ||
abort("Usage: pipeline <notes-dir> [--output <path>]")
end
notes_dir = Path.expand(notes_dir)
unless File.dir?(notes_dir) do
abort("Error: notes directory does not exist: #{notes_dir}")
end
output_dir =
(opts[:output] || System.get_env("OUTPUT_DIR") || File.cwd!())
|> Path.expand()
content_dir =
(opts[:content_dir] || Path.join(output_dir, "content"))
|> Path.expand()
{notes_dir, output_dir, content_dir}
end
# ---------------------------------------------------------------------------
# Phase 1: Wipe content/
# ---------------------------------------------------------------------------
defp wipe(content_dir) do
IO.puts("==> Wiping #{content_dir}")
File.mkdir_p!(content_dir)
content_dir
|> File.ls!()
|> Enum.reject(&(&1 == ".gitkeep"))
|> Enum.each(fn entry ->
Path.join(content_dir, entry) |> File.rm_rf!()
end)
end
# ---------------------------------------------------------------------------
# Phase 2: Export org files via Emacs + ox-hugo
# ---------------------------------------------------------------------------
defp export_org_files(notes_dir, output_dir) do
IO.puts("==> Exporting org files from #{notes_dir}")
# ox-hugo requires static/ to exist for image asset copying
File.mkdir_p!(Path.join(output_dir, "static"))
org_files =
Path.join(notes_dir, "**/*.org")
|> Path.wildcard()
if org_files == [] do
IO.puts("No .org files found in #{notes_dir}")
System.halt(0)
end
results =
Enum.map(org_files, fn orgfile ->
IO.puts(" exporting: #{orgfile}")
section =
orgfile
|> Path.dirname()
|> Path.relative_to(notes_dir)
{output, exit_code} =
System.cmd(
"emacs",
[
"--batch",
"--eval", "(require 'ox-hugo)",
"--eval", """
(org-cite-register-processor 'passthrough
:export-citation
(lambda (citation _style _backend _info)
(let ((keys (mapcar (lambda (ref)
(concat "@" (org-element-property :key ref)))
(org-cite-get-references citation))))
(format "[cite:%s]" (string-join keys ";")))))
""",
"--eval", "(setq org-cite-export-processors '((t passthrough)))",
"--eval", ~s[(setq org-hugo-base-dir "#{output_dir}")],
"--eval", ~s[(setq org-hugo-default-section-directory "#{section}")],
"--visit", orgfile,
"--funcall", "org-hugo-export-to-md"
],
stderr_to_stdout: true
)
filtered =
output
|> String.split("\n")
|> Enum.reject(&String.match?(&1, ~r/^Loading|^ad-handle|^For information/))
|> Enum.join("\n")
if filtered != "", do: IO.puts(filtered)
{orgfile, exit_code}
end)
failures = Enum.filter(results, fn {_, code} -> code != 0 end)
if failures != [] do
IO.puts(:stderr, "\nFailed to export #{length(failures)} file(s):")
Enum.each(failures, fn {f, code} -> IO.puts(:stderr, " [exit #{code}] #{f}") end)
System.halt(1)
end
end
# ---------------------------------------------------------------------------
# Phase 3: Markdown transformation pipeline
# ---------------------------------------------------------------------------
defp run_pipeline(content_dir) do
IO.puts("==> Running markdown pipeline")
pipeline_opts = %{
zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"),
bibtex_file: System.get_env("BIBTEX_FILE"),
citation_mode:
case System.get_env("CITATION_MODE", "warn") do
"silent" -> :silent
"strict" -> :strict
_ -> :warn
end
}
transforms = [Pipeline.Transforms.Citations]
case Pipeline.run(content_dir, transforms, pipeline_opts) do
{:ok, stats} ->
Enum.each(stats, fn {mod, count} ->
IO.puts(" #{inspect(mod)}: #{count} file(s) modified")
end)
{:error, reason} ->
IO.puts(:stderr, "Pipeline error: #{inspect(reason)}")
System.halt(1)
end
end
# ---------------------------------------------------------------------------
# Phase 4: Generate default index.md if none was exported
# ---------------------------------------------------------------------------
defp generate_index(content_dir) do
index_path = Path.join(content_dir, "index.md")
unless File.exists?(index_path) do
IO.puts("==> Generating default index.md")
pages =
Path.join(content_dir, "**/*.md")
|> Path.wildcard()
|> Enum.map(fn path ->
slug = Path.relative_to(path, content_dir) |> Path.rootname()
title =
path
|> File.read!()
|> then(fn content ->
case Regex.run(~r/^title\s*=\s*"(.+)"/m, content) do
[_, t] -> t
_ -> slug
end
end)
{slug, title}
end)
|> Enum.sort_by(fn {_, title} -> title end)
|> Enum.map(fn {slug, title} -> "- [#{title}](#{slug})" end)
|> Enum.join("\n")
File.write!(index_path, """
---
title: Index
---
#{pages}
""")
end
end
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
defp abort(message) do
IO.puts(:stderr, message)
System.halt(1)
end
end

View File

@@ -48,8 +48,8 @@ defmodule Pipeline.Transforms.Citations do
# Match [cite:@key] and [cite:@key1;@key2;...] (org-cite / citar style) # Match [cite:@key] and [cite:@key1;@key2;...] (org-cite / citar style)
@cite_bracket_regex ~r/\[cite:(@[^\]]+)\]/ @cite_bracket_regex ~r/\[cite:(@[^\]]+)\]/
# Match bare cite:key (older roam style, no brackets, no @ prefix) # Match bare cite:key or cite:@key (older roam style, no brackets, optional @ prefix)
@cite_bare_regex ~r/(?<![(\[])cite:([a-zA-Z0-9_:-]+)/ @cite_bare_regex ~r/(?<![(\[])cite:@?([a-zA-Z0-9_:-]+)/
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Pipeline callbacks # Pipeline callbacks

View File

@@ -5,21 +5,27 @@ defmodule Pipeline.MixProject do
[ [
app: :pipeline, app: :pipeline,
version: "0.1.0", version: "0.1.0",
elixir: "~> 1.15", elixir: "~> 1.17",
start_permanent: Mix.env() == :prod, start_permanent: Mix.env() == :prod,
deps: deps() deps: deps(),
escript: escript()
] ]
end end
def application do def application do
[ [
extra_applications: [:logger, :inets, :ssl], extra_applications: [:logger],
mod: {Pipeline.Application, []} mod: {Pipeline.Application, []}
] ]
end end
defp escript do
[main_module: Pipeline.CLI]
end
defp deps do defp deps do
[ [
{:finch, "~> 0.19"},
{:req, "~> 0.5"}, {:req, "~> 0.5"},
{:jason, "~> 1.4"} {:jason, "~> 1.4"}
] ]

View File

@@ -1,217 +0,0 @@
#!/usr/bin/env elixir
# Export org-roam notes (per-file) to content/ via ox-hugo,
# then run the markdown transformation pipeline (citations, etc.).
#
# Usage:
# NOTES_DIR=~/notes elixir scripts/export.exs
# elixir scripts/export.exs /path/to/notes
#
# Optional env vars:
# BIBTEX_FILE — path to a .bib file used as citation fallback
# ZOTERO_URL — Zotero Better BibTeX base URL (default: http://localhost:23119)
# CITATION_MODE — silent | warn (default) | strict
#
# The positional argument takes precedence over the NOTES_DIR env var.
# ---------------------------------------------------------------------------
# Load the pipeline Mix project so its modules are available in this script.
# ---------------------------------------------------------------------------
repo_root = __DIR__ |> Path.join("..") |> Path.expand()
pipeline_dir = Path.join(repo_root, "scripts/pipeline")
# Compile and load the pipeline project's modules into this runtime.
# Mix.install is NOT used here because we have a local Mix project — instead
# we compile it and push its beam files onto the code path.
#
# This runs `mix deps.get` + `mix compile` the first time; subsequent runs
# use the compiled artifacts from _build/ (fast, same as Mix caching).
{_, 0} =
System.cmd("mix", ["deps.get", "--quiet"],
cd: pipeline_dir,
env: [{"MIX_ENV", "prod"}],
into: IO.stream()
)
{_, 0} =
System.cmd("mix", ["compile", "--quiet"],
cd: pipeline_dir,
env: [{"MIX_ENV", "prod"}],
into: IO.stream()
)
# Add compiled beam files to the load path so we can call pipeline modules.
pipeline_build = Path.join(pipeline_dir, "_build/prod/lib")
pipeline_build
|> File.ls!()
|> Enum.each(fn app ->
ebin = Path.join([pipeline_build, app, "ebin"])
if File.dir?(ebin), do: Code.prepend_path(ebin)
end)
# Start the pipeline OTP application (which starts Finch for HTTP).
Application.ensure_all_started(:pipeline)
# ---------------------------------------------------------------------------
# Argument / env resolution
# ---------------------------------------------------------------------------
notes_dir =
case System.argv() do
[dir | _] -> dir
[] ->
System.get_env("NOTES_DIR") ||
(IO.puts(:stderr, "Usage: NOTES_DIR=/path/to/notes elixir scripts/export.exs")
System.halt(1))
end
notes_dir = Path.expand(notes_dir)
content_dir = Path.join(repo_root, "content")
unless File.dir?(notes_dir) do
IO.puts(:stderr, "Error: notes directory does not exist: #{notes_dir}")
System.halt(1)
end
# ---------------------------------------------------------------------------
# Phase 1: Wipe content/
# ---------------------------------------------------------------------------
IO.puts("==> Wiping #{content_dir}")
content_dir
|> File.ls!()
|> Enum.reject(&(&1 == ".gitkeep"))
|> Enum.each(fn entry ->
Path.join(content_dir, entry) |> File.rm_rf!()
end)
# ---------------------------------------------------------------------------
# Phase 2: Export org files via Emacs + ox-hugo
# ---------------------------------------------------------------------------
IO.puts("==> Exporting org files from #{notes_dir}")
org_files =
Path.join(notes_dir, "**/*.org")
|> Path.wildcard()
if org_files == [] do
IO.puts("No .org files found in #{notes_dir}")
System.halt(0)
end
results =
Enum.map(org_files, fn orgfile ->
IO.puts(" exporting: #{orgfile}")
section =
orgfile
|> Path.dirname()
|> Path.relative_to(notes_dir)
{output, exit_code} =
System.cmd(
"emacs",
[
"--batch",
"--eval", "(require 'ox-hugo)",
"--eval", ~s[(setq org-hugo-base-dir "#{repo_root}")],
"--eval", ~s[(setq org-hugo-default-section-directory "#{section}")],
"--visit", orgfile,
"--funcall", "org-hugo-export-to-md"
],
stderr_to_stdout: true
)
filtered =
output
|> String.split("\n")
|> Enum.reject(&String.match?(&1, ~r/^Loading|^ad-handle|^For information/))
|> Enum.join("\n")
if filtered != "", do: IO.puts(filtered)
{orgfile, exit_code}
end)
failures = Enum.filter(results, fn {_, code} -> code != 0 end)
if failures != [] do
IO.puts(:stderr, "\nFailed to export #{length(failures)} file(s):")
Enum.each(failures, fn {f, code} -> IO.puts(:stderr, " [exit #{code}] #{f}") end)
System.halt(1)
end
# ---------------------------------------------------------------------------
# Phase 3: Markdown transformation pipeline
# ---------------------------------------------------------------------------
IO.puts("==> Running markdown pipeline")
pipeline_opts = %{
zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"),
bibtex_file: System.get_env("BIBTEX_FILE"),
citation_mode:
case System.get_env("CITATION_MODE", "warn") do
"silent" -> :silent
"strict" -> :strict
_ -> :warn
end
}
transforms = [Pipeline.Transforms.Citations]
case Pipeline.run(content_dir, transforms, pipeline_opts) do
{:ok, stats} ->
Enum.each(stats, fn {mod, count} ->
IO.puts(" #{inspect(mod)}: #{count} file(s) modified")
end)
{:error, reason} ->
IO.puts(:stderr, "Pipeline error: #{inspect(reason)}")
System.halt(1)
end
# ---------------------------------------------------------------------------
# Phase 4: Generate default index.md if none was exported
# ---------------------------------------------------------------------------
md_count =
Path.join(content_dir, "**/*.md")
|> Path.wildcard()
|> length()
index_path = Path.join(content_dir, "index.md")
unless File.exists?(index_path) do
IO.puts("==> Generating default index.md")
pages =
Path.join(content_dir, "**/*.md")
|> Path.wildcard()
|> Enum.map(fn path ->
slug = Path.relative_to(path, content_dir) |> Path.rootname()
title =
path
|> File.read!()
|> then(fn content ->
case Regex.run(~r/^title\s*=\s*"(.+)"/m, content) do
[_, t] -> t
_ -> slug
end
end)
{slug, title}
end)
|> Enum.sort_by(fn {_, title} -> title end)
|> Enum.map(fn {slug, title} -> "- [#{title}](#{slug})" end)
|> Enum.join("\n")
File.write!(index_path, """
---
title: Index
---
#{pages}
""")
end
IO.puts("==> Done. #{md_count} markdown files in #{content_dir}")