From dc348185a797c4ff79f0ca35da88f3c9db015bf7 Mon Sep 17 00:00:00 2001 From: Ignacio Ballesteros Date: Fri, 20 Feb 2026 17:54:12 +0100 Subject: [PATCH] feat(pipeline): refactor into its own project --- AGENTS.md | 107 ------- flake.lock | 67 ++++- flake.nix | 46 ++- notes/example-citation.org | 4 +- pipeline/flake.lock | 61 ++++ pipeline/flake.nix | 69 +++++ .../pipeline => pipeline}/lib/pipeline.ex | 0 .../lib/pipeline/application.ex | 0 pipeline/lib/pipeline/cli.ex | 261 ++++++++++++++++++ .../lib/pipeline/resolvers/bibtex.ex | 0 .../lib/pipeline/resolvers/doi.ex | 0 .../lib/pipeline/resolvers/zotero.ex | 0 .../lib/pipeline/transform.ex | 0 .../lib/pipeline/transforms/citations.ex | 4 +- {scripts/pipeline => pipeline}/mix.exs | 12 +- {scripts/pipeline => pipeline}/mix.lock | 0 scripts/export.exs | 217 --------------- 17 files changed, 490 insertions(+), 358 deletions(-) create mode 100644 pipeline/flake.lock create mode 100644 pipeline/flake.nix rename {scripts/pipeline => pipeline}/lib/pipeline.ex (100%) rename {scripts/pipeline => pipeline}/lib/pipeline/application.ex (100%) create mode 100644 pipeline/lib/pipeline/cli.ex rename {scripts/pipeline => pipeline}/lib/pipeline/resolvers/bibtex.ex (100%) rename {scripts/pipeline => pipeline}/lib/pipeline/resolvers/doi.ex (100%) rename {scripts/pipeline => pipeline}/lib/pipeline/resolvers/zotero.ex (100%) rename {scripts/pipeline => pipeline}/lib/pipeline/transform.ex (100%) rename {scripts/pipeline => pipeline}/lib/pipeline/transforms/citations.ex (97%) rename {scripts/pipeline => pipeline}/mix.exs (64%) rename {scripts/pipeline => pipeline}/mix.lock (100%) delete mode 100644 scripts/export.exs diff --git a/AGENTS.md b/AGENTS.md index 69a38d07b..8979248d9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -244,113 +244,6 @@ git branch -d feature/my-feature **Merge direction:** `upstream → main → org-roam → feature/*` -## Org-Roam Workflow - -Notes live in a **separate directory** outside this repo. The export pipeline -converts them to Markdown via ox-hugo, applies post-processing transforms, then -Quartz builds the site. - -### Tooling - -The dev shell (`nix develop`) provides: - -- `nodejs_22` — Quartz build -- `elixir` — runs the export script and pipeline -- `emacs` + `ox-hugo` — performs the org → markdown conversion - -### Export and build - -```bash -# Export only (wipes content/, exports all .org files, runs pipeline) -NOTES_DIR=/path/to/notes npm run export - -# Export then build the site -NOTES_DIR=/path/to/notes npm run build:notes - -# Positional arg also works -elixir scripts/export.exs /path/to/notes -``` - -Optional env vars for the pipeline: - -| Var | Default | Purpose | -| --------------- | ------------------------ | ----------------------------------------- | -| `BIBTEX_FILE` | — | Path to `.bib` file for citation fallback | -| `ZOTERO_URL` | `http://localhost:23119` | Zotero Better BibTeX base URL | -| `CITATION_MODE` | `warn` | `silent` / `warn` / `strict` | - -### Export pipeline phases - -`scripts/export.exs` runs four phases in sequence: - -1. **Wipe** `content/` (preserving `.gitkeep`) -2. **Export** each `.org` file via `emacs --batch` + `ox-hugo` → `content/**/*.md` -3. **Pipeline** — run Elixir transform modules over every `.md` file -4. **Index** — generate a fallback `content/index.md` if none was exported - -The export uses TOML frontmatter (`+++`) and per-file mode (not per-subtree). - -### Markdown pipeline (`scripts/pipeline/`) - -A standalone Mix project that post-processes `content/*.md` after ox-hugo. -It is compiled automatically on first run; subsequent runs use the `_build/` -cache and are fast. - -**Architecture:** - -``` -scripts/pipeline/ -├── mix.exs # deps: req, jason -└── lib/ - ├── pipeline.ex # Generic runner (fold transforms over .md files) - ├── pipeline/ - │ ├── application.ex # OTP app — starts Finch HTTP pool - │ ├── transform.ex # Behaviour: init/1, apply/3, teardown/1 - │ ├── transforms/ - │ │ └── citations.ex # Resolves cite:key → [Label](url) - │ └── resolvers/ - │ ├── zotero.ex # JSON-RPC to Zotero Better BibTeX - │ ├── bibtex.ex # Parses local .bib file - │ └── doi.ex # Bare-key fallback (always succeeds) -``` - -**Adding a new transform:** - -1. Create `scripts/pipeline/lib/pipeline/transforms/my_transform.ex` -2. Implement the `Pipeline.Transform` behaviour (`init/1`, `apply/3`) -3. Append the module to `transforms` in `scripts/export.exs` - -```elixir -transforms = [ - Pipeline.Transforms.Citations, - Pipeline.Transforms.MyTransform, # new -] -``` - -### Citation resolution (`Pipeline.Transforms.Citations`) - -Handles org-citar syntax that passes through ox-hugo unchanged: - -| Syntax | Example | -| ---------------- | -------------------- | -| org-cite / citar | `[cite:@key]` | -| multiple keys | `[cite:@key1;@key2]` | -| bare (legacy) | `cite:key` | - -Resolution chain (first success wins): - -1. **Zotero** — JSON-RPC to `localhost:23119/better-bibtex/json-rpc` - - Calls `item.search` to find the item, then `item.attachments` to get - the PDF link (`zotero://open-pdf/library/items/KEY`) - - Falls back to `zotero://select/library/items/KEY` if no PDF attachment - - Probe uses a JSON-RPC call, **not** `/better-bibtex/cayw` - (that endpoint blocks waiting for interactive input) -2. **BibTeX** — parses `BIBTEX_FILE`; extracts authors, year, DOI/URL -3. **DOI fallback** — always succeeds; renders bare key or `https://doi.org/...` - -**Zotero JSON-RPC gotcha:** `Req 0.5` does not allow combining `:finch` and -`:connect_options` in the same call. Use `:receive_timeout` only. - ## Important Notes - **Client-side scripts**: Use `.inline.ts` suffix, bundled via esbuild diff --git a/flake.lock b/flake.lock index 43f833f07..8b4f3d8a8 100644 --- a/flake.lock +++ b/flake.lock @@ -18,6 +18,24 @@ "type": "github" } }, + "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, "nixpkgs": { "locked": { "lastModified": 1771008912, @@ -34,10 +52,42 @@ "type": "github" } }, + "nixpkgs_2": { + "locked": { + "lastModified": 1771369470, + "narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0182a361324364ae3f436a63005877674cf45efb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "pipeline": { + "inputs": { + "flake-utils": "flake-utils_2", + "nixpkgs": "nixpkgs_2" + }, + "locked": { + "path": "./pipeline", + "type": "path" + }, + "original": { + "path": "./pipeline", + "type": "path" + }, + "parent": [] + }, "root": { "inputs": { "flake-utils": "flake-utils", - "nixpkgs": "nixpkgs" + "nixpkgs": "nixpkgs", + "pipeline": "pipeline" } }, "systems": { @@ -54,6 +104,21 @@ "repo": "default", "type": "github" } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 3dd4666a6..df915fb01 100644 --- a/flake.nix +++ b/flake.nix @@ -4,22 +4,30 @@ inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-utils.url = "github:numtide/flake-utils"; + pipeline.url = "path:./pipeline"; }; - outputs = { self, nixpkgs, flake-utils }: + outputs = { self, nixpkgs, flake-utils, pipeline }: flake-utils.lib.eachDefaultSystem (system: let pkgs = import nixpkgs { inherit system; }; + fs = pkgs.lib.fileset; - # Emacs with ox-hugo — shared between devShell and buildApp - emacsWithOxHugo = (pkgs.emacsPackagesFor pkgs.emacs-nox).emacsWithPackages - (epkgs: [ epkgs.ox-hugo ]); + pipelineApp = pipeline.packages.${system}.default; - # Pre-fetched npm dependency tree (node_modules) + # Pre-fetched npm dependency tree (node_modules). + # src is filtered to only package.json + package-lock.json so that + # edits to Quartz source files do not invalidate this derivation. quartzDeps = pkgs.buildNpmPackage { pname = "quartz-deps"; version = "4.5.2"; - src = ./.; + src = fs.toSource { + root = ./.; + fileset = fs.unions [ + ./package.json + ./package-lock.json + ]; + }; npmDepsHash = "sha256-7u+VlIx44B3/ivM9vLMIOn+e4TL4eS6B682vhS+Ikb4="; dontBuild = true; installPhase = '' @@ -28,18 +36,10 @@ ''; }; - # Pre-fetched Hex/Mix dependencies for scripts/pipeline - pipelineMixDeps = pkgs.beamPackages.fetchMixDeps { - pname = "pipeline-mix-deps"; - version = "0.1.0"; - src = ./scripts/pipeline; - sha256 = "sha256-E79X+nUy86G1Jrwv3T7dXekoGv8Hd14ZgJSKWjvlmAw="; - }; - # The build application wrapper script buildApp = pkgs.writeShellApplication { name = "build"; - runtimeInputs = [ pkgs.nodejs_22 pkgs.elixir emacsWithOxHugo ]; + runtimeInputs = [ pkgs.nodejs_22 ]; text = '' NOTES_DIR="''${1:?Usage: build }" NOTES_DIR=$(realpath "$NOTES_DIR") @@ -54,15 +54,10 @@ # Drop in pre-built node_modules ln -s ${quartzDeps}/node_modules "$WORK/repo/node_modules" - # Drop in pre-fetched Mix deps so mix compile runs offline - cp -r ${pipelineMixDeps} "$WORK/repo/scripts/pipeline/deps" - chmod -R u+w "$WORK/repo/scripts/pipeline/deps" - - # ox-hugo requires static/ to exist before it can copy image assets - mkdir -p "$WORK/repo/static" - - # Run the export pipeline (org → md, citations transform) - NOTES_DIR="$NOTES_DIR" elixir "$WORK/repo/scripts/export.exs" + # Run the pre-compiled pipeline escript (org → md, citations transform) + ${pipelineApp}/bin/pipeline "$NOTES_DIR" \ + --output "$WORK/repo" \ + --content-dir "$WORK/repo/content" # Build the static site from within the repo copy so relative paths # (e.g. ./package.json in constants.js) resolve correctly. @@ -79,19 +74,18 @@ buildInputs = [ pkgs.nodejs_22 pkgs.elixir - emacsWithOxHugo pkgs.mcp-nixos ]; shellHook = '' echo "Node $(node --version) / npm $(npm --version)" elixir --version 2>/dev/null | head -1 || true - echo "Emacs $(emacs --version | head -1)" ''; }; packages.default = buildApp; packages.build = buildApp; + packages.pipeline = pipelineApp; apps.default = { type = "app"; program = "${buildApp}/bin/build"; }; apps.build = { type = "app"; program = "${buildApp}/bin/build"; }; diff --git a/notes/example-citation.org b/notes/example-citation.org index bca1480f9..72abf53c4 100644 --- a/notes/example-citation.org +++ b/notes/example-citation.org @@ -7,7 +7,7 @@ The methodology described in [cite:@podlovics2021journalArticle] provides a useful framework for analysis. Multiple citations can appear together: -[cite:@podlovics2021journalArticle] +[cite:@podlovics2021journalArticle;@petersen2022book] Older bare-cite style (org-roam v1 / older citar) also works: -cite:podlovics2021journalArticle +cite:@podlovics2021journalArticle diff --git a/pipeline/flake.lock b/pipeline/flake.lock new file mode 100644 index 000000000..3c3a5a856 --- /dev/null +++ b/pipeline/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1771369470, + "narHash": "sha256-0NBlEBKkN3lufyvFegY4TYv5mCNHbi5OmBDrzihbBMQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "0182a361324364ae3f436a63005877674cf45efb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/pipeline/flake.nix b/pipeline/flake.nix new file mode 100644 index 000000000..a851a7ada --- /dev/null +++ b/pipeline/flake.nix @@ -0,0 +1,69 @@ +{ + description = "Org-roam export pipeline — Elixir escript"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + fs = pkgs.lib.fileset; + + # Emacs with ox-hugo — needed at runtime by the pipeline escript + # (export_org_files calls `emacs --batch` with ox-hugo). + emacsWithOxHugo = (pkgs.emacsPackagesFor pkgs.emacs-nox).emacsWithPackages + (epkgs: [ epkgs.ox-hugo ]); + + # Pre-fetched Hex/Mix dependencies. + # src is filtered to mix.exs + mix.lock so source edits don't + # invalidate this derivation. + mixDeps = pkgs.beamPackages.fetchMixDeps { + pname = "pipeline-mix-deps"; + version = "0.1.0"; + src = fs.toSource { + root = ./.; + fileset = fs.unions [ + ./mix.exs + ./mix.lock + ]; + }; + sha256 = "sha256-E79X+nUy86G1Jrwv3T7dXekoGv8Hd14ZgJSKWjvlmAw="; + }; + + # Compiled pipeline escript (without runtime wrappers). + pipelineEscript = pkgs.beamPackages.mixRelease { + pname = "pipeline"; + version = "0.1.0"; + src = ./.; + + escriptBinName = "pipeline"; + mixFodDeps = mixDeps; + + stripDebug = true; + }; + + # Wrapped pipeline that puts emacs (with ox-hugo) on PATH so + # the escript's System.cmd("emacs", ...) calls succeed. + pipelineApp = pkgs.writeShellApplication { + name = "pipeline"; + runtimeInputs = [ emacsWithOxHugo ]; + text = '' + exec ${pipelineEscript}/bin/pipeline "$@" + ''; + }; + in + { + packages.default = pipelineApp; + packages.escript = pipelineEscript; + + devShells.default = pkgs.mkShell { + buildInputs = [ + pkgs.elixir + emacsWithOxHugo + ]; + }; + }); +} diff --git a/scripts/pipeline/lib/pipeline.ex b/pipeline/lib/pipeline.ex similarity index 100% rename from scripts/pipeline/lib/pipeline.ex rename to pipeline/lib/pipeline.ex diff --git a/scripts/pipeline/lib/pipeline/application.ex b/pipeline/lib/pipeline/application.ex similarity index 100% rename from scripts/pipeline/lib/pipeline/application.ex rename to pipeline/lib/pipeline/application.ex diff --git a/pipeline/lib/pipeline/cli.ex b/pipeline/lib/pipeline/cli.ex new file mode 100644 index 000000000..4978c0ec0 --- /dev/null +++ b/pipeline/lib/pipeline/cli.ex @@ -0,0 +1,261 @@ +defmodule Pipeline.CLI do + @moduledoc """ + Escript entry point for the org-roam export pipeline. + + Runs four phases in sequence: + + 1. Wipe `content/` (preserving `.gitkeep`) + 2. Export each `.org` file via `emacs --batch` + ox-hugo → `content/**/*.md` + 3. Run Elixir transform modules over every `.md` file + 4. Generate a fallback `content/index.md` if none was exported + + ## Usage + + pipeline [--output ] + + Arguments: + notes-dir Path to the directory containing `.org` notes (required). + Also accepts the `NOTES_DIR` env var. + + Options: + --output Output root directory (used as ox-hugo base dir). + Defaults to the `OUTPUT_DIR` env var, or the current + working directory. + --content-dir

Output directory for exported Markdown. Defaults to + `/content`. + + Optional env vars: + BIBTEX_FILE Path to a `.bib` file used as citation fallback. + ZOTERO_URL Zotero Better BibTeX base URL (default: http://localhost:23119). + CITATION_MODE silent | warn (default) | strict. + """ + + require Logger + + def main(argv) do + Application.ensure_all_started(:pipeline) + + {notes_dir, output_dir, content_dir} = parse_args(argv) + + wipe(content_dir) + export_org_files(notes_dir, output_dir) + run_pipeline(content_dir) + generate_index(content_dir) + + md_count = + content_dir + |> Path.join("**/*.md") + |> Path.wildcard() + |> length() + + IO.puts("==> Done. #{md_count} markdown files in #{content_dir}") + end + + # --------------------------------------------------------------------------- + # Argument parsing + # --------------------------------------------------------------------------- + + defp parse_args(argv) do + {opts, positional, _invalid} = + OptionParser.parse(argv, + strict: [output: :string, content_dir: :string] + ) + + notes_dir = + case positional do + [dir | _] -> + dir + + [] -> + System.get_env("NOTES_DIR") || + abort("Usage: pipeline [--output ]") + end + + notes_dir = Path.expand(notes_dir) + + unless File.dir?(notes_dir) do + abort("Error: notes directory does not exist: #{notes_dir}") + end + + output_dir = + (opts[:output] || System.get_env("OUTPUT_DIR") || File.cwd!()) + |> Path.expand() + + content_dir = + (opts[:content_dir] || Path.join(output_dir, "content")) + |> Path.expand() + + {notes_dir, output_dir, content_dir} + end + + # --------------------------------------------------------------------------- + # Phase 1: Wipe content/ + # --------------------------------------------------------------------------- + + defp wipe(content_dir) do + IO.puts("==> Wiping #{content_dir}") + File.mkdir_p!(content_dir) + + content_dir + |> File.ls!() + |> Enum.reject(&(&1 == ".gitkeep")) + |> Enum.each(fn entry -> + Path.join(content_dir, entry) |> File.rm_rf!() + end) + end + + # --------------------------------------------------------------------------- + # Phase 2: Export org files via Emacs + ox-hugo + # --------------------------------------------------------------------------- + + defp export_org_files(notes_dir, output_dir) do + IO.puts("==> Exporting org files from #{notes_dir}") + + # ox-hugo requires static/ to exist for image asset copying + File.mkdir_p!(Path.join(output_dir, "static")) + + org_files = + Path.join(notes_dir, "**/*.org") + |> Path.wildcard() + + if org_files == [] do + IO.puts("No .org files found in #{notes_dir}") + System.halt(0) + end + + results = + Enum.map(org_files, fn orgfile -> + IO.puts(" exporting: #{orgfile}") + + section = + orgfile + |> Path.dirname() + |> Path.relative_to(notes_dir) + + {output, exit_code} = + System.cmd( + "emacs", + [ + "--batch", + "--eval", "(require 'ox-hugo)", + "--eval", """ + (org-cite-register-processor 'passthrough + :export-citation + (lambda (citation _style _backend _info) + (let ((keys (mapcar (lambda (ref) + (concat "@" (org-element-property :key ref))) + (org-cite-get-references citation)))) + (format "[cite:%s]" (string-join keys ";"))))) + """, + "--eval", "(setq org-cite-export-processors '((t passthrough)))", + "--eval", ~s[(setq org-hugo-base-dir "#{output_dir}")], + "--eval", ~s[(setq org-hugo-default-section-directory "#{section}")], + "--visit", orgfile, + "--funcall", "org-hugo-export-to-md" + ], + stderr_to_stdout: true + ) + + filtered = + output + |> String.split("\n") + |> Enum.reject(&String.match?(&1, ~r/^Loading|^ad-handle|^For information/)) + |> Enum.join("\n") + + if filtered != "", do: IO.puts(filtered) + + {orgfile, exit_code} + end) + + failures = Enum.filter(results, fn {_, code} -> code != 0 end) + + if failures != [] do + IO.puts(:stderr, "\nFailed to export #{length(failures)} file(s):") + Enum.each(failures, fn {f, code} -> IO.puts(:stderr, " [exit #{code}] #{f}") end) + System.halt(1) + end + end + + # --------------------------------------------------------------------------- + # Phase 3: Markdown transformation pipeline + # --------------------------------------------------------------------------- + + defp run_pipeline(content_dir) do + IO.puts("==> Running markdown pipeline") + + pipeline_opts = %{ + zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"), + bibtex_file: System.get_env("BIBTEX_FILE"), + citation_mode: + case System.get_env("CITATION_MODE", "warn") do + "silent" -> :silent + "strict" -> :strict + _ -> :warn + end + } + + transforms = [Pipeline.Transforms.Citations] + + case Pipeline.run(content_dir, transforms, pipeline_opts) do + {:ok, stats} -> + Enum.each(stats, fn {mod, count} -> + IO.puts(" #{inspect(mod)}: #{count} file(s) modified") + end) + + {:error, reason} -> + IO.puts(:stderr, "Pipeline error: #{inspect(reason)}") + System.halt(1) + end + end + + # --------------------------------------------------------------------------- + # Phase 4: Generate default index.md if none was exported + # --------------------------------------------------------------------------- + + defp generate_index(content_dir) do + index_path = Path.join(content_dir, "index.md") + + unless File.exists?(index_path) do + IO.puts("==> Generating default index.md") + + pages = + Path.join(content_dir, "**/*.md") + |> Path.wildcard() + |> Enum.map(fn path -> + slug = Path.relative_to(path, content_dir) |> Path.rootname() + + title = + path + |> File.read!() + |> then(fn content -> + case Regex.run(~r/^title\s*=\s*"(.+)"/m, content) do + [_, t] -> t + _ -> slug + end + end) + + {slug, title} + end) + |> Enum.sort_by(fn {_, title} -> title end) + |> Enum.map(fn {slug, title} -> "- [#{title}](#{slug})" end) + |> Enum.join("\n") + + File.write!(index_path, """ + --- + title: Index + --- + + #{pages} + """) + end + end + + # --------------------------------------------------------------------------- + # Helpers + # --------------------------------------------------------------------------- + + defp abort(message) do + IO.puts(:stderr, message) + System.halt(1) + end +end diff --git a/scripts/pipeline/lib/pipeline/resolvers/bibtex.ex b/pipeline/lib/pipeline/resolvers/bibtex.ex similarity index 100% rename from scripts/pipeline/lib/pipeline/resolvers/bibtex.ex rename to pipeline/lib/pipeline/resolvers/bibtex.ex diff --git a/scripts/pipeline/lib/pipeline/resolvers/doi.ex b/pipeline/lib/pipeline/resolvers/doi.ex similarity index 100% rename from scripts/pipeline/lib/pipeline/resolvers/doi.ex rename to pipeline/lib/pipeline/resolvers/doi.ex diff --git a/scripts/pipeline/lib/pipeline/resolvers/zotero.ex b/pipeline/lib/pipeline/resolvers/zotero.ex similarity index 100% rename from scripts/pipeline/lib/pipeline/resolvers/zotero.ex rename to pipeline/lib/pipeline/resolvers/zotero.ex diff --git a/scripts/pipeline/lib/pipeline/transform.ex b/pipeline/lib/pipeline/transform.ex similarity index 100% rename from scripts/pipeline/lib/pipeline/transform.ex rename to pipeline/lib/pipeline/transform.ex diff --git a/scripts/pipeline/lib/pipeline/transforms/citations.ex b/pipeline/lib/pipeline/transforms/citations.ex similarity index 97% rename from scripts/pipeline/lib/pipeline/transforms/citations.ex rename to pipeline/lib/pipeline/transforms/citations.ex index 67316db64..fcae38672 100644 --- a/scripts/pipeline/lib/pipeline/transforms/citations.ex +++ b/pipeline/lib/pipeline/transforms/citations.ex @@ -48,8 +48,8 @@ defmodule Pipeline.Transforms.Citations do # Match [cite:@key] and [cite:@key1;@key2;...] (org-cite / citar style) @cite_bracket_regex ~r/\[cite:(@[^\]]+)\]/ - # Match bare cite:key (older roam style, no brackets, no @ prefix) - @cite_bare_regex ~r/(? 1.15", + elixir: "~> 1.17", start_permanent: Mix.env() == :prod, - deps: deps() + deps: deps(), + escript: escript() ] end def application do [ - extra_applications: [:logger, :inets, :ssl], + extra_applications: [:logger], mod: {Pipeline.Application, []} ] end + defp escript do + [main_module: Pipeline.CLI] + end + defp deps do [ + {:finch, "~> 0.19"}, {:req, "~> 0.5"}, {:jason, "~> 1.4"} ] diff --git a/scripts/pipeline/mix.lock b/pipeline/mix.lock similarity index 100% rename from scripts/pipeline/mix.lock rename to pipeline/mix.lock diff --git a/scripts/export.exs b/scripts/export.exs deleted file mode 100644 index 2a3b3ae0a..000000000 --- a/scripts/export.exs +++ /dev/null @@ -1,217 +0,0 @@ -#!/usr/bin/env elixir -# Export org-roam notes (per-file) to content/ via ox-hugo, -# then run the markdown transformation pipeline (citations, etc.). -# -# Usage: -# NOTES_DIR=~/notes elixir scripts/export.exs -# elixir scripts/export.exs /path/to/notes -# -# Optional env vars: -# BIBTEX_FILE — path to a .bib file used as citation fallback -# ZOTERO_URL — Zotero Better BibTeX base URL (default: http://localhost:23119) -# CITATION_MODE — silent | warn (default) | strict -# -# The positional argument takes precedence over the NOTES_DIR env var. - -# --------------------------------------------------------------------------- -# Load the pipeline Mix project so its modules are available in this script. -# --------------------------------------------------------------------------- -repo_root = __DIR__ |> Path.join("..") |> Path.expand() -pipeline_dir = Path.join(repo_root, "scripts/pipeline") - -# Compile and load the pipeline project's modules into this runtime. -# Mix.install is NOT used here because we have a local Mix project — instead -# we compile it and push its beam files onto the code path. -# -# This runs `mix deps.get` + `mix compile` the first time; subsequent runs -# use the compiled artifacts from _build/ (fast, same as Mix caching). -{_, 0} = - System.cmd("mix", ["deps.get", "--quiet"], - cd: pipeline_dir, - env: [{"MIX_ENV", "prod"}], - into: IO.stream() - ) - -{_, 0} = - System.cmd("mix", ["compile", "--quiet"], - cd: pipeline_dir, - env: [{"MIX_ENV", "prod"}], - into: IO.stream() - ) - -# Add compiled beam files to the load path so we can call pipeline modules. -pipeline_build = Path.join(pipeline_dir, "_build/prod/lib") - -pipeline_build -|> File.ls!() -|> Enum.each(fn app -> - ebin = Path.join([pipeline_build, app, "ebin"]) - if File.dir?(ebin), do: Code.prepend_path(ebin) -end) - -# Start the pipeline OTP application (which starts Finch for HTTP). -Application.ensure_all_started(:pipeline) - -# --------------------------------------------------------------------------- -# Argument / env resolution -# --------------------------------------------------------------------------- - -notes_dir = - case System.argv() do - [dir | _] -> dir - [] -> - System.get_env("NOTES_DIR") || - (IO.puts(:stderr, "Usage: NOTES_DIR=/path/to/notes elixir scripts/export.exs") - System.halt(1)) - end - -notes_dir = Path.expand(notes_dir) -content_dir = Path.join(repo_root, "content") - -unless File.dir?(notes_dir) do - IO.puts(:stderr, "Error: notes directory does not exist: #{notes_dir}") - System.halt(1) -end - -# --------------------------------------------------------------------------- -# Phase 1: Wipe content/ -# --------------------------------------------------------------------------- -IO.puts("==> Wiping #{content_dir}") - -content_dir -|> File.ls!() -|> Enum.reject(&(&1 == ".gitkeep")) -|> Enum.each(fn entry -> - Path.join(content_dir, entry) |> File.rm_rf!() -end) - -# --------------------------------------------------------------------------- -# Phase 2: Export org files via Emacs + ox-hugo -# --------------------------------------------------------------------------- -IO.puts("==> Exporting org files from #{notes_dir}") - -org_files = - Path.join(notes_dir, "**/*.org") - |> Path.wildcard() - -if org_files == [] do - IO.puts("No .org files found in #{notes_dir}") - System.halt(0) -end - -results = - Enum.map(org_files, fn orgfile -> - IO.puts(" exporting: #{orgfile}") - - section = - orgfile - |> Path.dirname() - |> Path.relative_to(notes_dir) - - {output, exit_code} = - System.cmd( - "emacs", - [ - "--batch", - "--eval", "(require 'ox-hugo)", - "--eval", ~s[(setq org-hugo-base-dir "#{repo_root}")], - "--eval", ~s[(setq org-hugo-default-section-directory "#{section}")], - "--visit", orgfile, - "--funcall", "org-hugo-export-to-md" - ], - stderr_to_stdout: true - ) - - filtered = - output - |> String.split("\n") - |> Enum.reject(&String.match?(&1, ~r/^Loading|^ad-handle|^For information/)) - |> Enum.join("\n") - - if filtered != "", do: IO.puts(filtered) - - {orgfile, exit_code} - end) - -failures = Enum.filter(results, fn {_, code} -> code != 0 end) - -if failures != [] do - IO.puts(:stderr, "\nFailed to export #{length(failures)} file(s):") - Enum.each(failures, fn {f, code} -> IO.puts(:stderr, " [exit #{code}] #{f}") end) - System.halt(1) -end - -# --------------------------------------------------------------------------- -# Phase 3: Markdown transformation pipeline -# --------------------------------------------------------------------------- -IO.puts("==> Running markdown pipeline") - -pipeline_opts = %{ - zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"), - bibtex_file: System.get_env("BIBTEX_FILE"), - citation_mode: - case System.get_env("CITATION_MODE", "warn") do - "silent" -> :silent - "strict" -> :strict - _ -> :warn - end -} - -transforms = [Pipeline.Transforms.Citations] - -case Pipeline.run(content_dir, transforms, pipeline_opts) do - {:ok, stats} -> - Enum.each(stats, fn {mod, count} -> - IO.puts(" #{inspect(mod)}: #{count} file(s) modified") - end) - - {:error, reason} -> - IO.puts(:stderr, "Pipeline error: #{inspect(reason)}") - System.halt(1) -end - -# --------------------------------------------------------------------------- -# Phase 4: Generate default index.md if none was exported -# --------------------------------------------------------------------------- -md_count = - Path.join(content_dir, "**/*.md") - |> Path.wildcard() - |> length() - -index_path = Path.join(content_dir, "index.md") - -unless File.exists?(index_path) do - IO.puts("==> Generating default index.md") - - pages = - Path.join(content_dir, "**/*.md") - |> Path.wildcard() - |> Enum.map(fn path -> - slug = Path.relative_to(path, content_dir) |> Path.rootname() - - title = - path - |> File.read!() - |> then(fn content -> - case Regex.run(~r/^title\s*=\s*"(.+)"/m, content) do - [_, t] -> t - _ -> slug - end - end) - - {slug, title} - end) - |> Enum.sort_by(fn {_, title} -> title end) - |> Enum.map(fn {slug, title} -> "- [#{title}](#{slug})" end) - |> Enum.join("\n") - - File.write!(index_path, """ - --- - title: Index - --- - - #{pages} - """) -end - -IO.puts("==> Done. #{md_count} markdown files in #{content_dir}")