Files
quartz/scripts/export.exs
Ignacio Ballesteros 511b003da8 Add Elixir markdown pipeline with org-citar citation resolution
Introduces scripts/pipeline/, a Mix project that runs as a post-export
transformation pass over content/*.md before Quartz builds the site.

Pipeline (scripts/export.exs phase 3):
- Compiles and loads the Mix project at export time (cached after first run)
- Applies a list of Transform modules sequentially over all .md files
- Only rewrites files that were actually changed

Citations transform (Pipeline.Transforms.Citations):
- Resolves [cite:@key] and bare cite:key syntax produced by ox-hugo/citar
- Resolution chain: Zotero BBT JSON-RPC → BibTeX file → DOI/bare-key fallback
- Zotero probe uses a no-op JSON-RPC call (cayw endpoint blocks indefinitely)
- Zotero resolver fetches PDF attachments via item.attachments, producing
  zotero://open-pdf/... links; falls back to zotero://select/library/items/...
- BibTeX resolver parses .bib files with a simple regex parser (no deps)
- DOI resolver is the always-succeeding last resort

Configuration via env vars:
  BIBTEX_FILE   — path to .bib file for fallback resolution
  ZOTERO_URL    — Zotero base URL (default: http://localhost:23119)
  CITATION_MODE — silent | warn (default) | strict

Adding future transforms requires only implementing Pipeline.Transform
behaviour and appending the module to the transforms list in export.exs.
2026-02-20 10:00:11 +01:00

218 lines
6.3 KiB
Elixir

#!/usr/bin/env elixir
# Export org-roam notes (per-file) to content/ via ox-hugo,
# then run the markdown transformation pipeline (citations, etc.).
#
# Usage:
# NOTES_DIR=~/notes elixir scripts/export.exs
# elixir scripts/export.exs /path/to/notes
#
# Optional env vars:
# BIBTEX_FILE — path to a .bib file used as citation fallback
# ZOTERO_URL — Zotero Better BibTeX base URL (default: http://localhost:23119)
# CITATION_MODE — silent | warn (default) | strict
#
# The positional argument takes precedence over the NOTES_DIR env var.
# ---------------------------------------------------------------------------
# Load the pipeline Mix project so its modules are available in this script.
# ---------------------------------------------------------------------------
repo_root = __DIR__ |> Path.join("..") |> Path.expand()
pipeline_dir = Path.join(repo_root, "scripts/pipeline")
# Compile and load the pipeline project's modules into this runtime.
# Mix.install is NOT used here because we have a local Mix project — instead
# we compile it and push its beam files onto the code path.
#
# This runs `mix deps.get` + `mix compile` the first time; subsequent runs
# use the compiled artifacts from _build/ (fast, same as Mix caching).
{_, 0} =
System.cmd("mix", ["deps.get", "--quiet"],
cd: pipeline_dir,
env: [{"MIX_ENV", "prod"}],
into: IO.stream()
)
{_, 0} =
System.cmd("mix", ["compile", "--quiet"],
cd: pipeline_dir,
env: [{"MIX_ENV", "prod"}],
into: IO.stream()
)
# Add compiled beam files to the load path so we can call pipeline modules.
pipeline_build = Path.join(pipeline_dir, "_build/prod/lib")
pipeline_build
|> File.ls!()
|> Enum.each(fn app ->
ebin = Path.join([pipeline_build, app, "ebin"])
if File.dir?(ebin), do: Code.prepend_path(ebin)
end)
# Start the pipeline OTP application (which starts Finch for HTTP).
Application.ensure_all_started(:pipeline)
# ---------------------------------------------------------------------------
# Argument / env resolution
# ---------------------------------------------------------------------------
notes_dir =
case System.argv() do
[dir | _] -> dir
[] ->
System.get_env("NOTES_DIR") ||
(IO.puts(:stderr, "Usage: NOTES_DIR=/path/to/notes elixir scripts/export.exs")
System.halt(1))
end
notes_dir = Path.expand(notes_dir)
content_dir = Path.join(repo_root, "content")
unless File.dir?(notes_dir) do
IO.puts(:stderr, "Error: notes directory does not exist: #{notes_dir}")
System.halt(1)
end
# ---------------------------------------------------------------------------
# Phase 1: Wipe content/
# ---------------------------------------------------------------------------
IO.puts("==> Wiping #{content_dir}")
content_dir
|> File.ls!()
|> Enum.reject(&(&1 == ".gitkeep"))
|> Enum.each(fn entry ->
Path.join(content_dir, entry) |> File.rm_rf!()
end)
# ---------------------------------------------------------------------------
# Phase 2: Export org files via Emacs + ox-hugo
# ---------------------------------------------------------------------------
IO.puts("==> Exporting org files from #{notes_dir}")
org_files =
Path.join(notes_dir, "**/*.org")
|> Path.wildcard()
if org_files == [] do
IO.puts("No .org files found in #{notes_dir}")
System.halt(0)
end
results =
Enum.map(org_files, fn orgfile ->
IO.puts(" exporting: #{orgfile}")
section =
orgfile
|> Path.dirname()
|> Path.relative_to(notes_dir)
{output, exit_code} =
System.cmd(
"emacs",
[
"--batch",
"--eval", "(require 'ox-hugo)",
"--eval", ~s[(setq org-hugo-base-dir "#{repo_root}")],
"--eval", ~s[(setq org-hugo-default-section-directory "#{section}")],
"--visit", orgfile,
"--funcall", "org-hugo-export-to-md"
],
stderr_to_stdout: true
)
filtered =
output
|> String.split("\n")
|> Enum.reject(&String.match?(&1, ~r/^Loading|^ad-handle|^For information/))
|> Enum.join("\n")
if filtered != "", do: IO.puts(filtered)
{orgfile, exit_code}
end)
failures = Enum.filter(results, fn {_, code} -> code != 0 end)
if failures != [] do
IO.puts(:stderr, "\nFailed to export #{length(failures)} file(s):")
Enum.each(failures, fn {f, code} -> IO.puts(:stderr, " [exit #{code}] #{f}") end)
System.halt(1)
end
# ---------------------------------------------------------------------------
# Phase 3: Markdown transformation pipeline
# ---------------------------------------------------------------------------
IO.puts("==> Running markdown pipeline")
pipeline_opts = %{
zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"),
bibtex_file: System.get_env("BIBTEX_FILE"),
citation_mode:
case System.get_env("CITATION_MODE", "warn") do
"silent" -> :silent
"strict" -> :strict
_ -> :warn
end
}
transforms = [Pipeline.Transforms.Citations]
case Pipeline.run(content_dir, transforms, pipeline_opts) do
{:ok, stats} ->
Enum.each(stats, fn {mod, count} ->
IO.puts(" #{inspect(mod)}: #{count} file(s) modified")
end)
{:error, reason} ->
IO.puts(:stderr, "Pipeline error: #{inspect(reason)}")
System.halt(1)
end
# ---------------------------------------------------------------------------
# Phase 4: Generate default index.md if none was exported
# ---------------------------------------------------------------------------
md_count =
Path.join(content_dir, "**/*.md")
|> Path.wildcard()
|> length()
index_path = Path.join(content_dir, "index.md")
unless File.exists?(index_path) do
IO.puts("==> Generating default index.md")
pages =
Path.join(content_dir, "**/*.md")
|> Path.wildcard()
|> Enum.map(fn path ->
slug = Path.relative_to(path, content_dir) |> Path.rootname()
title =
path
|> File.read!()
|> then(fn content ->
case Regex.run(~r/^title\s*=\s*"(.+)"/m, content) do
[_, t] -> t
_ -> slug
end
end)
{slug, title}
end)
|> Enum.sort_by(fn {_, title} -> title end)
|> Enum.map(fn {slug, title} -> "- [#{title}](#{slug})" end)
|> Enum.join("\n")
File.write!(index_path, """
---
title: Index
---
#{pages}
""")
end
IO.puts("==> Done. #{md_count} markdown files in #{content_dir}")