Add Elixir markdown pipeline with org-citar citation resolution
Introduces scripts/pipeline/, a Mix project that runs as a post-export transformation pass over content/*.md before Quartz builds the site. Pipeline (scripts/export.exs phase 3): - Compiles and loads the Mix project at export time (cached after first run) - Applies a list of Transform modules sequentially over all .md files - Only rewrites files that were actually changed Citations transform (Pipeline.Transforms.Citations): - Resolves [cite:@key] and bare cite:key syntax produced by ox-hugo/citar - Resolution chain: Zotero BBT JSON-RPC → BibTeX file → DOI/bare-key fallback - Zotero probe uses a no-op JSON-RPC call (cayw endpoint blocks indefinitely) - Zotero resolver fetches PDF attachments via item.attachments, producing zotero://open-pdf/... links; falls back to zotero://select/library/items/... - BibTeX resolver parses .bib files with a simple regex parser (no deps) - DOI resolver is the always-succeeding last resort Configuration via env vars: BIBTEX_FILE — path to .bib file for fallback resolution ZOTERO_URL — Zotero base URL (default: http://localhost:23119) CITATION_MODE — silent | warn (default) | strict Adding future transforms requires only implementing Pipeline.Transform behaviour and appending the module to the transforms list in export.exs.
This commit is contained in:
@@ -1,22 +1,71 @@
|
||||
#!/usr/bin/env elixir
|
||||
# Export org-roam notes (per-file) to content/ via ox-hugo.
|
||||
# Export org-roam notes (per-file) to content/ via ox-hugo,
|
||||
# then run the markdown transformation pipeline (citations, etc.).
|
||||
#
|
||||
# Usage:
|
||||
# NOTES_DIR=~/notes elixir scripts/export.exs
|
||||
# elixir scripts/export.exs /path/to/notes
|
||||
#
|
||||
# Optional env vars:
|
||||
# BIBTEX_FILE — path to a .bib file used as citation fallback
|
||||
# ZOTERO_URL — Zotero Better BibTeX base URL (default: http://localhost:23119)
|
||||
# CITATION_MODE — silent | warn (default) | strict
|
||||
#
|
||||
# The positional argument takes precedence over the NOTES_DIR env var.
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Load the pipeline Mix project so its modules are available in this script.
|
||||
# ---------------------------------------------------------------------------
|
||||
repo_root = __DIR__ |> Path.join("..") |> Path.expand()
|
||||
pipeline_dir = Path.join(repo_root, "scripts/pipeline")
|
||||
|
||||
# Compile and load the pipeline project's modules into this runtime.
|
||||
# Mix.install is NOT used here because we have a local Mix project — instead
|
||||
# we compile it and push its beam files onto the code path.
|
||||
#
|
||||
# This runs `mix deps.get` + `mix compile` the first time; subsequent runs
|
||||
# use the compiled artifacts from _build/ (fast, same as Mix caching).
|
||||
{_, 0} =
|
||||
System.cmd("mix", ["deps.get", "--quiet"],
|
||||
cd: pipeline_dir,
|
||||
env: [{"MIX_ENV", "prod"}],
|
||||
into: IO.stream()
|
||||
)
|
||||
|
||||
{_, 0} =
|
||||
System.cmd("mix", ["compile", "--quiet"],
|
||||
cd: pipeline_dir,
|
||||
env: [{"MIX_ENV", "prod"}],
|
||||
into: IO.stream()
|
||||
)
|
||||
|
||||
# Add compiled beam files to the load path so we can call pipeline modules.
|
||||
pipeline_build = Path.join(pipeline_dir, "_build/prod/lib")
|
||||
|
||||
pipeline_build
|
||||
|> File.ls!()
|
||||
|> Enum.each(fn app ->
|
||||
ebin = Path.join([pipeline_build, app, "ebin"])
|
||||
if File.dir?(ebin), do: Code.prepend_path(ebin)
|
||||
end)
|
||||
|
||||
# Start the pipeline OTP application (which starts Finch for HTTP).
|
||||
Application.ensure_all_started(:pipeline)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Argument / env resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
notes_dir =
|
||||
case System.argv() do
|
||||
[dir | _] -> dir
|
||||
[] ->
|
||||
System.get_env("NOTES_DIR") ||
|
||||
(IO.puts(:stderr, "Usage: NOTES_DIR=/path/to/notes elixir scripts/export.exs"); System.halt(1))
|
||||
(IO.puts(:stderr, "Usage: NOTES_DIR=/path/to/notes elixir scripts/export.exs")
|
||||
System.halt(1))
|
||||
end
|
||||
|
||||
notes_dir = Path.expand(notes_dir)
|
||||
repo_root = __DIR__ |> Path.join("..") |> Path.expand()
|
||||
content_dir = Path.join(repo_root, "content")
|
||||
|
||||
unless File.dir?(notes_dir) do
|
||||
@@ -24,7 +73,9 @@ unless File.dir?(notes_dir) do
|
||||
System.halt(1)
|
||||
end
|
||||
|
||||
# Wipe content/, preserving .gitkeep
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 1: Wipe content/
|
||||
# ---------------------------------------------------------------------------
|
||||
IO.puts("==> Wiping #{content_dir}")
|
||||
|
||||
content_dir
|
||||
@@ -34,7 +85,9 @@ content_dir
|
||||
Path.join(content_dir, entry) |> File.rm_rf!()
|
||||
end)
|
||||
|
||||
# Collect all .org files
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 2: Export org files via Emacs + ox-hugo
|
||||
# ---------------------------------------------------------------------------
|
||||
IO.puts("==> Exporting org files from #{notes_dir}")
|
||||
|
||||
org_files =
|
||||
@@ -46,12 +99,10 @@ if org_files == [] do
|
||||
System.halt(0)
|
||||
end
|
||||
|
||||
# Export each file via emacs --batch
|
||||
results =
|
||||
Enum.map(org_files, fn orgfile ->
|
||||
IO.puts(" exporting: #{orgfile}")
|
||||
|
||||
# Mirror the notes subdirectory structure under content/
|
||||
section =
|
||||
orgfile
|
||||
|> Path.dirname()
|
||||
@@ -71,7 +122,6 @@ results =
|
||||
stderr_to_stdout: true
|
||||
)
|
||||
|
||||
# Filter noisy emacs startup lines, same as the shell script
|
||||
filtered =
|
||||
output
|
||||
|> String.split("\n")
|
||||
@@ -91,12 +141,43 @@ if failures != [] do
|
||||
System.halt(1)
|
||||
end
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 3: Markdown transformation pipeline
|
||||
# ---------------------------------------------------------------------------
|
||||
IO.puts("==> Running markdown pipeline")
|
||||
|
||||
pipeline_opts = %{
|
||||
zotero_url: System.get_env("ZOTERO_URL", "http://localhost:23119"),
|
||||
bibtex_file: System.get_env("BIBTEX_FILE"),
|
||||
citation_mode:
|
||||
case System.get_env("CITATION_MODE", "warn") do
|
||||
"silent" -> :silent
|
||||
"strict" -> :strict
|
||||
_ -> :warn
|
||||
end
|
||||
}
|
||||
|
||||
transforms = [Pipeline.Transforms.Citations]
|
||||
|
||||
case Pipeline.run(content_dir, transforms, pipeline_opts) do
|
||||
{:ok, stats} ->
|
||||
Enum.each(stats, fn {mod, count} ->
|
||||
IO.puts(" #{inspect(mod)}: #{count} file(s) modified")
|
||||
end)
|
||||
|
||||
{:error, reason} ->
|
||||
IO.puts(:stderr, "Pipeline error: #{inspect(reason)}")
|
||||
System.halt(1)
|
||||
end
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 4: Generate default index.md if none was exported
|
||||
# ---------------------------------------------------------------------------
|
||||
md_count =
|
||||
Path.join(content_dir, "**/*.md")
|
||||
|> Path.wildcard()
|
||||
|> length()
|
||||
|
||||
# Generate a default index.md if none was exported
|
||||
index_path = Path.join(content_dir, "index.md")
|
||||
|
||||
unless File.exists?(index_path) do
|
||||
|
||||
Reference in New Issue
Block a user