Files
quartz-org-roam/scripts/pipeline/lib/pipeline/transforms/citations.ex
Ignacio Ballesteros 511b003da8 Add Elixir markdown pipeline with org-citar citation resolution
Introduces scripts/pipeline/, a Mix project that runs as a post-export
transformation pass over content/*.md before Quartz builds the site.

Pipeline (scripts/export.exs phase 3):
- Compiles and loads the Mix project at export time (cached after first run)
- Applies a list of Transform modules sequentially over all .md files
- Only rewrites files that were actually changed

Citations transform (Pipeline.Transforms.Citations):
- Resolves [cite:@key] and bare cite:key syntax produced by ox-hugo/citar
- Resolution chain: Zotero BBT JSON-RPC → BibTeX file → DOI/bare-key fallback
- Zotero probe uses a no-op JSON-RPC call (cayw endpoint blocks indefinitely)
- Zotero resolver fetches PDF attachments via item.attachments, producing
  zotero://open-pdf/... links; falls back to zotero://select/library/items/...
- BibTeX resolver parses .bib files with a simple regex parser (no deps)
- DOI resolver is the always-succeeding last resort

Configuration via env vars:
  BIBTEX_FILE   — path to .bib file for fallback resolution
  ZOTERO_URL    — Zotero base URL (default: http://localhost:23119)
  CITATION_MODE — silent | warn (default) | strict

Adding future transforms requires only implementing Pipeline.Transform
behaviour and appending the module to the transforms list in export.exs.
2026-02-20 10:00:11 +01:00

232 lines
6.9 KiB
Elixir

defmodule Pipeline.Transforms.Citations do
@moduledoc """
Markdown transform: resolves org-citar citation keys to hyperlinks.
## Recognised citation syntax (as output by ox-hugo from org-citar)
[cite:@key] → org-cite / citar standard (most common)
[cite:@key1;@key2] → multiple citations
cite:key → older roam-style bare cite syntax
## Resolution chain (in order)
1. Zotero (live instance via Better BibTeX JSON-RPC) — preferred
2. BibTeX file (BIBTEX_FILE env var) — fallback
3. DOI / bare key — always succeeds
## Modes (opts.citation_mode)
:silent — silently use DOI/bare-key fallback when Zotero+BibTeX fail
:warn — (default) emit a Logger.warning for unresolved keys
:strict — raise on unresolved keys (aborts pipeline)
## Format
Resolved citations are rendered as:
[Label](url) when a URL is available
[Label] when no URL could be determined (bare key fallback)
Multiple semicolon-separated keys become space-separated links:
[cite:@a;@b] → [Author A, 2020](url_a) [Author B, 2019](url_b)
## init/1 callback
Loads the BibTeX file (if configured) once before processing begins,
and probes Zotero availability, emitting warnings as appropriate.
"""
@behaviour Pipeline.Transform
require Logger
alias Pipeline.Resolvers.Zotero
alias Pipeline.Resolvers.BibTeX
alias Pipeline.Resolvers.DOI
# Match [cite:@key] and [cite:@key1;@key2;...] (org-cite / citar style)
@cite_bracket_regex ~r/\[cite:(@[^\]]+)\]/
# Match bare cite:key (older roam style, no brackets, no @ prefix)
@cite_bare_regex ~r/(?<![(\[])cite:([a-zA-Z0-9_:-]+)/
# ------------------------------------------------------------------
# Pipeline callbacks
# ------------------------------------------------------------------
@doc """
Called once before processing any files. Loads BibTeX, probes Zotero.
Returns a state map passed to every `apply/3` call.
"""
def init(opts) do
bibtex_entries = load_bibtex(opts)
zotero_available = probe_zotero(opts)
if not zotero_available and bibtex_entries == %{} do
Logger.warning(
"Citations: neither Zotero nor a BibTeX file is available. " <>
"All citations will fall back to bare-key rendering. " <>
"Set BIBTEX_FILE env var or start Zotero with Better BibTeX to resolve citations."
)
end
%{
bibtex_entries: bibtex_entries,
zotero_available: zotero_available,
zotero_url: Map.get(opts, :zotero_url, "http://localhost:23119"),
citation_mode: Map.get(opts, :citation_mode, :warn)
}
end
@doc """
Apply citation resolution to a single markdown file's content.
"""
def apply(content, state, _opts) do
content
|> resolve_bracket_citations(state)
|> resolve_bare_citations(state)
end
# ------------------------------------------------------------------
# Resolution passes
# ------------------------------------------------------------------
defp resolve_bracket_citations(content, state) do
Regex.replace(@cite_bracket_regex, content, fn _full, keys_str ->
keys_str
|> String.split(";")
|> Enum.map(&String.trim/1)
|> Enum.map(fn "@" <> key -> key end)
|> Enum.map(&resolve_key(&1, state))
|> Enum.join(" ")
end)
end
defp resolve_bare_citations(content, state) do
Regex.replace(@cite_bare_regex, content, fn _full, key ->
resolve_key(key, state)
end)
end
# ------------------------------------------------------------------
# Single-key resolution chain
# ------------------------------------------------------------------
defp resolve_key(key, state) do
info =
with :error <- try_zotero(key, state),
:error <- try_bibtex(key, state) do
handle_unresolved(key, state)
else
{:ok, citation_info} -> citation_info
end
format_result(info)
end
defp try_zotero(_key, %{zotero_available: false}), do: :error
defp try_zotero(key, %{zotero_url: url}) do
Zotero.resolve(key, url)
end
defp try_bibtex(_key, %{bibtex_entries: entries}) when map_size(entries) == 0, do: :error
defp try_bibtex(key, %{bibtex_entries: entries}) do
BibTeX.resolve(key, entries)
end
defp handle_unresolved(key, %{citation_mode: mode}) do
case mode do
:strict ->
raise "Citations: could not resolve citation key '#{key}' and mode is :strict"
:warn ->
Logger.warning("Citations: unresolved citation key '#{key}' — using bare-key fallback")
{:ok, result} = DOI.resolve(key)
result
:silent ->
{:ok, result} = DOI.resolve(key)
result
end
end
defp format_result(%{label: label, url: nil}), do: "[#{label}]"
defp format_result(%{label: label, url: url}), do: "[#{label}](#{url})"
# ------------------------------------------------------------------
# Init helpers
# ------------------------------------------------------------------
defp load_bibtex(opts) do
path = Map.get(opts, :bibtex_file) || System.get_env("BIBTEX_FILE")
cond do
is_nil(path) ->
Logger.debug("Citations: BIBTEX_FILE not set — BibTeX resolver disabled")
%{}
not File.exists?(path) ->
Logger.warning("Citations: BIBTEX_FILE=#{path} does not exist — BibTeX resolver disabled")
%{}
true ->
case BibTeX.load(path) do
{:ok, entries} -> entries
{:error, reason} ->
Logger.warning("Citations: failed to load BibTeX file #{path}: #{inspect(reason)}")
%{}
end
end
end
defp probe_zotero(opts) do
url = Map.get(opts, :zotero_url, "http://localhost:23119")
# Use a no-op JSON-RPC call to probe availability.
# /better-bibtex/cayw is intentionally avoided — it blocks waiting for
# user interaction and never returns without a pick.
payload =
Jason.encode!(%{
jsonrpc: "2.0",
method: "item.search",
params: [[[]]],
id: 0
})
result =
try do
Req.post(url <> "/better-bibtex/json-rpc",
body: payload,
headers: [{"content-type", "application/json"}],
receive_timeout: 3_000,
finch: Pipeline.Finch
)
rescue
e -> {:error, e}
end
case result do
{:ok, %{status: 200}} ->
Logger.info("Citations: Zotero Better BibTeX is available at #{url}")
true
{:ok, %{status: status}} ->
Logger.warning(
"Citations: Zotero responded HTTP #{status} at #{url}" <>
"is Better BibTeX installed?"
)
false
_ ->
Logger.warning(
"Citations: Zotero not reachable at #{url}" <>
"start Zotero with Better BibTeX or set BIBTEX_FILE as fallback"
)
false
end
end
end