Add Elixir markdown pipeline with org-citar citation resolution
Introduces scripts/pipeline/, a Mix project that runs as a post-export transformation pass over content/*.md before Quartz builds the site. Pipeline (scripts/export.exs phase 3): - Compiles and loads the Mix project at export time (cached after first run) - Applies a list of Transform modules sequentially over all .md files - Only rewrites files that were actually changed Citations transform (Pipeline.Transforms.Citations): - Resolves [cite:@key] and bare cite:key syntax produced by ox-hugo/citar - Resolution chain: Zotero BBT JSON-RPC → BibTeX file → DOI/bare-key fallback - Zotero probe uses a no-op JSON-RPC call (cayw endpoint blocks indefinitely) - Zotero resolver fetches PDF attachments via item.attachments, producing zotero://open-pdf/... links; falls back to zotero://select/library/items/... - BibTeX resolver parses .bib files with a simple regex parser (no deps) - DOI resolver is the always-succeeding last resort Configuration via env vars: BIBTEX_FILE — path to .bib file for fallback resolution ZOTERO_URL — Zotero base URL (default: http://localhost:23119) CITATION_MODE — silent | warn (default) | strict Adding future transforms requires only implementing Pipeline.Transform behaviour and appending the module to the transforms list in export.exs.
This commit is contained in:
83
scripts/pipeline/lib/pipeline.ex
Normal file
83
scripts/pipeline/lib/pipeline.ex
Normal file
@@ -0,0 +1,83 @@
|
||||
defmodule Pipeline do
|
||||
@moduledoc """
|
||||
Post-export markdown transformation pipeline.
|
||||
|
||||
Applies a list of transform modules sequentially over every .md file
|
||||
in a content directory. Each transform module must implement:
|
||||
|
||||
apply(content :: String.t(), opts :: map()) :: String.t()
|
||||
|
||||
Transforms are applied in the order given. A file is rewritten only
|
||||
when at least one transform mutates its content (checked via equality).
|
||||
|
||||
## Usage
|
||||
|
||||
opts = %{
|
||||
zotero_url: "http://localhost:23119",
|
||||
bibtex_file: System.get_env("BIBTEX_FILE"),
|
||||
citation_mode: :warn # :silent | :warn | :strict
|
||||
}
|
||||
|
||||
Pipeline.run(content_dir, [Pipeline.Transforms.Citations], opts)
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
@type transform :: module()
|
||||
@type opts :: map()
|
||||
|
||||
@doc """
|
||||
Run all transforms over every .md file under `content_dir`.
|
||||
Returns `{:ok, stats}` where stats maps each transform to a count of files it changed.
|
||||
"""
|
||||
@spec run(String.t(), [transform()], opts()) :: {:ok, map()}
|
||||
def run(content_dir, transforms, opts \\ %{}) do
|
||||
md_files =
|
||||
content_dir
|
||||
|> Path.join("**/*.md")
|
||||
|> Path.wildcard()
|
||||
|
||||
if md_files == [] do
|
||||
Logger.warning("Pipeline: no .md files found in #{content_dir}")
|
||||
{:ok, %{}}
|
||||
else
|
||||
Logger.info("Pipeline: processing #{length(md_files)} markdown files with #{length(transforms)} transform(s)")
|
||||
|
||||
# Initialise transforms (allows them to perform setup such as loading a .bib file).
|
||||
# Each transform module must implement the Pipeline.Transform behaviour.
|
||||
initialized =
|
||||
Enum.map(transforms, fn mod ->
|
||||
state = mod.init(opts)
|
||||
{mod, state}
|
||||
end)
|
||||
|
||||
stats =
|
||||
Enum.reduce(md_files, %{}, fn path, acc ->
|
||||
original = File.read!(path)
|
||||
|
||||
{transformed, file_stats} =
|
||||
Enum.reduce(initialized, {original, %{}}, fn {mod, state}, {content, fstats} ->
|
||||
result = mod.apply(content, state, opts)
|
||||
changed = result != content
|
||||
{result, Map.update(fstats, mod, (if changed, do: 1, else: 0), &(&1 + (if changed, do: 1, else: 0)))}
|
||||
end)
|
||||
|
||||
if transformed != original do
|
||||
File.write!(path, transformed)
|
||||
Logger.debug("Pipeline: updated #{Path.relative_to_cwd(path)}")
|
||||
end
|
||||
|
||||
Map.merge(acc, file_stats, fn _k, a, b -> a + b end)
|
||||
end)
|
||||
|
||||
Enum.each(initialized, fn {mod, state} ->
|
||||
# teardown/1 is optional in the behaviour
|
||||
if function_exported?(mod, :teardown, 1) do
|
||||
mod.teardown(state)
|
||||
end
|
||||
end)
|
||||
|
||||
{:ok, stats}
|
||||
end
|
||||
end
|
||||
end
|
||||
14
scripts/pipeline/lib/pipeline/application.ex
Normal file
14
scripts/pipeline/lib/pipeline/application.ex
Normal file
@@ -0,0 +1,14 @@
|
||||
defmodule Pipeline.Application do
|
||||
@moduledoc false
|
||||
use Application
|
||||
|
||||
@impl true
|
||||
def start(_type, _args) do
|
||||
children = [
|
||||
{Finch, name: Pipeline.Finch}
|
||||
]
|
||||
|
||||
opts = [strategy: :one_for_one, name: Pipeline.Supervisor]
|
||||
Supervisor.start_link(children, opts)
|
||||
end
|
||||
end
|
||||
178
scripts/pipeline/lib/pipeline/resolvers/bibtex.ex
Normal file
178
scripts/pipeline/lib/pipeline/resolvers/bibtex.ex
Normal file
@@ -0,0 +1,178 @@
|
||||
defmodule Pipeline.Resolvers.BibTeX do
|
||||
@moduledoc """
|
||||
Resolves citation keys from a local BibTeX (.bib) file.
|
||||
|
||||
Configured via the `BIBTEX_FILE` environment variable, or passed directly
|
||||
as `opts.bibtex_file`. The file is parsed once at init time and the
|
||||
resulting entry map is reused for all lookups.
|
||||
|
||||
Supports extracting: author last names, year, title, DOI, URL.
|
||||
|
||||
BibTeX entry format parsed:
|
||||
|
||||
@type{citationkey,
|
||||
author = {Last, First and Last2, First2},
|
||||
year = {2021},
|
||||
title = {Some Title},
|
||||
doi = {10.xxxx/yyyy},
|
||||
url = {https://example.com},
|
||||
}
|
||||
|
||||
Returns `{:ok, %{label: "Author, Year", url: "..."}}` or `:error`.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@doc """
|
||||
Parse a .bib file and return a map of `%{citation_key => entry_map}`.
|
||||
Returns `{:ok, entries}` or `{:error, reason}`.
|
||||
"""
|
||||
@spec load(String.t()) :: {:ok, map()} | {:error, term()}
|
||||
def load(path) do
|
||||
case File.read(path) do
|
||||
{:ok, content} ->
|
||||
entries = parse_entries(content)
|
||||
Logger.info("BibTeX: loaded #{map_size(entries)} entries from #{path}")
|
||||
{:ok, entries}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Resolve a citation key from pre-loaded BibTeX entries.
|
||||
"""
|
||||
@spec resolve(String.t(), map()) :: {:ok, map()} | :error
|
||||
def resolve(key, entries) do
|
||||
case Map.fetch(entries, key) do
|
||||
{:ok, entry} ->
|
||||
label = build_label(entry)
|
||||
url = build_url(entry)
|
||||
{:ok, %{label: label, url: url}}
|
||||
|
||||
:error ->
|
||||
:error
|
||||
end
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parsing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Match @type{key, ...fields...}
|
||||
# We handle nested braces by scanning character by character after
|
||||
# finding the opening, rather than relying on a single regex.
|
||||
@entry_header ~r/@\w+\s*\{\s*([^,\s]+)\s*,/
|
||||
|
||||
defp parse_entries(content) do
|
||||
# Split on "@" boundaries, then parse each chunk
|
||||
content
|
||||
|> String.split(~r/(?=@\w+\s*\{)/, trim: true)
|
||||
|> Enum.reduce(%{}, fn chunk, acc ->
|
||||
case Regex.run(@entry_header, chunk) do
|
||||
[_, key] ->
|
||||
fields = parse_fields(chunk)
|
||||
Map.put(acc, String.trim(key), fields)
|
||||
|
||||
_ ->
|
||||
acc
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
# Extract key = {value} or key = "value" pairs from an entry block.
|
||||
# Handles simple single-depth braces; good enough for common fields.
|
||||
@field_regex ~r/(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")/
|
||||
|
||||
defp parse_fields(chunk) do
|
||||
@field_regex
|
||||
|> Regex.scan(chunk)
|
||||
|> Enum.reduce(%{}, fn match, acc ->
|
||||
field_name = Enum.at(match, 1) |> String.downcase()
|
||||
# Value is in capture group 2 (braces) or 3 (quotes)
|
||||
value =
|
||||
case {Enum.at(match, 2, ""), Enum.at(match, 3, "")} do
|
||||
{"", q} -> q
|
||||
{b, _} -> b
|
||||
end
|
||||
|
||||
Map.put(acc, field_name, String.trim(value))
|
||||
end)
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Label & URL building
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
defp build_label(entry) do
|
||||
author_part =
|
||||
entry
|
||||
|> Map.get("author", "")
|
||||
|> parse_authors()
|
||||
|> format_authors()
|
||||
|
||||
year = Map.get(entry, "year", Map.get(entry, "date", ""))
|
||||
year = extract_year(year)
|
||||
|
||||
if year && author_part != "", do: "#{author_part}, #{year}", else: author_part
|
||||
end
|
||||
|
||||
defp parse_authors(""), do: []
|
||||
|
||||
defp parse_authors(author_str) do
|
||||
author_str
|
||||
|> String.split(" and ", trim: true)
|
||||
|> Enum.map(&extract_last_name/1)
|
||||
|> Enum.reject(&(&1 == ""))
|
||||
end
|
||||
|
||||
# Handles "Last, First" and "First Last" formats
|
||||
defp extract_last_name(name) do
|
||||
name = String.trim(name)
|
||||
|
||||
cond do
|
||||
String.contains?(name, ",") ->
|
||||
name |> String.split(",") |> List.first() |> String.trim()
|
||||
|
||||
String.contains?(name, " ") ->
|
||||
name |> String.split(" ") |> List.last() |> String.trim()
|
||||
|
||||
true ->
|
||||
name
|
||||
end
|
||||
end
|
||||
|
||||
defp format_authors([]), do: "Unknown"
|
||||
defp format_authors([single]), do: single
|
||||
defp format_authors([first | rest]), do: "#{first} & #{List.last(rest)}"
|
||||
|
||||
defp extract_year(""), do: nil
|
||||
|
||||
defp extract_year(str) do
|
||||
case Regex.run(~r/\b(\d{4})\b/, str) do
|
||||
[_, year] -> year
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
|
||||
defp build_url(entry) do
|
||||
cond do
|
||||
doi = Map.get(entry, "doi", "") |> non_empty() ->
|
||||
"https://doi.org/#{doi}"
|
||||
|
||||
url = Map.get(entry, "url", "") |> non_empty() ->
|
||||
url
|
||||
|
||||
true ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
defp non_empty(""), do: nil
|
||||
defp non_empty(v), do: v
|
||||
end
|
||||
18
scripts/pipeline/lib/pipeline/resolvers/doi.ex
Normal file
18
scripts/pipeline/lib/pipeline/resolvers/doi.ex
Normal file
@@ -0,0 +1,18 @@
|
||||
defmodule Pipeline.Resolvers.DOI do
|
||||
@moduledoc """
|
||||
Last-resort citation resolver — always succeeds.
|
||||
|
||||
If the citation key looks like a DOI (starts with "10."), returns a
|
||||
`https://doi.org/...` link. Otherwise returns the key itself as a
|
||||
plain label with no URL.
|
||||
"""
|
||||
|
||||
@spec resolve(String.t()) :: {:ok, map()}
|
||||
def resolve(key) do
|
||||
if String.starts_with?(key, "10.") do
|
||||
{:ok, %{label: key, url: "https://doi.org/#{key}"}}
|
||||
else
|
||||
{:ok, %{label: key, url: nil}}
|
||||
end
|
||||
end
|
||||
end
|
||||
182
scripts/pipeline/lib/pipeline/resolvers/zotero.ex
Normal file
182
scripts/pipeline/lib/pipeline/resolvers/zotero.ex
Normal file
@@ -0,0 +1,182 @@
|
||||
defmodule Pipeline.Resolvers.Zotero do
|
||||
@moduledoc """
|
||||
Resolves citation keys via Zotero Better BibTeX's JSON-RPC API.
|
||||
|
||||
Requires Zotero to be running with the Better BibTeX plugin installed.
|
||||
Default endpoint: http://localhost:23119/better-bibtex/json-rpc
|
||||
|
||||
Resolution strategy:
|
||||
1. Search by citation key via `item.search`
|
||||
2. If found, try to get a PDF attachment link (zotero://open-pdf/...)
|
||||
3. Fall back to zotero://select/items/@key
|
||||
|
||||
Returns `{:ok, %{label: "Author, Year", url: "zotero://..."}}` or `:error`.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
@rpc_path "/better-bibtex/json-rpc"
|
||||
|
||||
@doc """
|
||||
Attempt to resolve `key` against a running Zotero instance.
|
||||
`base_url` defaults to `http://localhost:23119`.
|
||||
"""
|
||||
@spec resolve(String.t(), String.t()) :: {:ok, map()} | :error
|
||||
def resolve(key, base_url \\ "http://localhost:23119") do
|
||||
url = base_url <> @rpc_path
|
||||
|
||||
payload =
|
||||
Jason.encode!(%{
|
||||
jsonrpc: "2.0",
|
||||
method: "item.search",
|
||||
params: [
|
||||
[["citationKey", "is", key]]
|
||||
],
|
||||
id: 1
|
||||
})
|
||||
|
||||
case Req.post(url,
|
||||
body: payload,
|
||||
headers: [{"content-type", "application/json"}],
|
||||
receive_timeout: 5_000,
|
||||
finch: Pipeline.Finch
|
||||
) do
|
||||
{:ok, %{status: 200, body: body}} ->
|
||||
parse_response(body, key, base_url)
|
||||
|
||||
{:ok, %{status: status}} ->
|
||||
Logger.debug("Zotero: unexpected HTTP #{status} for key #{key}")
|
||||
:error
|
||||
|
||||
{:error, reason} ->
|
||||
Logger.debug("Zotero: connection failed for key #{key}: #{inspect(reason)}")
|
||||
:error
|
||||
|
||||
other ->
|
||||
Logger.debug("Zotero: unexpected result for key #{key}: #{inspect(other)}")
|
||||
:error
|
||||
end
|
||||
rescue
|
||||
e ->
|
||||
Logger.debug("Zotero: exception resolving key #{key}: #{inspect(e)}")
|
||||
:error
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Private helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
defp parse_response(%{"result" => [item | _]}, key, base_url) do
|
||||
label = build_label(item)
|
||||
url = resolve_url(item, key, base_url)
|
||||
{:ok, %{label: label, url: url}}
|
||||
end
|
||||
|
||||
defp parse_response(%{"result" => []}, key, _base_url) do
|
||||
Logger.debug("Zotero: no item found for key #{key}")
|
||||
:error
|
||||
end
|
||||
|
||||
defp parse_response(%{"error" => err}, key, _base_url) do
|
||||
Logger.debug("Zotero: RPC error for key #{key}: #{inspect(err)}")
|
||||
:error
|
||||
end
|
||||
|
||||
defp parse_response(body, key, _base_url) do
|
||||
Logger.debug("Zotero: unexpected response shape for key #{key}: #{inspect(body)}")
|
||||
:error
|
||||
end
|
||||
|
||||
defp fetch_pdf_url(key, base_url) do
|
||||
payload =
|
||||
Jason.encode!(%{
|
||||
jsonrpc: "2.0",
|
||||
method: "item.attachments",
|
||||
params: [key],
|
||||
id: 2
|
||||
})
|
||||
|
||||
case Req.post(base_url <> @rpc_path,
|
||||
body: payload,
|
||||
headers: [{"content-type", "application/json"}],
|
||||
receive_timeout: 5_000,
|
||||
finch: Pipeline.Finch
|
||||
) do
|
||||
{:ok, %{status: 200, body: %{"result" => attachments}}} when is_list(attachments) ->
|
||||
attachments
|
||||
|> Enum.find_value(fn att ->
|
||||
open = Map.get(att, "open", "")
|
||||
path = Map.get(att, "path", "")
|
||||
if String.ends_with?(path, ".pdf"), do: open, else: nil
|
||||
end)
|
||||
|
||||
_ ->
|
||||
nil
|
||||
end
|
||||
rescue
|
||||
_ -> nil
|
||||
end
|
||||
|
||||
# CSL-JSON format: authors are under "author" with "family"/"given" keys.
|
||||
# Year is under "issued" -> "date-parts" -> [[year, month, day]].
|
||||
defp build_label(item) do
|
||||
authors = Map.get(item, "author", [])
|
||||
year = extract_year(item)
|
||||
|
||||
author_part =
|
||||
case authors do
|
||||
[] ->
|
||||
"Unknown"
|
||||
|
||||
[single] ->
|
||||
Map.get(single, "family", Map.get(single, "literal", "Unknown"))
|
||||
|
||||
[first | rest] ->
|
||||
first_name = Map.get(first, "family", Map.get(first, "literal", "Unknown"))
|
||||
last_name =
|
||||
rest
|
||||
|> List.last()
|
||||
|> then(&Map.get(&1, "family", Map.get(&1, "literal", "Unknown")))
|
||||
|
||||
"#{first_name} & #{last_name}"
|
||||
end
|
||||
|
||||
if year, do: "#{author_part}, #{year}", else: author_part
|
||||
end
|
||||
|
||||
# "issued": {"date-parts": [["2021", 2, 3]]}
|
||||
defp extract_year(item) do
|
||||
case get_in(item, ["issued", "date-parts"]) do
|
||||
[[year | _] | _] -> to_string(year)
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
|
||||
defp resolve_url(item, key, base_url) do
|
||||
# Prefer zotero://open-pdf/... for items with a PDF attachment.
|
||||
# Fall back to zotero://select/library/items/KEY to open the item in Zotero.
|
||||
# The "id" field is a URI like "http://zotero.org/users/123/items/ABCD1234".
|
||||
pdf_url = fetch_pdf_url(key, base_url)
|
||||
|
||||
if pdf_url do
|
||||
pdf_url
|
||||
else
|
||||
item_key =
|
||||
item
|
||||
|> Map.get("id", "")
|
||||
|> String.split("/")
|
||||
|> List.last()
|
||||
|> non_empty()
|
||||
|
||||
if item_key do
|
||||
"zotero://select/library/items/#{item_key}"
|
||||
else
|
||||
"zotero://select/items/@#{key}"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp non_empty(nil), do: nil
|
||||
defp non_empty(""), do: nil
|
||||
defp non_empty(v), do: v
|
||||
end
|
||||
48
scripts/pipeline/lib/pipeline/transform.ex
Normal file
48
scripts/pipeline/lib/pipeline/transform.ex
Normal file
@@ -0,0 +1,48 @@
|
||||
defmodule Pipeline.Transform do
|
||||
@moduledoc """
|
||||
Behaviour that all markdown transform modules must implement.
|
||||
|
||||
## Callbacks
|
||||
|
||||
- `init/1` — called once before processing; returns transform-specific state.
|
||||
Default implementation returns the opts map unchanged.
|
||||
- `apply/3` — called per .md file; returns the (possibly modified) content.
|
||||
- `teardown/1` — optional cleanup after all files are processed.
|
||||
|
||||
## Example
|
||||
|
||||
defmodule MyTransform do
|
||||
@behaviour Pipeline.Transform
|
||||
|
||||
@impl true
|
||||
def init(opts), do: %{some_state: opts[:value]}
|
||||
|
||||
@impl true
|
||||
def apply(content, state, _opts) do
|
||||
String.replace(content, "foo", state.some_state)
|
||||
end
|
||||
end
|
||||
"""
|
||||
|
||||
@doc "One-time initialisation. Returns opaque state passed to apply/3."
|
||||
@callback init(opts :: map()) :: term()
|
||||
|
||||
@doc "Transform file content. Returns the (possibly modified) content string."
|
||||
@callback apply(content :: String.t(), state :: term(), opts :: map()) :: String.t()
|
||||
|
||||
@doc "Optional cleanup after all files are processed."
|
||||
@callback teardown(state :: term()) :: :ok
|
||||
|
||||
@optional_callbacks teardown: 1
|
||||
|
||||
defmacro __using__(_) do
|
||||
quote do
|
||||
@behaviour Pipeline.Transform
|
||||
|
||||
@impl Pipeline.Transform
|
||||
def init(opts), do: opts
|
||||
|
||||
defoverridable init: 1
|
||||
end
|
||||
end
|
||||
end
|
||||
231
scripts/pipeline/lib/pipeline/transforms/citations.ex
Normal file
231
scripts/pipeline/lib/pipeline/transforms/citations.ex
Normal file
@@ -0,0 +1,231 @@
|
||||
defmodule Pipeline.Transforms.Citations do
|
||||
@moduledoc """
|
||||
Markdown transform: resolves org-citar citation keys to hyperlinks.
|
||||
|
||||
## Recognised citation syntax (as output by ox-hugo from org-citar)
|
||||
|
||||
[cite:@key] → org-cite / citar standard (most common)
|
||||
[cite:@key1;@key2] → multiple citations
|
||||
cite:key → older roam-style bare cite syntax
|
||||
|
||||
## Resolution chain (in order)
|
||||
|
||||
1. Zotero (live instance via Better BibTeX JSON-RPC) — preferred
|
||||
2. BibTeX file (BIBTEX_FILE env var) — fallback
|
||||
3. DOI / bare key — always succeeds
|
||||
|
||||
## Modes (opts.citation_mode)
|
||||
|
||||
:silent — silently use DOI/bare-key fallback when Zotero+BibTeX fail
|
||||
:warn — (default) emit a Logger.warning for unresolved keys
|
||||
:strict — raise on unresolved keys (aborts pipeline)
|
||||
|
||||
## Format
|
||||
|
||||
Resolved citations are rendered as:
|
||||
|
||||
[Label](url) when a URL is available
|
||||
[Label] when no URL could be determined (bare key fallback)
|
||||
|
||||
Multiple semicolon-separated keys become space-separated links:
|
||||
|
||||
[cite:@a;@b] → [Author A, 2020](url_a) [Author B, 2019](url_b)
|
||||
|
||||
## init/1 callback
|
||||
|
||||
Loads the BibTeX file (if configured) once before processing begins,
|
||||
and probes Zotero availability, emitting warnings as appropriate.
|
||||
"""
|
||||
|
||||
@behaviour Pipeline.Transform
|
||||
|
||||
require Logger
|
||||
|
||||
alias Pipeline.Resolvers.Zotero
|
||||
alias Pipeline.Resolvers.BibTeX
|
||||
alias Pipeline.Resolvers.DOI
|
||||
|
||||
# Match [cite:@key] and [cite:@key1;@key2;...] (org-cite / citar style)
|
||||
@cite_bracket_regex ~r/\[cite:(@[^\]]+)\]/
|
||||
|
||||
# Match bare cite:key (older roam style, no brackets, no @ prefix)
|
||||
@cite_bare_regex ~r/(?<![(\[])cite:([a-zA-Z0-9_:-]+)/
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Pipeline callbacks
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@doc """
|
||||
Called once before processing any files. Loads BibTeX, probes Zotero.
|
||||
Returns a state map passed to every `apply/3` call.
|
||||
"""
|
||||
def init(opts) do
|
||||
bibtex_entries = load_bibtex(opts)
|
||||
zotero_available = probe_zotero(opts)
|
||||
|
||||
if not zotero_available and bibtex_entries == %{} do
|
||||
Logger.warning(
|
||||
"Citations: neither Zotero nor a BibTeX file is available. " <>
|
||||
"All citations will fall back to bare-key rendering. " <>
|
||||
"Set BIBTEX_FILE env var or start Zotero with Better BibTeX to resolve citations."
|
||||
)
|
||||
end
|
||||
|
||||
%{
|
||||
bibtex_entries: bibtex_entries,
|
||||
zotero_available: zotero_available,
|
||||
zotero_url: Map.get(opts, :zotero_url, "http://localhost:23119"),
|
||||
citation_mode: Map.get(opts, :citation_mode, :warn)
|
||||
}
|
||||
end
|
||||
|
||||
@doc """
|
||||
Apply citation resolution to a single markdown file's content.
|
||||
"""
|
||||
def apply(content, state, _opts) do
|
||||
content
|
||||
|> resolve_bracket_citations(state)
|
||||
|> resolve_bare_citations(state)
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Resolution passes
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
defp resolve_bracket_citations(content, state) do
|
||||
Regex.replace(@cite_bracket_regex, content, fn _full, keys_str ->
|
||||
keys_str
|
||||
|> String.split(";")
|
||||
|> Enum.map(&String.trim/1)
|
||||
|> Enum.map(fn "@" <> key -> key end)
|
||||
|> Enum.map(&resolve_key(&1, state))
|
||||
|> Enum.join(" ")
|
||||
end)
|
||||
end
|
||||
|
||||
defp resolve_bare_citations(content, state) do
|
||||
Regex.replace(@cite_bare_regex, content, fn _full, key ->
|
||||
resolve_key(key, state)
|
||||
end)
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Single-key resolution chain
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
defp resolve_key(key, state) do
|
||||
info =
|
||||
with :error <- try_zotero(key, state),
|
||||
:error <- try_bibtex(key, state) do
|
||||
handle_unresolved(key, state)
|
||||
else
|
||||
{:ok, citation_info} -> citation_info
|
||||
end
|
||||
|
||||
format_result(info)
|
||||
end
|
||||
|
||||
defp try_zotero(_key, %{zotero_available: false}), do: :error
|
||||
|
||||
defp try_zotero(key, %{zotero_url: url}) do
|
||||
Zotero.resolve(key, url)
|
||||
end
|
||||
|
||||
defp try_bibtex(_key, %{bibtex_entries: entries}) when map_size(entries) == 0, do: :error
|
||||
|
||||
defp try_bibtex(key, %{bibtex_entries: entries}) do
|
||||
BibTeX.resolve(key, entries)
|
||||
end
|
||||
|
||||
defp handle_unresolved(key, %{citation_mode: mode}) do
|
||||
case mode do
|
||||
:strict ->
|
||||
raise "Citations: could not resolve citation key '#{key}' and mode is :strict"
|
||||
|
||||
:warn ->
|
||||
Logger.warning("Citations: unresolved citation key '#{key}' — using bare-key fallback")
|
||||
{:ok, result} = DOI.resolve(key)
|
||||
result
|
||||
|
||||
:silent ->
|
||||
{:ok, result} = DOI.resolve(key)
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
defp format_result(%{label: label, url: nil}), do: "[#{label}]"
|
||||
defp format_result(%{label: label, url: url}), do: "[#{label}](#{url})"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Init helpers
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
defp load_bibtex(opts) do
|
||||
path = Map.get(opts, :bibtex_file) || System.get_env("BIBTEX_FILE")
|
||||
|
||||
cond do
|
||||
is_nil(path) ->
|
||||
Logger.debug("Citations: BIBTEX_FILE not set — BibTeX resolver disabled")
|
||||
%{}
|
||||
|
||||
not File.exists?(path) ->
|
||||
Logger.warning("Citations: BIBTEX_FILE=#{path} does not exist — BibTeX resolver disabled")
|
||||
%{}
|
||||
|
||||
true ->
|
||||
case BibTeX.load(path) do
|
||||
{:ok, entries} -> entries
|
||||
{:error, reason} ->
|
||||
Logger.warning("Citations: failed to load BibTeX file #{path}: #{inspect(reason)}")
|
||||
%{}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
defp probe_zotero(opts) do
|
||||
url = Map.get(opts, :zotero_url, "http://localhost:23119")
|
||||
|
||||
# Use a no-op JSON-RPC call to probe availability.
|
||||
# /better-bibtex/cayw is intentionally avoided — it blocks waiting for
|
||||
# user interaction and never returns without a pick.
|
||||
payload =
|
||||
Jason.encode!(%{
|
||||
jsonrpc: "2.0",
|
||||
method: "item.search",
|
||||
params: [[[]]],
|
||||
id: 0
|
||||
})
|
||||
|
||||
result =
|
||||
try do
|
||||
Req.post(url <> "/better-bibtex/json-rpc",
|
||||
body: payload,
|
||||
headers: [{"content-type", "application/json"}],
|
||||
receive_timeout: 3_000,
|
||||
finch: Pipeline.Finch
|
||||
)
|
||||
rescue
|
||||
e -> {:error, e}
|
||||
end
|
||||
|
||||
case result do
|
||||
{:ok, %{status: 200}} ->
|
||||
Logger.info("Citations: Zotero Better BibTeX is available at #{url}")
|
||||
true
|
||||
|
||||
{:ok, %{status: status}} ->
|
||||
Logger.warning(
|
||||
"Citations: Zotero responded HTTP #{status} at #{url} — " <>
|
||||
"is Better BibTeX installed?"
|
||||
)
|
||||
false
|
||||
|
||||
_ ->
|
||||
Logger.warning(
|
||||
"Citations: Zotero not reachable at #{url} — " <>
|
||||
"start Zotero with Better BibTeX or set BIBTEX_FILE as fallback"
|
||||
)
|
||||
false
|
||||
end
|
||||
end
|
||||
end
|
||||
27
scripts/pipeline/mix.exs
Normal file
27
scripts/pipeline/mix.exs
Normal file
@@ -0,0 +1,27 @@
|
||||
defmodule Pipeline.MixProject do
|
||||
use Mix.Project
|
||||
|
||||
def project do
|
||||
[
|
||||
app: :pipeline,
|
||||
version: "0.1.0",
|
||||
elixir: "~> 1.15",
|
||||
start_permanent: Mix.env() == :prod,
|
||||
deps: deps()
|
||||
]
|
||||
end
|
||||
|
||||
def application do
|
||||
[
|
||||
extra_applications: [:logger, :inets, :ssl],
|
||||
mod: {Pipeline.Application, []}
|
||||
]
|
||||
end
|
||||
|
||||
defp deps do
|
||||
[
|
||||
{:req, "~> 0.5"},
|
||||
{:jason, "~> 1.4"}
|
||||
]
|
||||
end
|
||||
end
|
||||
11
scripts/pipeline/mix.lock
Normal file
11
scripts/pipeline/mix.lock
Normal file
@@ -0,0 +1,11 @@
|
||||
%{
|
||||
"finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"},
|
||||
"hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
|
||||
"jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"},
|
||||
"mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"},
|
||||
"mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"},
|
||||
"nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
|
||||
"nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
|
||||
"req": {:hex, :req, "0.5.17", "0096ddd5b0ed6f576a03dde4b158a0c727215b15d2795e59e0916c6971066ede", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "0b8bc6ffdfebbc07968e59d3ff96d52f2202d0536f10fef4dc11dc02a2a43e39"},
|
||||
"telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"},
|
||||
}
|
||||
Reference in New Issue
Block a user