forked from github/quartz
Introduces scripts/pipeline/, a Mix project that runs as a post-export transformation pass over content/*.md before Quartz builds the site. Pipeline (scripts/export.exs phase 3): - Compiles and loads the Mix project at export time (cached after first run) - Applies a list of Transform modules sequentially over all .md files - Only rewrites files that were actually changed Citations transform (Pipeline.Transforms.Citations): - Resolves [cite:@key] and bare cite:key syntax produced by ox-hugo/citar - Resolution chain: Zotero BBT JSON-RPC → BibTeX file → DOI/bare-key fallback - Zotero probe uses a no-op JSON-RPC call (cayw endpoint blocks indefinitely) - Zotero resolver fetches PDF attachments via item.attachments, producing zotero://open-pdf/... links; falls back to zotero://select/library/items/... - BibTeX resolver parses .bib files with a simple regex parser (no deps) - DOI resolver is the always-succeeding last resort Configuration via env vars: BIBTEX_FILE — path to .bib file for fallback resolution ZOTERO_URL — Zotero base URL (default: http://localhost:23119) CITATION_MODE — silent | warn (default) | strict Adding future transforms requires only implementing Pipeline.Transform behaviour and appending the module to the transforms list in export.exs.
179 lines
4.7 KiB
Elixir
179 lines
4.7 KiB
Elixir
defmodule Pipeline.Resolvers.BibTeX do
|
|
@moduledoc """
|
|
Resolves citation keys from a local BibTeX (.bib) file.
|
|
|
|
Configured via the `BIBTEX_FILE` environment variable, or passed directly
|
|
as `opts.bibtex_file`. The file is parsed once at init time and the
|
|
resulting entry map is reused for all lookups.
|
|
|
|
Supports extracting: author last names, year, title, DOI, URL.
|
|
|
|
BibTeX entry format parsed:
|
|
|
|
@type{citationkey,
|
|
author = {Last, First and Last2, First2},
|
|
year = {2021},
|
|
title = {Some Title},
|
|
doi = {10.xxxx/yyyy},
|
|
url = {https://example.com},
|
|
}
|
|
|
|
Returns `{:ok, %{label: "Author, Year", url: "..."}}` or `:error`.
|
|
"""
|
|
|
|
require Logger
|
|
|
|
# ------------------------------------------------------------------
|
|
# Public API
|
|
# ------------------------------------------------------------------
|
|
|
|
@doc """
|
|
Parse a .bib file and return a map of `%{citation_key => entry_map}`.
|
|
Returns `{:ok, entries}` or `{:error, reason}`.
|
|
"""
|
|
@spec load(String.t()) :: {:ok, map()} | {:error, term()}
|
|
def load(path) do
|
|
case File.read(path) do
|
|
{:ok, content} ->
|
|
entries = parse_entries(content)
|
|
Logger.info("BibTeX: loaded #{map_size(entries)} entries from #{path}")
|
|
{:ok, entries}
|
|
|
|
{:error, reason} ->
|
|
{:error, reason}
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Resolve a citation key from pre-loaded BibTeX entries.
|
|
"""
|
|
@spec resolve(String.t(), map()) :: {:ok, map()} | :error
|
|
def resolve(key, entries) do
|
|
case Map.fetch(entries, key) do
|
|
{:ok, entry} ->
|
|
label = build_label(entry)
|
|
url = build_url(entry)
|
|
{:ok, %{label: label, url: url}}
|
|
|
|
:error ->
|
|
:error
|
|
end
|
|
end
|
|
|
|
# ------------------------------------------------------------------
|
|
# Parsing
|
|
# ------------------------------------------------------------------
|
|
|
|
# Match @type{key, ...fields...}
|
|
# We handle nested braces by scanning character by character after
|
|
# finding the opening, rather than relying on a single regex.
|
|
@entry_header ~r/@\w+\s*\{\s*([^,\s]+)\s*,/
|
|
|
|
defp parse_entries(content) do
|
|
# Split on "@" boundaries, then parse each chunk
|
|
content
|
|
|> String.split(~r/(?=@\w+\s*\{)/, trim: true)
|
|
|> Enum.reduce(%{}, fn chunk, acc ->
|
|
case Regex.run(@entry_header, chunk) do
|
|
[_, key] ->
|
|
fields = parse_fields(chunk)
|
|
Map.put(acc, String.trim(key), fields)
|
|
|
|
_ ->
|
|
acc
|
|
end
|
|
end)
|
|
end
|
|
|
|
# Extract key = {value} or key = "value" pairs from an entry block.
|
|
# Handles simple single-depth braces; good enough for common fields.
|
|
@field_regex ~r/(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")/
|
|
|
|
defp parse_fields(chunk) do
|
|
@field_regex
|
|
|> Regex.scan(chunk)
|
|
|> Enum.reduce(%{}, fn match, acc ->
|
|
field_name = Enum.at(match, 1) |> String.downcase()
|
|
# Value is in capture group 2 (braces) or 3 (quotes)
|
|
value =
|
|
case {Enum.at(match, 2, ""), Enum.at(match, 3, "")} do
|
|
{"", q} -> q
|
|
{b, _} -> b
|
|
end
|
|
|
|
Map.put(acc, field_name, String.trim(value))
|
|
end)
|
|
end
|
|
|
|
# ------------------------------------------------------------------
|
|
# Label & URL building
|
|
# ------------------------------------------------------------------
|
|
|
|
defp build_label(entry) do
|
|
author_part =
|
|
entry
|
|
|> Map.get("author", "")
|
|
|> parse_authors()
|
|
|> format_authors()
|
|
|
|
year = Map.get(entry, "year", Map.get(entry, "date", ""))
|
|
year = extract_year(year)
|
|
|
|
if year && author_part != "", do: "#{author_part}, #{year}", else: author_part
|
|
end
|
|
|
|
defp parse_authors(""), do: []
|
|
|
|
defp parse_authors(author_str) do
|
|
author_str
|
|
|> String.split(" and ", trim: true)
|
|
|> Enum.map(&extract_last_name/1)
|
|
|> Enum.reject(&(&1 == ""))
|
|
end
|
|
|
|
# Handles "Last, First" and "First Last" formats
|
|
defp extract_last_name(name) do
|
|
name = String.trim(name)
|
|
|
|
cond do
|
|
String.contains?(name, ",") ->
|
|
name |> String.split(",") |> List.first() |> String.trim()
|
|
|
|
String.contains?(name, " ") ->
|
|
name |> String.split(" ") |> List.last() |> String.trim()
|
|
|
|
true ->
|
|
name
|
|
end
|
|
end
|
|
|
|
defp format_authors([]), do: "Unknown"
|
|
defp format_authors([single]), do: single
|
|
defp format_authors([first | rest]), do: "#{first} & #{List.last(rest)}"
|
|
|
|
defp extract_year(""), do: nil
|
|
|
|
defp extract_year(str) do
|
|
case Regex.run(~r/\b(\d{4})\b/, str) do
|
|
[_, year] -> year
|
|
_ -> nil
|
|
end
|
|
end
|
|
|
|
defp build_url(entry) do
|
|
cond do
|
|
doi = Map.get(entry, "doi", "") |> non_empty() ->
|
|
"https://doi.org/#{doi}"
|
|
|
|
url = Map.get(entry, "url", "") |> non_empty() ->
|
|
url
|
|
|
|
true ->
|
|
nil
|
|
end
|
|
end
|
|
|
|
defp non_empty(""), do: nil
|
|
defp non_empty(v), do: v
|
|
end
|