defmodule OrgGarden.Resolvers.BibTeX do @moduledoc """ Resolves citation keys from a local BibTeX (.bib) file. Configured via the `BIBTEX_FILE` environment variable, or passed directly as `opts.bibtex_file`. The file is parsed once at init time and the resulting entry map is reused for all lookups. Supports extracting: author last names, year, title, DOI, URL. BibTeX entry format parsed: @type{citationkey, author = {Last, First and Last2, First2}, year = {2021}, title = {Some Title}, doi = {10.xxxx/yyyy}, url = {https://example.com}, } Returns `{:ok, %{label: "Author, Year", url: "..."}}` or `:error`. """ require Logger # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------ @doc """ Parse a .bib file and return a map of `%{citation_key => entry_map}`. Returns `{:ok, entries}` or `{:error, reason}`. """ @spec load(String.t()) :: {:ok, map()} | {:error, term()} def load(path) do case File.read(path) do {:ok, content} -> entries = parse_entries(content) Logger.info("BibTeX: loaded #{map_size(entries)} entries from #{path}") {:ok, entries} {:error, reason} -> {:error, reason} end end @doc """ Resolve a citation key from pre-loaded BibTeX entries. """ @spec resolve(String.t(), map()) :: {:ok, map()} | :error def resolve(key, entries) do case Map.fetch(entries, key) do {:ok, entry} -> label = build_label(entry) url = build_url(entry) {:ok, %{label: label, url: url}} :error -> :error end end # ------------------------------------------------------------------ # Parsing # ------------------------------------------------------------------ # Match @type{key, ...fields...} # We handle nested braces by scanning character by character after # finding the opening, rather than relying on a single regex. @entry_header ~r/@\w+\s*\{\s*([^,\s]+)\s*,/ defp parse_entries(content) do # Split on "@" boundaries, then parse each chunk content |> String.split(~r/(?=@\w+\s*\{)/, trim: true) |> Enum.reduce(%{}, fn chunk, acc -> case Regex.run(@entry_header, chunk) do [_, key] -> fields = parse_fields(chunk) Map.put(acc, String.trim(key), fields) _ -> acc end end) end # Extract key = {value} or key = "value" pairs from an entry block. # Handles simple single-depth braces; good enough for common fields. @field_regex ~r/(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")/ defp parse_fields(chunk) do @field_regex |> Regex.scan(chunk) |> Enum.reduce(%{}, fn match, acc -> field_name = Enum.at(match, 1) |> String.downcase() # Value is in capture group 2 (braces) or 3 (quotes) value = case {Enum.at(match, 2, ""), Enum.at(match, 3, "")} do {"", q} -> q {b, _} -> b end Map.put(acc, field_name, String.trim(value)) end) end # ------------------------------------------------------------------ # Label & URL building # ------------------------------------------------------------------ defp build_label(entry) do author_part = entry |> Map.get("author", "") |> parse_authors() |> format_authors() year = Map.get(entry, "year", Map.get(entry, "date", "")) year = extract_year(year) if year && author_part != "", do: "#{author_part}, #{year}", else: author_part end defp parse_authors(""), do: [] defp parse_authors(author_str) do author_str |> String.split(" and ", trim: true) |> Enum.map(&extract_last_name/1) |> Enum.reject(&(&1 == "")) end # Handles "Last, First" and "First Last" formats defp extract_last_name(name) do name = String.trim(name) cond do String.contains?(name, ",") -> name |> String.split(",") |> List.first() |> String.trim() String.contains?(name, " ") -> name |> String.split(" ") |> List.last() |> String.trim() true -> name end end defp format_authors([]), do: "Unknown" defp format_authors([single]), do: single defp format_authors([first | rest]), do: "#{first} & #{List.last(rest)}" defp extract_year(""), do: nil defp extract_year(str) do case Regex.run(~r/\b(\d{4})\b/, str) do [_, year] -> year _ -> nil end end defp build_url(entry) do cond do doi = Map.get(entry, "doi", "") |> non_empty() -> "https://doi.org/#{doi}" url = Map.get(entry, "url", "") |> non_empty() -> url true -> nil end end defp non_empty(""), do: nil defp non_empty(v), do: v end