feat: unified watch server under org-garden
This commit is contained in:
178
org-garden/lib/org_garden/resolvers/bibtex.ex
Normal file
178
org-garden/lib/org_garden/resolvers/bibtex.ex
Normal file
@@ -0,0 +1,178 @@
|
||||
defmodule OrgGarden.Resolvers.BibTeX do
|
||||
@moduledoc """
|
||||
Resolves citation keys from a local BibTeX (.bib) file.
|
||||
|
||||
Configured via the `BIBTEX_FILE` environment variable, or passed directly
|
||||
as `opts.bibtex_file`. The file is parsed once at init time and the
|
||||
resulting entry map is reused for all lookups.
|
||||
|
||||
Supports extracting: author last names, year, title, DOI, URL.
|
||||
|
||||
BibTeX entry format parsed:
|
||||
|
||||
@type{citationkey,
|
||||
author = {Last, First and Last2, First2},
|
||||
year = {2021},
|
||||
title = {Some Title},
|
||||
doi = {10.xxxx/yyyy},
|
||||
url = {https://example.com},
|
||||
}
|
||||
|
||||
Returns `{:ok, %{label: "Author, Year", url: "..."}}` or `:error`.
|
||||
"""
|
||||
|
||||
require Logger
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Public API
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@doc """
|
||||
Parse a .bib file and return a map of `%{citation_key => entry_map}`.
|
||||
Returns `{:ok, entries}` or `{:error, reason}`.
|
||||
"""
|
||||
@spec load(String.t()) :: {:ok, map()} | {:error, term()}
|
||||
def load(path) do
|
||||
case File.read(path) do
|
||||
{:ok, content} ->
|
||||
entries = parse_entries(content)
|
||||
Logger.info("BibTeX: loaded #{map_size(entries)} entries from #{path}")
|
||||
{:ok, entries}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, reason}
|
||||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Resolve a citation key from pre-loaded BibTeX entries.
|
||||
"""
|
||||
@spec resolve(String.t(), map()) :: {:ok, map()} | :error
|
||||
def resolve(key, entries) do
|
||||
case Map.fetch(entries, key) do
|
||||
{:ok, entry} ->
|
||||
label = build_label(entry)
|
||||
url = build_url(entry)
|
||||
{:ok, %{label: label, url: url}}
|
||||
|
||||
:error ->
|
||||
:error
|
||||
end
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Parsing
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
# Match @type{key, ...fields...}
|
||||
# We handle nested braces by scanning character by character after
|
||||
# finding the opening, rather than relying on a single regex.
|
||||
@entry_header ~r/@\w+\s*\{\s*([^,\s]+)\s*,/
|
||||
|
||||
defp parse_entries(content) do
|
||||
# Split on "@" boundaries, then parse each chunk
|
||||
content
|
||||
|> String.split(~r/(?=@\w+\s*\{)/, trim: true)
|
||||
|> Enum.reduce(%{}, fn chunk, acc ->
|
||||
case Regex.run(@entry_header, chunk) do
|
||||
[_, key] ->
|
||||
fields = parse_fields(chunk)
|
||||
Map.put(acc, String.trim(key), fields)
|
||||
|
||||
_ ->
|
||||
acc
|
||||
end
|
||||
end)
|
||||
end
|
||||
|
||||
# Extract key = {value} or key = "value" pairs from an entry block.
|
||||
# Handles simple single-depth braces; good enough for common fields.
|
||||
@field_regex ~r/(\w+)\s*=\s*(?:\{([^{}]*(?:\{[^{}]*\}[^{}]*)*)\}|"([^"]*)")/
|
||||
|
||||
defp parse_fields(chunk) do
|
||||
@field_regex
|
||||
|> Regex.scan(chunk)
|
||||
|> Enum.reduce(%{}, fn match, acc ->
|
||||
field_name = Enum.at(match, 1) |> String.downcase()
|
||||
# Value is in capture group 2 (braces) or 3 (quotes)
|
||||
value =
|
||||
case {Enum.at(match, 2, ""), Enum.at(match, 3, "")} do
|
||||
{"", q} -> q
|
||||
{b, _} -> b
|
||||
end
|
||||
|
||||
Map.put(acc, field_name, String.trim(value))
|
||||
end)
|
||||
end
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Label & URL building
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
defp build_label(entry) do
|
||||
author_part =
|
||||
entry
|
||||
|> Map.get("author", "")
|
||||
|> parse_authors()
|
||||
|> format_authors()
|
||||
|
||||
year = Map.get(entry, "year", Map.get(entry, "date", ""))
|
||||
year = extract_year(year)
|
||||
|
||||
if year && author_part != "", do: "#{author_part}, #{year}", else: author_part
|
||||
end
|
||||
|
||||
defp parse_authors(""), do: []
|
||||
|
||||
defp parse_authors(author_str) do
|
||||
author_str
|
||||
|> String.split(" and ", trim: true)
|
||||
|> Enum.map(&extract_last_name/1)
|
||||
|> Enum.reject(&(&1 == ""))
|
||||
end
|
||||
|
||||
# Handles "Last, First" and "First Last" formats
|
||||
defp extract_last_name(name) do
|
||||
name = String.trim(name)
|
||||
|
||||
cond do
|
||||
String.contains?(name, ",") ->
|
||||
name |> String.split(",") |> List.first() |> String.trim()
|
||||
|
||||
String.contains?(name, " ") ->
|
||||
name |> String.split(" ") |> List.last() |> String.trim()
|
||||
|
||||
true ->
|
||||
name
|
||||
end
|
||||
end
|
||||
|
||||
defp format_authors([]), do: "Unknown"
|
||||
defp format_authors([single]), do: single
|
||||
defp format_authors([first | rest]), do: "#{first} & #{List.last(rest)}"
|
||||
|
||||
defp extract_year(""), do: nil
|
||||
|
||||
defp extract_year(str) do
|
||||
case Regex.run(~r/\b(\d{4})\b/, str) do
|
||||
[_, year] -> year
|
||||
_ -> nil
|
||||
end
|
||||
end
|
||||
|
||||
defp build_url(entry) do
|
||||
cond do
|
||||
doi = Map.get(entry, "doi", "") |> non_empty() ->
|
||||
"https://doi.org/#{doi}"
|
||||
|
||||
url = Map.get(entry, "url", "") |> non_empty() ->
|
||||
url
|
||||
|
||||
true ->
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
defp non_empty(""), do: nil
|
||||
defp non_empty(v), do: v
|
||||
end
|
||||
Reference in New Issue
Block a user