- Add org-id resolution for [[id:...]] links by building ID locations database once before parallel export - Handle broken file links gracefully with org-export-with-broken-links - Fix race condition in parallel exports by pre-building ID cache - Fix Quartz config not being applied: cp was preserving nix store hash prefix in filename instead of using explicit destination filename - Continue pipeline even when some exports fail, reporting failures - Improve error handling and logging throughout export pipeline
222 lines
7.4 KiB
Elixir
222 lines
7.4 KiB
Elixir
defmodule OrgGarden.Export do
|
|
@moduledoc """
|
|
Org-to-Markdown export via Emacs batch + ox-hugo.
|
|
|
|
Provides both single-file and batch export, plus a helper to compute
|
|
the expected `.md` output path for a given `.org` source file.
|
|
"""
|
|
|
|
require Logger
|
|
|
|
@doc """
|
|
Export a single `.org` file to Markdown via `emacs --batch` + ox-hugo.
|
|
|
|
Accepts an optional `id_locations_file` path for pre-built org-id database.
|
|
If not provided, builds the ID database inline (slower for batch exports).
|
|
|
|
Returns `{:ok, exit_code}` with the emacs exit code (0 = success),
|
|
or `{:error, reason}` if the command could not be executed.
|
|
"""
|
|
@spec export_file(String.t(), String.t(), String.t(), String.t() | nil) ::
|
|
{:ok, non_neg_integer()} | {:error, term()}
|
|
def export_file(orgfile, notes_dir, output_dir, id_locations_file \\ nil) do
|
|
OrgGarden.Telemetry.span_export(orgfile, fn ->
|
|
do_export_file(orgfile, notes_dir, output_dir, id_locations_file)
|
|
end)
|
|
end
|
|
|
|
defp do_export_file(orgfile, notes_dir, output_dir, id_locations_file) do
|
|
section =
|
|
orgfile
|
|
|> Path.dirname()
|
|
|> Path.relative_to(notes_dir)
|
|
|
|
# ox-hugo requires static/ to exist for image asset copying
|
|
File.mkdir_p!(Path.join(output_dir, "static"))
|
|
|
|
# Build the org-id setup commands based on whether we have a pre-built file
|
|
id_setup_args =
|
|
if id_locations_file do
|
|
# Use pre-built ID locations file (faster for parallel exports)
|
|
[
|
|
"--eval", ~s[(setq org-id-locations-file "#{id_locations_file}")],
|
|
"--eval", "(org-id-locations-load)"
|
|
]
|
|
else
|
|
# Build ID locations inline (for single file exports)
|
|
[
|
|
"--eval", ~s[(setq org-id-extra-files (directory-files-recursively "#{notes_dir}" "\\\\.org$"))],
|
|
"--eval", "(org-id-update-id-locations)"
|
|
]
|
|
end
|
|
|
|
{output, exit_code} =
|
|
System.cmd(
|
|
"emacs",
|
|
[
|
|
"--batch",
|
|
"--eval", "(require 'ox-hugo)"
|
|
] ++
|
|
id_setup_args ++
|
|
[
|
|
# Allow export to proceed even if some links cannot be resolved
|
|
"--eval", "(setq org-export-with-broken-links 'mark)",
|
|
# Prevent errors when file links point to non-existent files/headlines
|
|
"--eval", "(advice-add 'org-link-search :around (lambda (orig-fn &rest args) (condition-case nil (apply orig-fn args) (error nil))))",
|
|
"--eval", """
|
|
(org-cite-register-processor 'passthrough
|
|
:export-citation
|
|
(lambda (citation _style _backend _info)
|
|
(let ((keys (mapcar (lambda (ref)
|
|
(concat "@" (org-element-property :key ref)))
|
|
(org-cite-get-references citation))))
|
|
(format "[cite:%s]" (string-join keys ";")))))
|
|
""",
|
|
"--eval", "(setq org-cite-export-processors '((t passthrough)))",
|
|
"--eval", ~s[(setq org-hugo-base-dir "#{output_dir}")],
|
|
"--eval", ~s[(setq org-hugo-default-section-directory "#{section}")],
|
|
"--visit", orgfile,
|
|
"--funcall", "org-hugo-export-to-md"
|
|
],
|
|
stderr_to_stdout: true
|
|
)
|
|
|
|
# Log raw emacs output at debug level for troubleshooting
|
|
if output != "", do: Logger.debug("emacs output:\n#{output}")
|
|
|
|
if exit_code == 0 do
|
|
{:ok, exit_code}
|
|
else
|
|
{:error, {:emacs_exit, exit_code}}
|
|
end
|
|
rescue
|
|
e -> {:error, e}
|
|
end
|
|
|
|
@default_max_concurrency 8
|
|
|
|
@doc """
|
|
Export all `.org` files found under `notes_dir`.
|
|
|
|
Exports files in parallel for improved performance. The concurrency level
|
|
can be configured via the `:export_concurrency` application config or
|
|
the `EXPORT_CONCURRENCY` environment variable. Defaults to #{@default_max_concurrency}.
|
|
|
|
Returns `{:ok, success_count, failures}` where `success_count` is the number
|
|
of successfully exported files and `failures` is a list of `{file, {:error, reason}}`
|
|
tuples for files that failed to export. The pipeline continues even if some
|
|
files fail.
|
|
"""
|
|
@spec export_all(String.t(), String.t()) :: {:ok, non_neg_integer(), list()}
|
|
def export_all(notes_dir, output_dir) do
|
|
org_files =
|
|
Path.join(notes_dir, "**/*.org")
|
|
|> Path.wildcard()
|
|
|
|
if org_files == [] do
|
|
Logger.warning("No .org files found in #{notes_dir}")
|
|
{:ok, 0, []}
|
|
else
|
|
max_concurrency = get_concurrency()
|
|
Logger.info("Exporting #{length(org_files)} org file(s) from #{notes_dir} (concurrency: #{max_concurrency})")
|
|
|
|
# Build org-id locations database once before parallel export
|
|
id_locations_file = build_id_locations(notes_dir)
|
|
|
|
results =
|
|
org_files
|
|
|> Task.async_stream(
|
|
fn orgfile ->
|
|
Logger.info(" exporting: #{orgfile}")
|
|
result = export_file(orgfile, notes_dir, output_dir, id_locations_file)
|
|
|
|
# Log failure inline at warning level
|
|
case result do
|
|
{:ok, _} ->
|
|
:ok
|
|
|
|
{:error, {:emacs_exit, code}} ->
|
|
Logger.warning(" failed: #{Path.basename(orgfile)} (emacs exit code #{code})")
|
|
end
|
|
|
|
{orgfile, result}
|
|
end,
|
|
max_concurrency: max_concurrency,
|
|
timeout: :infinity,
|
|
ordered: false
|
|
)
|
|
|> Enum.map(fn {:ok, result} -> result end)
|
|
|
|
# Clean up temp file
|
|
if id_locations_file, do: File.rm(id_locations_file)
|
|
|
|
{successes, failures} =
|
|
Enum.split_with(results, fn
|
|
{_, {:ok, _}} -> true
|
|
{_, {:error, _}} -> false
|
|
end)
|
|
|
|
{:ok, length(successes), failures}
|
|
end
|
|
end
|
|
|
|
# Build org-id locations database file by scanning all org files once
|
|
defp build_id_locations(notes_dir) do
|
|
id_file = Path.join(System.tmp_dir!(), "org-id-locations-#{:erlang.unique_integer([:positive])}")
|
|
|
|
{_output, exit_code} =
|
|
System.cmd(
|
|
"emacs",
|
|
[
|
|
"--batch",
|
|
"--eval", ~s[(setq org-id-locations-file "#{id_file}")],
|
|
"--eval", ~s[(setq org-id-extra-files (directory-files-recursively "#{notes_dir}" "\\\\.org$"))],
|
|
"--eval", "(org-id-update-id-locations)"
|
|
],
|
|
stderr_to_stdout: true
|
|
)
|
|
|
|
if exit_code == 0 do
|
|
Logger.debug("Built org-id locations database: #{id_file}")
|
|
id_file
|
|
else
|
|
Logger.warning("Failed to build org-id locations database")
|
|
nil
|
|
end
|
|
end
|
|
|
|
defp get_concurrency do
|
|
Application.get_env(:org_garden, :export_concurrency, @default_max_concurrency)
|
|
end
|
|
|
|
@doc """
|
|
Compute the expected `.md` path for a given `.org` file.
|
|
|
|
Uses the same section-mapping logic as ox-hugo: the relative directory
|
|
of the `.org` file within `notes_dir` becomes the section directory
|
|
under `content_dir`.
|
|
|
|
## Examples
|
|
|
|
iex> OrgGarden.Export.expected_md_path("/notes/bus/emt.org", "/notes", "/out/content")
|
|
"/out/content/bus/emt.md"
|
|
|
|
iex> OrgGarden.Export.expected_md_path("/notes/top-level.org", "/notes", "/out/content")
|
|
"/out/content/top-level.md"
|
|
"""
|
|
@spec expected_md_path(String.t(), String.t(), String.t()) :: String.t()
|
|
def expected_md_path(orgfile, notes_dir, content_dir) do
|
|
section =
|
|
orgfile
|
|
|> Path.dirname()
|
|
|> Path.relative_to(notes_dir)
|
|
|
|
basename = Path.basename(orgfile, ".org") <> ".md"
|
|
|
|
case section do
|
|
"." -> Path.join(content_dir, basename)
|
|
_ -> Path.join([content_dir, section, basename])
|
|
end
|
|
end
|
|
end
|