Add service infrastructure for long-running deployment

- Add configuration system (config/*.exs, OrgGarden.Config)
- Refactor supervision tree with DynamicSupervisor and Registry
- Add OrgGarden.Server for serve mode lifecycle management
- Add health check HTTP endpoints (Bandit/Plug on :9090)
- Add telemetry events for export and watcher operations
- Implement graceful shutdown with SIGTERM handling
- Add Mix Release support with overlay scripts
- Add NixOS module for systemd service deployment
- Update documentation with service usage
This commit is contained in:
Ignacio Ballesteros
2026-02-21 20:38:47 +01:00
parent 6476b45f04
commit 01805dbf39
23 changed files with 1147 additions and 83 deletions

View File

@@ -16,7 +16,11 @@ defmodule OrgGarden.Quartz do
require Logger
defstruct [:port, :quartz_path, :content_dir, :http_port, :ws_port]
alias OrgGarden.Config
@shutdown_timeout 5_000
defstruct [:port, :os_pid, :quartz_path, :content_dir, :http_port, :ws_port]
# -------------------------------------------------------------------
# Client API
@@ -35,21 +39,27 @@ defmodule OrgGarden.Quartz do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end
@doc """
Check if Quartz is running.
"""
def running? do
Process.whereis(__MODULE__) != nil
end
# -------------------------------------------------------------------
# GenServer callbacks
# -------------------------------------------------------------------
@impl true
def init(opts) do
quartz_path =
System.get_env("QUARTZ_PATH") ||
raise "QUARTZ_PATH environment variable not set"
Process.flag(:trap_exit, true)
node_path = System.get_env("NODE_PATH", "node")
quartz_path = Config.get!(:quartz_path)
node_path = Config.get(:node_path, "node")
content_dir = Keyword.fetch!(opts, :content_dir)
http_port = Keyword.get(opts, :port, 8080)
ws_port = Keyword.get(opts, :ws_port, 3001)
http_port = Keyword.get(opts, :port, Config.get(:http_port, 8080))
ws_port = Keyword.get(opts, :ws_port, Config.get(:ws_port, 3001))
cli_path = Path.join(quartz_path, "quartz/bootstrap-cli.mjs")
@@ -61,9 +71,12 @@ defmodule OrgGarden.Quartz do
cli_path,
"build",
"--serve",
"--directory", content_dir,
"--port", to_string(http_port),
"--wsPort", to_string(ws_port)
"--directory",
content_dir,
"--port",
to_string(http_port),
"--wsPort",
to_string(ws_port)
]
Logger.info("[quartz] Starting: #{node_path} #{Enum.join(args, " ")}")
@@ -79,8 +92,12 @@ defmodule OrgGarden.Quartz do
env: [{~c"NODE_NO_WARNINGS", ~c"1"}]
])
# Get the OS process ID for graceful shutdown
{:os_pid, os_pid} = Port.info(port, :os_pid)
state = %__MODULE__{
port: port,
os_pid: os_pid,
quartz_path: quartz_path,
content_dir: content_dir,
http_port: http_port,
@@ -102,17 +119,65 @@ defmodule OrgGarden.Quartz do
@impl true
def handle_info({port, {:exit_status, status}}, %{port: port} = state) do
Logger.error("[quartz] Process exited with status #{status}")
{:stop, {:quartz_exit, status}, state}
{:stop, {:quartz_exit, status}, %{state | port: nil, os_pid: nil}}
end
@impl true
def terminate(_reason, %{port: port}) when is_port(port) do
# Attempt graceful shutdown
Port.close(port)
:ok
rescue
_ -> :ok
def handle_info({:EXIT, port, reason}, %{port: port} = state) do
Logger.warning("[quartz] Port terminated: #{inspect(reason)}")
{:stop, {:port_exit, reason}, %{state | port: nil, os_pid: nil}}
end
def terminate(_reason, _state), do: :ok
@impl true
def terminate(_reason, %{os_pid: nil}) do
# Process already exited
:ok
end
@impl true
def terminate(_reason, %{port: port, os_pid: os_pid}) do
Logger.info("[quartz] Shutting down gracefully...")
# Send SIGTERM to the Node.js process
case System.cmd("kill", ["-TERM", to_string(os_pid)], stderr_to_stdout: true) do
{_, 0} ->
# Wait for graceful exit
wait_for_exit(port, @shutdown_timeout)
{output, _} ->
Logger.warning("[quartz] Failed to send SIGTERM: #{output}")
force_close(port, os_pid)
end
:ok
end
# -------------------------------------------------------------------
# Private functions
# -------------------------------------------------------------------
defp wait_for_exit(port, timeout) do
receive do
{^port, {:exit_status, status}} ->
Logger.info("[quartz] Exited with status #{status}")
:ok
after
timeout ->
Logger.warning("[quartz] Shutdown timeout, forcing kill")
{:os_pid, os_pid} = Port.info(port, :os_pid)
force_close(port, os_pid)
end
end
defp force_close(port, os_pid) do
# Send SIGKILL
System.cmd("kill", ["-KILL", to_string(os_pid)], stderr_to_stdout: true)
# Close the port
try do
Port.close(port)
rescue
_ -> :ok
end
end
end