Add service infrastructure for long-running deployment
- Add configuration system (config/*.exs, OrgGarden.Config) - Refactor supervision tree with DynamicSupervisor and Registry - Add OrgGarden.Server for serve mode lifecycle management - Add health check HTTP endpoints (Bandit/Plug on :9090) - Add telemetry events for export and watcher operations - Implement graceful shutdown with SIGTERM handling - Add Mix Release support with overlay scripts - Add NixOS module for systemd service deployment - Update documentation with service usage
This commit is contained in:
89
lib/org_garden/health.ex
Normal file
89
lib/org_garden/health.ex
Normal file
@@ -0,0 +1,89 @@
|
||||
defmodule OrgGarden.Health do
|
||||
@moduledoc """
|
||||
Health check HTTP endpoints.
|
||||
|
||||
Provides liveness and readiness probes for systemd/kubernetes health checks.
|
||||
|
||||
## Endpoints
|
||||
|
||||
* `GET /health/live` — Always returns 200 if the process is alive
|
||||
* `GET /health/ready` — Returns 200 if all components are ready, 503 otherwise
|
||||
* `GET /health` — Returns JSON status of all components
|
||||
"""
|
||||
|
||||
use Plug.Router
|
||||
|
||||
plug(:match)
|
||||
plug(:dispatch)
|
||||
|
||||
# Liveness probe — is the process alive?
|
||||
get "/health/live" do
|
||||
send_resp(conn, 200, "ok")
|
||||
end
|
||||
|
||||
# Readiness probe — are all components ready to serve?
|
||||
get "/health/ready" do
|
||||
checks = run_checks()
|
||||
all_healthy = Enum.all?(checks, fn {_name, status} -> status == :ok end)
|
||||
|
||||
if all_healthy do
|
||||
send_resp(conn, 200, "ready")
|
||||
else
|
||||
send_resp(conn, 503, "not ready")
|
||||
end
|
||||
end
|
||||
|
||||
# Full health status as JSON
|
||||
get "/health" do
|
||||
checks = run_checks()
|
||||
all_healthy = Enum.all?(checks, fn {_name, status} -> status == :ok end)
|
||||
|
||||
status =
|
||||
if all_healthy do
|
||||
"healthy"
|
||||
else
|
||||
"degraded"
|
||||
end
|
||||
|
||||
body =
|
||||
Jason.encode!(%{
|
||||
status: status,
|
||||
checks:
|
||||
Map.new(checks, fn {name, status} ->
|
||||
{name, Atom.to_string(status)}
|
||||
end)
|
||||
})
|
||||
|
||||
conn
|
||||
|> put_resp_content_type("application/json")
|
||||
|> send_resp(if(all_healthy, do: 200, else: 503), body)
|
||||
end
|
||||
|
||||
match _ do
|
||||
send_resp(conn, 404, "not found")
|
||||
end
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Health checks
|
||||
# -------------------------------------------------------------------
|
||||
|
||||
defp run_checks do
|
||||
[
|
||||
{:server, check_server()},
|
||||
{:watcher, check_watcher()},
|
||||
{:quartz, check_quartz()}
|
||||
]
|
||||
end
|
||||
|
||||
defp check_server do
|
||||
if Process.whereis(OrgGarden.Server), do: :ok, else: :not_running
|
||||
end
|
||||
|
||||
defp check_watcher do
|
||||
if Process.whereis(OrgGarden.Watcher), do: :ok, else: :not_running
|
||||
end
|
||||
|
||||
defp check_quartz do
|
||||
if Process.whereis(OrgGarden.Quartz), do: :ok, else: :not_running
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user