initial version

This commit is contained in:
Ignacio Ballesteros
2026-02-14 09:09:54 +01:00
commit 3738d1d7d8
16 changed files with 1037 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
result

61
flake.lock generated Normal file
View File

@@ -0,0 +1,61 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1770841267,
"narHash": "sha256-9xejG0KoqsoKEGp2kVbXRlEYtFFcDTHjidiuX8hGO44=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "ec7c70d12ce2fc37cb92aff673dcdca89d187bae",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

102
flake.nix Normal file
View File

@@ -0,0 +1,102 @@
{
description = "org-to-quartz: Convert org notes to Quartz-compatible markdown";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
python = pkgs.python311;
pythonPackages = python.pkgs;
org-to-quartz = pythonPackages.buildPythonApplication {
pname = "org-to-quartz";
version = "0.1.0";
format = "pyproject";
src = ./.;
nativeBuildInputs = [
pythonPackages.setuptools
pythonPackages.wheel
];
propagatedBuildInputs = [
pythonPackages.pybtex
pythonPackages.requests
pythonPackages.pyyaml
pkgs.pandoc
];
# Make pandoc available at runtime
makeWrapperArgs = [
"--prefix" "PATH" ":" "${pkgs.pandoc}/bin"
];
meta = {
description = "Convert org notes to Quartz-compatible markdown";
mainProgram = "org-to-quartz";
};
};
# Script to serve quartz with converted notes
quartz-serve = pkgs.writeShellScriptBin "quartz-serve" ''
set -e
NOTES_DIR="''${1:-.}"
PORT="''${2:-8080}"
WORK_DIR=$(mktemp -d)
echo "Cloning Quartz..."
${pkgs.git}/bin/git clone --depth 1 https://github.com/jackyzha0/quartz.git "$WORK_DIR/quartz" 2>/dev/null
echo "Installing dependencies..."
cd "$WORK_DIR/quartz"
${pkgs.nodejs}/bin/npm install --silent
echo "Converting org notes from $NOTES_DIR..."
${org-to-quartz}/bin/org-to-quartz "$NOTES_DIR" "$WORK_DIR/quartz/content" -v
# Enable OxHugo plugin
${pkgs.gnused}/bin/sed -i 's/Plugin.GitHubFlavoredMarkdown()/Plugin.OxHugoFlavouredMarkdown(),\n Plugin.GitHubFlavoredMarkdown()/' quartz.config.ts
echo ""
echo "Starting Quartz on http://localhost:$PORT"
${pkgs.nodejs}/bin/npx quartz build --serve --port "$PORT"
'';
in {
packages = {
default = org-to-quartz;
org-to-quartz = org-to-quartz;
};
devShells.default = pkgs.mkShell {
buildInputs = [
python
pythonPackages.pybtex
pythonPackages.requests
pythonPackages.pyyaml
pythonPackages.pytest
pkgs.pandoc
pkgs.nodejs
];
};
apps = {
default = {
type = "app";
program = "${org-to-quartz}/bin/org-to-quartz";
};
serve = {
type = "app";
program = "${quartz-serve}/bin/quartz-serve";
};
};
}
);
}

20
pyproject.toml Normal file
View File

@@ -0,0 +1,20 @@
[build-system]
requires = ["setuptools", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "org-to-quartz"
version = "0.1.0"
description = "Convert org notes to Quartz-compatible markdown"
requires-python = ">=3.11"
dependencies = [
"pybtex",
"requests",
"pyyaml",
]
[project.scripts]
org-to-quartz = "org_to_quartz.main:main"
[tool.setuptools.packages.find]
where = ["src"]

View File

@@ -0,0 +1,3 @@
"""org-to-quartz: Convert org notes to Quartz-compatible markdown."""
__version__ = "0.1.0"

View File

@@ -0,0 +1,5 @@
"""Citation resolution module."""
from .resolver import CitationResolver
__all__ = ["CitationResolver"]

View File

@@ -0,0 +1,91 @@
"""Local BibTeX file resolver for citations."""
from dataclasses import dataclass
from pathlib import Path
from pybtex.database import parse_file, BibliographyData
@dataclass
class CitationInfo:
"""Resolved citation information."""
key: str
authors: str
year: str
title: str
doi: str | None = None
url: str | None = None
def format_link(self, link: str | None = None) -> str:
"""Format as markdown link [Author, Year](link)."""
display = f"{self.authors}, {self.year}" if self.authors else self.key
if link:
return f"[{display}]({link})"
if self.doi:
return f"[{display}](https://doi.org/{self.doi})"
if self.url:
return f"[{display}]({self.url})"
return f"[{display}]"
class BibtexResolver:
"""Resolve citations from a local BibTeX file."""
def __init__(self, bib_path: Path | str | None = None):
"""Initialize with optional path to .bib file."""
self.bib_data: BibliographyData | None = None
self.bib_path = Path(bib_path) if bib_path else None
if self.bib_path and self.bib_path.exists():
self._load_bib()
def _load_bib(self) -> None:
"""Load BibTeX file."""
if self.bib_path is None:
return
try:
self.bib_data = parse_file(str(self.bib_path))
except Exception:
self.bib_data = None
def _extract_authors(self, entry) -> str:
"""Extract author names from BibTeX entry."""
if "author" not in entry.persons:
return ""
authors = entry.persons["author"]
if len(authors) == 1:
return str(authors[0].last_names[0]) if authors[0].last_names else ""
elif len(authors) == 2:
names = [str(a.last_names[0]) for a in authors if a.last_names]
return " & ".join(names)
else:
first = authors[0].last_names[0] if authors[0].last_names else ""
return f"{first} et al."
def _extract_year(self, entry) -> str:
"""Extract year from BibTeX entry."""
return entry.fields.get("year", "")
def try_resolve(self, cite_key: str) -> CitationInfo | None:
"""Try to resolve a citation key from the BibTeX file.
Returns:
CitationInfo if found, None otherwise
"""
if self.bib_data is None:
return None
if cite_key not in self.bib_data.entries:
return None
entry = self.bib_data.entries[cite_key]
return CitationInfo(
key=cite_key,
authors=self._extract_authors(entry),
year=self._extract_year(entry),
title=entry.fields.get("title", ""),
doi=entry.fields.get("doi"),
url=entry.fields.get("url"),
)

View File

@@ -0,0 +1,33 @@
"""DOI/URL fallback resolver for citations."""
from .bibtex import CitationInfo
class DOIResolver:
"""Fallback resolver that formats citation keys as-is."""
def try_resolve(self, cite_key: str) -> CitationInfo:
"""Create a minimal citation info with just the key.
This is the final fallback - always returns something.
Returns:
CitationInfo with just the key (no link)
"""
# Check if the key looks like a DOI
doi = None
if cite_key.startswith("10."):
doi = cite_key
return CitationInfo(
key=cite_key,
authors="",
year="",
title="",
doi=doi,
url=None,
)
def format_raw(self, cite_key: str) -> str:
"""Format as raw citation without link."""
return f"[{cite_key}]"

View File

@@ -0,0 +1,55 @@
"""Citation resolver orchestrator - chains multiple resolvers."""
from pathlib import Path
from .zotero import ZoteroResolver
from .bibtex import BibtexResolver
from .doi import DOIResolver
class CitationResolver:
"""Orchestrates citation resolution across multiple sources.
Resolution order:
1. Zotero Better BibTeX (if running) -> zotero://select/items/@key
2. Local BibTeX file (if provided) -> DOI link or raw
3. DOI fallback -> raw key
"""
def __init__(self, bib_path: Path | str | None = None):
"""Initialize citation resolver.
Args:
bib_path: Optional path to local .bib file for fallback
"""
self.zotero = ZoteroResolver()
self.bibtex = BibtexResolver(bib_path)
self.doi = DOIResolver()
def resolve(self, cite_key: str) -> str:
"""Resolve a citation key to a markdown link.
Tries resolvers in order:
1. Zotero (returns zotero://select link)
2. BibTeX (returns DOI link if available)
3. DOI fallback (returns raw key)
Args:
cite_key: Citation key (e.g., "smith2020")
Returns:
Markdown formatted citation link
"""
# Try Zotero first
info = self.zotero.try_resolve(cite_key)
if info is not None:
return info.format_link(info.url) # zotero:// URL
# Try BibTeX
info = self.bibtex.try_resolve(cite_key)
if info is not None:
return info.format_link() # DOI or URL from bib entry
# Final fallback
info = self.doi.try_resolve(cite_key)
return info.format_link()

View File

@@ -0,0 +1,126 @@
"""Zotero Better BibTeX JSON-RPC resolver."""
import requests
from typing import Any
from .bibtex import CitationInfo
ZOTERO_RPC_URL = "http://localhost:23119/better-bibtex/json-rpc"
class ZoteroResolver:
"""Resolve citations via Zotero Better BibTeX JSON-RPC API."""
def __init__(self, timeout: float = 2.0):
"""Initialize Zotero resolver.
Args:
timeout: Request timeout in seconds (short since it's local)
"""
self.timeout = timeout
self._available: bool | None = None
def is_available(self) -> bool:
"""Check if Zotero with Better BibTeX is running."""
if self._available is not None:
return self._available
try:
# Simple ping to check if server is up
response = requests.post(
ZOTERO_RPC_URL,
json={"jsonrpc": "2.0", "method": "item.citationkey", "params": ["__test__"], "id": 1},
timeout=self.timeout,
)
self._available = response.status_code == 200
except (requests.RequestException, ConnectionError):
self._available = False
return self._available
def _rpc_call(self, method: str, params: list[Any]) -> Any | None:
"""Make a JSON-RPC call to Better BibTeX."""
try:
response = requests.post(
ZOTERO_RPC_URL,
json={"jsonrpc": "2.0", "method": method, "params": params, "id": 1},
timeout=self.timeout,
)
if response.status_code != 200:
return None
data = response.json()
return data.get("result")
except (requests.RequestException, ValueError):
return None
def _get_item_by_citekey(self, cite_key: str) -> dict | None:
"""Get Zotero item data by citation key."""
# Better BibTeX method to search by citekey
result = self._rpc_call("item.search", [f"citekey:{cite_key}"])
if result and isinstance(result, list) and len(result) > 0:
return result[0]
return None
def _extract_authors(self, item: dict) -> str:
"""Extract author string from Zotero item."""
creators = item.get("creators", [])
authors = [c for c in creators if c.get("creatorType") == "author"]
if not authors:
return ""
if len(authors) == 1:
return authors[0].get("lastName", "")
elif len(authors) == 2:
return f"{authors[0].get('lastName', '')} & {authors[1].get('lastName', '')}"
else:
return f"{authors[0].get('lastName', '')} et al."
def _extract_year(self, item: dict) -> str:
"""Extract year from Zotero item."""
date = item.get("date", "")
# Try to extract year from date string
if date:
# Common formats: "2024", "2024-01-15", "January 2024"
import re
match = re.search(r"(\d{4})", date)
if match:
return match.group(1)
return ""
def try_resolve(self, cite_key: str) -> CitationInfo | None:
"""Try to resolve a citation key via Zotero.
Returns:
CitationInfo with zotero:// link if found, None otherwise
"""
if not self.is_available():
return None
item = self._get_item_by_citekey(cite_key)
if item is None:
return None
# Build zotero:// select URL
item_key = item.get("key", "")
library_id = item.get("libraryID", 1)
# Format: zotero://select/items/@citekey or zotero://select/library/items/ITEMKEY
zotero_url = f"zotero://select/items/@{cite_key}"
info = CitationInfo(
key=cite_key,
authors=self._extract_authors(item),
year=self._extract_year(item),
title=item.get("title", ""),
doi=item.get("DOI"),
url=zotero_url,
)
return info
def format_zotero_link(self, info: CitationInfo) -> str:
"""Format citation with Zotero select URL."""
return info.format_link(info.url)

View File

@@ -0,0 +1,120 @@
"""Detect and copy images referenced in org files."""
import re
import shutil
from pathlib import Path
from .org_parser import OrgDocument
# Patterns for image references in org-mode
IMAGE_PATTERNS = [
# [[file:path/to/image.png]]
re.compile(r"\[\[file:([^\]]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))\]\]", re.IGNORECASE),
# [[./path/to/image.png]]
re.compile(r"\[\[(\./[^\]]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))\]\]", re.IGNORECASE),
# [[path/to/image.png]] (without file: prefix)
re.compile(r"\[\[([^\]:]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))\]\]", re.IGNORECASE),
# Inline image references: ./image.png or path/image.png
re.compile(r"(?<!\[)(\./[^\s\]]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))(?!\])", re.IGNORECASE),
]
def find_images(content: str) -> list[str]:
"""Find all image references in org content."""
images: set[str] = set()
for pattern in IMAGE_PATTERNS:
matches = pattern.findall(content)
images.update(matches)
return list(images)
def resolve_image_path(image_ref: str, source_dir: Path) -> Path | None:
"""Resolve image reference to absolute path.
Args:
image_ref: Image reference from org file (e.g., './img.png', 'images/fig.png')
source_dir: Directory containing the source org file
Returns:
Absolute path to image if found, None otherwise
"""
# Remove file: prefix if present
clean_ref = image_ref.removeprefix("file:")
# Try relative to source directory
candidate = source_dir / clean_ref
if candidate.exists():
return candidate.resolve()
# Try without ./ prefix
if clean_ref.startswith("./"):
candidate = source_dir / clean_ref[2:]
if candidate.exists():
return candidate.resolve()
return None
def copy_images(doc: OrgDocument, output_dir: Path) -> dict[str, str]:
"""Copy images referenced in document to output directory.
Args:
doc: Parsed org document
output_dir: Directory where the note's folder will be created
Returns:
Mapping of original image references to new relative paths
"""
if doc.source_path is None:
return {}
source_dir = doc.source_path.parent
note_dir = output_dir / doc.slug
note_dir.mkdir(parents=True, exist_ok=True)
image_refs = find_images(doc.body)
path_mapping: dict[str, str] = {}
for ref in image_refs:
src_path = resolve_image_path(ref, source_dir)
if src_path is None:
# Image not found, skip but could log warning
continue
# Copy to note directory with original filename
dest_path = note_dir / src_path.name
if not dest_path.exists() or src_path.stat().st_mtime > dest_path.stat().st_mtime:
shutil.copy2(src_path, dest_path)
# Map original reference to new relative path (just filename since it's in same dir)
path_mapping[ref] = src_path.name
return path_mapping
def update_image_paths(content: str, path_mapping: dict[str, str]) -> str:
"""Update image references in markdown content.
Args:
content: Markdown content (after pandoc conversion)
path_mapping: Mapping from original refs to new filenames
Returns:
Content with updated image paths
"""
for old_ref, new_path in path_mapping.items():
# After pandoc conversion, images become ![](path) or ![alt](path)
# We need to replace various forms of the old reference
# file:path -> new_path
content = content.replace(f"file:{old_ref}", new_path)
# ./path -> new_path
if old_ref.startswith("./"):
content = content.replace(old_ref, new_path)
content = content.replace(old_ref[2:], new_path) # without ./
else:
content = content.replace(old_ref, new_path)
return content

141
src/org_to_quartz/main.py Normal file
View File

@@ -0,0 +1,141 @@
"""CLI entry point for org-to-quartz converter."""
import argparse
import sys
from pathlib import Path
from .org_parser import parse_org_file
from .markdown_writer import convert_document, write_markdown
from .image_handler import copy_images, update_image_paths
from .citations import CitationResolver
def find_org_files(input_dir: Path) -> list[Path]:
"""Find all .org files in directory (non-recursive)."""
return list(input_dir.glob("*.org"))
def convert_file(
org_path: Path,
output_dir: Path,
citation_resolver: CitationResolver | None,
verbose: bool = False,
) -> Path | None:
"""Convert a single org file to Quartz markdown.
Returns:
Path to created note directory, or None on error
"""
try:
# Parse org file
doc = parse_org_file(org_path)
if verbose:
print(f" Parsed: {doc.title or org_path.stem}")
# Create output directory for this note
note_dir = output_dir / doc.slug
note_dir.mkdir(parents=True, exist_ok=True)
# Copy images first (before conversion, to get path mapping)
image_mapping = copy_images(doc, output_dir)
if verbose and image_mapping:
print(f" Copied {len(image_mapping)} images")
# Convert document
md_content = convert_document(doc, citation_resolver)
# Update image paths in converted content
md_content = update_image_paths(md_content, image_mapping)
# Write output
output_path = note_dir / "index.md"
output_path.write_text(md_content, encoding="utf-8")
return note_dir
except Exception as e:
print(f" Error converting {org_path.name}: {e}", file=sys.stderr)
return None
def main() -> int:
"""Main CLI entry point."""
parser = argparse.ArgumentParser(
prog="org-to-quartz",
description="Convert org-mode notes to Quartz-compatible markdown",
)
parser.add_argument(
"input_dir",
type=Path,
help="Directory containing .org files",
)
parser.add_argument(
"output_dir",
type=Path,
help="Output directory (e.g., quartz/content)",
)
parser.add_argument(
"--bib",
type=Path,
metavar="FILE",
help="Path to .bib file for citation resolution",
)
parser.add_argument(
"-v", "--verbose",
action="store_true",
help="Verbose output",
)
args = parser.parse_args()
# Validate input directory
if not args.input_dir.is_dir():
print(f"Error: Input directory does not exist: {args.input_dir}", file=sys.stderr)
return 1
# Create output directory
args.output_dir.mkdir(parents=True, exist_ok=True)
# Find org files
org_files = find_org_files(args.input_dir)
if not org_files:
print(f"No .org files found in {args.input_dir}", file=sys.stderr)
return 1
print(f"Found {len(org_files)} org files")
# Initialize citation resolver
citation_resolver = CitationResolver(args.bib)
if args.verbose:
if citation_resolver.zotero.is_available():
print("Zotero Better BibTeX: available")
else:
print("Zotero Better BibTeX: not available")
if args.bib:
print(f"BibTeX file: {args.bib}")
# Convert each file
success_count = 0
error_count = 0
for org_path in org_files:
print(f"Converting: {org_path.name}")
result = convert_file(org_path, args.output_dir, citation_resolver, args.verbose)
if result:
success_count += 1
if args.verbose:
print(f" -> {result}")
else:
error_count += 1
# Summary
print(f"\nDone: {success_count} converted, {error_count} errors")
return 0 if error_count == 0 else 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,148 @@
"""Convert org content to Quartz-compatible markdown with YAML front matter."""
import re
import subprocess
import yaml
from pathlib import Path
from .org_parser import OrgDocument
def build_front_matter(doc: OrgDocument) -> dict:
"""Build YAML front matter dict from OrgDocument."""
fm: dict = {}
if doc.title:
fm["title"] = doc.title
if doc.date:
fm["date"] = doc.date
if doc.lastmod:
fm["lastmod"] = doc.lastmod
if doc.tags:
fm["tags"] = doc.tags
if doc.draft:
fm["draft"] = True
return fm
def front_matter_to_yaml(fm: dict) -> str:
"""Convert front matter dict to YAML string with delimiters."""
if not fm:
return ""
yaml_str = yaml.dump(fm, default_flow_style=False, allow_unicode=True, sort_keys=False)
return f"---\n{yaml_str}---\n"
def convert_org_to_markdown(org_content: str) -> str:
"""Convert org-mode content to GitHub-flavored markdown using pandoc."""
try:
result = subprocess.run(
["pandoc", "-f", "org", "-t", "gfm", "--wrap=none"],
input=org_content,
capture_output=True,
text=True,
check=True,
)
return result.stdout
except subprocess.CalledProcessError as e:
raise RuntimeError(f"Pandoc conversion failed: {e.stderr}") from e
except FileNotFoundError:
raise RuntimeError("Pandoc not found. Please install pandoc.")
def process_wikilinks(content: str) -> str:
"""Convert org-roam style links (after pandoc) to Quartz wikilinks.
Pandoc converts org links to markdown links:
- [[roam:Title]] -> [roam:Title](roam:Title) -> [[Title]]
- [[id:uuid][Desc]] -> [Desc](id:uuid) -> [[Desc]]
- [[file:x.org][Desc]] -> [Desc](file:x.org) -> [[Desc]]
"""
# [roam:Title](roam:Title) -> [[Title]]
content = re.sub(r"\[roam:([^\]]+)\]\(roam:[^)]+\)", r"[[\1]]", content)
# [Description](id:uuid) -> [[Description]]
content = re.sub(r"\[([^\]]+)\]\(id:[a-f0-9-]+\)", r"[[\1]]", content)
# [Description](file:something.org) -> [[Description]]
content = re.sub(r"\[([^\]]+)\]\(file:[^)]+\.org\)", r"[[\1]]", content)
return content
def process_citations(content: str, resolver) -> str:
"""Process org-mode citations and resolve them.
Handles:
- cite:key
- [cite:@key]
- [[cite:key][description]]
"""
if resolver is None:
return content
def replace_cite(match) -> str:
cite_key = match.group(1)
return resolver.resolve(cite_key)
# cite:key pattern
content = re.sub(r"cite:([a-zA-Z0-9_-]+)", replace_cite, content)
# [cite:@key] pattern
content = re.sub(r"\[cite:@([a-zA-Z0-9_-]+)\]", replace_cite, content)
# [[cite:key][description]] - use resolved link but keep description context
def replace_cite_with_desc(match) -> str:
cite_key = match.group(1)
return resolver.resolve(cite_key)
content = re.sub(r"\[\[cite:([a-zA-Z0-9_-]+)\]\[[^\]]*\]\]", replace_cite_with_desc, content)
return content
def generate_draft_todo(doc: OrgDocument) -> str:
"""Generate TODO comment for draft/noexport files."""
if doc.draft:
return "<!-- TODO: This note was marked as noexport. Review before publishing. -->\n\n"
return ""
def convert_document(doc: OrgDocument, citation_resolver=None) -> str:
"""Convert OrgDocument to complete markdown string."""
# Build front matter
fm = build_front_matter(doc)
yaml_header = front_matter_to_yaml(fm)
# Convert body
md_body = convert_org_to_markdown(doc.body)
# Process links
md_body = process_wikilinks(md_body)
# Process citations
md_body = process_citations(md_body, citation_resolver)
# Add TODO comment if draft
todo_comment = generate_draft_todo(doc)
return yaml_header + todo_comment + md_body
def write_markdown(doc: OrgDocument, output_dir: Path, citation_resolver=None) -> Path:
"""Write converted document to output directory.
Creates: output_dir/<slug>/index.md
Returns: Path to created directory
"""
# Create note directory
note_dir = output_dir / doc.slug
note_dir.mkdir(parents=True, exist_ok=True)
# Convert and write
md_content = convert_document(doc, citation_resolver)
output_path = note_dir / "index.md"
output_path.write_text(md_content, encoding="utf-8")
return note_dir

View File

@@ -0,0 +1,99 @@
"""Parse org-mode files to extract front matter and body."""
import re
from dataclasses import dataclass, field
from pathlib import Path
@dataclass
class OrgDocument:
"""Parsed org document with front matter and body."""
title: str = ""
tags: list[str] = field(default_factory=list)
date: str = ""
lastmod: str = ""
draft: bool = False
hugo_section: str = ""
raw_front_matter: dict[str, str] = field(default_factory=dict)
body: str = ""
source_path: Path | None = None
@property
def slug(self) -> str:
"""Generate URL-friendly slug from title or filename."""
if self.source_path:
return self.source_path.stem
return re.sub(r"[^a-z0-9]+", "-", self.title.lower()).strip("-")
# Pattern for org front matter: #+KEY: value
FRONT_MATTER_PATTERN = re.compile(r"^#\+(\w+):\s*(.*)$", re.IGNORECASE)
# Pattern for filetags: :tag1:tag2:tag3:
# Uses word boundary to capture tags between colons
FILETAGS_PATTERN = re.compile(r"(?<=:)([^:]+)(?=:)")
def parse_filetags(value: str) -> list[str]:
"""Parse org filetags format ':tag1:tag2:' into list."""
return FILETAGS_PATTERN.findall(value)
def parse_date(value: str) -> str:
"""Parse org date format [2024-02-21] or <2024-02-21> to ISO date."""
# Remove brackets/angles and any day names
match = re.search(r"(\d{4}-\d{2}-\d{2})", value)
return match.group(1) if match else value.strip("[]<>")
def parse_org_file(path: Path) -> OrgDocument:
"""Parse an org file into OrgDocument."""
content = path.read_text(encoding="utf-8")
return parse_org_content(content, source_path=path)
def parse_org_content(content: str, source_path: Path | None = None) -> OrgDocument:
"""Parse org content string into OrgDocument."""
lines = content.splitlines()
front_matter: dict[str, str] = {}
body_start = 0
# Parse front matter (lines starting with #+)
for i, line in enumerate(lines):
match = FRONT_MATTER_PATTERN.match(line)
if match:
key = match.group(1).lower()
value = match.group(2).strip()
front_matter[key] = value
elif line.strip() and not line.startswith("#"):
# First non-empty, non-comment line starts the body
body_start = i
break
elif not line.strip():
# Empty line, continue looking for more front matter or body start
continue
else:
body_start = i
break
# Find actual body start (skip leading empty lines after front matter)
while body_start < len(lines) and not lines[body_start].strip():
body_start += 1
body = "\n".join(lines[body_start:])
# Build OrgDocument
doc = OrgDocument(
title=front_matter.get("title", ""),
tags=parse_filetags(front_matter.get("filetags", "")),
date=parse_date(front_matter.get("date", "")),
lastmod=parse_date(front_matter.get("hugo_lastmod", "")),
draft="noexport" in front_matter.get("hugo_tags", "").lower(),
hugo_section=front_matter.get("hugo_section", ""),
raw_front_matter=front_matter,
body=body,
source_path=source_path,
)
return doc

View File

@@ -0,0 +1,8 @@
#+title: Draft Note
#+filetags: :draft:
#+date: [2024-02-20]
#+hugo_tags: noexport
* Work in Progress
This note is marked as noexport.

View File

@@ -0,0 +1,24 @@
#+title: Example Note
#+filetags: :test:example:
#+date: [2024-02-21]
#+hugo_lastmod: [2024-02-22]
#+hugo_section: notes
* Introduction
This is an example org note with some features:
- A link to [[roam:Another Note]]
- A citation cite:smith2020
- Some *bold* and /italic/ text
* Code Example
#+begin_src python
def hello():
print("Hello, world!")
#+end_src
* Conclusion
See also [[id:abc-123][Related Concept]] for more info.