commit 3738d1d7d841b2e068f0a6f9403c238749916a19 Author: Ignacio Ballesteros Date: Sat Feb 14 09:09:54 2026 +0100 initial version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b2be92b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +result diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..3009a12 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1770841267, + "narHash": "sha256-9xejG0KoqsoKEGp2kVbXRlEYtFFcDTHjidiuX8hGO44=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "ec7c70d12ce2fc37cb92aff673dcdca89d187bae", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..e936df7 --- /dev/null +++ b/flake.nix @@ -0,0 +1,102 @@ +{ + description = "org-to-quartz: Convert org notes to Quartz-compatible markdown"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + + python = pkgs.python311; + pythonPackages = python.pkgs; + + org-to-quartz = pythonPackages.buildPythonApplication { + pname = "org-to-quartz"; + version = "0.1.0"; + format = "pyproject"; + + src = ./.; + + nativeBuildInputs = [ + pythonPackages.setuptools + pythonPackages.wheel + ]; + + propagatedBuildInputs = [ + pythonPackages.pybtex + pythonPackages.requests + pythonPackages.pyyaml + pkgs.pandoc + ]; + + # Make pandoc available at runtime + makeWrapperArgs = [ + "--prefix" "PATH" ":" "${pkgs.pandoc}/bin" + ]; + + meta = { + description = "Convert org notes to Quartz-compatible markdown"; + mainProgram = "org-to-quartz"; + }; + }; + + # Script to serve quartz with converted notes + quartz-serve = pkgs.writeShellScriptBin "quartz-serve" '' + set -e + NOTES_DIR="''${1:-.}" + PORT="''${2:-8080}" + WORK_DIR=$(mktemp -d) + + echo "Cloning Quartz..." + ${pkgs.git}/bin/git clone --depth 1 https://github.com/jackyzha0/quartz.git "$WORK_DIR/quartz" 2>/dev/null + + echo "Installing dependencies..." + cd "$WORK_DIR/quartz" + ${pkgs.nodejs}/bin/npm install --silent + + echo "Converting org notes from $NOTES_DIR..." + ${org-to-quartz}/bin/org-to-quartz "$NOTES_DIR" "$WORK_DIR/quartz/content" -v + + # Enable OxHugo plugin + ${pkgs.gnused}/bin/sed -i 's/Plugin.GitHubFlavoredMarkdown()/Plugin.OxHugoFlavouredMarkdown(),\n Plugin.GitHubFlavoredMarkdown()/' quartz.config.ts + + echo "" + echo "Starting Quartz on http://localhost:$PORT" + ${pkgs.nodejs}/bin/npx quartz build --serve --port "$PORT" + ''; + + in { + packages = { + default = org-to-quartz; + org-to-quartz = org-to-quartz; + }; + + devShells.default = pkgs.mkShell { + buildInputs = [ + python + pythonPackages.pybtex + pythonPackages.requests + pythonPackages.pyyaml + pythonPackages.pytest + pkgs.pandoc + pkgs.nodejs + ]; + }; + + apps = { + default = { + type = "app"; + program = "${org-to-quartz}/bin/org-to-quartz"; + }; + serve = { + type = "app"; + program = "${quartz-serve}/bin/quartz-serve"; + }; + }; + } + ); +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4f15ed3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,20 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "org-to-quartz" +version = "0.1.0" +description = "Convert org notes to Quartz-compatible markdown" +requires-python = ">=3.11" +dependencies = [ + "pybtex", + "requests", + "pyyaml", +] + +[project.scripts] +org-to-quartz = "org_to_quartz.main:main" + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/src/org_to_quartz/__init__.py b/src/org_to_quartz/__init__.py new file mode 100644 index 0000000..524a450 --- /dev/null +++ b/src/org_to_quartz/__init__.py @@ -0,0 +1,3 @@ +"""org-to-quartz: Convert org notes to Quartz-compatible markdown.""" + +__version__ = "0.1.0" diff --git a/src/org_to_quartz/citations/__init__.py b/src/org_to_quartz/citations/__init__.py new file mode 100644 index 0000000..d7380d7 --- /dev/null +++ b/src/org_to_quartz/citations/__init__.py @@ -0,0 +1,5 @@ +"""Citation resolution module.""" + +from .resolver import CitationResolver + +__all__ = ["CitationResolver"] diff --git a/src/org_to_quartz/citations/bibtex.py b/src/org_to_quartz/citations/bibtex.py new file mode 100644 index 0000000..369c78e --- /dev/null +++ b/src/org_to_quartz/citations/bibtex.py @@ -0,0 +1,91 @@ +"""Local BibTeX file resolver for citations.""" + +from dataclasses import dataclass +from pathlib import Path + +from pybtex.database import parse_file, BibliographyData + + +@dataclass +class CitationInfo: + """Resolved citation information.""" + + key: str + authors: str + year: str + title: str + doi: str | None = None + url: str | None = None + + def format_link(self, link: str | None = None) -> str: + """Format as markdown link [Author, Year](link).""" + display = f"{self.authors}, {self.year}" if self.authors else self.key + if link: + return f"[{display}]({link})" + if self.doi: + return f"[{display}](https://doi.org/{self.doi})" + if self.url: + return f"[{display}]({self.url})" + return f"[{display}]" + + +class BibtexResolver: + """Resolve citations from a local BibTeX file.""" + + def __init__(self, bib_path: Path | str | None = None): + """Initialize with optional path to .bib file.""" + self.bib_data: BibliographyData | None = None + self.bib_path = Path(bib_path) if bib_path else None + + if self.bib_path and self.bib_path.exists(): + self._load_bib() + + def _load_bib(self) -> None: + """Load BibTeX file.""" + if self.bib_path is None: + return + try: + self.bib_data = parse_file(str(self.bib_path)) + except Exception: + self.bib_data = None + + def _extract_authors(self, entry) -> str: + """Extract author names from BibTeX entry.""" + if "author" not in entry.persons: + return "" + + authors = entry.persons["author"] + if len(authors) == 1: + return str(authors[0].last_names[0]) if authors[0].last_names else "" + elif len(authors) == 2: + names = [str(a.last_names[0]) for a in authors if a.last_names] + return " & ".join(names) + else: + first = authors[0].last_names[0] if authors[0].last_names else "" + return f"{first} et al." + + def _extract_year(self, entry) -> str: + """Extract year from BibTeX entry.""" + return entry.fields.get("year", "") + + def try_resolve(self, cite_key: str) -> CitationInfo | None: + """Try to resolve a citation key from the BibTeX file. + + Returns: + CitationInfo if found, None otherwise + """ + if self.bib_data is None: + return None + + if cite_key not in self.bib_data.entries: + return None + + entry = self.bib_data.entries[cite_key] + return CitationInfo( + key=cite_key, + authors=self._extract_authors(entry), + year=self._extract_year(entry), + title=entry.fields.get("title", ""), + doi=entry.fields.get("doi"), + url=entry.fields.get("url"), + ) diff --git a/src/org_to_quartz/citations/doi.py b/src/org_to_quartz/citations/doi.py new file mode 100644 index 0000000..15b83cb --- /dev/null +++ b/src/org_to_quartz/citations/doi.py @@ -0,0 +1,33 @@ +"""DOI/URL fallback resolver for citations.""" + +from .bibtex import CitationInfo + + +class DOIResolver: + """Fallback resolver that formats citation keys as-is.""" + + def try_resolve(self, cite_key: str) -> CitationInfo: + """Create a minimal citation info with just the key. + + This is the final fallback - always returns something. + + Returns: + CitationInfo with just the key (no link) + """ + # Check if the key looks like a DOI + doi = None + if cite_key.startswith("10."): + doi = cite_key + + return CitationInfo( + key=cite_key, + authors="", + year="", + title="", + doi=doi, + url=None, + ) + + def format_raw(self, cite_key: str) -> str: + """Format as raw citation without link.""" + return f"[{cite_key}]" diff --git a/src/org_to_quartz/citations/resolver.py b/src/org_to_quartz/citations/resolver.py new file mode 100644 index 0000000..2c1a574 --- /dev/null +++ b/src/org_to_quartz/citations/resolver.py @@ -0,0 +1,55 @@ +"""Citation resolver orchestrator - chains multiple resolvers.""" + +from pathlib import Path + +from .zotero import ZoteroResolver +from .bibtex import BibtexResolver +from .doi import DOIResolver + + +class CitationResolver: + """Orchestrates citation resolution across multiple sources. + + Resolution order: + 1. Zotero Better BibTeX (if running) -> zotero://select/items/@key + 2. Local BibTeX file (if provided) -> DOI link or raw + 3. DOI fallback -> raw key + """ + + def __init__(self, bib_path: Path | str | None = None): + """Initialize citation resolver. + + Args: + bib_path: Optional path to local .bib file for fallback + """ + self.zotero = ZoteroResolver() + self.bibtex = BibtexResolver(bib_path) + self.doi = DOIResolver() + + def resolve(self, cite_key: str) -> str: + """Resolve a citation key to a markdown link. + + Tries resolvers in order: + 1. Zotero (returns zotero://select link) + 2. BibTeX (returns DOI link if available) + 3. DOI fallback (returns raw key) + + Args: + cite_key: Citation key (e.g., "smith2020") + + Returns: + Markdown formatted citation link + """ + # Try Zotero first + info = self.zotero.try_resolve(cite_key) + if info is not None: + return info.format_link(info.url) # zotero:// URL + + # Try BibTeX + info = self.bibtex.try_resolve(cite_key) + if info is not None: + return info.format_link() # DOI or URL from bib entry + + # Final fallback + info = self.doi.try_resolve(cite_key) + return info.format_link() diff --git a/src/org_to_quartz/citations/zotero.py b/src/org_to_quartz/citations/zotero.py new file mode 100644 index 0000000..9d51417 --- /dev/null +++ b/src/org_to_quartz/citations/zotero.py @@ -0,0 +1,126 @@ +"""Zotero Better BibTeX JSON-RPC resolver.""" + +import requests +from typing import Any + +from .bibtex import CitationInfo + + +ZOTERO_RPC_URL = "http://localhost:23119/better-bibtex/json-rpc" + + +class ZoteroResolver: + """Resolve citations via Zotero Better BibTeX JSON-RPC API.""" + + def __init__(self, timeout: float = 2.0): + """Initialize Zotero resolver. + + Args: + timeout: Request timeout in seconds (short since it's local) + """ + self.timeout = timeout + self._available: bool | None = None + + def is_available(self) -> bool: + """Check if Zotero with Better BibTeX is running.""" + if self._available is not None: + return self._available + + try: + # Simple ping to check if server is up + response = requests.post( + ZOTERO_RPC_URL, + json={"jsonrpc": "2.0", "method": "item.citationkey", "params": ["__test__"], "id": 1}, + timeout=self.timeout, + ) + self._available = response.status_code == 200 + except (requests.RequestException, ConnectionError): + self._available = False + + return self._available + + def _rpc_call(self, method: str, params: list[Any]) -> Any | None: + """Make a JSON-RPC call to Better BibTeX.""" + try: + response = requests.post( + ZOTERO_RPC_URL, + json={"jsonrpc": "2.0", "method": method, "params": params, "id": 1}, + timeout=self.timeout, + ) + if response.status_code != 200: + return None + data = response.json() + return data.get("result") + except (requests.RequestException, ValueError): + return None + + def _get_item_by_citekey(self, cite_key: str) -> dict | None: + """Get Zotero item data by citation key.""" + # Better BibTeX method to search by citekey + result = self._rpc_call("item.search", [f"citekey:{cite_key}"]) + if result and isinstance(result, list) and len(result) > 0: + return result[0] + return None + + def _extract_authors(self, item: dict) -> str: + """Extract author string from Zotero item.""" + creators = item.get("creators", []) + authors = [c for c in creators if c.get("creatorType") == "author"] + + if not authors: + return "" + + if len(authors) == 1: + return authors[0].get("lastName", "") + elif len(authors) == 2: + return f"{authors[0].get('lastName', '')} & {authors[1].get('lastName', '')}" + else: + return f"{authors[0].get('lastName', '')} et al." + + def _extract_year(self, item: dict) -> str: + """Extract year from Zotero item.""" + date = item.get("date", "") + # Try to extract year from date string + if date: + # Common formats: "2024", "2024-01-15", "January 2024" + import re + + match = re.search(r"(\d{4})", date) + if match: + return match.group(1) + return "" + + def try_resolve(self, cite_key: str) -> CitationInfo | None: + """Try to resolve a citation key via Zotero. + + Returns: + CitationInfo with zotero:// link if found, None otherwise + """ + if not self.is_available(): + return None + + item = self._get_item_by_citekey(cite_key) + if item is None: + return None + + # Build zotero:// select URL + item_key = item.get("key", "") + library_id = item.get("libraryID", 1) + + # Format: zotero://select/items/@citekey or zotero://select/library/items/ITEMKEY + zotero_url = f"zotero://select/items/@{cite_key}" + + info = CitationInfo( + key=cite_key, + authors=self._extract_authors(item), + year=self._extract_year(item), + title=item.get("title", ""), + doi=item.get("DOI"), + url=zotero_url, + ) + + return info + + def format_zotero_link(self, info: CitationInfo) -> str: + """Format citation with Zotero select URL.""" + return info.format_link(info.url) diff --git a/src/org_to_quartz/image_handler.py b/src/org_to_quartz/image_handler.py new file mode 100644 index 0000000..95f2901 --- /dev/null +++ b/src/org_to_quartz/image_handler.py @@ -0,0 +1,120 @@ +"""Detect and copy images referenced in org files.""" + +import re +import shutil +from pathlib import Path + +from .org_parser import OrgDocument + + +# Patterns for image references in org-mode +IMAGE_PATTERNS = [ + # [[file:path/to/image.png]] + re.compile(r"\[\[file:([^\]]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))\]\]", re.IGNORECASE), + # [[./path/to/image.png]] + re.compile(r"\[\[(\./[^\]]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))\]\]", re.IGNORECASE), + # [[path/to/image.png]] (without file: prefix) + re.compile(r"\[\[([^\]:]+\.(?:png|jpg|jpeg|gif|svg|webp|bmp))\]\]", re.IGNORECASE), + # Inline image references: ./image.png or path/image.png + re.compile(r"(? list[str]: + """Find all image references in org content.""" + images: set[str] = set() + for pattern in IMAGE_PATTERNS: + matches = pattern.findall(content) + images.update(matches) + return list(images) + + +def resolve_image_path(image_ref: str, source_dir: Path) -> Path | None: + """Resolve image reference to absolute path. + + Args: + image_ref: Image reference from org file (e.g., './img.png', 'images/fig.png') + source_dir: Directory containing the source org file + + Returns: + Absolute path to image if found, None otherwise + """ + # Remove file: prefix if present + clean_ref = image_ref.removeprefix("file:") + + # Try relative to source directory + candidate = source_dir / clean_ref + if candidate.exists(): + return candidate.resolve() + + # Try without ./ prefix + if clean_ref.startswith("./"): + candidate = source_dir / clean_ref[2:] + if candidate.exists(): + return candidate.resolve() + + return None + + +def copy_images(doc: OrgDocument, output_dir: Path) -> dict[str, str]: + """Copy images referenced in document to output directory. + + Args: + doc: Parsed org document + output_dir: Directory where the note's folder will be created + + Returns: + Mapping of original image references to new relative paths + """ + if doc.source_path is None: + return {} + + source_dir = doc.source_path.parent + note_dir = output_dir / doc.slug + note_dir.mkdir(parents=True, exist_ok=True) + + image_refs = find_images(doc.body) + path_mapping: dict[str, str] = {} + + for ref in image_refs: + src_path = resolve_image_path(ref, source_dir) + if src_path is None: + # Image not found, skip but could log warning + continue + + # Copy to note directory with original filename + dest_path = note_dir / src_path.name + if not dest_path.exists() or src_path.stat().st_mtime > dest_path.stat().st_mtime: + shutil.copy2(src_path, dest_path) + + # Map original reference to new relative path (just filename since it's in same dir) + path_mapping[ref] = src_path.name + + return path_mapping + + +def update_image_paths(content: str, path_mapping: dict[str, str]) -> str: + """Update image references in markdown content. + + Args: + content: Markdown content (after pandoc conversion) + path_mapping: Mapping from original refs to new filenames + + Returns: + Content with updated image paths + """ + for old_ref, new_path in path_mapping.items(): + # After pandoc conversion, images become ![](path) or ![alt](path) + # We need to replace various forms of the old reference + + # file:path -> new_path + content = content.replace(f"file:{old_ref}", new_path) + + # ./path -> new_path + if old_ref.startswith("./"): + content = content.replace(old_ref, new_path) + content = content.replace(old_ref[2:], new_path) # without ./ + else: + content = content.replace(old_ref, new_path) + + return content diff --git a/src/org_to_quartz/main.py b/src/org_to_quartz/main.py new file mode 100644 index 0000000..a7d0f12 --- /dev/null +++ b/src/org_to_quartz/main.py @@ -0,0 +1,141 @@ +"""CLI entry point for org-to-quartz converter.""" + +import argparse +import sys +from pathlib import Path + +from .org_parser import parse_org_file +from .markdown_writer import convert_document, write_markdown +from .image_handler import copy_images, update_image_paths +from .citations import CitationResolver + + +def find_org_files(input_dir: Path) -> list[Path]: + """Find all .org files in directory (non-recursive).""" + return list(input_dir.glob("*.org")) + + +def convert_file( + org_path: Path, + output_dir: Path, + citation_resolver: CitationResolver | None, + verbose: bool = False, +) -> Path | None: + """Convert a single org file to Quartz markdown. + + Returns: + Path to created note directory, or None on error + """ + try: + # Parse org file + doc = parse_org_file(org_path) + + if verbose: + print(f" Parsed: {doc.title or org_path.stem}") + + # Create output directory for this note + note_dir = output_dir / doc.slug + note_dir.mkdir(parents=True, exist_ok=True) + + # Copy images first (before conversion, to get path mapping) + image_mapping = copy_images(doc, output_dir) + + if verbose and image_mapping: + print(f" Copied {len(image_mapping)} images") + + # Convert document + md_content = convert_document(doc, citation_resolver) + + # Update image paths in converted content + md_content = update_image_paths(md_content, image_mapping) + + # Write output + output_path = note_dir / "index.md" + output_path.write_text(md_content, encoding="utf-8") + + return note_dir + + except Exception as e: + print(f" Error converting {org_path.name}: {e}", file=sys.stderr) + return None + + +def main() -> int: + """Main CLI entry point.""" + parser = argparse.ArgumentParser( + prog="org-to-quartz", + description="Convert org-mode notes to Quartz-compatible markdown", + ) + parser.add_argument( + "input_dir", + type=Path, + help="Directory containing .org files", + ) + parser.add_argument( + "output_dir", + type=Path, + help="Output directory (e.g., quartz/content)", + ) + parser.add_argument( + "--bib", + type=Path, + metavar="FILE", + help="Path to .bib file for citation resolution", + ) + parser.add_argument( + "-v", "--verbose", + action="store_true", + help="Verbose output", + ) + + args = parser.parse_args() + + # Validate input directory + if not args.input_dir.is_dir(): + print(f"Error: Input directory does not exist: {args.input_dir}", file=sys.stderr) + return 1 + + # Create output directory + args.output_dir.mkdir(parents=True, exist_ok=True) + + # Find org files + org_files = find_org_files(args.input_dir) + if not org_files: + print(f"No .org files found in {args.input_dir}", file=sys.stderr) + return 1 + + print(f"Found {len(org_files)} org files") + + # Initialize citation resolver + citation_resolver = CitationResolver(args.bib) + + if args.verbose: + if citation_resolver.zotero.is_available(): + print("Zotero Better BibTeX: available") + else: + print("Zotero Better BibTeX: not available") + if args.bib: + print(f"BibTeX file: {args.bib}") + + # Convert each file + success_count = 0 + error_count = 0 + + for org_path in org_files: + print(f"Converting: {org_path.name}") + result = convert_file(org_path, args.output_dir, citation_resolver, args.verbose) + if result: + success_count += 1 + if args.verbose: + print(f" -> {result}") + else: + error_count += 1 + + # Summary + print(f"\nDone: {success_count} converted, {error_count} errors") + + return 0 if error_count == 0 else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/src/org_to_quartz/markdown_writer.py b/src/org_to_quartz/markdown_writer.py new file mode 100644 index 0000000..22af425 --- /dev/null +++ b/src/org_to_quartz/markdown_writer.py @@ -0,0 +1,148 @@ +"""Convert org content to Quartz-compatible markdown with YAML front matter.""" + +import re +import subprocess +import yaml +from pathlib import Path + +from .org_parser import OrgDocument + + +def build_front_matter(doc: OrgDocument) -> dict: + """Build YAML front matter dict from OrgDocument.""" + fm: dict = {} + + if doc.title: + fm["title"] = doc.title + if doc.date: + fm["date"] = doc.date + if doc.lastmod: + fm["lastmod"] = doc.lastmod + if doc.tags: + fm["tags"] = doc.tags + if doc.draft: + fm["draft"] = True + + return fm + + +def front_matter_to_yaml(fm: dict) -> str: + """Convert front matter dict to YAML string with delimiters.""" + if not fm: + return "" + yaml_str = yaml.dump(fm, default_flow_style=False, allow_unicode=True, sort_keys=False) + return f"---\n{yaml_str}---\n" + + +def convert_org_to_markdown(org_content: str) -> str: + """Convert org-mode content to GitHub-flavored markdown using pandoc.""" + try: + result = subprocess.run( + ["pandoc", "-f", "org", "-t", "gfm", "--wrap=none"], + input=org_content, + capture_output=True, + text=True, + check=True, + ) + return result.stdout + except subprocess.CalledProcessError as e: + raise RuntimeError(f"Pandoc conversion failed: {e.stderr}") from e + except FileNotFoundError: + raise RuntimeError("Pandoc not found. Please install pandoc.") + + +def process_wikilinks(content: str) -> str: + """Convert org-roam style links (after pandoc) to Quartz wikilinks. + + Pandoc converts org links to markdown links: + - [[roam:Title]] -> [roam:Title](roam:Title) -> [[Title]] + - [[id:uuid][Desc]] -> [Desc](id:uuid) -> [[Desc]] + - [[file:x.org][Desc]] -> [Desc](file:x.org) -> [[Desc]] + """ + # [roam:Title](roam:Title) -> [[Title]] + content = re.sub(r"\[roam:([^\]]+)\]\(roam:[^)]+\)", r"[[\1]]", content) + + # [Description](id:uuid) -> [[Description]] + content = re.sub(r"\[([^\]]+)\]\(id:[a-f0-9-]+\)", r"[[\1]]", content) + + # [Description](file:something.org) -> [[Description]] + content = re.sub(r"\[([^\]]+)\]\(file:[^)]+\.org\)", r"[[\1]]", content) + + return content + + +def process_citations(content: str, resolver) -> str: + """Process org-mode citations and resolve them. + + Handles: + - cite:key + - [cite:@key] + - [[cite:key][description]] + """ + if resolver is None: + return content + + def replace_cite(match) -> str: + cite_key = match.group(1) + return resolver.resolve(cite_key) + + # cite:key pattern + content = re.sub(r"cite:([a-zA-Z0-9_-]+)", replace_cite, content) + + # [cite:@key] pattern + content = re.sub(r"\[cite:@([a-zA-Z0-9_-]+)\]", replace_cite, content) + + # [[cite:key][description]] - use resolved link but keep description context + def replace_cite_with_desc(match) -> str: + cite_key = match.group(1) + return resolver.resolve(cite_key) + + content = re.sub(r"\[\[cite:([a-zA-Z0-9_-]+)\]\[[^\]]*\]\]", replace_cite_with_desc, content) + + return content + + +def generate_draft_todo(doc: OrgDocument) -> str: + """Generate TODO comment for draft/noexport files.""" + if doc.draft: + return "\n\n" + return "" + + +def convert_document(doc: OrgDocument, citation_resolver=None) -> str: + """Convert OrgDocument to complete markdown string.""" + # Build front matter + fm = build_front_matter(doc) + yaml_header = front_matter_to_yaml(fm) + + # Convert body + md_body = convert_org_to_markdown(doc.body) + + # Process links + md_body = process_wikilinks(md_body) + + # Process citations + md_body = process_citations(md_body, citation_resolver) + + # Add TODO comment if draft + todo_comment = generate_draft_todo(doc) + + return yaml_header + todo_comment + md_body + + +def write_markdown(doc: OrgDocument, output_dir: Path, citation_resolver=None) -> Path: + """Write converted document to output directory. + + Creates: output_dir//index.md + Returns: Path to created directory + """ + # Create note directory + note_dir = output_dir / doc.slug + note_dir.mkdir(parents=True, exist_ok=True) + + # Convert and write + md_content = convert_document(doc, citation_resolver) + output_path = note_dir / "index.md" + output_path.write_text(md_content, encoding="utf-8") + + return note_dir diff --git a/src/org_to_quartz/org_parser.py b/src/org_to_quartz/org_parser.py new file mode 100644 index 0000000..333ff6c --- /dev/null +++ b/src/org_to_quartz/org_parser.py @@ -0,0 +1,99 @@ +"""Parse org-mode files to extract front matter and body.""" + +import re +from dataclasses import dataclass, field +from pathlib import Path + + +@dataclass +class OrgDocument: + """Parsed org document with front matter and body.""" + + title: str = "" + tags: list[str] = field(default_factory=list) + date: str = "" + lastmod: str = "" + draft: bool = False + hugo_section: str = "" + raw_front_matter: dict[str, str] = field(default_factory=dict) + body: str = "" + source_path: Path | None = None + + @property + def slug(self) -> str: + """Generate URL-friendly slug from title or filename.""" + if self.source_path: + return self.source_path.stem + return re.sub(r"[^a-z0-9]+", "-", self.title.lower()).strip("-") + + +# Pattern for org front matter: #+KEY: value +FRONT_MATTER_PATTERN = re.compile(r"^#\+(\w+):\s*(.*)$", re.IGNORECASE) + +# Pattern for filetags: :tag1:tag2:tag3: +# Uses word boundary to capture tags between colons +FILETAGS_PATTERN = re.compile(r"(?<=:)([^:]+)(?=:)") + + +def parse_filetags(value: str) -> list[str]: + """Parse org filetags format ':tag1:tag2:' into list.""" + return FILETAGS_PATTERN.findall(value) + + +def parse_date(value: str) -> str: + """Parse org date format [2024-02-21] or <2024-02-21> to ISO date.""" + # Remove brackets/angles and any day names + match = re.search(r"(\d{4}-\d{2}-\d{2})", value) + return match.group(1) if match else value.strip("[]<>") + + +def parse_org_file(path: Path) -> OrgDocument: + """Parse an org file into OrgDocument.""" + content = path.read_text(encoding="utf-8") + return parse_org_content(content, source_path=path) + + +def parse_org_content(content: str, source_path: Path | None = None) -> OrgDocument: + """Parse org content string into OrgDocument.""" + lines = content.splitlines() + front_matter: dict[str, str] = {} + body_start = 0 + + # Parse front matter (lines starting with #+) + for i, line in enumerate(lines): + match = FRONT_MATTER_PATTERN.match(line) + if match: + key = match.group(1).lower() + value = match.group(2).strip() + front_matter[key] = value + elif line.strip() and not line.startswith("#"): + # First non-empty, non-comment line starts the body + body_start = i + break + elif not line.strip(): + # Empty line, continue looking for more front matter or body start + continue + else: + body_start = i + break + + # Find actual body start (skip leading empty lines after front matter) + while body_start < len(lines) and not lines[body_start].strip(): + body_start += 1 + + body = "\n".join(lines[body_start:]) + + # Build OrgDocument + doc = OrgDocument( + title=front_matter.get("title", ""), + tags=parse_filetags(front_matter.get("filetags", "")), + date=parse_date(front_matter.get("date", "")), + lastmod=parse_date(front_matter.get("hugo_lastmod", "")), + draft="noexport" in front_matter.get("hugo_tags", "").lower(), + hugo_section=front_matter.get("hugo_section", ""), + raw_front_matter=front_matter, + body=body, + source_path=source_path, + ) + + return doc diff --git a/test/notes/draft-note.org b/test/notes/draft-note.org new file mode 100644 index 0000000..57c8064 --- /dev/null +++ b/test/notes/draft-note.org @@ -0,0 +1,8 @@ +#+title: Draft Note +#+filetags: :draft: +#+date: [2024-02-20] +#+hugo_tags: noexport + +* Work in Progress + +This note is marked as noexport. diff --git a/test/notes/example-note.org b/test/notes/example-note.org new file mode 100644 index 0000000..5d09fd1 --- /dev/null +++ b/test/notes/example-note.org @@ -0,0 +1,24 @@ +#+title: Example Note +#+filetags: :test:example: +#+date: [2024-02-21] +#+hugo_lastmod: [2024-02-22] +#+hugo_section: notes + +* Introduction + +This is an example org note with some features: + +- A link to [[roam:Another Note]] +- A citation cite:smith2020 +- Some *bold* and /italic/ text + +* Code Example + +#+begin_src python +def hello(): + print("Hello, world!") +#+end_src + +* Conclusion + +See also [[id:abc-123][Related Concept]] for more info.