diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..ba8a334 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,6 @@ +# Environment + +- You are in a nix flake project. When building, add files to git. +- Run a non-installed tool with `nix run nixpkgs#`. +- For repeated use, add a tool to development shell. +- Verify that tools build after with nix build, and run it with nix run. diff --git a/PLAN.md b/PLAN.md new file mode 100644 index 0000000..b0c5520 --- /dev/null +++ b/PLAN.md @@ -0,0 +1,93 @@ +# org-to-quartz: Org Notes to Quartz Converter + +## Project Structure + +``` +org-notes-quartz/ +├── flake.nix +├── pyproject.toml +├── AGENTS.md +├── PLAN.md +└── src/ + └── org_to_quartz/ + ├── __init__.py + ├── main.py # CLI (argparse) + ├── org_parser.py # Parse #+key: values + body + ├── markdown_writer.py # YAML front matter + pandoc conversion + ├── image_handler.py # Detect & copy images per-note + └── citations/ + ├── __init__.py + ├── resolver.py # Orchestrator: tries resolvers in order + ├── zotero.py # Better BibTeX JSON-RPC (port 23119) + ├── bibtex.py # pybtex to parse local .bib + └── doi.py # Format as DOI link fallback +``` + +## Dependencies + +- Python 3.11+ +- pandoc (org → gfm conversion) +- pybtex (parse .bib files) +- requests (Zotero JSON-RPC) + +## Module Responsibilities + +| Module | Responsibility | +|--------|----------------| +| `main.py` | CLI: `org-to-quartz [--bib FILE]` | +| `org_parser.py` | Extract front matter dict + body text from `.org` | +| `markdown_writer.py` | Build YAML front matter, call pandoc, write `.md` | +| `image_handler.py` | Find `[[file:img.png]]` / `./img.png`, copy to `/` | +| `citations/resolver.py` | Chain: Zotero → Bibtex → DOI, return `[Author, Year](zotero://...)` | +| `citations/zotero.py` | JSON-RPC to `http://localhost:23119/better-bibtex/json-rpc` | +| `citations/bibtex.py` | Parse `.bib`, format author/year, link to DOI if available | +| `citations/doi.py` | Format `[cite_key](https://doi.org/...)` or raw key | + +## Output Structure + +``` +content/ +├── my-note/ +│ ├── index.md # The converted note +│ └── diagram.png # Copied image +├── another-note/ +│ ├── index.md +│ └── photo.jpg +``` + +## Front Matter Mapping + +| Org | YAML | +|-----|------| +| `#+title:` | `title:` | +| `#+filetags:` | `tags:` (list) | +| `#+date:` | `date:` | +| `#+hugo_lastmod:` | `lastmod:` | +| `#+hugo_tags: noexport` | `draft: true` + TODO comment | + +## Citation Flow + +``` +cite:smith2020 → Zotero JSON-RPC lookup + ├─ Found → [Smith, 2020](zotero://select/items/@smith2020) + └─ Not found → Bibtex lookup + ├─ Found → [Smith, 2020](https://doi.org/10.xxx) + └─ Not found → DOI resolver + └─ [smith2020] (raw) +``` + +## Implementation Order + +1. `flake.nix` + `pyproject.toml` - project skeleton +2. `org_parser.py` - front matter + body extraction +3. `markdown_writer.py` - pandoc integration + YAML output +4. `image_handler.py` - image detection and copying +5. `citations/bibtex.py` - local bib parsing +6. `citations/doi.py` - simple fallback formatter +7. `citations/zotero.py` - JSON-RPC client +8. `citations/resolver.py` - orchestrator +9. `main.py` - CLI wiring + +## TODO + +- [ ] Handle org-roam `id:` links (requires ID→title mapping from all files) diff --git a/src/org_to_quartz/markdown_writer.py b/src/org_to_quartz/markdown_writer.py index 22af425..26ae6e4 100644 --- a/src/org_to_quartz/markdown_writer.py +++ b/src/org_to_quartz/markdown_writer.py @@ -74,9 +74,9 @@ def process_wikilinks(content: str) -> str: def process_citations(content: str, resolver) -> str: """Process org-mode citations and resolve them. - Handles: + Handles (after pandoc conversion, brackets may be escaped): - cite:key - - [cite:@key] + - [cite:@key] or \[cite:@key\] - [[cite:key][description]] """ if resolver is None: @@ -86,18 +86,19 @@ def process_citations(content: str, resolver) -> str: cite_key = match.group(1) return resolver.resolve(cite_key) - # cite:key pattern - content = re.sub(r"cite:([a-zA-Z0-9_-]+)", replace_cite, content) + # cite:key pattern (not preceded by @, to avoid double-matching) + content = re.sub(r"(? str: - cite_key = match.group(1) + # [[cite:key][description]] - after pandoc becomes [description](cite:key) + def replace_cite_link(match) -> str: + cite_key = match.group(2) return resolver.resolve(cite_key) - content = re.sub(r"\[\[cite:([a-zA-Z0-9_-]+)\]\[[^\]]*\]\]", replace_cite_with_desc, content) + content = re.sub(r"\[([^\]]+)\]\(cite:([a-zA-Z0-9_-]+)\)", replace_cite_link, content) return content