#!/usr/bin/env python3 """Export RESEARCH.org to HTML with Zotero PDF links""" import argparse import json import re import subprocess import sys from socket import setdefaulttimeout from urllib.request import urlopen, Request from urllib.error import URLError BBT_API = "http://localhost:23119/better-bibtex/json-rpc" TIMEOUT = 5 # seconds def call_bbt(method, params=None): """Call BBT JSON-RPC API with timeout""" payload = { "jsonrpc": "2.0", "method": method, "id": 1 } if params is not None: payload["params"] = params setdefaulttimeout(TIMEOUT) req = Request(BBT_API, data=json.dumps(payload).encode('utf-8'), headers={'Content-Type': 'application/json'}) try: with urlopen(req) as response: result = json.loads(response.read().decode('utf-8')) if 'error' in result: print(f"BBT API error: {result['error']}", file=sys.stderr) return None return result.get('result') except URLError as e: print(f"Connection error to BBT API: {e}", file=sys.stderr) print("Is Zotero with Better BibTeX running?", file=sys.stderr) return None except TimeoutError: print(f"Timeout connecting to BBT API ({TIMEOUT}s)", file=sys.stderr) return None except Exception as e: print(f"Unexpected error calling BBT API: {e}", file=sys.stderr) return None def format_author_label(authors, year): """Format author label: 'Chow, 1978', 'Arts & Thompson, 2010', 'Kalaji et al., 2011'""" if not authors: return "Unknown, Year" if len(authors) == 1: return f"{authors[0]['family']}, {year}" elif len(authors) == 2: return f"{authors[0]['family']} & {authors[1]['family']}, {year}" else: return f"{authors[0]['family']} et al., {year}" def get_zotero_link(citekey): """Get (url, label) tuple for a cite key""" # First, try to get PDF attachments attachments = call_bbt("item.attachments", [citekey]) if attachments and len(attachments) > 0: label = "PDF" # Default, will be refined below url = attachments[0]['open'] # Try to get proper label from item info item_info = call_bbt("item.search", [citekey]) if item_info and len(item_info) > 0: item = item_info[0] authors = item.get('author', []) year = item.get('issued', {}).get('date-parts', [['?']])[0][0] label = format_author_label(authors, year) return url, label # Fallback: get item info and construct select link item_info = call_bbt("item.search", [citekey]) if item_info and len(item_info) > 0: item = item_info[0] item_id = item['id'] # Extract item key from URL like "http://zotero.org/users/.../items/ABC12345" match = re.search(r'/items/([A-Z0-9]+)$', item_id) if match: item_key = match.group(1) label = format_author_label(item.get('author', []), item.get('issued', {}).get('date-parts', [['?']])[0][0]) return f"zotero://select/library/items/{item_key}", label return None, None def main(): parser = argparse.ArgumentParser(description='Export RESEARCH.org to HTML with Zotero links') parser.add_argument('--input', default='RESEARCH.org', help='Input Org file') parser.add_argument('--output', default='research.html', help='Output HTML file') args = parser.parse_args() # Check BBT API is ready api_status = call_bbt("api.ready") if not api_status: print("Error: BBT API not available. Is Zotero with Better BibTeX running?", file=sys.stderr) sys.exit(1) print(f"BBT API ready: {api_status.get('betterbibtex', 'unknown')}") # Read Org file try: with open(args.input, 'r', encoding='utf-8') as f: content = f.read() except FileNotFoundError: print(f"Error: Input file '{args.input}' not found", file=sys.stderr) sys.exit(1) # Extract all cite keys cite_pattern = r'\[cite:@([a-zA-Z0-9_-]+)\]' cite_keys = re.findall(cite_pattern, content) unique_keys = sorted(set(cite_keys)) print(f"Found {len(unique_keys)} unique citations") # Build citation mapping citation_map = {} warnings_count = 0 for citekey in unique_keys: url, label = get_zotero_link(citekey) if url: citation_map[citekey] = (url, label) print(f" Mapped {citekey} → {label}") else: warnings_count += 1 citation_map[citekey] = (None, citekey) print(f" Warning: Could not map {citekey}", file=sys.stderr) if warnings_count > 0: print(f"Completed with {warnings_count} warnings", file=sys.stderr) # Replace citations with Org links def replace_citation(match): citekey = match.group(1) url, label = citation_map.get(citekey, (None, citekey)) if url: return f"[[{url}][{label}]]" return f"[❓cite:@{citekey}]" # Fallback with emoji processed_content = re.sub(cite_pattern, replace_citation, content) # CSS and JS to inject css_to_inject = """ """ # Run Pandoc to get HTML first, then post-process to inject CSS cmd = ['pandoc', '--from', 'org', '--to', 'html', '--standalone', '-'] try: proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = proc.communicate(processed_content.encode('utf-8')) except FileNotFoundError: print("Error: pandoc not found on PATH", file=sys.stderr) sys.exit(1) if proc.returncode != 0: print(f"Pandoc error: {stderr.decode('utf-8')}", file=sys.stderr) sys.exit(1) # Post-process HTML to inject CSS into html = stdout.decode('utf-8') head_match = re.search(r'(.*?)', html, re.DOTALL) if head_match: head_content = head_match.group(1) # Insert CSS into head (before ) new_head = head_content.replace('', css_to_inject + '\n ') html = html.replace(head_content, new_head, 1) else: # No tag found (unexpected), prepend CSS html = css_to_inject + '\n' + html # Write final HTML with open(args.output, 'w', encoding='utf-8') as f: f.write(html) print(f"Successfully exported to {args.output}") if __name__ == "__main__": main()