#!/usr/bin/env python3 import os import re import sys import sqlite3 from panflute import * def resolve_target(target, page): if re.search('https?://|file:|tel:|mailto:', target): return target # At this point, we should be fairly confident the link is a wiki page. # Normalize it by removing the .html extension that pandoc throws on there: if target.endswith('.html'): target = os.path.splitext(target)[0] # Check for an absolute path (within the wiki): if target.startswith('/'): return target.replace('/', '', 1); page_elements = page.split('/') # Get rid of the page name: page_elements.pop() path_elements = page_elements + target.split('/') resolved_path = [] while len(path_elements) > 0: el = path_elements.pop() if el == '..' and len(path_elements) > 0: # Discard a directory: path_elements.pop() else: resolved_path.append(el) resolved_path.reverse() return '/'.join(resolved_path) def extract_values(elem, doc): if isinstance(elem, Link): link_target = elem.url # Skip in-page anchors, for now: if link_target.startswith('#'): return; # Insert a row of data c.execute( "INSERT OR IGNORE INTO links VALUES (?, ?)", ( pagename, resolve_target(link_target, pagename) ) ) conn.commit() # Ensure we're in the wiki directory: notes_dir = os.path.join(os.getenv('HOME'), 'notes') vimwiki_dir = os.path.join(notes_dir, 'vimwiki') os.chdir(notes_dir) conn = sqlite3.connect('metadata.db') c = conn.cursor() for input_file in sys.argv[1:]: # Trim leading dir and .wiki: # XXX: This is such hacky garbage, jiminy: pagename = input_file.replace('./vimwiki/', '', 1) pagename = pagename.replace(vimwiki_dir + '/', '', 1) pagename = os.path.splitext(pagename)[0] with open(input_file) as page: doc = convert_text( page.read(), input_format='vimwiki', standalone=True ) title = doc.get_metadata('title') date = doc.get_metadata('date') # Log the name and metadata of the page: c.execute("DELETE FROM pages WHERE page = ?", (pagename,)) c.execute("INSERT INTO pages VALUES (?, ?, ?)", (pagename, title, date)) # Clear any links from this page in case something's been deleted: c.execute("DELETE FROM links WHERE page = ?", (pagename,)) doc.walk(extract_values) conn.close()