|
|
- #!/usr/bin/env python3
-
- import os
- import re
- import sys
- import sqlite3
- from panflute import *
-
- def resolve_target(target, page):
-
- if re.search('https?://|file:|tel:|mailto:', target):
- return target
-
- # At this point, we should be fairly confident the link is a wiki page.
- # Normalize it by removing the .html extension that pandoc throws on there:
- if target.endswith('.html'):
- target = os.path.splitext(target)[0]
-
- # Check for an absolute path (within the wiki):
- if target.startswith('/'):
- return target.replace('/', '', 1);
-
- page_elements = page.split('/')
- # Get rid of the page name:
- page_elements.pop()
-
- path_elements = page_elements + target.split('/')
-
- resolved_path = []
- while len(path_elements) > 0:
- el = path_elements.pop()
- if el == '..' and len(path_elements) > 0:
- # Discard a directory:
- path_elements.pop()
- else:
- resolved_path.append(el)
-
- resolved_path.reverse()
- return '/'.join(resolved_path)
-
- def extract_values(elem, doc):
-
- if isinstance(elem, Link):
- link_target = elem.url
-
- # Skip in-page anchors, for now:
- if link_target.startswith('#'):
- return;
-
- # Insert a row of data
- c.execute(
- "INSERT OR IGNORE INTO links VALUES (?, ?)",
- (
- pagename,
- resolve_target(link_target, pagename)
- )
- )
-
- conn.commit()
-
- # Ensure we're in the wiki directory:
- notes_dir = os.path.join(os.getenv('HOME'), 'notes')
- vimwiki_dir = os.path.join(notes_dir, 'vimwiki')
- os.chdir(notes_dir)
-
- conn = sqlite3.connect('metadata.db')
- c = conn.cursor()
-
- for input_file in sys.argv[1:]:
- # Trim leading dir and .wiki:
- # XXX: This is such hacky garbage, jiminy:
- pagename = input_file.replace('./vimwiki/', '', 1)
- pagename = pagename.replace(vimwiki_dir + '/', '', 1)
- pagename = os.path.splitext(pagename)[0]
-
- with open(input_file) as page:
- doc = convert_text(
- page.read(),
- input_format='vimwiki',
- standalone=True
- )
-
- title = doc.get_metadata('title')
- date = doc.get_metadata('date')
-
- # Log the name and metadata of the page:
- c.execute("DELETE FROM pages WHERE page = ?", (pagename,))
- c.execute("INSERT INTO pages VALUES (?, ?, ?)", (pagename, title, date))
-
- # Clear any links from this page in case something's been deleted:
- c.execute("DELETE FROM links WHERE page = ?", (pagename,))
- doc.walk(extract_values)
-
- conn.close()
|