#!/home/brennen/python_bullshit/bin/python3

"""
    This script collects metadata from VimWiki pages in my notes.
"""

import os
import re
import sys
import sqlite3
from pprint import pprint
from panflute import *


def resolve_target(target, page):
    """Resolve the target of a link to a canonical name."""

    if re.search('https?://|file:|tel:|mailto:', target):
        return target

    # Special case diary entries because Pandoc returns them as relative links.
    # This will break if you're trying to link to foo/diary/2023-01-01 instead
    # of /diary/2023-01-01, but at least within my notes that's not something
    # I do, and it's probably a rare case in general.
    if re.search('diary/\d{4}-\d{2}-\d{2}', target):
        return target

    # At this point, we should be fairly confident the link is a wiki page.
    # Normalize it by removing the .html extension that pandoc throws on there:
    if target.endswith('.html'):
        target = os.path.splitext(target)[0]

    # Check for an absolute path (within the wiki):
    if target.startswith('/'):
        # Note this might behave weirdly with the leading // thing, but I don't
        # _think_ Pandoc supports that anyway:
        return target.replace('/', '', 1)

    page_elements = page.split('/')
    # Get rid of the page name:
    page_elements.pop()

    path_elements = page_elements + target.split('/')

    resolved_path = []
    while len(path_elements) > 0:
        element = path_elements.pop()
        if element == '..' and len(path_elements) > 0:
            # Discard a directory:
            path_elements.pop()
        else:
            resolved_path.append(element)

    resolved_path.reverse()
    return '/'.join(resolved_path)

def extract_values(elem, doc):
    """Extract links from given document, write to sqlite."""

    # pprint(elem)
    if isinstance(elem, Link):
        link_target = elem.url

        # Skip in-page anchors, for now:
        if link_target.startswith('#'):
            return

        # Insert a row of data
        c.execute(
            "INSERT OR IGNORE INTO links VALUES (?, ?)",
            (
                pagename,
                resolve_target(link_target, pagename)
            )
        )

        conn.commit()

# Ensure we're in the wiki directory:
notes_dir = os.path.join(os.getenv('HOME'), 'notes')
vimwiki_dir = os.path.join(notes_dir, 'vimwiki')
os.chdir(notes_dir)

conn = sqlite3.connect('metadata.db')
c = conn.cursor()

date_re = re.compile('diary/([0-9]{4}-[0-9]{2}-[0-9]{2})')

for input_file in sys.argv[1:]:

    # Trim leading directory and .wiki extension:
    # XXX: This fails badly if in a symlinked path
    input_file_abspath = os.path.abspath(input_file)
    pagename = input_file_abspath.replace(vimwiki_dir + os.sep, '', 1)
    pagename = os.path.splitext(pagename)[0]

    # XXX: do real logging
    # print(pagename)

    # _logscratch is for ephemeral renderings of logs for other pages - skip
    # collecting any metadata for these pages:
    if pagename.startswith('_logscratch'):
        continue

    with open(input_file_abspath) as page:
        doc = convert_text(
            page.read(),
            input_format='vimwiki',
            standalone=True
        )

    # XXX: This should fall back to headers
    title = doc.get_metadata('title')
    if not title:
        title = pagename

    # This falls back to diary paths for date metadata:
    date = doc.get_metadata('date')
    if not date:
        date_match_result = re.match(date_re, pagename)
        if date_match_result:
            # XXX: This needs to include timezone somehow or another
            #      THIS IS A DIRTY HACK, MAKE IT USE THE ACTUAL CURRENT TZ
            date = date_match_result.group(1) + " 00:00:00.000000000-07:00"

    # Log the name and metadata of the page:
    c.execute("DELETE FROM pages WHERE page = ?", (pagename,))
    c.execute("INSERT INTO pages VALUES (?, ?, ?)", (pagename, title, date))

    # Clear any links from this page in case something's been deleted:
    c.execute("DELETE FROM links WHERE page = ?", (pagename,))

    conn.commit()

    doc.walk(extract_values)

conn.close()