Text::Markdown::Discount
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1799 lines
36 KiB

/* markdown: a C implementation of John Gruber's Markdown markup language.
*
* Copyright (C) 2007 David L Parsons.
* The redistribution terms are provided in the COPYRIGHT file that must
* be distributed with this source code.
*/
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <time.h>
#include <ctype.h>
#include "config.h"
#include "cstring.h"
#include "markdown.h"
#include "amalloc.h"
typedef int (*stfu)(const void*,const void*);
typedef void (*spanhandler)(MMIOT*,int);
/* forward declarations */
static void text(MMIOT *f);
static Paragraph *display(Paragraph*, MMIOT*);
/* externals from markdown.c */
int __mkd_footsort(Footnote *, Footnote *);
/*
* push text into the generator input buffer
*/
static void
push(char *bfr, int size, MMIOT *f)
{
while ( size-- > 0 )
EXPAND(f->in) = *bfr++;
}
/* look <i> characters ahead of the cursor.
*/
static inline int
peek(MMIOT *f, int i)
{
i += (f->isp-1);
return (i >= 0) && (i < S(f->in)) ? T(f->in)[i] : EOF;
}
/* pull a byte from the input buffer
*/
static inline int
pull(MMIOT *f)
{
return ( f->isp < S(f->in) ) ? T(f->in)[f->isp++] : EOF;
}
/* return a pointer to the current position in the input buffer.
*/
static inline char*
cursor(MMIOT *f)
{
return T(f->in) + f->isp;
}
static inline int
isthisspace(MMIOT *f, int i)
{
int c = peek(f, i);
return isspace(c) || (c < ' ');
}
static inline int
isthisalnum(MMIOT *f, int i)
{
int c = peek(f, i);
return (c != EOF) && isalnum(c);
}
static inline int
isthisnonword(MMIOT *f, int i)
{
return isthisspace(f, i) || ispunct(peek(f,i));
}
/* return/set the current cursor position
*/
#define mmiotseek(f,x) (f->isp = x)
#define mmiottell(f) (f->isp)
/* move n characters forward ( or -n characters backward) in the input buffer.
*/
static void
shift(MMIOT *f, int i)
{
if (f->isp + i >= 0 )
f->isp += i;
}
/* Qchar()
*/
static void
Qchar(int c, MMIOT *f)
{
block *cur;
if ( S(f->Q) == 0 ) {
cur = &EXPAND(f->Q);
memset(cur, 0, sizeof *cur);
cur->b_type = bTEXT;
}
else
cur = &T(f->Q)[S(f->Q)-1];
EXPAND(cur->b_text) = c;
}
/* Qstring()
*/
static void
Qstring(char *s, MMIOT *f)
{
while (*s)
Qchar(*s++, f);
}
/* Qwrite()
*/
static void
Qwrite(char *s, int size, MMIOT *f)
{
while (size-- > 0)
Qchar(*s++, f);
}
/* Qprintf()
*/
static void
Qprintf(MMIOT *f, char *fmt, ...)
{
char bfr[80];
va_list ptr;
va_start(ptr,fmt);
vsnprintf(bfr, sizeof bfr, fmt, ptr);
va_end(ptr);
Qstring(bfr, f);
}
/* Qem()
*/
static void
Qem(MMIOT *f, char c, int count)
{
block *p = &EXPAND(f->Q);
memset(p, 0, sizeof *p);
p->b_type = (c == '*') ? bSTAR : bUNDER;
p->b_char = c;
p->b_count = count;
memset(&EXPAND(f->Q), 0, sizeof(block));
}
/* generate html from a markup fragment
*/
void
___mkd_reparse(char *bfr, int size, int flags, MMIOT *f, char *esc)
{
MMIOT sub;
struct escaped e;
___mkd_initmmiot(&sub, f->footnotes);
sub.flags = f->flags | flags;
sub.cb = f->cb;
sub.ref_prefix = f->ref_prefix;
if ( esc ) {
sub.esc = &e;
e.up = f->esc;
e.text = esc;
}
else
sub.esc = f->esc;
push(bfr, size, &sub);
EXPAND(sub.in) = 0;
S(sub.in)--;
text(&sub);
___mkd_emblock(&sub);
Qwrite(T(sub.out), S(sub.out), f);
___mkd_freemmiot(&sub, f->footnotes);
}
/*
* check the escape list for special cases
*/
static int
escaped(MMIOT *f, char c)
{
struct escaped *thing = f->esc;
while ( thing ) {
if ( strchr(thing->text, c) )
return 1;
thing = thing->up;
}
return 0;
}
/*
* write out a url, escaping problematic characters
*/
static void
puturl(char *s, int size, MMIOT *f, int display)
{
unsigned char c;
while ( size-- > 0 ) {
c = *s++;
if ( c == '\\' && size-- > 0 ) {
c = *s++;
if ( !( ispunct(c) || isspace(c) ) )
Qchar('\\', f);
}
if ( c == '&' )
Qstring("&amp;", f);
else if ( c == '<' )
Qstring("&lt;", f);
else if ( c == '"' )
Qstring("%22", f);
else if ( isalnum(c) || ispunct(c) || (display && isspace(c)) )
Qchar(c, f);
else if ( c == 003 ) /* untokenize ^C */
Qstring(" ", f);
else
Qprintf(f, "%%%02X", c);
}
}
/* advance forward until the next character is not whitespace
*/
static int
eatspace(MMIOT *f)
{
int c;
for ( ; ((c=peek(f, 1)) != EOF) && isspace(c); pull(f) )
;
return c;
}
/* (match (a (nested (parenthetical (string.)))))
*/
static int
parenthetical(int in, int out, MMIOT *f)
{
int size, indent, c;
for ( indent=1,size=0; indent; size++ ) {
if ( (c = pull(f)) == EOF )
return EOF;
else if ( (c == '\\') && (peek(f,1) == out || peek(f,1) == in) ) {
++size;
pull(f);
}
else if ( c == in )
++indent;
else if ( c == out )
--indent;
}
return size ? (size-1) : 0;
}
/* extract a []-delimited label from the input stream.
*/
static int
linkylabel(MMIOT *f, Cstring *res)
{
char *ptr = cursor(f);
int size;
if ( (size = parenthetical('[',']',f)) != EOF ) {
T(*res) = ptr;
S(*res) = size;
return 1;
}
return 0;
}
/* see if the quote-prefixed linky segment is actually a title.
*/
static int
linkytitle(MMIOT *f, char quote, Footnote *ref)
{
int whence = mmiottell(f);
char *title = cursor(f);
char *e;
register int c;
while ( (c = pull(f)) != EOF ) {
e = cursor(f);
if ( c == quote ) {
if ( (c = eatspace(f)) == ')' ) {
T(ref->title) = 1+title;
S(ref->title) = (e-title)-2;
return 1;
}
}
}
mmiotseek(f, whence);
return 0;
}
/* extract a =HHHxWWW size from the input stream
*/
static int
linkysize(MMIOT *f, Footnote *ref)
{
int height=0, width=0;
int whence = mmiottell(f);
int c;
if ( isspace(peek(f,0)) ) {
pull(f); /* eat '=' */
for ( c = pull(f); isdigit(c); c = pull(f))
width = (width * 10) + (c - '0');
if ( c == 'x' ) {
for ( c = pull(f); isdigit(c); c = pull(f))
height = (height*10) + (c - '0');
if ( isspace(c) )
c = eatspace(f);
if ( (c == ')') || ((c == '\'' || c == '"') && linkytitle(f, c, ref)) ) {
ref->height = height;
ref->width = width;
return 1;
}
}
}
mmiotseek(f, whence);
return 0;
}
/* extract a <...>-encased url from the input stream.
* (markdown 1.0.2b8 compatibility; older versions
* of markdown treated the < and > as syntactic
* sugar that didn't have to be there. 1.0.2b8
* requires a closing >, and then falls into the
* title or closing )
*/
static int
linkybroket(MMIOT *f, int image, Footnote *p)
{
int c;
int good = 0;
T(p->link) = cursor(f);
for ( S(p->link)=0; (c = pull(f)) != '>'; ++S(p->link) ) {
/* pull in all input until a '>' is found, or die trying.
*/
if ( c == EOF )
return 0;
else if ( (c == '\\') && ispunct(peek(f,2)) ) {
++S(p->link);
pull(f);
}
}
c = eatspace(f);
/* next nonspace needs to be a title, a size, or )
*/
if ( ( c == '\'' || c == '"' ) && linkytitle(f,c,p) )
good=1;
else if ( image && (c == '=') && linkysize(f,p) )
good=1;
else
good=( c == ')' );
if ( good ) {
if ( peek(f, 1) == ')' )
pull(f);
___mkd_tidy(&p->link);
}
return good;
} /* linkybroket */
/* extract a (-prefixed url from the input stream.
* the label is either of the format `<link>`, where I
* extract until I find a >, or it is of the format
* `text`, where I extract until I reach a ')', a quote,
* or (if image) a '='
*/
static int
linkyurl(MMIOT *f, int image, Footnote *p)
{
int c;
int mayneedtotrim=0;
if ( (c = eatspace(f)) == EOF )
return 0;
if ( c == '<' ) {
pull(f);
if ( !(f->flags & MKD_1_COMPAT) )
return linkybroket(f,image,p);
mayneedtotrim=1;
}
T(p->link) = cursor(f);
for ( S(p->link)=0; (c = peek(f,1)) != ')'; ++S(p->link) ) {
if ( c == EOF )
return 0;
else if ( (c == '"' || c == '\'') && linkytitle(f, c, p) )
break;
else if ( image && (c == '=') && linkysize(f, p) )
break;
else if ( (c == '\\') && ispunct(peek(f,2)) ) {
++S(p->link);
pull(f);
}
pull(f);
}
if ( peek(f, 1) == ')' )
pull(f);
___mkd_tidy(&p->link);
if ( mayneedtotrim && (T(p->link)[S(p->link)-1] == '>') )
--S(p->link);
return 1;
}
/* prefixes for <automatic links>
*/
static struct _protocol {
char *name;
int nlen;
} protocol[] = {
#define _aprotocol(x) { x, (sizeof x)-1 }
_aprotocol( "https:" ),
_aprotocol( "http:" ),
_aprotocol( "news:" ),
_aprotocol( "ftp:" ),
#undef _aprotocol
};
#define NRPROTOCOLS (sizeof protocol / sizeof protocol[0])
static int
isautoprefix(char *text, int size)
{
int i;
struct _protocol *p;
for (i=0, p=protocol; i < NRPROTOCOLS; i++, p++)
if ( (size >= p->nlen) && strncasecmp(text, p->name, p->nlen) == 0 )
return 1;
return 0;
}
/*
* all the tag types that linkylinky can produce are
* defined by this structure.
*/
typedef struct linkytype {
char *pat;
int szpat;
char *link_pfx; /* tag prefix and link pointer (eg: "<a href="\"" */
char *link_sfx; /* link suffix (eg: "\"" */
int WxH; /* this tag allows width x height arguments */
char *text_pfx; /* text prefix (eg: ">" */
char *text_sfx; /* text suffix (eg: "</a>" */
int flags; /* reparse flags */
int kind; /* tag is url or something else? */
#define IS_URL 0x01
} linkytype;
static linkytype imaget = { 0, 0, "<img src=\"", "\"",
1, " alt=\"", "\" />", MKD_NOIMAGE|MKD_TAGTEXT, IS_URL };
static linkytype linkt = { 0, 0, "<a href=\"", "\"",
0, ">", "</a>", MKD_NOLINKS, IS_URL };
/*
* pseudo-protocols for [][];
*
* id: generates <a id="link">tag</a>
* class: generates <span class="link">tag</span>
* raw: just dump the link without any processing
*/
static linkytype specials[] = {
{ "id:", 3, "<span id=\"", "\"", 0, ">", "</span>", 0, 0 },
{ "raw:", 4, 0, 0, 0, 0, 0, MKD_NOHTML, 0 },
{ "lang:", 5, "<span lang=\"", "\"", 0, ">", "</span>", 0, 0 },
{ "abbr:", 5, "<abbr title=\"", "\"", 0, ">", "</abbr>", 0, 0 },
{ "class:", 6, "<span class=\"", "\"", 0, ">", "</span>", 0, 0 },
} ;
#define NR(x) (sizeof x / sizeof x[0])
/* see if t contains one of our pseudo-protocols.
*/
static linkytype *
pseudo(Cstring t)
{
int i;
linkytype *r;
for ( i=0, r=specials; i < NR(specials); i++,r++ ) {
if ( (S(t) > r->szpat) && (strncasecmp(T(t), r->pat, r->szpat) == 0) )
return r;
}
return 0;
}
/* print out the start of an `img' or `a' tag, applying callbacks as needed.
*/
static void
printlinkyref(MMIOT *f, linkytype *tag, char *link, int size)
{
char *edit;
if ( f->flags & IS_LABEL )
return;
Qstring(tag->link_pfx, f);
if ( tag->kind & IS_URL ) {
if ( f->cb && f->cb->e_url && (edit = (*f->cb->e_url)(link, size, f->cb->e_data)) ) {
puturl(edit, strlen(edit), f, 0);
if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
}
else
puturl(link + tag->szpat, size - tag->szpat, f, 0);
}
else
___mkd_reparse(link + tag->szpat, size - tag->szpat, MKD_TAGTEXT, f, 0);
Qstring(tag->link_sfx, f);
if ( f->cb && f->cb->e_flags && (edit = (*f->cb->e_flags)(link, size, f->cb->e_data)) ) {
Qchar(' ', f);
Qstring(edit, f);
if ( f->cb->e_free ) (*f->cb->e_free)(edit, f->cb->e_data);
}
} /* printlinkyref */
/* helper function for php markdown extra footnotes; allow the user to
* define a prefix tag instead of just `fn`
*/
static char *
p_or_nothing(p)
MMIOT *p;
{
return p->ref_prefix ? p->ref_prefix : "fn";
}
/* php markdown extra/daring fireball style print footnotes
*/
static int
extra_linky(MMIOT *f, Cstring text, Footnote *ref)
{
if ( ref->flags & REFERENCED )
return 0;
if ( f->flags & IS_LABEL )
___mkd_reparse(T(text), S(text), linkt.flags, f, 0);
else {
ref->flags |= REFERENCED;
ref->refnumber = ++ f->reference;
Qprintf(f, "<sup id=\"%sref:%d\"><a href=\"#%s:%d\" rel=\"footnote\">%d</a></sup>",
p_or_nothing(f), ref->refnumber,
p_or_nothing(f), ref->refnumber, ref->refnumber);
}
return 1;
} /* extra_linky */
/* print out a linky (or fail if it's Not Allowed)
*/
static int
linkyformat(MMIOT *f, Cstring text, int image, Footnote *ref)
{
linkytype *tag;
if ( image )
tag = &imaget;
else if ( tag = pseudo(ref->link) ) {
if ( f->flags & (MKD_NO_EXT|MKD_SAFELINK) )
return 0;
}
else if ( (f->flags & MKD_SAFELINK) && T(ref->link)
&& (T(ref->link)[0] != '/')
&& !isautoprefix(T(ref->link), S(ref->link)) )
/* if MKD_SAFELINK, only accept links that are local or
* a well-known protocol
*/
return 0;
else
tag = &linkt;
if ( f->flags & tag->flags )
return 0;
if ( f->flags & IS_LABEL )
___mkd_reparse(T(text), S(text), tag->flags, f, 0);
else if ( tag->link_pfx ) {
printlinkyref(f, tag, T(ref->link), S(ref->link));
if ( tag->WxH ) {
if ( ref->height ) Qprintf(f," height=\"%d\"", ref->height);
if ( ref->width ) Qprintf(f, " width=\"%d\"", ref->width);
}
if ( S(ref->title) ) {
Qstring(" title=\"", f);
___mkd_reparse(T(ref->title), S(ref->title), MKD_TAGTEXT, f, 0);
Qchar('"', f);
}
Qstring(tag->text_pfx, f);
___mkd_reparse(T(text), S(text), tag->flags, f, 0);
Qstring(tag->text_sfx, f);
}
else
Qwrite(T(ref->link) + tag->szpat, S(ref->link) - tag->szpat, f);
return 1;
} /* linkyformat */
/*
* process embedded links and images
*/
static int
linkylinky(int image, MMIOT *f)
{
int start = mmiottell(f);
Cstring name;
Footnote key, *ref;
int status = 0;
int extra_footnote = 0;
CREATE(name);
memset(&key, 0, sizeof key);
if ( linkylabel(f, &name) ) {
if ( peek(f,1) == '(' ) {
pull(f);
if ( linkyurl(f, image, &key) )
status = linkyformat(f, name, image, &key);
}
else {
int goodlink, implicit_mark = mmiottell(f);
if ( isspace(peek(f,1)) )
pull(f);
if ( peek(f,1) == '[' ) {
pull(f); /* consume leading '[' */
goodlink = linkylabel(f, &key.tag);
}
else {
/* new markdown implicit name syntax doesn't
* require a second []
*/
mmiotseek(f, implicit_mark);
goodlink = !(f->flags & MKD_1_COMPAT);
if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (!image) && S(name) && T(name)[0] == '^' )
extra_footnote = 1;
}
if ( goodlink ) {
if ( !S(key.tag) ) {
DELETE(key.tag);
T(key.tag) = T(name);
S(key.tag) = S(name);
}
if ( ref = bsearch(&key, T(*f->footnotes), S(*f->footnotes),
sizeof key, (stfu)__mkd_footsort) ) {
if ( extra_footnote )
status = extra_linky(f,name,ref);
else
status = linkyformat(f, name, image, ref);
}
}
}
}
DELETE(name);
___mkd_freefootnote(&key);
if ( status == 0 )
mmiotseek(f, start);
return status;
}
/* write a character to output, doing text escapes ( & -> &amp;,
* > -> &gt; < -> &lt; )
*/
static void
cputc(int c, MMIOT *f)
{
switch (c) {
case '&': Qstring("&amp;", f); break;
case '>': Qstring("&gt;", f); break;
case '<': Qstring("&lt;", f); break;
default : Qchar(c, f); break;
}
}
/*
* convert an email address to a string of nonsense
*/
static void
mangle(char *s, int len, MMIOT *f)
{
while ( len-- > 0 ) {
Qstring("&#", f);
Qprintf(f, COINTOSS() ? "x%02x;" : "%02d;", *((unsigned char*)(s++)) );
}
}
/* nrticks() -- count up a row of tick marks
*/
static int
nrticks(int offset, int tickchar, MMIOT *f)
{
int tick = 0;
while ( peek(f, offset+tick) == tickchar ) tick++;
return tick;
} /* nrticks */
/* matchticks() -- match a certain # of ticks, and if that fails
* match the largest subset of those ticks.
*
* if a subset was matched, return the # of ticks
* that were matched.
*/
static int
matchticks(MMIOT *f, int tickchar, int ticks, int *endticks)
{
int size, count, c;
int subsize=0, subtick=0;
*endticks = ticks;
for (size = 0; (c=peek(f,size+ticks)) != EOF; size ++) {
if ( (c == tickchar) && ( count = nrticks(size+ticks,tickchar,f)) ) {
if ( count == ticks )
return size;
else if ( count ) {
if ( (count > subtick) && (count < ticks) ) {
subsize = size;
subtick = count;
}
size += count;
}
}
}
if ( subsize ) {
*endticks = subtick;
return subsize;
}
return 0;
} /* matchticks */
/* code() -- write a string out as code. The only characters that have
* special meaning in a code block are * `<' and `&' , which
* are /always/ expanded to &lt; and &amp;
*/
static void
code(MMIOT *f, char *s, int length)
{
int i,c;
for ( i=0; i < length; i++ )
if ( (c = s[i]) == 003) /* ^C: expand back to 2 spaces */
Qstring(" ", f);
else if ( c == '\\' && (i < length-1) && escaped(f, s[i+1]) )
cputc(s[++i], f);
else
cputc(c, f);
} /* code */
/* delspan() -- write out a chunk of text, blocking with <del>...</del>
*/
static void
delspan(MMIOT *f, int size)
{
Qstring("<del>", f);
___mkd_reparse(cursor(f)-1, size, 0, f, 0);
Qstring("</del>", f);
}
/* codespan() -- write out a chunk of text as code, trimming one
* space off the front and/or back as appropriate.
*/
static void
codespan(MMIOT *f, int size)
{
int i=0;
if ( size > 1 && peek(f, size-1) == ' ' ) --size;
if ( peek(f,i) == ' ' ) ++i, --size;
Qstring("<code>", f);
code(f, cursor(f)+(i-1), size);
Qstring("</code>", f);
} /* codespan */
/* before letting a tag through, validate against
* MKD_NOLINKS and MKD_NOIMAGE
*/
static int
forbidden_tag(MMIOT *f)
{
int c = toupper(peek(f, 1));
if ( f->flags & MKD_NOHTML )
return 1;
if ( c == 'A' && (f->flags & MKD_NOLINKS) && !isthisalnum(f,2) )
return 1;
if ( c == 'I' && (f->flags & MKD_NOIMAGE)
&& strncasecmp(cursor(f)+1, "MG", 2) == 0
&& !isthisalnum(f,4) )
return 1;
return 0;
}
/* Check a string to see if it looks like a mail address
* "looks like a mail address" means alphanumeric + some
* specials, then a `@`, then alphanumeric + some specials,
* but with a `.`
*/
static int
maybe_address(char *p, int size)
{
int ok = 0;
for ( ;size && (isalnum(*p) || strchr("._-+*", *p)); ++p, --size)
;
if ( ! (size && *p == '@') )
return 0;
--size, ++p;
if ( size && *p == '.' ) return 0;
for ( ;size && (isalnum(*p) || strchr("._-+", *p)); ++p, --size )
if ( *p == '.' && size > 1 ) ok = 1;
return size ? 0 : ok;
}
/* The size-length token at cursor(f) is either a mailto:, an
* implicit mailto:, one of the approved url protocols, or just
* plain old text. If it's a mailto: or an approved protocol,
* linkify it, otherwise say "no"
*/
static int
process_possible_link(MMIOT *f, int size)
{
int address= 0;
int mailto = 0;
char *text = cursor(f);
if ( f->flags & MKD_NOLINKS ) return 0;
if ( (size > 7) && strncasecmp(text, "mailto:", 7) == 0 ) {
/* if it says it's a mailto, it's a mailto -- who am
* I to second-guess the user?
*/
address = 1;
mailto = 7; /* 7 is the length of "mailto:"; we need this */
}
else
address = maybe_address(text, size);
if ( address ) {
Qstring("<a href=\"", f);
if ( !mailto ) {
/* supply a mailto: protocol if one wasn't attached */
mangle("mailto:", 7, f);
}
mangle(text, size, f);
Qstring("\">", f);
mangle(text+mailto, size-mailto, f);
Qstring("</a>", f);
return 1;
}
else if ( isautoprefix(text, size) ) {
printlinkyref(f, &linkt, text, size);
Qchar('>', f);
puturl(text,size,f, 1);
Qstring("</a>", f);
return 1;
}
return 0;
} /* process_possible_link */
/* a < may be just a regular character, the start of an embedded html
* tag, or the start of an <automatic link>. If it's an automatic
* link, we also need to know if it's an email address because if it
* is we need to mangle it in our futile attempt to cut down on the
* spaminess of the rendered page.
*/
static int
maybe_tag_or_link(MMIOT *f)
{
int c, size;
int maybetag = 1;
if ( f->flags & MKD_TAGTEXT )
return 0;
for ( size=0; (c = peek(f, size+1)) != '>'; size++) {
if ( c == EOF )
return 0;
else if ( c == '\\' ) {
maybetag=0;
if ( peek(f, size+2) != EOF )
size++;
}
else if ( isspace(c) )
break;
#if WITH_GITHUB_TAGS
else if ( ! (c == '/' || c == '-' || c == '_' || isalnum(c) ) )
#else
else if ( ! (c == '/' || isalnum(c) ) )
#endif
maybetag=0;
}
if ( size ) {
if ( maybetag || (size >= 3 && strncmp(cursor(f), "!--", 3) == 0) ) {
/* It is not a html tag unless we find the closing '>' in
* the same block.
*/
while ( (c = peek(f, size+1)) != '>' )
if ( c == EOF )
return 0;
else
size++;
if ( forbidden_tag(f) )
return 0;
Qchar('<', f);
while ( ((c = peek(f, 1)) != EOF) && (c != '>') )
Qchar(pull(f), f);
return 1;
}
else if ( !isspace(c) && process_possible_link(f, size) ) {
shift(f, size+1);
return 1;
}
}
return 0;
}
/* autolinking means that all inline html is <a href'ified>. A
* autolink url is alphanumerics, slashes, periods, underscores,
* the at sign, colon, and the % character.
*/
static int
maybe_autolink(MMIOT *f)
{
register int c;
int size;
/* greedily scan forward for the end of a legitimate link.
*/
for ( size=0; (c=peek(f, size+1)) != EOF; size++ )
if ( c == '\\' ) {
if ( peek(f, size+2) != EOF )
++size;
}
else if ( isspace(c) || strchr("'\"()[]{}<>`", c) )
break;
if ( (size > 1) && process_possible_link(f, size) ) {
shift(f, size);
return 1;
}
return 0;
}
/* smartyquote code that's common for single and double quotes
*/
static int
smartyquote(int *flags, char typeofquote, MMIOT *f)
{
int bit = (typeofquote == 's') ? 0x01 : 0x02;
if ( bit & (*flags) ) {
if ( isthisnonword(f,1) ) {
Qprintf(f, "&r%cquo;", typeofquote);
(*flags) &= ~bit;
return 1;
}
}
else if ( isthisnonword(f,-1) && peek(f,1) != EOF ) {
Qprintf(f, "&l%cquo;", typeofquote);
(*flags) |= bit;
return 1;
}
return 0;
}
static int
islike(MMIOT *f, char *s)
{
int len;
int i;
if ( s[0] == '|' ) {
if ( !isthisnonword(f, -1) )
return 0;
++s;
}
if ( !(len = strlen(s)) )
return 0;
if ( s[len-1] == '|' ) {
if ( !isthisnonword(f,len-1) )
return 0;
len--;
}
for (i=1; i < len; i++)
if (tolower(peek(f,i)) != s[i])
return 0;
return 1;
}
static struct smarties {
char c0;
char *pat;
char *entity;
int shift;
} smarties[] = {
{ '\'', "'s|", "rsquo", 0 },
{ '\'', "'t|", "rsquo", 0 },
{ '\'', "'re|", "rsquo", 0 },
{ '\'', "'ll|", "rsquo", 0 },
{ '\'', "'ve|", "rsquo", 0 },
{ '\'', "'m|", "rsquo", 0 },
{ '\'', "'d|", "rsquo", 0 },
{ '-', "---", "mdash", 2 },
{ '-', "--", "ndash", 1 },
{ '.', "...", "hellip", 2 },
{ '.', ". . .", "hellip", 4 },
{ '(', "(c)", "copy", 2 },
{ '(', "(r)", "reg", 2 },
{ '(', "(tm)", "trade", 3 },
{ '3', "|3/4|", "frac34", 2 },
{ '3', "|3/4ths|", "frac34", 2 },
{ '1', "|1/2|", "frac12", 2 },
{ '1', "|1/4|", "frac14", 2 },
{ '1', "|1/4th|", "frac14", 2 },
{ '&', "&#0;", 0, 3 },
} ;
#define NRSMART ( sizeof smarties / sizeof smarties[0] )
/* Smarty-pants-style chrome for quotes, -, ellipses, and (r)(c)(tm)
*/
static int
smartypants(int c, int *flags, MMIOT *f)
{
int i;
if ( f->flags & (MKD_NOPANTS|MKD_TAGTEXT|IS_LABEL) )
return 0;
for ( i=0; i < NRSMART; i++)
if ( (c == smarties[i].c0) && islike(f, smarties[i].pat) ) {
if ( smarties[i].entity )
Qprintf(f, "&%s;", smarties[i].entity);
shift(f, smarties[i].shift);
return 1;
}
switch (c) {
case '<' : return 0;
case '\'': if ( smartyquote(flags, 's', f) ) return 1;
break;
case '"': if ( smartyquote(flags, 'd', f) ) return 1;
break;
case '`': if ( peek(f, 1) == '`' ) {
int j = 2;
while ( (c=peek(f,j)) != EOF ) {
if ( c == '\\' )
j += 2;
else if ( c == '`' )
break;
else if ( c == '\'' && peek(f, j+1) == '\'' ) {
Qstring("&ldquo;", f);
___mkd_reparse(cursor(f)+1, j-2, 0, f, 0);
Qstring("&rdquo;", f);
shift(f,j+1);
return 1;
}
else ++j;
}
}
break;
}
return 0;
} /* smartypants */
/* process a body of text encased in some sort of tick marks. If it
* works, generate the output and return 1, otherwise just return 0 and
* let the caller figure it out.
*/
static int
tickhandler(MMIOT *f, int tickchar, int minticks, int allow_space, spanhandler spanner)
{
int endticks, size;
int tick = nrticks(0, tickchar, f);
if ( !allow_space && isspace(peek(f,tick)) )
return 0;
if ( (tick >= minticks) && (size = matchticks(f,tickchar,tick,&endticks)) ) {
if ( endticks < tick ) {
size += (tick - endticks);
tick = endticks;
}
shift(f, tick);
(*spanner)(f,size);
shift(f, size+tick-1);
return 1;
}
return 0;
}
#define tag_text(f) (f->flags & MKD_TAGTEXT)
static void
text(MMIOT *f)
{
int c, j;
int rep;
int smartyflags = 0;
while (1) {
if ( (f->flags & MKD_AUTOLINK) && isalpha(peek(f,1)) && !tag_text(f) )
maybe_autolink(f);
c = pull(f);
if (c == EOF)
break;
if ( smartypants(c, &smartyflags, f) )
continue;
switch (c) {
case 0: break;
case 3: Qstring(tag_text(f) ? " " : "<br/>", f);
break;
case '>': if ( tag_text(f) )
Qstring("&gt;", f);
else
Qchar(c, f);
break;
case '"': if ( tag_text(f) )
Qstring("&quot;", f);
else
Qchar(c, f);
break;
case '!': if ( peek(f,1) == '[' ) {
pull(f);
if ( tag_text(f) || !linkylinky(1, f) )
Qstring("![", f);
}
else
Qchar(c, f);
break;
case '[': if ( tag_text(f) || !linkylinky(0, f) )
Qchar(c, f);
break;
/* A^B -> A<sup>B</sup> */
case '^': if ( (f->flags & (MKD_NOSUPERSCRIPT|MKD_STRICT|MKD_TAGTEXT))
|| (isthisnonword(f,-1) && peek(f,-1) != ')')
|| isthisspace(f,1) )
Qchar(c,f);
else {
char *sup = cursor(f);
int len = 0;
if ( peek(f,1) == '(' ) {
int here = mmiottell(f);
pull(f);
if ( (len = parenthetical('(',')',f)) <= 0 ) {
mmiotseek(f,here);
Qchar(c, f);
break;
}
sup++;
}
else {
while ( isthisalnum(f,1+len) )
++len;
if ( !len ) {
Qchar(c,f);
break;
}
shift(f,len);
}
Qstring("<sup>",f);
___mkd_reparse(sup, len, 0, f, "()");
Qstring("</sup>", f);
}
break;
case '_':
/* Underscores don't count if they're in the middle of a word */
if ( !(f->flags & (MKD_NORELAXED|MKD_STRICT))
&& isthisalnum(f,-1)
&& isthisalnum(f,1) ) {
Qchar(c, f);
break;
}
case '*':
/* Underscores & stars don't count if they're out in the middle
* of whitespace */
if ( isthisspace(f,-1) && isthisspace(f,1) ) {
Qchar(c, f);
break;
}
/* else fall into the regular old emphasis case */
if ( tag_text(f) )
Qchar(c, f);
else {
for (rep = 1; peek(f,1) == c; pull(f) )
++rep;
Qem(f,c,rep);
}
break;
case '~': if ( (f->flags & (MKD_NOSTRIKETHROUGH|MKD_TAGTEXT|MKD_STRICT)) || ! tickhandler(f,c,2,0, delspan) )
Qchar(c, f);
break;
case '`': if ( tag_text(f) || !tickhandler(f,c,1,1,codespan) )
Qchar(c, f);
break;
case '\\': switch ( c = pull(f) ) {
case '&': Qstring("&amp;", f);
break;
case '<': c = peek(f,1);
if ( (c == EOF) || isspace(c) )
Qstring("&lt;", f);
else {
/* Markdown.pl does not escape <[nonwhite]
* sequences */
Qchar('\\', f);
shift(f, -1);
}
break;
case '^': if ( f->flags & (MKD_STRICT|MKD_NOSUPERSCRIPT) ) {
Qchar('\\', f);
shift(f,-1);
break;
}
Qchar(c, f);
break;
case ':': case '|':
if ( f->flags & MKD_NOTABLES ) {
Qchar('\\', f);
shift(f,-1);
break;
}
Qchar(c, f);
break;
case EOF: Qchar('\\', f);
break;
default: if ( escaped(f,c) ||
strchr(">#.-+{}]![*_\\()`", c) )
Qchar(c, f);
else {
Qchar('\\', f);
shift(f, -1);
}
break;
}
break;
case '<': if ( !maybe_tag_or_link(f) )
Qstring("&lt;", f);
break;
case '&': j = (peek(f,1) == '#' ) ? 2 : 1;
while ( isthisalnum(f,j) )
++j;
if ( peek(f,j) != ';' )
Qstring("&amp;", f);
else
Qchar(c, f);
break;
default: Qchar(c, f);
break;
}
}
/* truncate the input string after we've finished processing it */
S(f->in) = f->isp = 0;
} /* text */
/* print a header block
*/
static void
printheader(Paragraph *pp, MMIOT *f)
{
#if WITH_ID_ANCHOR
Qprintf(f, "<h%d", pp->hnumber);
if ( f->flags & MKD_TOC ) {
Qstring(" id=\"", f);
mkd_string_to_anchor(T(pp->text->text),
S(pp->text->text),
(mkd_sta_function_t)Qchar, f, 1);
Qchar('"', f);
}
Qchar('>', f);
#else
if ( f->flags & MKD_TOC ) {
Qstring("<a name=\"", f);
mkd_string_to_anchor(T(pp->text->text),
S(pp->text->text),
(mkd_sta_function_t)Qchar, f, 1);
Qstring("\"></a>\n", f);
}
Qprintf(f, "<h%d>", pp->hnumber);
#endif
push(T(pp->text->text), S(pp->text->text), f);
text(f);
Qprintf(f, "</h%d>", pp->hnumber);
}
enum e_alignments { a_NONE, a_CENTER, a_LEFT, a_RIGHT };
static char* alignments[] = { "", " style=\"text-align:center;\"",
" style=\"text-align:left;\"",
" style=\"text-align:right;\"" };
typedef STRING(int) Istring;
static int
splat(Line *p, char *block, Istring align, int force, MMIOT *f)
{
int first,
idx = p->dle,
colno = 0;
___mkd_tidy(&p->text);
if ( T(p->text)[S(p->text)-1] == '|' )
--S(p->text);
Qstring("<tr>\n", f);
while ( idx < S(p->text) ) {
first = idx;
if ( force && (colno >= S(align)-1) )
idx = S(p->text);
else
while ( (idx < S(p->text)) && (T(p->text)[idx] != '|') ) {
if ( T(p->text)[idx] == '\\' )
++idx;
++idx;
}
Qprintf(f, "<%s%s>",
block,
alignments[ (colno < S(align)) ? T(align)[colno] : a_NONE ]);
___mkd_reparse(T(p->text)+first, idx-first, 0, f, "|");
Qprintf(f, "</%s>\n", block);
idx++;
colno++;
}
if ( force )
while (colno < S(align) ) {
Qprintf(f, "<%s></%s>\n", block, block);
++colno;
}
Qstring("</tr>\n", f);
return colno;
}
static int
printtable(Paragraph *pp, MMIOT *f)
{
/* header, dashes, then lines of content */
Line *hdr, *dash, *body;
Istring align;
int hcols,start;
char *p;
enum e_alignments it;
hdr = pp->text;
dash= hdr->next;
body= dash->next;
if ( T(hdr->text)[hdr->dle] == '|' ) {
/* trim leading pipe off all lines
*/
Line *r;
for ( r = pp->text; r; r = r->next )
r->dle ++;
}
/* figure out cell alignments */
CREATE(align);
for (p=T(dash->text), start=dash->dle; start < S(dash->text); ) {
char first, last;
int end;
last=first=0;
for (end=start ; (end < S(dash->text)) && p[end] != '|'; ++ end ) {
if ( p[end] == '\\' )
++ end;
else if ( !isspace(p[end]) ) {
if ( !first) first = p[end];
last = p[end];
}
}
it = ( first == ':' ) ? (( last == ':') ? a_CENTER : a_LEFT)
: (( last == ':') ? a_RIGHT : a_NONE );
EXPAND(align) = it;
start = 1+end;
}
Qstring("<table>\n", f);
Qstring("<thead>\n", f);
hcols = splat(hdr, "th", align, 0, f);
Qstring("</thead>\n", f);
if ( hcols < S(align) )
S(align) = hcols;
else
while ( hcols > S(align) )
EXPAND(align) = a_NONE;
Qstring("<tbody>\n", f);
for ( ; body; body = body->next)
splat(body, "td", align, 1, f);
Qstring("</tbody>\n", f);
Qstring("</table>\n", f);
DELETE(align);
return 1;
}
static int
printblock(Paragraph *pp, MMIOT *f)
{
Line *t = pp->text;
static char *Begin[] = { "", "<p>", "<p style=\"text-align:center;\">" };
static char *End[] = { "", "</p>","</p>" };
while (t) {
if ( S(t->text) ) {
if ( t->next && S(t->text) > 2
&& T(t->text)[S(t->text)-2] == ' '
&& T(t->text)[S(t->text)-1] == ' ' ) {
push(T(t->text), S(t->text)-2, f);
push("\003\n", 2, f);
}
else {
___mkd_tidy(&t->text);
push(T(t->text), S(t->text), f);
if ( t->next )
push("\n", 1, f);
}
}
t = t->next;
}
Qstring(Begin[pp->align], f);
text(f);
Qstring(End[pp->align], f);
return 1;
}
static void
printcode(Line *t, MMIOT *f)
{
int blanks;
Qstring("<pre><code>", f);
for ( blanks = 0; t ; t = t->next ) {
if ( S(t->text) > t->dle ) {
while ( blanks ) {
Qchar('\n', f);
--blanks;
}
code(f, T(t->text), S(t->text));
Qchar('\n', f);
}
else blanks++;
}
Qstring("</code></pre>", f);
}
static void
printhtml(Line *t, MMIOT *f)
{
int blanks;
for ( blanks=0; t ; t = t->next )
if ( S(t->text) ) {
for ( ; blanks; --blanks )
Qchar('\n', f);
Qwrite(T(t->text), S(t->text), f);
Qchar('\n', f);
}
else
blanks++;
}
static void
htmlify(Paragraph *p, char *block, char *arguments, MMIOT *f)
{
___mkd_emblock(f);
if ( block )
Qprintf(f, arguments ? "<%s %s>" : "<%s>", block, arguments);
___mkd_emblock(f);
while (( p = display(p, f) )) {
___mkd_emblock(f);
Qstring("\n\n", f);
}
if ( block )
Qprintf(f, "</%s>", block);
___mkd_emblock(f);
}
static void
definitionlist(Paragraph *p, MMIOT *f)
{
Line *tag;
if ( p ) {
Qstring("<dl>\n", f);
for ( ; p ; p = p->next) {
for ( tag = p->text; tag; tag = tag->next ) {
Qstring("<dt>", f);
___mkd_reparse(T(tag->text), S(tag->text), 0, f, 0);
Qstring("</dt>\n", f);
}
htmlify(p->down, "dd", p->ident, f);
Qchar('\n', f);
}
Qstring("</dl>", f);
}
}
static void
listdisplay(int typ, Paragraph *p, MMIOT* f)
{
if ( p ) {
Qprintf(f, "<%cl", (typ==UL)?'u':'o');
if ( typ == AL )
Qprintf(f, " type=\"a\"");
Qprintf(f, ">\n");
for ( ; p ; p = p->next ) {
htmlify(p->down, "li", p->ident, f);
Qchar('\n', f);
}
Qprintf(f, "</%cl>\n", (typ==UL)?'u':'o');
}
}
/* dump out a Paragraph in the desired manner
*/
static Paragraph*
display(Paragraph *p, MMIOT *f)
{
if ( !p ) return 0;
switch ( p->typ ) {
case STYLE:
case WHITESPACE:
break;
case HTML:
printhtml(p->text, f);
break;
case CODE:
printcode(p->text, f);
break;
case QUOTE:
htmlify(p->down, p->ident ? "div" : "blockquote", p->ident, f);
break;
case UL:
case OL:
case AL:
listdisplay(p->typ, p->down, f);
break;
case DL:
definitionlist(p->down, f);
break;
case HR:
Qstring("<hr />", f);
break;
case HDR:
printheader(p, f);
break;
case TABLE:
printtable(p, f);
break;
case SOURCE:
htmlify(p->down, 0, 0, f);
break;
default:
printblock(p, f);
break;
}
return p->next;
}
/* dump out a list of footnotes
*/
static void
mkd_extra_footnotes(MMIOT *m)
{
int j, i;
Footnote *t;
if ( m->reference == 0 )
return;
Csprintf(&m->out, "\n<div class=\"footnotes\">\n<hr/>\n<ol>\n");
for ( i=1; i <= m->reference; i++ ) {
for ( j=0; j < S(*m->footnotes); j++ ) {
t = &T(*m->footnotes)[j];
if ( (t->refnumber == i) && (t->flags & REFERENCED) ) {
Csprintf(&m->out, "<li id=\"%s:%d\">\n<p>",
p_or_nothing(m), t->refnumber);
Csreparse(&m->out, T(t->title), S(t->title), 0);
Csprintf(&m->out, "<a href=\"#%sref:%d\" rev=\"footnote\">&#8617;</a>",
p_or_nothing(m), t->refnumber);
Csprintf(&m->out, "</p></li>\n");
}
}
}
Csprintf(&m->out, "</ol>\n</div>\n");
}
/* return a pointer to the compiled markdown
* document.
*/
int
mkd_document(Document *p, char **res)
{
int size;
if ( p && p->compiled ) {
if ( ! p->html ) {
htmlify(p->code, 0, 0, p->ctx);
if ( p->ctx->flags & MKD_EXTRA_FOOTNOTE )
mkd_extra_footnotes(p->ctx);
p->html = 1;
}
size = S(p->ctx->out);
if ( (size == 0) || T(p->ctx->out)[size-1] )
EXPAND(p->ctx->out) = 0;
*res = T(p->ctx->out);
return size;
}
return EOF;
}