Text::Markdown::Discount
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1302 lines
25 KiB

  1. /* markdown: a C implementation of John Gruber's Markdown markup language.
  2. *
  3. * Copyright (C) 2007 David L Parsons.
  4. * The redistribution terms are provided in the COPYRIGHT file that must
  5. * be distributed with this source code.
  6. */
  7. #include "config.h"
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <stdarg.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include <ctype.h>
  14. #include "cstring.h"
  15. #include "markdown.h"
  16. #include "amalloc.h"
  17. #include "tags.h"
  18. typedef int (*stfu)(const void*,const void*);
  19. typedef ANCHOR(Paragraph) ParagraphRoot;
  20. static Paragraph *Pp(ParagraphRoot *, Line *, int);
  21. static Paragraph *compile(Line *, int, MMIOT *);
  22. /* case insensitive string sort for Footnote tags.
  23. */
  24. int
  25. __mkd_footsort(Footnote *a, Footnote *b)
  26. {
  27. int i;
  28. char ac, bc;
  29. if ( S(a->tag) != S(b->tag) )
  30. return S(a->tag) - S(b->tag);
  31. for ( i=0; i < S(a->tag); i++) {
  32. ac = tolower(T(a->tag)[i]);
  33. bc = tolower(T(b->tag)[i]);
  34. if ( isspace(ac) && isspace(bc) )
  35. continue;
  36. if ( ac != bc )
  37. return ac - bc;
  38. }
  39. return 0;
  40. }
  41. /* find the first blank character after position <i>
  42. */
  43. static int
  44. nextblank(Line *t, int i)
  45. {
  46. while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
  47. ++i;
  48. return i;
  49. }
  50. /* find the next nonblank character after position <i>
  51. */
  52. static int
  53. nextnonblank(Line *t, int i)
  54. {
  55. while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
  56. ++i;
  57. return i;
  58. }
  59. /* find the first nonblank character on the Line.
  60. */
  61. int
  62. mkd_firstnonblank(Line *p)
  63. {
  64. return nextnonblank(p,0);
  65. }
  66. static inline int
  67. blankline(Line *p)
  68. {
  69. return ! (p && (S(p->text) > p->dle) );
  70. }
  71. static Line *
  72. skipempty(Line *p)
  73. {
  74. while ( p && (p->dle == S(p->text)) )
  75. p = p->next;
  76. return p;
  77. }
  78. void
  79. ___mkd_tidy(Cstring *t)
  80. {
  81. while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
  82. --S(*t);
  83. }
  84. static struct kw comment = { "!--", 3, 0 };
  85. static struct kw *
  86. isopentag(Line *p)
  87. {
  88. int i=0, len;
  89. char *line;
  90. if ( !p ) return 0;
  91. line = T(p->text);
  92. len = S(p->text);
  93. if ( len < 3 || line[0] != '<' )
  94. return 0;
  95. if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
  96. /* comments need special case handling, because
  97. * the !-- doesn't need to end in a whitespace
  98. */
  99. return &comment;
  100. /* find how long the tag is so we can check to see if
  101. * it's a block-level tag
  102. */
  103. for ( i=1; i < len && T(p->text)[i] != '>'
  104. && T(p->text)[i] != '/'
  105. && !isspace(T(p->text)[i]); ++i )
  106. ;
  107. return mkd_search_tags(T(p->text)+1, i-1);
  108. }
  109. typedef struct _flo {
  110. Line *t;
  111. int i;
  112. } FLO;
  113. #define floindex(x) (x.i)
  114. static int
  115. flogetc(FLO *f)
  116. {
  117. if ( f && f->t ) {
  118. if ( f->i < S(f->t->text) )
  119. return T(f->t->text)[f->i++];
  120. f->t = f->t->next;
  121. f->i = 0;
  122. return flogetc(f);
  123. }
  124. return EOF;
  125. }
  126. static void
  127. splitline(Line *t, int cutpoint)
  128. {
  129. if ( t && (cutpoint < S(t->text)) ) {
  130. Line *tmp = calloc(1, sizeof *tmp);
  131. tmp->next = t->next;
  132. t->next = tmp;
  133. tmp->dle = t->dle;
  134. SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
  135. S(t->text) = cutpoint;
  136. }
  137. }
  138. #define UNCHECK(l) ((l)->flags &= ~CHECKED)
  139. /*
  140. * walk a line, seeing if it's any of half a dozen interesting regular
  141. * types.
  142. */
  143. static void
  144. checkline(Line *l)
  145. {
  146. int eol, i;
  147. int dashes = 0, spaces = 0,
  148. equals = 0, underscores = 0,
  149. stars = 0, tildes = 0;
  150. l->flags |= CHECKED;
  151. l->kind = chk_text;
  152. l->count = 0;
  153. if (l->dle >= 4) { l->kind=chk_code; return; }
  154. for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
  155. ;
  156. for (i=l->dle; i<eol; i++) {
  157. register int c = T(l->text)[i];
  158. if ( c != ' ' ) l->count++;
  159. switch (c) {
  160. case '-': dashes = 1; break;
  161. case ' ': spaces = 1; break;
  162. case '=': equals = 1; break;
  163. case '_': underscores = 1; break;
  164. case '*': stars = 1; break;
  165. case '~': tildes = 1; break;
  166. default: return;
  167. }
  168. }
  169. if ( dashes + equals + underscores + stars + tildes > 1 )
  170. return;
  171. if ( spaces ) {
  172. if ( (underscores || stars || dashes) )
  173. l->kind = chk_hr;
  174. return;
  175. }
  176. if ( stars || underscores ) { l->kind = chk_hr; }
  177. else if ( dashes ) { l->kind = chk_dash; }
  178. else if ( tildes ) { l->kind = chk_tilde; }
  179. else if ( equals ) { l->kind = chk_equal; }
  180. }
  181. static Line *
  182. commentblock(Paragraph *p, int *unclosed)
  183. {
  184. Line *t, *ret;
  185. char *end;
  186. for ( t = p->text; t ; t = t->next) {
  187. if ( end = strstr(T(t->text), "-->") ) {
  188. splitline(t, 3 + (end - T(t->text)) );
  189. ret = t->next;
  190. t->next = 0;
  191. return ret;
  192. }
  193. }
  194. *unclosed = 1;
  195. return t;
  196. }
  197. static Line *
  198. htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
  199. {
  200. Line *ret;
  201. FLO f = { p->text, 0 };
  202. int c;
  203. int i, closing, depth=0;
  204. *unclosed = 0;
  205. if ( tag == &comment )
  206. return commentblock(p, unclosed);
  207. if ( tag->selfclose ) {
  208. ret = f.t->next;
  209. f.t->next = 0;
  210. return ret;
  211. }
  212. while ( (c = flogetc(&f)) != EOF ) {
  213. if ( c == '<' ) {
  214. /* tag? */
  215. c = flogetc(&f);
  216. if ( c == '!' ) { /* comment? */
  217. if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
  218. /* yes */
  219. while ( (c = flogetc(&f)) != EOF ) {
  220. if ( c == '-' && flogetc(&f) == '-'
  221. && flogetc(&f) == '>')
  222. /* consumed whole comment */
  223. break;
  224. }
  225. }
  226. }
  227. else {
  228. if ( closing = (c == '/') ) c = flogetc(&f);
  229. for ( i=0; i < tag->size; c=flogetc(&f) ) {
  230. if ( tag->id[i++] != toupper(c) )
  231. break;
  232. }
  233. if ( (i == tag->size) && !isalnum(c) ) {
  234. depth = depth + (closing ? -1 : 1);
  235. if ( depth == 0 ) {
  236. while ( c != EOF && c != '>' ) {
  237. /* consume trailing gunk in close tag */
  238. c = flogetc(&f);
  239. }
  240. if ( c == EOF )
  241. break;
  242. if ( !f.t )
  243. return 0;
  244. splitline(f.t, floindex(f));
  245. ret = f.t->next;
  246. f.t->next = 0;
  247. return ret;
  248. }
  249. }
  250. }
  251. }
  252. }
  253. *unclosed = 1;
  254. return 0;
  255. }
  256. /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
  257. */
  258. static int
  259. isfootnote(Line *t)
  260. {
  261. int i;
  262. if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
  263. return 0;
  264. for ( ++i; i < S(t->text) ; ++i ) {
  265. if ( T(t->text)[i] == '[' )
  266. return 0;
  267. else if ( T(t->text)[i] == ']' )
  268. return ( T(t->text)[i+1] == ':' ) ;
  269. }
  270. return 0;
  271. }
  272. static inline int
  273. isquote(Line *t)
  274. {
  275. return (t->dle < 4 && T(t->text)[t->dle] == '>');
  276. }
  277. static inline int
  278. iscode(Line *t)
  279. {
  280. return (t->dle >= 4);
  281. }
  282. static inline int
  283. ishr(Line *t)
  284. {
  285. if ( ! (t->flags & CHECKED) )
  286. checkline(t);
  287. if ( t->count > 2 )
  288. return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
  289. return 0;
  290. }
  291. static int
  292. issetext(Line *t, int *htyp)
  293. {
  294. Line *n;
  295. /* check for setext-style HEADER
  296. * ======
  297. */
  298. if ( (n = t->next) ) {
  299. if ( !(n->flags & CHECKED) )
  300. checkline(n);
  301. if ( n->kind == chk_dash || n->kind == chk_equal ) {
  302. *htyp = SETEXT;
  303. return 1;
  304. }
  305. }
  306. return 0;
  307. }
  308. static int
  309. ishdr(Line *t, int *htyp)
  310. {
  311. /* ANY leading `#`'s make this into an ETX header
  312. */
  313. if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
  314. *htyp = ETX;
  315. return 1;
  316. }
  317. /* And if not, maybe it's a SETEXT header instead
  318. */
  319. return issetext(t, htyp);
  320. }
  321. static inline int
  322. end_of_block(Line *t)
  323. {
  324. int dummy;
  325. if ( !t )
  326. return 0;
  327. return ( (S(t->text) <= t->dle) || ishr(t) || ishdr(t, &dummy) );
  328. }
  329. static Line*
  330. is_discount_dt(Line *t, int *clip)
  331. {
  332. #if USE_DISCOUNT_DL
  333. if ( t && t->next
  334. && (S(t->text) > 2)
  335. && (t->dle == 0)
  336. && (T(t->text)[0] == '=')
  337. && (T(t->text)[S(t->text)-1] == '=') ) {
  338. if ( t->next->dle >= 4 ) {
  339. *clip = 4;
  340. return t;
  341. }
  342. else
  343. return is_discount_dt(t->next, clip);
  344. }
  345. #endif
  346. return 0;
  347. }
  348. static int
  349. is_extra_dd(Line *t)
  350. {
  351. return (t->dle < 4) && (T(t->text)[t->dle] == ':')
  352. && isspace(T(t->text)[t->dle+1]);
  353. }
  354. static Line*
  355. is_extra_dt(Line *t, int *clip)
  356. {
  357. #if USE_EXTRA_DL
  358. if ( t && t->next && T(t->text)[0] != '='
  359. && T(t->text)[S(t->text)-1] != '=') {
  360. Line *x;
  361. if ( iscode(t) || end_of_block(t) )
  362. return 0;
  363. if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
  364. *clip = x->dle+2;
  365. return t;
  366. }
  367. if ( x=is_extra_dt(t->next, clip) )
  368. return x;
  369. }
  370. #endif
  371. return 0;
  372. }
  373. static Line*
  374. isdefinition(Line *t, int *clip, int *kind)
  375. {
  376. Line *ret;
  377. *kind = 1;
  378. if ( ret = is_discount_dt(t,clip) )
  379. return ret;
  380. *kind=2;
  381. return is_extra_dt(t,clip);
  382. }
  383. static int
  384. islist(Line *t, int *clip, DWORD flags, int *list_type)
  385. {
  386. int i, j;
  387. char *q;
  388. if ( end_of_block(t) )
  389. return 0;
  390. if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type) )
  391. return DL;
  392. if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
  393. i = nextnonblank(t, t->dle+1);
  394. *clip = (i > 4) ? 4 : i;
  395. *list_type = UL;
  396. return AL;
  397. }
  398. if ( (j = nextblank(t,t->dle)) > t->dle ) {
  399. if ( T(t->text)[j-1] == '.' ) {
  400. if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
  401. && (j == t->dle + 2)
  402. && isalpha(T(t->text)[t->dle]) ) {
  403. j = nextnonblank(t,j);
  404. *clip = (j > 4) ? 4 : j;
  405. *list_type = AL;
  406. return AL;
  407. }
  408. strtoul(T(t->text)+t->dle, &q, 10);
  409. if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
  410. j = nextnonblank(t,j);
  411. *clip = (j > 4) ? 4 : j;
  412. *list_type = OL;
  413. return AL;
  414. }
  415. }
  416. }
  417. return 0;
  418. }
  419. static Line *
  420. headerblock(Paragraph *pp, int htyp)
  421. {
  422. Line *ret = 0;
  423. Line *p = pp->text;
  424. int i, j;
  425. switch (htyp) {
  426. case SETEXT:
  427. /* p->text is header, p->next->text is -'s or ='s
  428. */
  429. pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
  430. ret = p->next->next;
  431. ___mkd_freeLine(p->next);
  432. p->next = 0;
  433. break;
  434. case ETX:
  435. /* p->text is ###header###, so we need to trim off
  436. * the leading and trailing `#`'s
  437. */
  438. for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
  439. && (i < 6); i++)
  440. ;
  441. pp->hnumber = i;
  442. while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
  443. ++i;
  444. CLIP(p->text, 0, i);
  445. UNCHECK(p);
  446. for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
  447. ;
  448. while ( j && isspace(T(p->text)[j-1]) )
  449. --j;
  450. S(p->text) = j;
  451. ret = p->next;
  452. p->next = 0;
  453. break;
  454. }
  455. return ret;
  456. }
  457. static Line *
  458. codeblock(Paragraph *p)
  459. {
  460. Line *t = p->text, *r;
  461. for ( ; t; t = r ) {
  462. CLIP(t->text,0,4);
  463. t->dle = mkd_firstnonblank(t);
  464. if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
  465. ___mkd_freeLineRange(t,r);
  466. t->next = 0;
  467. return r;
  468. }
  469. }
  470. return t;
  471. }
  472. #ifdef WITH_FENCED_CODE
  473. static int
  474. iscodefence(Line *r, int size)
  475. {
  476. if ( !(r->flags & CHECKED) )
  477. checkline(r);
  478. return (r->kind == chk_tilde) && (r->count >= size);
  479. }
  480. static Paragraph *
  481. fencedcodeblock(ParagraphRoot *d, Line **ptr)
  482. {
  483. Line *first, *r;
  484. Paragraph *ret;
  485. first = (*ptr);
  486. /* don't allow zero-length code fences
  487. */
  488. if ( (first->next == 0) || iscodefence(first->next, first->count) )
  489. return 0;
  490. /* find the closing fence, discard the fences,
  491. * return a Paragraph with the contents
  492. */
  493. for ( r = first; r && r->next; r = r->next )
  494. if ( iscodefence(r->next, first->count) ) {
  495. (*ptr) = r->next->next;
  496. ret = Pp(d, first->next, CODE);
  497. ___mkd_freeLine(first);
  498. ___mkd_freeLine(r->next);
  499. r->next = 0;
  500. return ret;
  501. }
  502. return 0;
  503. }
  504. #endif
  505. static int
  506. centered(Line *first, Line *last)
  507. {
  508. if ( first&&last ) {
  509. int len = S(last->text);
  510. if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
  511. && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
  512. CLIP(first->text, 0, 2);
  513. S(last->text) -= 2;
  514. return CENTER;
  515. }
  516. }
  517. return 0;
  518. }
  519. static int
  520. endoftextblock(Line *t, int toplevelblock, DWORD flags)
  521. {
  522. int z;
  523. if ( end_of_block(t) || isquote(t) )
  524. return 1;
  525. /* HORRIBLE STANDARDS KLUDGES:
  526. * 1. non-toplevel paragraphs absorb adjacent code blocks
  527. * 2. Toplevel paragraphs eat absorb adjacent list items,
  528. * but sublevel blocks behave properly.
  529. * (What this means is that we only need to check for code
  530. * blocks at toplevel, and only check for list items at
  531. * nested levels.)
  532. */
  533. return toplevelblock ? 0 : islist(t,&z,flags,&z);
  534. }
  535. static Line *
  536. textblock(Paragraph *p, int toplevel, DWORD flags)
  537. {
  538. Line *t, *next;
  539. for ( t = p->text; t ; t = next ) {
  540. if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
  541. p->align = centered(p->text, t);
  542. t->next = 0;
  543. return next;
  544. }
  545. }
  546. return t;
  547. }
  548. /* length of the id: or class: kind in a special div-not-quote block
  549. */
  550. static int
  551. szmarkerclass(char *p)
  552. {
  553. if ( strncasecmp(p, "id:", 3) == 0 )
  554. return 3;
  555. if ( strncasecmp(p, "class:", 6) == 0 )
  556. return 6;
  557. return 0;
  558. }
  559. /*
  560. * check if the first line of a quoted block is the special div-not-quote
  561. * marker %[kind:]name%
  562. */
  563. #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
  564. static int
  565. isdivmarker(Line *p, int start, DWORD flags)
  566. {
  567. char *s;
  568. int last, i;
  569. if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
  570. return 0;
  571. last= S(p->text) - (1 + start);
  572. s = T(p->text) + start;
  573. if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
  574. return 0;
  575. i = szmarkerclass(s+1);
  576. if ( !iscsschar(s[i+1]) )
  577. return 0;
  578. while ( ++i < last )
  579. if ( !(isdigit(s[i]) || iscsschar(s[i])) )
  580. return 0;
  581. return 1;
  582. }
  583. /*
  584. * accumulate a blockquote.
  585. *
  586. * one sick horrible thing about blockquotes is that even though
  587. * it just takes ^> to start a quote, following lines, if quoted,
  588. * assume that the prefix is ``> ''. This means that code needs
  589. * to be indented *5* spaces from the leading '>', but *4* spaces
  590. * from the start of the line. This does not appear to be
  591. * documented in the reference implementation, but it's the
  592. * way the markdown sample web form at Daring Fireball works.
  593. */
  594. static Line *
  595. quoteblock(Paragraph *p, DWORD flags)
  596. {
  597. Line *t, *q;
  598. int qp;
  599. for ( t = p->text; t ; t = q ) {
  600. if ( isquote(t) ) {
  601. /* clip leading spaces */
  602. for (qp = 0; T(t->text)[qp] != '>'; qp ++)
  603. /* assert: the first nonblank character on this line
  604. * will be a >
  605. */;
  606. /* clip '>' */
  607. qp++;
  608. /* clip next space, if any */
  609. if ( T(t->text)[qp] == ' ' )
  610. qp++;
  611. CLIP(t->text, 0, qp);
  612. UNCHECK(t);
  613. t->dle = mkd_firstnonblank(t);
  614. }
  615. q = skipempty(t->next);
  616. if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
  617. ___mkd_freeLineRange(t, q);
  618. t = q;
  619. break;
  620. }
  621. }
  622. if ( isdivmarker(p->text,0,flags) ) {
  623. char *prefix = "class";
  624. int i;
  625. q = p->text;
  626. p->text = p->text->next;
  627. if ( (i = szmarkerclass(1+T(q->text))) == 3 )
  628. /* and this would be an "%id:" prefix */
  629. prefix="id";
  630. if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
  631. sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
  632. T(q->text)+(i+1) );
  633. ___mkd_freeLine(q);
  634. }
  635. return t;
  636. }
  637. typedef int (*linefn)(Line *);
  638. /*
  639. * pull in a list block. A list block starts with a list marker and
  640. * runs until the next list marker, the next non-indented paragraph,
  641. * or EOF. You do not have to indent nonblank lines after the list
  642. * marker, but multiple paragraphs need to start with a 4-space indent.
  643. */
  644. static Line *
  645. listitem(Paragraph *p, int indent, DWORD flags, linefn check)
  646. {
  647. Line *t, *q;
  648. int clip = indent;
  649. int z;
  650. for ( t = p->text; t ; t = q) {
  651. CLIP(t->text, 0, clip);
  652. UNCHECK(t);
  653. t->dle = mkd_firstnonblank(t);
  654. if ( (q = skipempty(t->next)) == 0 ) {
  655. ___mkd_freeLineRange(t,q);
  656. return 0;
  657. }
  658. /* after a blank line, the next block needs to start with a line
  659. * that's indented 4(? -- reference implementation allows a 1
  660. * character indent, but that has unfortunate side effects here)
  661. * spaces, but after that the line doesn't need any indentation
  662. */
  663. if ( q != t->next ) {
  664. if (q->dle < indent) {
  665. q = t->next;
  666. t->next = 0;
  667. return q;
  668. }
  669. /* indent at least 2, and at most as
  670. * as far as the initial line was indented. */
  671. indent = clip ? clip : 2;
  672. }
  673. if ( (q->dle < indent) && (ishr(q) || islist(q,&z,flags,&z)
  674. || (check && (*check)(q)))
  675. && !issetext(q,&z) ) {
  676. q = t->next;
  677. t->next = 0;
  678. return q;
  679. }
  680. clip = (q->dle > indent) ? indent : q->dle;
  681. }
  682. return t;
  683. }
  684. static Line *
  685. definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
  686. {
  687. ParagraphRoot d = { 0, 0 };
  688. Paragraph *p;
  689. Line *q = top->text, *text = 0, *labels;
  690. int z, para;
  691. while (( labels = q )) {
  692. if ( (q = isdefinition(labels, &z, &kind)) == 0 )
  693. break;
  694. if ( (text = skipempty(q->next)) == 0 )
  695. break;
  696. if (( para = (text != q->next) ))
  697. ___mkd_freeLineRange(q, text);
  698. q->next = 0;
  699. if ( kind == 1 /* discount dl */ )
  700. for ( q = labels; q; q = q->next ) {
  701. CLIP(q->text, 0, 1);
  702. UNCHECK(q);
  703. S(q->text)--;
  704. }
  705. dd_block:
  706. p = Pp(&d, text, LISTITEM);
  707. text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
  708. p->down = compile(p->text, 0, f);
  709. p->text = labels; labels = 0;
  710. if ( para && p->down ) p->down->align = PARA;
  711. if ( (q = skipempty(text)) == 0 )
  712. break;
  713. if (( para = (q != text) )) {
  714. Line anchor;
  715. anchor.next = text;
  716. ___mkd_freeLineRange(&anchor,q);
  717. text = q;
  718. }
  719. if ( kind == 2 && is_extra_dd(q) )
  720. goto dd_block;
  721. }
  722. top->text = 0;
  723. top->down = T(d);
  724. return text;
  725. }
  726. static Line *
  727. enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
  728. {
  729. ParagraphRoot d = { 0, 0 };
  730. Paragraph *p;
  731. Line *q = top->text, *text;
  732. int para = 0, z;
  733. while (( text = q )) {
  734. p = Pp(&d, text, LISTITEM);
  735. text = listitem(p, clip, f->flags, 0);
  736. p->down = compile(p->text, 0, f);
  737. p->text = 0;
  738. if ( para && p->down ) p->down->align = PARA;
  739. if ( (q = skipempty(text)) == 0
  740. || islist(q, &clip, f->flags, &z) != list_class )
  741. break;
  742. if ( para = (q != text) ) {
  743. Line anchor;
  744. anchor.next = text;
  745. ___mkd_freeLineRange(&anchor, q);
  746. if ( p->down ) p->down->align = PARA;
  747. }
  748. }
  749. top->text = 0;
  750. top->down = T(d);
  751. return text;
  752. }
  753. static int
  754. tgood(char c)
  755. {
  756. switch (c) {
  757. case '\'':
  758. case '"': return c;
  759. case '(': return ')';
  760. }
  761. return 0;
  762. }
  763. /*
  764. * add a new (image or link) footnote to the footnote table
  765. */
  766. static Line*
  767. addfootnote(Line *p, MMIOT* f)
  768. {
  769. int j, i;
  770. int c;
  771. Line *np = p->next;
  772. Footnote *foot = &EXPAND(*f->footnotes);
  773. CREATE(foot->tag);
  774. CREATE(foot->link);
  775. CREATE(foot->title);
  776. foot->flags = foot->height = foot->width = 0;
  777. for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
  778. EXPAND(foot->tag) = T(p->text)[j];
  779. EXPAND(foot->tag) = 0;
  780. S(foot->tag)--;
  781. j = nextnonblank(p, j+2);
  782. if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
  783. while ( j < S(p->text) )
  784. EXPAND(foot->title) = T(p->text)[j++];
  785. goto skip_to_end;
  786. }
  787. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  788. EXPAND(foot->link) = T(p->text)[j++];
  789. EXPAND(foot->link) = 0;
  790. S(foot->link)--;
  791. j = nextnonblank(p,j);
  792. if ( T(p->text)[j] == '=' ) {
  793. sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
  794. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  795. ++j;
  796. j = nextnonblank(p,j);
  797. }
  798. if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
  799. ___mkd_freeLine(p);
  800. p = np;
  801. np = p->next;
  802. j = p->dle;
  803. }
  804. if ( (c = tgood(T(p->text)[j])) ) {
  805. /* Try to take the rest of the line as a comment; read to
  806. * EOL, then shrink the string back to before the final
  807. * quote.
  808. */
  809. ++j; /* skip leading quote */
  810. while ( j < S(p->text) )
  811. EXPAND(foot->title) = T(p->text)[j++];
  812. while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
  813. --S(foot->title);
  814. if ( S(foot->title) ) /* skip trailing quote */
  815. --S(foot->title);
  816. EXPAND(foot->title) = 0;
  817. --S(foot->title);
  818. }
  819. skip_to_end:
  820. ___mkd_freeLine(p);
  821. return np;
  822. }
  823. /*
  824. * allocate a paragraph header, link it to the
  825. * tail of the current document
  826. */
  827. static Paragraph *
  828. Pp(ParagraphRoot *d, Line *ptr, int typ)
  829. {
  830. Paragraph *ret = calloc(sizeof *ret, 1);
  831. ret->text = ptr;
  832. ret->typ = typ;
  833. return ATTACH(*d, ret);
  834. }
  835. static Line*
  836. consume(Line *ptr, int *eaten)
  837. {
  838. Line *next;
  839. int blanks=0;
  840. for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
  841. next = ptr->next;
  842. ___mkd_freeLine(ptr);
  843. }
  844. if ( ptr ) *eaten = blanks;
  845. return ptr;
  846. }
  847. /*
  848. * top-level compilation; break the document into
  849. * style, html, and source blocks with footnote links
  850. * weeded out.
  851. */
  852. static Paragraph *
  853. compile_document(Line *ptr, MMIOT *f)
  854. {
  855. ParagraphRoot d = { 0, 0 };
  856. ANCHOR(Line) source = { 0, 0 };
  857. Paragraph *p = 0;
  858. struct kw *tag;
  859. int eaten, unclosed;
  860. while ( ptr ) {
  861. if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
  862. /* If we encounter a html/style block, compile and save all
  863. * of the cached source BEFORE processing the html/style.
  864. */
  865. if ( T(source) ) {
  866. E(source)->next = 0;
  867. p = Pp(&d, 0, SOURCE);
  868. p->down = compile(T(source), 1, f);
  869. T(source) = E(source) = 0;
  870. }
  871. p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
  872. ptr = htmlblock(p, tag, &unclosed);
  873. if ( unclosed ) {
  874. p->typ = SOURCE;
  875. p->down = compile(p->text, 1, f);
  876. p->text = 0;
  877. }
  878. }
  879. else if ( isfootnote(ptr) ) {
  880. /* footnotes, like cats, sleep anywhere; pull them
  881. * out of the input stream and file them away for
  882. * later processing
  883. */
  884. ptr = consume(addfootnote(ptr, f), &eaten);
  885. }
  886. else {
  887. /* source; cache it up to wait for eof or the
  888. * next html/style block
  889. */
  890. ATTACH(source,ptr);
  891. ptr = ptr->next;
  892. }
  893. }
  894. if ( T(source) ) {
  895. /* if there's any cached source at EOF, compile
  896. * it now.
  897. */
  898. E(source)->next = 0;
  899. p = Pp(&d, 0, SOURCE);
  900. p->down = compile(T(source), 1, f);
  901. }
  902. return T(d);
  903. }
  904. static int
  905. first_nonblank_before(Line *j, int dle)
  906. {
  907. return (j->dle < dle) ? j->dle : dle;
  908. }
  909. static int
  910. actually_a_table(MMIOT *f, Line *pp)
  911. {
  912. Line *r;
  913. int j;
  914. int c;
  915. /* tables need to be turned on */
  916. if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
  917. return 0;
  918. /* tables need three lines */
  919. if ( !(pp && pp->next && pp->next->next) ) {
  920. return 0;
  921. }
  922. /* all lines must contain |'s */
  923. for (r = pp; r; r = r->next )
  924. if ( !(r->flags & PIPECHAR) ) {
  925. return 0;
  926. }
  927. /* if the header has a leading |, all lines must have leading |'s */
  928. if ( T(pp->text)[pp->dle] == '|' ) {
  929. for ( r = pp; r; r = r->next )
  930. if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
  931. return 0;
  932. }
  933. }
  934. /* second line must be only whitespace, -, |, or : */
  935. r = pp->next;
  936. for ( j=r->dle; j < S(r->text); ++j ) {
  937. c = T(r->text)[j];
  938. if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
  939. return 0;
  940. }
  941. }
  942. return 1;
  943. }
  944. /*
  945. * break a collection of markdown input into
  946. * blocks of lists, code, html, and text to
  947. * be marked up.
  948. */
  949. static Paragraph *
  950. compile(Line *ptr, int toplevel, MMIOT *f)
  951. {
  952. ParagraphRoot d = { 0, 0 };
  953. Paragraph *p = 0;
  954. Line *r;
  955. int para = toplevel;
  956. int blocks = 0;
  957. int hdr_type, list_type, list_class, indent;
  958. ptr = consume(ptr, &para);
  959. while ( ptr ) {
  960. if ( iscode(ptr) ) {
  961. p = Pp(&d, ptr, CODE);
  962. if ( f->flags & MKD_1_COMPAT) {
  963. /* HORRIBLE STANDARDS KLUDGE: the first line of every block
  964. * has trailing whitespace trimmed off.
  965. */
  966. ___mkd_tidy(&p->text->text);
  967. }
  968. ptr = codeblock(p);
  969. }
  970. #if WITH_FENCED_CODE
  971. else if ( iscodefence(ptr,3) && (p=fencedcodeblock(&d, &ptr)) )
  972. /* yay, it's already done */ ;
  973. #endif
  974. else if ( ishr(ptr) ) {
  975. p = Pp(&d, 0, HR);
  976. r = ptr;
  977. ptr = ptr->next;
  978. ___mkd_freeLine(r);
  979. }
  980. else if (( list_class = islist(ptr, &indent, f->flags, &list_type) )) {
  981. if ( list_class == DL ) {
  982. p = Pp(&d, ptr, DL);
  983. ptr = definition_block(p, indent, f, list_type);
  984. }
  985. else {
  986. p = Pp(&d, ptr, list_type);
  987. ptr = enumerated_block(p, indent, f, list_class);
  988. }
  989. }
  990. else if ( isquote(ptr) ) {
  991. p = Pp(&d, ptr, QUOTE);
  992. ptr = quoteblock(p, f->flags);
  993. p->down = compile(p->text, 1, f);
  994. p->text = 0;
  995. }
  996. else if ( ishdr(ptr, &hdr_type) ) {
  997. p = Pp(&d, ptr, HDR);
  998. ptr = headerblock(p, hdr_type);
  999. }
  1000. else {
  1001. p = Pp(&d, ptr, MARKUP);
  1002. ptr = textblock(p, toplevel, f->flags);
  1003. /* tables are a special kind of paragraph */
  1004. if ( actually_a_table(f, p->text) )
  1005. p->typ = TABLE;
  1006. }
  1007. if ( (para||toplevel) && !p->align )
  1008. p->align = PARA;
  1009. blocks++;
  1010. para = toplevel || (blocks > 1);
  1011. ptr = consume(ptr, &para);
  1012. if ( para && !p->align )
  1013. p->align = PARA;
  1014. }
  1015. return T(d);
  1016. }
  1017. /*
  1018. * the guts of the markdown() function, ripped out so I can do
  1019. * debugging.
  1020. */
  1021. /*
  1022. * prepare and compile `text`, returning a Paragraph tree.
  1023. */
  1024. int
  1025. mkd_compile(Document *doc, DWORD flags)
  1026. {
  1027. if ( !doc )
  1028. return 0;
  1029. if ( doc->compiled )
  1030. return 1;
  1031. doc->compiled = 1;
  1032. memset(doc->ctx, 0, sizeof(MMIOT) );
  1033. doc->ctx->ref_prefix= doc->ref_prefix;
  1034. doc->ctx->cb = &(doc->cb);
  1035. doc->ctx->flags = flags & USER_FLAGS;
  1036. CREATE(doc->ctx->in);
  1037. doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
  1038. CREATE(*doc->ctx->footnotes);
  1039. mkd_initialize();
  1040. doc->code = compile_document(T(doc->content), doc->ctx);
  1041. qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
  1042. sizeof T(*doc->ctx->footnotes)[0],
  1043. (stfu)__mkd_footsort);
  1044. memset(&doc->content, 0, sizeof doc->content);
  1045. return 1;
  1046. }