Text::Markdown::Discount
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1320 lines
26 KiB

  1. /* markdown: a C implementation of John Gruber's Markdown markup language.
  2. *
  3. * Copyright (C) 2007 David L Parsons.
  4. * The redistribution terms are provided in the COPYRIGHT file that must
  5. * be distributed with this source code.
  6. */
  7. #include "config.h"
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <stdarg.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include <ctype.h>
  14. #include "cstring.h"
  15. #include "markdown.h"
  16. #include "amalloc.h"
  17. #include "tags.h"
  18. typedef int (*stfu)(const void*,const void*);
  19. typedef ANCHOR(Paragraph) ParagraphRoot;
  20. static Paragraph *Pp(ParagraphRoot *, Line *, int);
  21. static Paragraph *compile(Line *, int, MMIOT *);
  22. /* case insensitive string sort for Footnote tags.
  23. */
  24. int
  25. __mkd_footsort(Footnote *a, Footnote *b)
  26. {
  27. int i;
  28. char ac, bc;
  29. if ( S(a->tag) != S(b->tag) )
  30. return S(a->tag) - S(b->tag);
  31. for ( i=0; i < S(a->tag); i++) {
  32. ac = tolower(T(a->tag)[i]);
  33. bc = tolower(T(b->tag)[i]);
  34. if ( isspace(ac) && isspace(bc) )
  35. continue;
  36. if ( ac != bc )
  37. return ac - bc;
  38. }
  39. return 0;
  40. }
  41. /* find the first blank character after position <i>
  42. */
  43. static int
  44. nextblank(Line *t, int i)
  45. {
  46. while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
  47. ++i;
  48. return i;
  49. }
  50. /* find the next nonblank character after position <i>
  51. */
  52. static int
  53. nextnonblank(Line *t, int i)
  54. {
  55. while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
  56. ++i;
  57. return i;
  58. }
  59. /* find the first nonblank character on the Line.
  60. */
  61. int
  62. mkd_firstnonblank(Line *p)
  63. {
  64. return nextnonblank(p,0);
  65. }
  66. static inline int
  67. blankline(Line *p)
  68. {
  69. return ! (p && (S(p->text) > p->dle) );
  70. }
  71. static Line *
  72. skipempty(Line *p)
  73. {
  74. while ( p && (p->dle == S(p->text)) )
  75. p = p->next;
  76. return p;
  77. }
  78. void
  79. ___mkd_tidy(Cstring *t)
  80. {
  81. while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
  82. --S(*t);
  83. }
  84. static struct kw comment = { "!--", 3, 0 };
  85. static struct kw *
  86. isopentag(Line *p)
  87. {
  88. int i=0, len;
  89. char *line;
  90. if ( !p ) return 0;
  91. line = T(p->text);
  92. len = S(p->text);
  93. if ( len < 3 || line[0] != '<' )
  94. return 0;
  95. if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
  96. /* comments need special case handling, because
  97. * the !-- doesn't need to end in a whitespace
  98. */
  99. return &comment;
  100. /* find how long the tag is so we can check to see if
  101. * it's a block-level tag
  102. */
  103. for ( i=1; i < len && T(p->text)[i] != '>'
  104. && T(p->text)[i] != '/'
  105. && !isspace(T(p->text)[i]); ++i )
  106. ;
  107. return mkd_search_tags(T(p->text)+1, i-1);
  108. }
  109. typedef struct _flo {
  110. Line *t;
  111. int i;
  112. } FLO;
  113. #define floindex(x) (x.i)
  114. static int
  115. flogetc(FLO *f)
  116. {
  117. if ( f && f->t ) {
  118. if ( f->i < S(f->t->text) )
  119. return T(f->t->text)[f->i++];
  120. f->t = f->t->next;
  121. f->i = 0;
  122. return flogetc(f);
  123. }
  124. return EOF;
  125. }
  126. static void
  127. splitline(Line *t, int cutpoint)
  128. {
  129. if ( t && (cutpoint < S(t->text)) ) {
  130. Line *tmp = calloc(1, sizeof *tmp);
  131. tmp->next = t->next;
  132. t->next = tmp;
  133. tmp->dle = t->dle;
  134. SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
  135. S(t->text) = cutpoint;
  136. }
  137. }
  138. #define UNCHECK(l) ((l)->flags &= ~CHECKED)
  139. /*
  140. * walk a line, seeing if it's any of half a dozen interesting regular
  141. * types.
  142. */
  143. static void
  144. checkline(Line *l)
  145. {
  146. int eol, i;
  147. int dashes = 0, spaces = 0,
  148. equals = 0, underscores = 0,
  149. stars = 0, tildes = 0,
  150. backticks = 0;
  151. l->flags |= CHECKED;
  152. l->kind = chk_text;
  153. l->count = 0;
  154. if (l->dle >= 4) { l->kind=chk_code; return; }
  155. for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
  156. ;
  157. for (i=l->dle; i<eol; i++) {
  158. register int c = T(l->text)[i];
  159. if ( c != ' ' ) l->count++;
  160. switch (c) {
  161. case '-': dashes = 1; break;
  162. case ' ': spaces = 1; break;
  163. case '=': equals = 1; break;
  164. case '_': underscores = 1; break;
  165. case '*': stars = 1; break;
  166. #if WITH_FENCED_CODE
  167. case '~': tildes = 1; break;
  168. case '`': backticks = 1; break;
  169. #endif
  170. default: return;
  171. }
  172. }
  173. if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
  174. return;
  175. if ( spaces ) {
  176. if ( (underscores || stars || dashes) )
  177. l->kind = chk_hr;
  178. return;
  179. }
  180. if ( stars || underscores ) { l->kind = chk_hr; }
  181. else if ( dashes ) { l->kind = chk_dash; }
  182. else if ( equals ) { l->kind = chk_equal; }
  183. #if WITH_FENCED_CODE
  184. else if ( tildes ) { l->kind = chk_tilde; }
  185. else if ( backticks ) { l->kind = chk_backtick; }
  186. #endif
  187. }
  188. static Line *
  189. commentblock(Paragraph *p, int *unclosed)
  190. {
  191. Line *t, *ret;
  192. char *end;
  193. for ( t = p->text; t ; t = t->next) {
  194. if ( end = strstr(T(t->text), "-->") ) {
  195. splitline(t, 3 + (end - T(t->text)) );
  196. ret = t->next;
  197. t->next = 0;
  198. return ret;
  199. }
  200. }
  201. *unclosed = 1;
  202. return t;
  203. }
  204. static Line *
  205. htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
  206. {
  207. Line *ret;
  208. FLO f = { p->text, 0 };
  209. int c;
  210. int i, closing, depth=0;
  211. *unclosed = 0;
  212. if ( tag == &comment )
  213. return commentblock(p, unclosed);
  214. if ( tag->selfclose ) {
  215. ret = f.t->next;
  216. f.t->next = 0;
  217. return ret;
  218. }
  219. while ( (c = flogetc(&f)) != EOF ) {
  220. if ( c == '<' ) {
  221. /* tag? */
  222. c = flogetc(&f);
  223. if ( c == '!' ) { /* comment? */
  224. if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
  225. /* yes */
  226. while ( (c = flogetc(&f)) != EOF ) {
  227. if ( c == '-' && flogetc(&f) == '-'
  228. && flogetc(&f) == '>')
  229. /* consumed whole comment */
  230. break;
  231. }
  232. }
  233. }
  234. else {
  235. if ( closing = (c == '/') ) c = flogetc(&f);
  236. for ( i=0; i < tag->size; c=flogetc(&f) ) {
  237. if ( tag->id[i++] != toupper(c) )
  238. break;
  239. }
  240. if ( (i == tag->size) && !isalnum(c) ) {
  241. depth = depth + (closing ? -1 : 1);
  242. if ( depth == 0 ) {
  243. while ( c != EOF && c != '>' ) {
  244. /* consume trailing gunk in close tag */
  245. c = flogetc(&f);
  246. }
  247. if ( c == EOF )
  248. break;
  249. if ( !f.t )
  250. return 0;
  251. splitline(f.t, floindex(f));
  252. ret = f.t->next;
  253. f.t->next = 0;
  254. return ret;
  255. }
  256. }
  257. }
  258. }
  259. }
  260. *unclosed = 1;
  261. return 0;
  262. }
  263. /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
  264. */
  265. static int
  266. isfootnote(Line *t)
  267. {
  268. int i;
  269. if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
  270. return 0;
  271. for ( ++i; i < S(t->text) ; ++i ) {
  272. if ( T(t->text)[i] == '[' )
  273. return 0;
  274. else if ( T(t->text)[i] == ']' )
  275. return ( T(t->text)[i+1] == ':' ) ;
  276. }
  277. return 0;
  278. }
  279. static inline int
  280. isquote(Line *t)
  281. {
  282. return (t->dle < 4 && T(t->text)[t->dle] == '>');
  283. }
  284. static inline int
  285. iscode(Line *t)
  286. {
  287. return (t->dle >= 4);
  288. }
  289. static inline int
  290. ishr(Line *t)
  291. {
  292. if ( ! (t->flags & CHECKED) )
  293. checkline(t);
  294. if ( t->count > 2 )
  295. return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
  296. return 0;
  297. }
  298. static int
  299. issetext(Line *t, int *htyp)
  300. {
  301. Line *n;
  302. /* check for setext-style HEADER
  303. * ======
  304. */
  305. if ( (n = t->next) ) {
  306. if ( !(n->flags & CHECKED) )
  307. checkline(n);
  308. if ( n->kind == chk_dash || n->kind == chk_equal ) {
  309. *htyp = SETEXT;
  310. return 1;
  311. }
  312. }
  313. return 0;
  314. }
  315. static int
  316. ishdr(Line *t, int *htyp)
  317. {
  318. /* ANY leading `#`'s make this into an ETX header
  319. */
  320. if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
  321. *htyp = ETX;
  322. return 1;
  323. }
  324. /* And if not, maybe it's a SETEXT header instead
  325. */
  326. return issetext(t, htyp);
  327. }
  328. static inline int
  329. end_of_block(Line *t)
  330. {
  331. int dummy;
  332. if ( !t )
  333. return 0;
  334. return ( (S(t->text) <= t->dle) || ishr(t) || ishdr(t, &dummy) );
  335. }
  336. static Line*
  337. is_discount_dt(Line *t, int *clip)
  338. {
  339. #if USE_DISCOUNT_DL
  340. if ( t && t->next
  341. && (S(t->text) > 2)
  342. && (t->dle == 0)
  343. && (T(t->text)[0] == '=')
  344. && (T(t->text)[S(t->text)-1] == '=') ) {
  345. if ( t->next->dle >= 4 ) {
  346. *clip = 4;
  347. return t;
  348. }
  349. else
  350. return is_discount_dt(t->next, clip);
  351. }
  352. #endif
  353. return 0;
  354. }
  355. static int
  356. is_extra_dd(Line *t)
  357. {
  358. return (t->dle < 4) && (T(t->text)[t->dle] == ':')
  359. && isspace(T(t->text)[t->dle+1]);
  360. }
  361. static Line*
  362. is_extra_dt(Line *t, int *clip)
  363. {
  364. #if USE_EXTRA_DL
  365. if ( t && t->next && S(t->text) && T(t->text)[0] != '='
  366. && T(t->text)[S(t->text)-1] != '=') {
  367. Line *x;
  368. if ( iscode(t) || end_of_block(t) )
  369. return 0;
  370. if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
  371. *clip = x->dle+2;
  372. return t;
  373. }
  374. if ( x=is_extra_dt(t->next, clip) )
  375. return x;
  376. }
  377. #endif
  378. return 0;
  379. }
  380. static Line*
  381. isdefinition(Line *t, int *clip, int *kind)
  382. {
  383. Line *ret;
  384. *kind = 1;
  385. if ( ret = is_discount_dt(t,clip) )
  386. return ret;
  387. *kind=2;
  388. return is_extra_dt(t,clip);
  389. }
  390. static int
  391. islist(Line *t, int *clip, DWORD flags, int *list_type)
  392. {
  393. int i, j;
  394. char *q;
  395. if ( end_of_block(t) )
  396. return 0;
  397. if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type) )
  398. return DL;
  399. if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
  400. i = nextnonblank(t, t->dle+1);
  401. *clip = (i > 4) ? 4 : i;
  402. *list_type = UL;
  403. return AL;
  404. }
  405. if ( (j = nextblank(t,t->dle)) > t->dle ) {
  406. if ( T(t->text)[j-1] == '.' ) {
  407. if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
  408. && (j == t->dle + 2)
  409. && isalpha(T(t->text)[t->dle]) ) {
  410. j = nextnonblank(t,j);
  411. *clip = (j > 4) ? 4 : j;
  412. *list_type = AL;
  413. return AL;
  414. }
  415. strtoul(T(t->text)+t->dle, &q, 10);
  416. if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
  417. j = nextnonblank(t,j);
  418. /* *clip = j; */
  419. *clip = (j > 4) ? 4 : j;
  420. *list_type = OL;
  421. return AL;
  422. }
  423. }
  424. }
  425. return 0;
  426. }
  427. static Line *
  428. headerblock(Paragraph *pp, int htyp)
  429. {
  430. Line *ret = 0;
  431. Line *p = pp->text;
  432. int i, j;
  433. switch (htyp) {
  434. case SETEXT:
  435. /* p->text is header, p->next->text is -'s or ='s
  436. */
  437. pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
  438. ret = p->next->next;
  439. ___mkd_freeLine(p->next);
  440. p->next = 0;
  441. break;
  442. case ETX:
  443. /* p->text is ###header###, so we need to trim off
  444. * the leading and trailing `#`'s
  445. */
  446. for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
  447. && (i < 6); i++)
  448. ;
  449. pp->hnumber = i;
  450. while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
  451. ++i;
  452. CLIP(p->text, 0, i);
  453. UNCHECK(p);
  454. for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
  455. ;
  456. while ( j && isspace(T(p->text)[j-1]) )
  457. --j;
  458. S(p->text) = j;
  459. ret = p->next;
  460. p->next = 0;
  461. break;
  462. }
  463. return ret;
  464. }
  465. static Line *
  466. codeblock(Paragraph *p)
  467. {
  468. Line *t = p->text, *r;
  469. for ( ; t; t = r ) {
  470. CLIP(t->text,0,4);
  471. t->dle = mkd_firstnonblank(t);
  472. if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
  473. ___mkd_freeLineRange(t,r);
  474. t->next = 0;
  475. return r;
  476. }
  477. }
  478. return t;
  479. }
  480. #ifdef WITH_FENCED_CODE
  481. static int
  482. iscodefence(Line *r, int size, line_type kind)
  483. {
  484. if ( !(r->flags & CHECKED) )
  485. checkline(r);
  486. if ( kind )
  487. return (r->kind == kind) && (r->count >= size);
  488. else
  489. return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
  490. }
  491. static Paragraph *
  492. fencedcodeblock(ParagraphRoot *d, Line **ptr)
  493. {
  494. Line *first, *r;
  495. Paragraph *ret;
  496. first = (*ptr);
  497. /* don't allow zero-length code fences
  498. */
  499. if ( (first->next == 0) || iscodefence(first->next, first->count, 0) )
  500. return 0;
  501. /* find the closing fence, discard the fences,
  502. * return a Paragraph with the contents
  503. */
  504. for ( r = first; r && r->next; r = r->next )
  505. if ( iscodefence(r->next, first->count, first->kind) ) {
  506. (*ptr) = r->next->next;
  507. ret = Pp(d, first->next, CODE);
  508. ___mkd_freeLine(first);
  509. ___mkd_freeLine(r->next);
  510. r->next = 0;
  511. return ret;
  512. }
  513. return 0;
  514. }
  515. #endif
  516. static int
  517. centered(Line *first, Line *last)
  518. {
  519. if ( first&&last ) {
  520. int len = S(last->text);
  521. if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
  522. && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
  523. CLIP(first->text, 0, 2);
  524. S(last->text) -= 2;
  525. return CENTER;
  526. }
  527. }
  528. return 0;
  529. }
  530. static int
  531. endoftextblock(Line *t, int toplevelblock, DWORD flags)
  532. {
  533. int z;
  534. if ( end_of_block(t) || isquote(t) )
  535. return 1;
  536. /* HORRIBLE STANDARDS KLUDGES:
  537. * 1. non-toplevel paragraphs absorb adjacent code blocks
  538. * 2. Toplevel paragraphs eat absorb adjacent list items,
  539. * but sublevel blocks behave properly.
  540. * (What this means is that we only need to check for code
  541. * blocks at toplevel, and only check for list items at
  542. * nested levels.)
  543. */
  544. return toplevelblock ? 0 : islist(t,&z,flags,&z);
  545. }
  546. static Line *
  547. textblock(Paragraph *p, int toplevel, DWORD flags)
  548. {
  549. Line *t, *next;
  550. for ( t = p->text; t ; t = next ) {
  551. if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
  552. p->align = centered(p->text, t);
  553. t->next = 0;
  554. return next;
  555. }
  556. }
  557. return t;
  558. }
  559. /* length of the id: or class: kind in a special div-not-quote block
  560. */
  561. static int
  562. szmarkerclass(char *p)
  563. {
  564. if ( strncasecmp(p, "id:", 3) == 0 )
  565. return 3;
  566. if ( strncasecmp(p, "class:", 6) == 0 )
  567. return 6;
  568. return 0;
  569. }
  570. /*
  571. * check if the first line of a quoted block is the special div-not-quote
  572. * marker %[kind:]name%
  573. */
  574. #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
  575. static int
  576. isdivmarker(Line *p, int start, DWORD flags)
  577. {
  578. char *s;
  579. int last, i;
  580. if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
  581. return 0;
  582. start = nextnonblank(p, start);
  583. last= S(p->text) - (1 + start);
  584. s = T(p->text) + start;
  585. if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
  586. return 0;
  587. i = szmarkerclass(s+1);
  588. if ( !iscsschar(s[i+1]) )
  589. return 0;
  590. while ( ++i < last )
  591. if ( !(isdigit(s[i]) || iscsschar(s[i])) )
  592. return 0;
  593. return 1;
  594. }
  595. /*
  596. * accumulate a blockquote.
  597. *
  598. * one sick horrible thing about blockquotes is that even though
  599. * it just takes ^> to start a quote, following lines, if quoted,
  600. * assume that the prefix is ``> ''. This means that code needs
  601. * to be indented *5* spaces from the leading '>', but *4* spaces
  602. * from the start of the line. This does not appear to be
  603. * documented in the reference implementation, but it's the
  604. * way the markdown sample web form at Daring Fireball works.
  605. */
  606. static Line *
  607. quoteblock(Paragraph *p, DWORD flags)
  608. {
  609. Line *t, *q;
  610. int qp;
  611. for ( t = p->text; t ; t = q ) {
  612. if ( isquote(t) ) {
  613. /* clip leading spaces */
  614. for (qp = 0; T(t->text)[qp] != '>'; qp ++)
  615. /* assert: the first nonblank character on this line
  616. * will be a >
  617. */;
  618. /* clip '>' */
  619. qp++;
  620. /* clip next space, if any */
  621. if ( T(t->text)[qp] == ' ' )
  622. qp++;
  623. CLIP(t->text, 0, qp);
  624. UNCHECK(t);
  625. t->dle = mkd_firstnonblank(t);
  626. }
  627. q = skipempty(t->next);
  628. if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
  629. ___mkd_freeLineRange(t, q);
  630. t = q;
  631. break;
  632. }
  633. }
  634. if ( isdivmarker(p->text,0,flags) ) {
  635. char *prefix = "class";
  636. int i;
  637. q = p->text;
  638. p->text = p->text->next;
  639. if ( (i = szmarkerclass(1+T(q->text))) == 3 )
  640. /* and this would be an "%id:" prefix */
  641. prefix="id";
  642. if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
  643. sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
  644. T(q->text)+(i+1) );
  645. ___mkd_freeLine(q);
  646. }
  647. return t;
  648. }
  649. typedef int (*linefn)(Line *);
  650. /*
  651. * pull in a list block. A list block starts with a list marker and
  652. * runs until the next list marker, the next non-indented paragraph,
  653. * or EOF. You do not have to indent nonblank lines after the list
  654. * marker, but multiple paragraphs need to start with a 4-space indent.
  655. */
  656. static Line *
  657. listitem(Paragraph *p, int indent, DWORD flags, linefn check)
  658. {
  659. Line *t, *q;
  660. int clip = indent;
  661. int z;
  662. for ( t = p->text; t ; t = q) {
  663. CLIP(t->text, 0, clip);
  664. UNCHECK(t);
  665. t->dle = mkd_firstnonblank(t);
  666. if ( (q = skipempty(t->next)) == 0 ) {
  667. ___mkd_freeLineRange(t,q);
  668. return 0;
  669. }
  670. /* after a blank line, the next block needs to start with a line
  671. * that's indented 4(? -- reference implementation allows a 1
  672. * character indent, but that has unfortunate side effects here)
  673. * spaces, but after that the line doesn't need any indentation
  674. */
  675. if ( q != t->next ) {
  676. if (q->dle < indent) {
  677. q = t->next;
  678. t->next = 0;
  679. return q;
  680. }
  681. /* indent at least 2, and at most as
  682. * as far as the initial line was indented. */
  683. indent = clip ? clip : 2;
  684. }
  685. if ( (q->dle < indent) && (ishr(q) || islist(q,&z,flags,&z)
  686. || (check && (*check)(q)))
  687. && !issetext(q,&z) ) {
  688. q = t->next;
  689. t->next = 0;
  690. return q;
  691. }
  692. clip = (q->dle > indent) ? indent : q->dle;
  693. }
  694. return t;
  695. }
  696. static Line *
  697. definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
  698. {
  699. ParagraphRoot d = { 0, 0 };
  700. Paragraph *p;
  701. Line *q = top->text, *text = 0, *labels;
  702. int z, para;
  703. while (( labels = q )) {
  704. if ( (q = isdefinition(labels, &z, &kind)) == 0 )
  705. break;
  706. if ( (text = skipempty(q->next)) == 0 )
  707. break;
  708. if ( para = (text != q->next) )
  709. ___mkd_freeLineRange(q, text);
  710. q->next = 0;
  711. if ( kind == 1 /* discount dl */ )
  712. for ( q = labels; q; q = q->next ) {
  713. CLIP(q->text, 0, 1);
  714. UNCHECK(q);
  715. S(q->text)--;
  716. }
  717. dd_block:
  718. p = Pp(&d, text, LISTITEM);
  719. text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
  720. p->down = compile(p->text, 0, f);
  721. p->text = labels; labels = 0;
  722. if ( para && p->down ) p->down->align = PARA;
  723. if ( (q = skipempty(text)) == 0 )
  724. break;
  725. if ( para = (q != text) ) {
  726. Line anchor;
  727. anchor.next = text;
  728. ___mkd_freeLineRange(&anchor,q);
  729. text = q;
  730. }
  731. if ( kind == 2 && is_extra_dd(q) )
  732. goto dd_block;
  733. }
  734. top->text = 0;
  735. top->down = T(d);
  736. return text;
  737. }
  738. static Line *
  739. enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
  740. {
  741. ParagraphRoot d = { 0, 0 };
  742. Paragraph *p;
  743. Line *q = top->text, *text;
  744. int para = 0, z;
  745. while (( text = q )) {
  746. p = Pp(&d, text, LISTITEM);
  747. text = listitem(p, clip, f->flags, 0);
  748. p->down = compile(p->text, 0, f);
  749. p->text = 0;
  750. if ( para && p->down ) p->down->align = PARA;
  751. if ( (q = skipempty(text)) == 0
  752. || islist(q, &clip, f->flags, &z) != list_class )
  753. break;
  754. if ( para = (q != text) ) {
  755. Line anchor;
  756. anchor.next = text;
  757. ___mkd_freeLineRange(&anchor, q);
  758. if ( p->down ) p->down->align = PARA;
  759. }
  760. }
  761. top->text = 0;
  762. top->down = T(d);
  763. return text;
  764. }
  765. static int
  766. tgood(char c)
  767. {
  768. switch (c) {
  769. case '\'':
  770. case '"': return c;
  771. case '(': return ')';
  772. }
  773. return 0;
  774. }
  775. /*
  776. * add a new (image or link) footnote to the footnote table
  777. */
  778. static Line*
  779. addfootnote(Line *p, MMIOT* f)
  780. {
  781. int j, i;
  782. int c;
  783. Line *np = p->next;
  784. Footnote *foot = &EXPAND(*f->footnotes);
  785. CREATE(foot->tag);
  786. CREATE(foot->link);
  787. CREATE(foot->title);
  788. foot->flags = foot->height = foot->width = 0;
  789. for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
  790. EXPAND(foot->tag) = T(p->text)[j];
  791. EXPAND(foot->tag) = 0;
  792. S(foot->tag)--;
  793. j = nextnonblank(p, j+2);
  794. if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
  795. while ( j < S(p->text) )
  796. EXPAND(foot->title) = T(p->text)[j++];
  797. goto skip_to_end;
  798. }
  799. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  800. EXPAND(foot->link) = T(p->text)[j++];
  801. EXPAND(foot->link) = 0;
  802. S(foot->link)--;
  803. j = nextnonblank(p,j);
  804. if ( T(p->text)[j] == '=' ) {
  805. sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
  806. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  807. ++j;
  808. j = nextnonblank(p,j);
  809. }
  810. if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
  811. ___mkd_freeLine(p);
  812. p = np;
  813. np = p->next;
  814. j = p->dle;
  815. }
  816. if ( (c = tgood(T(p->text)[j])) ) {
  817. /* Try to take the rest of the line as a comment; read to
  818. * EOL, then shrink the string back to before the final
  819. * quote.
  820. */
  821. ++j; /* skip leading quote */
  822. while ( j < S(p->text) )
  823. EXPAND(foot->title) = T(p->text)[j++];
  824. while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
  825. --S(foot->title);
  826. if ( S(foot->title) ) /* skip trailing quote */
  827. --S(foot->title);
  828. EXPAND(foot->title) = 0;
  829. --S(foot->title);
  830. }
  831. skip_to_end:
  832. ___mkd_freeLine(p);
  833. return np;
  834. }
  835. /*
  836. * allocate a paragraph header, link it to the
  837. * tail of the current document
  838. */
  839. static Paragraph *
  840. Pp(ParagraphRoot *d, Line *ptr, int typ)
  841. {
  842. Paragraph *ret = calloc(sizeof *ret, 1);
  843. ret->text = ptr;
  844. ret->typ = typ;
  845. return ATTACH(*d, ret);
  846. }
  847. static Line*
  848. consume(Line *ptr, int *eaten)
  849. {
  850. Line *next;
  851. int blanks=0;
  852. for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
  853. next = ptr->next;
  854. ___mkd_freeLine(ptr);
  855. }
  856. if ( ptr ) *eaten = blanks;
  857. return ptr;
  858. }
  859. /*
  860. * top-level compilation; break the document into
  861. * style, html, and source blocks with footnote links
  862. * weeded out.
  863. */
  864. static Paragraph *
  865. compile_document(Line *ptr, MMIOT *f)
  866. {
  867. ParagraphRoot d = { 0, 0 };
  868. ANCHOR(Line) source = { 0, 0 };
  869. Paragraph *p = 0;
  870. struct kw *tag;
  871. int eaten, unclosed;
  872. while ( ptr ) {
  873. if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
  874. int blocktype;
  875. /* If we encounter a html/style block, compile and save all
  876. * of the cached source BEFORE processing the html/style.
  877. */
  878. if ( T(source) ) {
  879. E(source)->next = 0;
  880. p = Pp(&d, 0, SOURCE);
  881. p->down = compile(T(source), 1, f);
  882. T(source) = E(source) = 0;
  883. }
  884. if ( f->flags & MKD_NOSTYLE )
  885. blocktype = HTML;
  886. else
  887. blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
  888. p = Pp(&d, ptr, blocktype);
  889. ptr = htmlblock(p, tag, &unclosed);
  890. if ( unclosed ) {
  891. p->typ = SOURCE;
  892. p->down = compile(p->text, 1, f);
  893. p->text = 0;
  894. }
  895. }
  896. else if ( isfootnote(ptr) ) {
  897. /* footnotes, like cats, sleep anywhere; pull them
  898. * out of the input stream and file them away for
  899. * later processing
  900. */
  901. ptr = consume(addfootnote(ptr, f), &eaten);
  902. }
  903. else {
  904. /* source; cache it up to wait for eof or the
  905. * next html/style block
  906. */
  907. ATTACH(source,ptr);
  908. ptr = ptr->next;
  909. }
  910. }
  911. if ( T(source) ) {
  912. /* if there's any cached source at EOF, compile
  913. * it now.
  914. */
  915. E(source)->next = 0;
  916. p = Pp(&d, 0, SOURCE);
  917. p->down = compile(T(source), 1, f);
  918. }
  919. return T(d);
  920. }
  921. static int
  922. first_nonblank_before(Line *j, int dle)
  923. {
  924. return (j->dle < dle) ? j->dle : dle;
  925. }
  926. static int
  927. actually_a_table(MMIOT *f, Line *pp)
  928. {
  929. Line *r;
  930. int j;
  931. int c;
  932. /* tables need to be turned on */
  933. if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
  934. return 0;
  935. /* tables need three lines */
  936. if ( !(pp && pp->next && pp->next->next) ) {
  937. return 0;
  938. }
  939. /* all lines must contain |'s */
  940. for (r = pp; r; r = r->next )
  941. if ( !(r->flags & PIPECHAR) ) {
  942. return 0;
  943. }
  944. /* if the header has a leading |, all lines must have leading |'s */
  945. if ( T(pp->text)[pp->dle] == '|' ) {
  946. for ( r = pp; r; r = r->next )
  947. if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
  948. return 0;
  949. }
  950. }
  951. /* second line must be only whitespace, -, |, or : */
  952. r = pp->next;
  953. for ( j=r->dle; j < S(r->text); ++j ) {
  954. c = T(r->text)[j];
  955. if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
  956. return 0;
  957. }
  958. }
  959. return 1;
  960. }
  961. /*
  962. * break a collection of markdown input into
  963. * blocks of lists, code, html, and text to
  964. * be marked up.
  965. */
  966. static Paragraph *
  967. compile(Line *ptr, int toplevel, MMIOT *f)
  968. {
  969. ParagraphRoot d = { 0, 0 };
  970. Paragraph *p = 0;
  971. Line *r;
  972. int para = toplevel;
  973. int blocks = 0;
  974. int hdr_type, list_type, list_class, indent;
  975. ptr = consume(ptr, &para);
  976. while ( ptr ) {
  977. if ( iscode(ptr) ) {
  978. p = Pp(&d, ptr, CODE);
  979. if ( f->flags & MKD_1_COMPAT) {
  980. /* HORRIBLE STANDARDS KLUDGE: the first line of every block
  981. * has trailing whitespace trimmed off.
  982. */
  983. ___mkd_tidy(&p->text->text);
  984. }
  985. ptr = codeblock(p);
  986. }
  987. #if WITH_FENCED_CODE
  988. else if ( iscodefence(ptr,3,0) && (p=fencedcodeblock(&d, &ptr)) )
  989. /* yay, it's already done */ ;
  990. #endif
  991. else if ( ishr(ptr) ) {
  992. p = Pp(&d, 0, HR);
  993. r = ptr;
  994. ptr = ptr->next;
  995. ___mkd_freeLine(r);
  996. }
  997. else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
  998. if ( list_class == DL ) {
  999. p = Pp(&d, ptr, DL);
  1000. ptr = definition_block(p, indent, f, list_type);
  1001. }
  1002. else {
  1003. p = Pp(&d, ptr, list_type);
  1004. ptr = enumerated_block(p, indent, f, list_class);
  1005. }
  1006. }
  1007. else if ( isquote(ptr) ) {
  1008. p = Pp(&d, ptr, QUOTE);
  1009. ptr = quoteblock(p, f->flags);
  1010. p->down = compile(p->text, 1, f);
  1011. p->text = 0;
  1012. }
  1013. else if ( ishdr(ptr, &hdr_type) ) {
  1014. p = Pp(&d, ptr, HDR);
  1015. ptr = headerblock(p, hdr_type);
  1016. }
  1017. else {
  1018. p = Pp(&d, ptr, MARKUP);
  1019. ptr = textblock(p, toplevel, f->flags);
  1020. /* tables are a special kind of paragraph */
  1021. if ( actually_a_table(f, p->text) )
  1022. p->typ = TABLE;
  1023. }
  1024. if ( (para||toplevel) && !p->align )
  1025. p->align = PARA;
  1026. blocks++;
  1027. para = toplevel || (blocks > 1);
  1028. ptr = consume(ptr, &para);
  1029. if ( para && !p->align )
  1030. p->align = PARA;
  1031. }
  1032. return T(d);
  1033. }
  1034. /*
  1035. * the guts of the markdown() function, ripped out so I can do
  1036. * debugging.
  1037. */
  1038. /*
  1039. * prepare and compile `text`, returning a Paragraph tree.
  1040. */
  1041. int
  1042. mkd_compile(Document *doc, DWORD flags)
  1043. {
  1044. if ( !doc )
  1045. return 0;
  1046. if ( doc->compiled )
  1047. return 1;
  1048. doc->compiled = 1;
  1049. memset(doc->ctx, 0, sizeof(MMIOT) );
  1050. doc->ctx->ref_prefix= doc->ref_prefix;
  1051. doc->ctx->cb = &(doc->cb);
  1052. doc->ctx->flags = flags & USER_FLAGS;
  1053. CREATE(doc->ctx->in);
  1054. doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
  1055. CREATE(*doc->ctx->footnotes);
  1056. mkd_initialize();
  1057. doc->code = compile_document(T(doc->content), doc->ctx);
  1058. qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
  1059. sizeof T(*doc->ctx->footnotes)[0],
  1060. (stfu)__mkd_footsort);
  1061. memset(&doc->content, 0, sizeof doc->content);
  1062. return 1;
  1063. }