Text::Markdown::Discount
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1318 lines
26 KiB

  1. /* markdown: a C implementation of John Gruber's Markdown markup language.
  2. *
  3. * Copyright (C) 2007 David L Parsons.
  4. * The redistribution terms are provided in the COPYRIGHT file that must
  5. * be distributed with this source code.
  6. */
  7. #include "config.h"
  8. #include <stdio.h>
  9. #include <string.h>
  10. #include <stdarg.h>
  11. #include <stdlib.h>
  12. #include <time.h>
  13. #include <ctype.h>
  14. #include "cstring.h"
  15. #include "markdown.h"
  16. #include "amalloc.h"
  17. #include "tags.h"
  18. typedef int (*stfu)(const void*,const void*);
  19. typedef ANCHOR(Paragraph) ParagraphRoot;
  20. static Paragraph *Pp(ParagraphRoot *, Line *, int);
  21. static Paragraph *compile(Line *, int, MMIOT *);
  22. /* case insensitive string sort for Footnote tags.
  23. */
  24. int
  25. __mkd_footsort(Footnote *a, Footnote *b)
  26. {
  27. int i;
  28. char ac, bc;
  29. if ( S(a->tag) != S(b->tag) )
  30. return S(a->tag) - S(b->tag);
  31. for ( i=0; i < S(a->tag); i++) {
  32. ac = tolower(T(a->tag)[i]);
  33. bc = tolower(T(b->tag)[i]);
  34. if ( isspace(ac) && isspace(bc) )
  35. continue;
  36. if ( ac != bc )
  37. return ac - bc;
  38. }
  39. return 0;
  40. }
  41. /* find the first blank character after position <i>
  42. */
  43. static int
  44. nextblank(Line *t, int i)
  45. {
  46. while ( (i < S(t->text)) && !isspace(T(t->text)[i]) )
  47. ++i;
  48. return i;
  49. }
  50. /* find the next nonblank character after position <i>
  51. */
  52. static int
  53. nextnonblank(Line *t, int i)
  54. {
  55. while ( (i < S(t->text)) && isspace(T(t->text)[i]) )
  56. ++i;
  57. return i;
  58. }
  59. /* find the first nonblank character on the Line.
  60. */
  61. int
  62. mkd_firstnonblank(Line *p)
  63. {
  64. return nextnonblank(p,0);
  65. }
  66. static inline int
  67. blankline(Line *p)
  68. {
  69. return ! (p && (S(p->text) > p->dle) );
  70. }
  71. static Line *
  72. skipempty(Line *p)
  73. {
  74. while ( p && (p->dle == S(p->text)) )
  75. p = p->next;
  76. return p;
  77. }
  78. void
  79. ___mkd_tidy(Cstring *t)
  80. {
  81. while ( S(*t) && isspace(T(*t)[S(*t)-1]) )
  82. --S(*t);
  83. }
  84. static struct kw comment = { "!--", 3, 0 };
  85. static struct kw *
  86. isopentag(Line *p)
  87. {
  88. int i=0, len;
  89. char *line;
  90. if ( !p ) return 0;
  91. line = T(p->text);
  92. len = S(p->text);
  93. if ( len < 3 || line[0] != '<' )
  94. return 0;
  95. if ( line[1] == '!' && line[2] == '-' && line[3] == '-' )
  96. /* comments need special case handling, because
  97. * the !-- doesn't need to end in a whitespace
  98. */
  99. return &comment;
  100. /* find how long the tag is so we can check to see if
  101. * it's a block-level tag
  102. */
  103. for ( i=1; i < len && T(p->text)[i] != '>'
  104. && T(p->text)[i] != '/'
  105. && !isspace(T(p->text)[i]); ++i )
  106. ;
  107. return mkd_search_tags(T(p->text)+1, i-1);
  108. }
  109. typedef struct _flo {
  110. Line *t;
  111. int i;
  112. } FLO;
  113. #define floindex(x) (x.i)
  114. static int
  115. flogetc(FLO *f)
  116. {
  117. if ( f && f->t ) {
  118. if ( f->i < S(f->t->text) )
  119. return T(f->t->text)[f->i++];
  120. f->t = f->t->next;
  121. f->i = 0;
  122. return flogetc(f);
  123. }
  124. return EOF;
  125. }
  126. static void
  127. splitline(Line *t, int cutpoint)
  128. {
  129. if ( t && (cutpoint < S(t->text)) ) {
  130. Line *tmp = calloc(1, sizeof *tmp);
  131. tmp->next = t->next;
  132. t->next = tmp;
  133. tmp->dle = t->dle;
  134. SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
  135. S(t->text) = cutpoint;
  136. }
  137. }
  138. #define UNCHECK(l) ((l)->flags &= ~CHECKED)
  139. /*
  140. * walk a line, seeing if it's any of half a dozen interesting regular
  141. * types.
  142. */
  143. static void
  144. checkline(Line *l)
  145. {
  146. int eol, i;
  147. int dashes = 0, spaces = 0,
  148. equals = 0, underscores = 0,
  149. stars = 0, tildes = 0,
  150. backticks = 0;
  151. l->flags |= CHECKED;
  152. l->kind = chk_text;
  153. l->count = 0;
  154. if (l->dle >= 4) { l->kind=chk_code; return; }
  155. for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
  156. ;
  157. for (i=l->dle; i<eol; i++) {
  158. register int c = T(l->text)[i];
  159. if ( c != ' ' ) l->count++;
  160. switch (c) {
  161. case '-': dashes = 1; break;
  162. case ' ': spaces = 1; break;
  163. case '=': equals = 1; break;
  164. case '_': underscores = 1; break;
  165. case '*': stars = 1; break;
  166. #if WITH_FENCED_CODE
  167. case '~': tildes = 1; break;
  168. case '`': backticks = 1; break;
  169. #endif
  170. default: return;
  171. }
  172. }
  173. if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
  174. return;
  175. if ( spaces ) {
  176. if ( (underscores || stars || dashes) )
  177. l->kind = chk_hr;
  178. return;
  179. }
  180. if ( stars || underscores ) { l->kind = chk_hr; }
  181. else if ( dashes ) { l->kind = chk_dash; }
  182. else if ( equals ) { l->kind = chk_equal; }
  183. #if WITH_FENCED_CODE
  184. else if ( tildes ) { l->kind = chk_tilde; }
  185. else if ( backticks ) { l->kind = chk_backtick; }
  186. #endif
  187. }
  188. static Line *
  189. commentblock(Paragraph *p, int *unclosed)
  190. {
  191. Line *t, *ret;
  192. char *end;
  193. for ( t = p->text; t ; t = t->next) {
  194. if ( end = strstr(T(t->text), "-->") ) {
  195. splitline(t, 3 + (end - T(t->text)) );
  196. ret = t->next;
  197. t->next = 0;
  198. return ret;
  199. }
  200. }
  201. *unclosed = 1;
  202. return t;
  203. }
  204. static Line *
  205. htmlblock(Paragraph *p, struct kw *tag, int *unclosed)
  206. {
  207. Line *ret;
  208. FLO f = { p->text, 0 };
  209. int c;
  210. int i, closing, depth=0;
  211. *unclosed = 0;
  212. if ( tag == &comment )
  213. return commentblock(p, unclosed);
  214. if ( tag->selfclose ) {
  215. ret = f.t->next;
  216. f.t->next = 0;
  217. return ret;
  218. }
  219. while ( (c = flogetc(&f)) != EOF ) {
  220. if ( c == '<' ) {
  221. /* tag? */
  222. c = flogetc(&f);
  223. if ( c == '!' ) { /* comment? */
  224. if ( flogetc(&f) == '-' && flogetc(&f) == '-' ) {
  225. /* yes */
  226. while ( (c = flogetc(&f)) != EOF ) {
  227. if ( c == '-' && flogetc(&f) == '-'
  228. && flogetc(&f) == '>')
  229. /* consumed whole comment */
  230. break;
  231. }
  232. }
  233. }
  234. else {
  235. if ( closing = (c == '/') ) c = flogetc(&f);
  236. for ( i=0; i < tag->size; c=flogetc(&f) ) {
  237. if ( tag->id[i++] != toupper(c) )
  238. break;
  239. }
  240. if ( (i == tag->size) && !isalnum(c) ) {
  241. depth = depth + (closing ? -1 : 1);
  242. if ( depth == 0 ) {
  243. while ( c != EOF && c != '>' ) {
  244. /* consume trailing gunk in close tag */
  245. c = flogetc(&f);
  246. }
  247. if ( c == EOF )
  248. break;
  249. if ( !f.t )
  250. return 0;
  251. splitline(f.t, floindex(f));
  252. ret = f.t->next;
  253. f.t->next = 0;
  254. return ret;
  255. }
  256. }
  257. }
  258. }
  259. }
  260. *unclosed = 1;
  261. return 0;
  262. }
  263. /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
  264. */
  265. static int
  266. isfootnote(Line *t)
  267. {
  268. int i;
  269. if ( ( (i = t->dle) > 3) || (T(t->text)[i] != '[') )
  270. return 0;
  271. for ( ++i; i < S(t->text) ; ++i ) {
  272. if ( T(t->text)[i] == '[' )
  273. return 0;
  274. else if ( T(t->text)[i] == ']' )
  275. return ( T(t->text)[i+1] == ':' ) ;
  276. }
  277. return 0;
  278. }
  279. static inline int
  280. isquote(Line *t)
  281. {
  282. return (t->dle < 4 && T(t->text)[t->dle] == '>');
  283. }
  284. static inline int
  285. iscode(Line *t)
  286. {
  287. return (t->dle >= 4);
  288. }
  289. static inline int
  290. ishr(Line *t)
  291. {
  292. if ( ! (t->flags & CHECKED) )
  293. checkline(t);
  294. if ( t->count > 2 )
  295. return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
  296. return 0;
  297. }
  298. static int
  299. issetext(Line *t, int *htyp)
  300. {
  301. Line *n;
  302. /* check for setext-style HEADER
  303. * ======
  304. */
  305. if ( (n = t->next) ) {
  306. if ( !(n->flags & CHECKED) )
  307. checkline(n);
  308. if ( n->kind == chk_dash || n->kind == chk_equal ) {
  309. *htyp = SETEXT;
  310. return 1;
  311. }
  312. }
  313. return 0;
  314. }
  315. static int
  316. ishdr(Line *t, int *htyp)
  317. {
  318. /* ANY leading `#`'s make this into an ETX header
  319. */
  320. if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
  321. *htyp = ETX;
  322. return 1;
  323. }
  324. /* And if not, maybe it's a SETEXT header instead
  325. */
  326. return issetext(t, htyp);
  327. }
  328. static inline int
  329. end_of_block(Line *t)
  330. {
  331. int dummy;
  332. if ( !t )
  333. return 0;
  334. return ( (S(t->text) <= t->dle) || ishr(t) || ishdr(t, &dummy) );
  335. }
  336. static Line*
  337. is_discount_dt(Line *t, int *clip)
  338. {
  339. #if USE_DISCOUNT_DL
  340. if ( t && t->next
  341. && (S(t->text) > 2)
  342. && (t->dle == 0)
  343. && (T(t->text)[0] == '=')
  344. && (T(t->text)[S(t->text)-1] == '=') ) {
  345. if ( t->next->dle >= 4 ) {
  346. *clip = 4;
  347. return t;
  348. }
  349. else
  350. return is_discount_dt(t->next, clip);
  351. }
  352. #endif
  353. return 0;
  354. }
  355. static int
  356. is_extra_dd(Line *t)
  357. {
  358. return (t->dle < 4) && (T(t->text)[t->dle] == ':')
  359. && isspace(T(t->text)[t->dle+1]);
  360. }
  361. static Line*
  362. is_extra_dt(Line *t, int *clip)
  363. {
  364. #if USE_EXTRA_DL
  365. if ( t && t->next && S(t->text) && T(t->text)[0] != '='
  366. && T(t->text)[S(t->text)-1] != '=') {
  367. Line *x;
  368. if ( iscode(t) || end_of_block(t) )
  369. return 0;
  370. if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
  371. *clip = x->dle+2;
  372. return t;
  373. }
  374. if ( x=is_extra_dt(t->next, clip) )
  375. return x;
  376. }
  377. #endif
  378. return 0;
  379. }
  380. static Line*
  381. isdefinition(Line *t, int *clip, int *kind)
  382. {
  383. Line *ret;
  384. *kind = 1;
  385. if ( ret = is_discount_dt(t,clip) )
  386. return ret;
  387. *kind=2;
  388. return is_extra_dt(t,clip);
  389. }
  390. static int
  391. islist(Line *t, int *clip, DWORD flags, int *list_type)
  392. {
  393. int i, j;
  394. char *q;
  395. if ( end_of_block(t) )
  396. return 0;
  397. if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type) )
  398. return DL;
  399. if ( strchr("*-+", T(t->text)[t->dle]) && isspace(T(t->text)[t->dle+1]) ) {
  400. i = nextnonblank(t, t->dle+1);
  401. *clip = (i > 4) ? 4 : i;
  402. *list_type = UL;
  403. return AL;
  404. }
  405. if ( (j = nextblank(t,t->dle)) > t->dle ) {
  406. if ( T(t->text)[j-1] == '.' ) {
  407. if ( !(flags & (MKD_NOALPHALIST|MKD_STRICT))
  408. && (j == t->dle + 2)
  409. && isalpha(T(t->text)[t->dle]) ) {
  410. j = nextnonblank(t,j);
  411. *clip = (j > 4) ? 4 : j;
  412. *list_type = AL;
  413. return AL;
  414. }
  415. strtoul(T(t->text)+t->dle, &q, 10);
  416. if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
  417. j = nextnonblank(t,j);
  418. *clip = (j > 4) ? 4 : j;
  419. *list_type = OL;
  420. return AL;
  421. }
  422. }
  423. }
  424. return 0;
  425. }
  426. static Line *
  427. headerblock(Paragraph *pp, int htyp)
  428. {
  429. Line *ret = 0;
  430. Line *p = pp->text;
  431. int i, j;
  432. switch (htyp) {
  433. case SETEXT:
  434. /* p->text is header, p->next->text is -'s or ='s
  435. */
  436. pp->hnumber = (T(p->next->text)[0] == '=') ? 1 : 2;
  437. ret = p->next->next;
  438. ___mkd_freeLine(p->next);
  439. p->next = 0;
  440. break;
  441. case ETX:
  442. /* p->text is ###header###, so we need to trim off
  443. * the leading and trailing `#`'s
  444. */
  445. for (i=0; (T(p->text)[i] == T(p->text)[0]) && (i < S(p->text)-1)
  446. && (i < 6); i++)
  447. ;
  448. pp->hnumber = i;
  449. while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
  450. ++i;
  451. CLIP(p->text, 0, i);
  452. UNCHECK(p);
  453. for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
  454. ;
  455. while ( j && isspace(T(p->text)[j-1]) )
  456. --j;
  457. S(p->text) = j;
  458. ret = p->next;
  459. p->next = 0;
  460. break;
  461. }
  462. return ret;
  463. }
  464. static Line *
  465. codeblock(Paragraph *p)
  466. {
  467. Line *t = p->text, *r;
  468. for ( ; t; t = r ) {
  469. CLIP(t->text,0,4);
  470. t->dle = mkd_firstnonblank(t);
  471. if ( !( (r = skipempty(t->next)) && iscode(r)) ) {
  472. ___mkd_freeLineRange(t,r);
  473. t->next = 0;
  474. return r;
  475. }
  476. }
  477. return t;
  478. }
  479. #ifdef WITH_FENCED_CODE
  480. static int
  481. iscodefence(Line *r, int size, line_type kind)
  482. {
  483. if ( !(r->flags & CHECKED) )
  484. checkline(r);
  485. if ( kind )
  486. return (r->kind == kind) && (r->count >= size);
  487. else
  488. return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
  489. }
  490. static Paragraph *
  491. fencedcodeblock(ParagraphRoot *d, Line **ptr)
  492. {
  493. Line *first, *r;
  494. Paragraph *ret;
  495. first = (*ptr);
  496. /* don't allow zero-length code fences
  497. */
  498. if ( (first->next == 0) || iscodefence(first->next, first->count, 0) )
  499. return 0;
  500. /* find the closing fence, discard the fences,
  501. * return a Paragraph with the contents
  502. */
  503. for ( r = first; r && r->next; r = r->next )
  504. if ( iscodefence(r->next, first->count, first->kind) ) {
  505. (*ptr) = r->next->next;
  506. ret = Pp(d, first->next, CODE);
  507. ___mkd_freeLine(first);
  508. ___mkd_freeLine(r->next);
  509. r->next = 0;
  510. return ret;
  511. }
  512. return 0;
  513. }
  514. #endif
  515. static int
  516. centered(Line *first, Line *last)
  517. {
  518. if ( first&&last ) {
  519. int len = S(last->text);
  520. if ( (len > 2) && (strncmp(T(first->text), "->", 2) == 0)
  521. && (strncmp(T(last->text)+len-2, "<-", 2) == 0) ) {
  522. CLIP(first->text, 0, 2);
  523. S(last->text) -= 2;
  524. return CENTER;
  525. }
  526. }
  527. return 0;
  528. }
  529. static int
  530. endoftextblock(Line *t, int toplevelblock, DWORD flags)
  531. {
  532. int z;
  533. if ( end_of_block(t) || isquote(t) )
  534. return 1;
  535. /* HORRIBLE STANDARDS KLUDGES:
  536. * 1. non-toplevel paragraphs absorb adjacent code blocks
  537. * 2. Toplevel paragraphs eat absorb adjacent list items,
  538. * but sublevel blocks behave properly.
  539. * (What this means is that we only need to check for code
  540. * blocks at toplevel, and only check for list items at
  541. * nested levels.)
  542. */
  543. return toplevelblock ? 0 : islist(t,&z,flags,&z);
  544. }
  545. static Line *
  546. textblock(Paragraph *p, int toplevel, DWORD flags)
  547. {
  548. Line *t, *next;
  549. for ( t = p->text; t ; t = next ) {
  550. if ( ((next = t->next) == 0) || endoftextblock(next, toplevel, flags) ) {
  551. p->align = centered(p->text, t);
  552. t->next = 0;
  553. return next;
  554. }
  555. }
  556. return t;
  557. }
  558. /* length of the id: or class: kind in a special div-not-quote block
  559. */
  560. static int
  561. szmarkerclass(char *p)
  562. {
  563. if ( strncasecmp(p, "id:", 3) == 0 )
  564. return 3;
  565. if ( strncasecmp(p, "class:", 6) == 0 )
  566. return 6;
  567. return 0;
  568. }
  569. /*
  570. * check if the first line of a quoted block is the special div-not-quote
  571. * marker %[kind:]name%
  572. */
  573. #define iscsschar(c) (isalpha(c) || (c == '-') || (c == '_') )
  574. static int
  575. isdivmarker(Line *p, int start, DWORD flags)
  576. {
  577. char *s;
  578. int last, i;
  579. if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
  580. return 0;
  581. last= S(p->text) - (1 + start);
  582. s = T(p->text) + start;
  583. if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
  584. return 0;
  585. i = szmarkerclass(s+1);
  586. if ( !iscsschar(s[i+1]) )
  587. return 0;
  588. while ( ++i < last )
  589. if ( !(isdigit(s[i]) || iscsschar(s[i])) )
  590. return 0;
  591. return 1;
  592. }
  593. /*
  594. * accumulate a blockquote.
  595. *
  596. * one sick horrible thing about blockquotes is that even though
  597. * it just takes ^> to start a quote, following lines, if quoted,
  598. * assume that the prefix is ``> ''. This means that code needs
  599. * to be indented *5* spaces from the leading '>', but *4* spaces
  600. * from the start of the line. This does not appear to be
  601. * documented in the reference implementation, but it's the
  602. * way the markdown sample web form at Daring Fireball works.
  603. */
  604. static Line *
  605. quoteblock(Paragraph *p, DWORD flags)
  606. {
  607. Line *t, *q;
  608. int qp;
  609. for ( t = p->text; t ; t = q ) {
  610. if ( isquote(t) ) {
  611. /* clip leading spaces */
  612. for (qp = 0; T(t->text)[qp] != '>'; qp ++)
  613. /* assert: the first nonblank character on this line
  614. * will be a >
  615. */;
  616. /* clip '>' */
  617. qp++;
  618. /* clip next space, if any */
  619. if ( T(t->text)[qp] == ' ' )
  620. qp++;
  621. CLIP(t->text, 0, qp);
  622. UNCHECK(t);
  623. t->dle = mkd_firstnonblank(t);
  624. }
  625. q = skipempty(t->next);
  626. if ( (q == 0) || ((q != t->next) && (!isquote(q) || isdivmarker(q,1,flags))) ) {
  627. ___mkd_freeLineRange(t, q);
  628. t = q;
  629. break;
  630. }
  631. }
  632. if ( isdivmarker(p->text,0,flags) ) {
  633. char *prefix = "class";
  634. int i;
  635. q = p->text;
  636. p->text = p->text->next;
  637. if ( (i = szmarkerclass(1+T(q->text))) == 3 )
  638. /* and this would be an "%id:" prefix */
  639. prefix="id";
  640. if ( p->ident = malloc(4+strlen(prefix)+S(q->text)) )
  641. sprintf(p->ident, "%s=\"%.*s\"", prefix, S(q->text)-(i+2),
  642. T(q->text)+(i+1) );
  643. ___mkd_freeLine(q);
  644. }
  645. return t;
  646. }
  647. typedef int (*linefn)(Line *);
  648. /*
  649. * pull in a list block. A list block starts with a list marker and
  650. * runs until the next list marker, the next non-indented paragraph,
  651. * or EOF. You do not have to indent nonblank lines after the list
  652. * marker, but multiple paragraphs need to start with a 4-space indent.
  653. */
  654. static Line *
  655. listitem(Paragraph *p, int indent, DWORD flags, linefn check)
  656. {
  657. Line *t, *q;
  658. int clip = indent;
  659. int z;
  660. for ( t = p->text; t ; t = q) {
  661. CLIP(t->text, 0, clip);
  662. UNCHECK(t);
  663. t->dle = mkd_firstnonblank(t);
  664. if ( (q = skipempty(t->next)) == 0 ) {
  665. ___mkd_freeLineRange(t,q);
  666. return 0;
  667. }
  668. /* after a blank line, the next block needs to start with a line
  669. * that's indented 4(? -- reference implementation allows a 1
  670. * character indent, but that has unfortunate side effects here)
  671. * spaces, but after that the line doesn't need any indentation
  672. */
  673. if ( q != t->next ) {
  674. if (q->dle < indent) {
  675. q = t->next;
  676. t->next = 0;
  677. return q;
  678. }
  679. /* indent at least 2, and at most as
  680. * as far as the initial line was indented. */
  681. indent = clip ? clip : 2;
  682. }
  683. if ( (q->dle < indent) && (ishr(q) || islist(q,&z,flags,&z)
  684. || (check && (*check)(q)))
  685. && !issetext(q,&z) ) {
  686. q = t->next;
  687. t->next = 0;
  688. return q;
  689. }
  690. clip = (q->dle > indent) ? indent : q->dle;
  691. }
  692. return t;
  693. }
  694. static Line *
  695. definition_block(Paragraph *top, int clip, MMIOT *f, int kind)
  696. {
  697. ParagraphRoot d = { 0, 0 };
  698. Paragraph *p;
  699. Line *q = top->text, *text = 0, *labels;
  700. int z, para;
  701. while (( labels = q )) {
  702. if ( (q = isdefinition(labels, &z, &kind)) == 0 )
  703. break;
  704. if ( (text = skipempty(q->next)) == 0 )
  705. break;
  706. if ( para = (text != q->next) )
  707. ___mkd_freeLineRange(q, text);
  708. q->next = 0;
  709. if ( kind == 1 /* discount dl */ )
  710. for ( q = labels; q; q = q->next ) {
  711. CLIP(q->text, 0, 1);
  712. UNCHECK(q);
  713. S(q->text)--;
  714. }
  715. dd_block:
  716. p = Pp(&d, text, LISTITEM);
  717. text = listitem(p, clip, f->flags, (kind==2) ? is_extra_dd : 0);
  718. p->down = compile(p->text, 0, f);
  719. p->text = labels; labels = 0;
  720. if ( para && p->down ) p->down->align = PARA;
  721. if ( (q = skipempty(text)) == 0 )
  722. break;
  723. if ( para = (q != text) ) {
  724. Line anchor;
  725. anchor.next = text;
  726. ___mkd_freeLineRange(&anchor,q);
  727. text = q;
  728. }
  729. if ( kind == 2 && is_extra_dd(q) )
  730. goto dd_block;
  731. }
  732. top->text = 0;
  733. top->down = T(d);
  734. return text;
  735. }
  736. static Line *
  737. enumerated_block(Paragraph *top, int clip, MMIOT *f, int list_class)
  738. {
  739. ParagraphRoot d = { 0, 0 };
  740. Paragraph *p;
  741. Line *q = top->text, *text;
  742. int para = 0, z;
  743. while (( text = q )) {
  744. p = Pp(&d, text, LISTITEM);
  745. text = listitem(p, clip, f->flags, 0);
  746. p->down = compile(p->text, 0, f);
  747. p->text = 0;
  748. if ( para && p->down ) p->down->align = PARA;
  749. if ( (q = skipempty(text)) == 0
  750. || islist(q, &clip, f->flags, &z) != list_class )
  751. break;
  752. if ( para = (q != text) ) {
  753. Line anchor;
  754. anchor.next = text;
  755. ___mkd_freeLineRange(&anchor, q);
  756. if ( p->down ) p->down->align = PARA;
  757. }
  758. }
  759. top->text = 0;
  760. top->down = T(d);
  761. return text;
  762. }
  763. static int
  764. tgood(char c)
  765. {
  766. switch (c) {
  767. case '\'':
  768. case '"': return c;
  769. case '(': return ')';
  770. }
  771. return 0;
  772. }
  773. /*
  774. * add a new (image or link) footnote to the footnote table
  775. */
  776. static Line*
  777. addfootnote(Line *p, MMIOT* f)
  778. {
  779. int j, i;
  780. int c;
  781. Line *np = p->next;
  782. Footnote *foot = &EXPAND(*f->footnotes);
  783. CREATE(foot->tag);
  784. CREATE(foot->link);
  785. CREATE(foot->title);
  786. foot->flags = foot->height = foot->width = 0;
  787. for (j=i=p->dle+1; T(p->text)[j] != ']'; j++)
  788. EXPAND(foot->tag) = T(p->text)[j];
  789. EXPAND(foot->tag) = 0;
  790. S(foot->tag)--;
  791. j = nextnonblank(p, j+2);
  792. if ( (f->flags & MKD_EXTRA_FOOTNOTE) && (T(foot->tag)[0] == '^') ) {
  793. while ( j < S(p->text) )
  794. EXPAND(foot->title) = T(p->text)[j++];
  795. goto skip_to_end;
  796. }
  797. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  798. EXPAND(foot->link) = T(p->text)[j++];
  799. EXPAND(foot->link) = 0;
  800. S(foot->link)--;
  801. j = nextnonblank(p,j);
  802. if ( T(p->text)[j] == '=' ) {
  803. sscanf(T(p->text)+j, "=%dx%d", &foot->width, &foot->height);
  804. while ( (j < S(p->text)) && !isspace(T(p->text)[j]) )
  805. ++j;
  806. j = nextnonblank(p,j);
  807. }
  808. if ( (j >= S(p->text)) && np && np->dle && tgood(T(np->text)[np->dle]) ) {
  809. ___mkd_freeLine(p);
  810. p = np;
  811. np = p->next;
  812. j = p->dle;
  813. }
  814. if ( (c = tgood(T(p->text)[j])) ) {
  815. /* Try to take the rest of the line as a comment; read to
  816. * EOL, then shrink the string back to before the final
  817. * quote.
  818. */
  819. ++j; /* skip leading quote */
  820. while ( j < S(p->text) )
  821. EXPAND(foot->title) = T(p->text)[j++];
  822. while ( S(foot->title) && T(foot->title)[S(foot->title)-1] != c )
  823. --S(foot->title);
  824. if ( S(foot->title) ) /* skip trailing quote */
  825. --S(foot->title);
  826. EXPAND(foot->title) = 0;
  827. --S(foot->title);
  828. }
  829. skip_to_end:
  830. ___mkd_freeLine(p);
  831. return np;
  832. }
  833. /*
  834. * allocate a paragraph header, link it to the
  835. * tail of the current document
  836. */
  837. static Paragraph *
  838. Pp(ParagraphRoot *d, Line *ptr, int typ)
  839. {
  840. Paragraph *ret = calloc(sizeof *ret, 1);
  841. ret->text = ptr;
  842. ret->typ = typ;
  843. return ATTACH(*d, ret);
  844. }
  845. static Line*
  846. consume(Line *ptr, int *eaten)
  847. {
  848. Line *next;
  849. int blanks=0;
  850. for (; ptr && blankline(ptr); ptr = next, blanks++ ) {
  851. next = ptr->next;
  852. ___mkd_freeLine(ptr);
  853. }
  854. if ( ptr ) *eaten = blanks;
  855. return ptr;
  856. }
  857. /*
  858. * top-level compilation; break the document into
  859. * style, html, and source blocks with footnote links
  860. * weeded out.
  861. */
  862. static Paragraph *
  863. compile_document(Line *ptr, MMIOT *f)
  864. {
  865. ParagraphRoot d = { 0, 0 };
  866. ANCHOR(Line) source = { 0, 0 };
  867. Paragraph *p = 0;
  868. struct kw *tag;
  869. int eaten, unclosed;
  870. while ( ptr ) {
  871. if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
  872. int blocktype;
  873. /* If we encounter a html/style block, compile and save all
  874. * of the cached source BEFORE processing the html/style.
  875. */
  876. if ( T(source) ) {
  877. E(source)->next = 0;
  878. p = Pp(&d, 0, SOURCE);
  879. p->down = compile(T(source), 1, f);
  880. T(source) = E(source) = 0;
  881. }
  882. if ( f->flags & MKD_NOSTYLE )
  883. blocktype = HTML;
  884. else
  885. blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
  886. p = Pp(&d, ptr, blocktype);
  887. ptr = htmlblock(p, tag, &unclosed);
  888. if ( unclosed ) {
  889. p->typ = SOURCE;
  890. p->down = compile(p->text, 1, f);
  891. p->text = 0;
  892. }
  893. }
  894. else if ( isfootnote(ptr) ) {
  895. /* footnotes, like cats, sleep anywhere; pull them
  896. * out of the input stream and file them away for
  897. * later processing
  898. */
  899. ptr = consume(addfootnote(ptr, f), &eaten);
  900. }
  901. else {
  902. /* source; cache it up to wait for eof or the
  903. * next html/style block
  904. */
  905. ATTACH(source,ptr);
  906. ptr = ptr->next;
  907. }
  908. }
  909. if ( T(source) ) {
  910. /* if there's any cached source at EOF, compile
  911. * it now.
  912. */
  913. E(source)->next = 0;
  914. p = Pp(&d, 0, SOURCE);
  915. p->down = compile(T(source), 1, f);
  916. }
  917. return T(d);
  918. }
  919. static int
  920. first_nonblank_before(Line *j, int dle)
  921. {
  922. return (j->dle < dle) ? j->dle : dle;
  923. }
  924. static int
  925. actually_a_table(MMIOT *f, Line *pp)
  926. {
  927. Line *r;
  928. int j;
  929. int c;
  930. /* tables need to be turned on */
  931. if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
  932. return 0;
  933. /* tables need three lines */
  934. if ( !(pp && pp->next && pp->next->next) ) {
  935. return 0;
  936. }
  937. /* all lines must contain |'s */
  938. for (r = pp; r; r = r->next )
  939. if ( !(r->flags & PIPECHAR) ) {
  940. return 0;
  941. }
  942. /* if the header has a leading |, all lines must have leading |'s */
  943. if ( T(pp->text)[pp->dle] == '|' ) {
  944. for ( r = pp; r; r = r->next )
  945. if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
  946. return 0;
  947. }
  948. }
  949. /* second line must be only whitespace, -, |, or : */
  950. r = pp->next;
  951. for ( j=r->dle; j < S(r->text); ++j ) {
  952. c = T(r->text)[j];
  953. if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
  954. return 0;
  955. }
  956. }
  957. return 1;
  958. }
  959. /*
  960. * break a collection of markdown input into
  961. * blocks of lists, code, html, and text to
  962. * be marked up.
  963. */
  964. static Paragraph *
  965. compile(Line *ptr, int toplevel, MMIOT *f)
  966. {
  967. ParagraphRoot d = { 0, 0 };
  968. Paragraph *p = 0;
  969. Line *r;
  970. int para = toplevel;
  971. int blocks = 0;
  972. int hdr_type, list_type, list_class, indent;
  973. ptr = consume(ptr, &para);
  974. while ( ptr ) {
  975. if ( iscode(ptr) ) {
  976. p = Pp(&d, ptr, CODE);
  977. if ( f->flags & MKD_1_COMPAT) {
  978. /* HORRIBLE STANDARDS KLUDGE: the first line of every block
  979. * has trailing whitespace trimmed off.
  980. */
  981. ___mkd_tidy(&p->text->text);
  982. }
  983. ptr = codeblock(p);
  984. }
  985. #if WITH_FENCED_CODE
  986. else if ( iscodefence(ptr,3,0) && (p=fencedcodeblock(&d, &ptr)) )
  987. /* yay, it's already done */ ;
  988. #endif
  989. else if ( ishr(ptr) ) {
  990. p = Pp(&d, 0, HR);
  991. r = ptr;
  992. ptr = ptr->next;
  993. ___mkd_freeLine(r);
  994. }
  995. else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
  996. if ( list_class == DL ) {
  997. p = Pp(&d, ptr, DL);
  998. ptr = definition_block(p, indent, f, list_type);
  999. }
  1000. else {
  1001. p = Pp(&d, ptr, list_type);
  1002. ptr = enumerated_block(p, indent, f, list_class);
  1003. }
  1004. }
  1005. else if ( isquote(ptr) ) {
  1006. p = Pp(&d, ptr, QUOTE);
  1007. ptr = quoteblock(p, f->flags);
  1008. p->down = compile(p->text, 1, f);
  1009. p->text = 0;
  1010. }
  1011. else if ( ishdr(ptr, &hdr_type) ) {
  1012. p = Pp(&d, ptr, HDR);
  1013. ptr = headerblock(p, hdr_type);
  1014. }
  1015. else {
  1016. p = Pp(&d, ptr, MARKUP);
  1017. ptr = textblock(p, toplevel, f->flags);
  1018. /* tables are a special kind of paragraph */
  1019. if ( actually_a_table(f, p->text) )
  1020. p->typ = TABLE;
  1021. }
  1022. if ( (para||toplevel) && !p->align )
  1023. p->align = PARA;
  1024. blocks++;
  1025. para = toplevel || (blocks > 1);
  1026. ptr = consume(ptr, &para);
  1027. if ( para && !p->align )
  1028. p->align = PARA;
  1029. }
  1030. return T(d);
  1031. }
  1032. /*
  1033. * the guts of the markdown() function, ripped out so I can do
  1034. * debugging.
  1035. */
  1036. /*
  1037. * prepare and compile `text`, returning a Paragraph tree.
  1038. */
  1039. int
  1040. mkd_compile(Document *doc, DWORD flags)
  1041. {
  1042. if ( !doc )
  1043. return 0;
  1044. if ( doc->compiled )
  1045. return 1;
  1046. doc->compiled = 1;
  1047. memset(doc->ctx, 0, sizeof(MMIOT) );
  1048. doc->ctx->ref_prefix= doc->ref_prefix;
  1049. doc->ctx->cb = &(doc->cb);
  1050. doc->ctx->flags = flags & USER_FLAGS;
  1051. CREATE(doc->ctx->in);
  1052. doc->ctx->footnotes = malloc(sizeof doc->ctx->footnotes[0]);
  1053. CREATE(*doc->ctx->footnotes);
  1054. mkd_initialize();
  1055. doc->code = compile_document(T(doc->content), doc->ctx);
  1056. qsort(T(*doc->ctx->footnotes), S(*doc->ctx->footnotes),
  1057. sizeof T(*doc->ctx->footnotes)[0],
  1058. (stfu)__mkd_footsort);
  1059. memset(&doc->content, 0, sizeof doc->content);
  1060. return 1;
  1061. }