Markdown.php 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. <?php
  2. /**
  3. * @copyright Copyright (c) 2014 Carsten Brandt
  4. * @license https://github.com/cebe/markdown/blob/master/LICENSE
  5. * @link https://github.com/cebe/markdown#readme
  6. */
  7. namespace cebe\markdown\latex;
  8. use cebe\markdown\block\CodeTrait;
  9. use cebe\markdown\block\HeadlineTrait;
  10. use cebe\markdown\block\ListTrait;
  11. use cebe\markdown\block\QuoteTrait;
  12. use cebe\markdown\block\RuleTrait;
  13. use cebe\markdown\inline\CodeTrait as InlineCodeTrait;
  14. use cebe\markdown\inline\EmphStrongTrait;
  15. use cebe\markdown\inline\LinkTrait;
  16. use MikeVanRiel\TextToLatex;
  17. /**
  18. * Markdown parser for the [initial markdown spec](http://daringfireball.net/projects/markdown/syntax).
  19. *
  20. * @author Carsten Brandt <mail@cebe.cc>
  21. */
  22. class Markdown extends \cebe\markdown\Parser
  23. {
  24. // include block element parsing using traits
  25. use CodeTrait;
  26. use HeadlineTrait;
  27. use ListTrait {
  28. // Check Ul List before headline
  29. identifyUl as protected identifyBUl;
  30. consumeUl as protected consumeBUl;
  31. }
  32. use QuoteTrait;
  33. use RuleTrait {
  34. // Check Hr before checking lists
  35. identifyHr as protected identifyAHr;
  36. consumeHr as protected consumeAHr;
  37. }
  38. // include inline element parsing using traits
  39. use InlineCodeTrait;
  40. use EmphStrongTrait;
  41. use LinkTrait;
  42. /**
  43. * @var string this string will be prefixed to all auto generated labels.
  44. * This can be used to disambiguate labels when combining multiple markdown files into one document.
  45. */
  46. public $labelPrefix = '';
  47. const LINK_STYLE_FOOTNOTE = 'footnote';
  48. const LINK_STYLE_HREF = 'href';
  49. /**
  50. * @var string link style defines how links are rendered in LaTeX, there are two different options:
  51. *
  52. * - `footnote` (default) - render all links with a footnote, which contains the full URL of the link. This is good for printing the PDF.
  53. * - `href` - render all links with a hyperref, similar to HTML, the link target is not visible in this case.
  54. */
  55. public $linkStyle = self::LINK_STYLE_FOOTNOTE;
  56. /**
  57. * @var array these are "escapeable" characters. When using one of these prefixed with a
  58. * backslash, the character will be outputted without the backslash and is not interpreted
  59. * as markdown.
  60. */
  61. protected $escapeCharacters = [
  62. '\\', // backslash
  63. '`', // backtick
  64. '*', // asterisk
  65. '_', // underscore
  66. '{', '}', // curly braces
  67. '[', ']', // square brackets
  68. '(', ')', // parentheses
  69. '#', // hash mark
  70. '+', // plus sign
  71. '-', // minus sign (hyphen)
  72. '.', // dot
  73. '!', // exclamation mark
  74. '<', '>',
  75. ];
  76. /**
  77. * @inheritDoc
  78. */
  79. protected function prepare()
  80. {
  81. // reset references
  82. $this->references = [];
  83. }
  84. /**
  85. * Consume lines for a paragraph
  86. *
  87. * Allow headlines and code to break paragraphs
  88. */
  89. protected function consumeParagraph($lines, $current)
  90. {
  91. // consume until newline
  92. $content = [];
  93. for ($i = $current, $count = count($lines); $i < $count; $i++) {
  94. $line = $lines[$i];
  95. if (!empty($line) && ltrim($line) !== '' &&
  96. !($line[0] === "\t" || $line[0] === " " && strncmp($line, ' ', 4) === 0) &&
  97. !$this->identifyHeadline($line, $lines, $i))
  98. {
  99. $content[] = $line;
  100. } else {
  101. break;
  102. }
  103. }
  104. $block = [
  105. 'paragraph',
  106. 'content' => $this->parseInline(implode("\n", $content)),
  107. ];
  108. return [$block, --$i];
  109. }
  110. // rendering adjusted for LaTeX output
  111. /**
  112. * @inheritdoc
  113. */
  114. protected function renderParagraph($block)
  115. {
  116. return $this->renderAbsy($block['content']) . "\n\n";
  117. }
  118. /**
  119. * @inheritdoc
  120. */
  121. protected function renderQuote($block)
  122. {
  123. return '\begin{quote}' . $this->renderAbsy($block['content']) . "\\end{quote}\n";
  124. }
  125. /**
  126. * @inheritdoc
  127. */
  128. protected function renderCode($block)
  129. {
  130. $language = isset($block['language']) ? "\\lstset{language={$block['language']}}" : '\lstset{language={}}';
  131. $content = $block['content'];
  132. // replace No-Break Space characters in code block, which do not render in LaTeX
  133. $content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $content);
  134. return "$language\\begin{lstlisting}\n{$content}\n\\end{lstlisting}\n";
  135. }
  136. /**
  137. * @inheritdoc
  138. */
  139. protected function renderList($block)
  140. {
  141. $type = ($block['list'] === 'ol') ? 'enumerate' : 'itemize';
  142. $output = "\\begin{{$type}}\n";
  143. foreach ($block['items'] as $item => $itemLines) {
  144. $output .= '\item ' . $this->renderAbsy($itemLines). "\n";
  145. }
  146. return "$output\\end{{$type}}\n";
  147. }
  148. /**
  149. * @inheritdoc
  150. */
  151. protected function renderHeadline($block)
  152. {
  153. $content = $this->renderAbsy($block['content']);
  154. switch($block['level']) {
  155. case 1: return "\\section{{$content}}\n";
  156. case 2: return "\\subsection{{$content}}\n";
  157. case 3: return "\\subsubsection{{$content}}\n";
  158. default: return "\\paragraph{{$content}}\n";
  159. }
  160. }
  161. /**
  162. * @inheritdoc
  163. */
  164. protected function renderHr($block)
  165. {
  166. return "\n\\noindent\\rule{\\textwidth}{0.4pt}\n";
  167. }
  168. /**
  169. * @inheritdoc
  170. */
  171. protected function renderLink($block)
  172. {
  173. if (isset($block['refkey'])) {
  174. if (($ref = $this->lookupReference($block['refkey'])) !== false) {
  175. $block = array_merge($block, $ref);
  176. } else {
  177. return $block['orig'];
  178. }
  179. }
  180. $url = $block['url'];
  181. $text = $this->renderAbsy($block['text']);
  182. if (strpos($url, '://') === false) {
  183. // consider all non absolute links as relative in the document
  184. // $title is ignored in this case.
  185. if (isset($url[0]) && $url[0] === '#') {
  186. $url = $this->labelPrefix . $url;
  187. }
  188. return '\hyperref['.str_replace('#', '::', $url).']{' . $text . '}';
  189. } else {
  190. if ($this->linkStyle === self::LINK_STYLE_HREF) {
  191. return '\href{' . $this->escapeUrl($url) . '}{' . $text . '}';
  192. }
  193. return $text . '\\footnote{' . (empty($block['title']) ? '' : $this->escapeLatex($block['title']) . ': ') . '\url{' . $this->escapeUrl($url) . '}}';
  194. }
  195. }
  196. /**
  197. * @inheritdoc
  198. */
  199. protected function renderImage($block)
  200. {
  201. if (isset($block['refkey'])) {
  202. if (($ref = $this->lookupReference($block['refkey'])) !== false) {
  203. $block = array_merge($block, $ref);
  204. } else {
  205. return $block['orig'];
  206. }
  207. }
  208. // TODO create figure with caption with title
  209. $replaces = [
  210. '%' => '\\%',
  211. '{' => '\\%7B',
  212. '}' => '\\%7D',
  213. '\\' => '\\\\',
  214. '#' => '\\#',
  215. '$' => '\\%24',
  216. ];
  217. $url = str_replace(array_keys($replaces), array_values($replaces), $block['url']);
  218. return "\\noindent\\includegraphics[width=\\textwidth]{{$url}}";
  219. }
  220. /**
  221. * Parses <a name="..."></a> tags as reference labels
  222. */
  223. private function parseInlineHtml($text)
  224. {
  225. if (strpos($text, '>') !== false) {
  226. // convert a name markers to \labels
  227. if (preg_match('~^<((a|span)) (name|id)="(.*?)">.*?</\1>~i', $text, $matches)) {
  228. return [
  229. ['label', 'name' => str_replace('#', '::', $this->labelPrefix . $matches[4])],
  230. strlen($matches[0])
  231. ];
  232. }
  233. }
  234. return [['text', '<'], 1];
  235. }
  236. /**
  237. * renders a reference label
  238. */
  239. protected function renderLabel($block)
  240. {
  241. return "\\label{{$block['name']}}";
  242. }
  243. /**
  244. * @inheritdoc
  245. */
  246. protected function renderEmail($block)
  247. {
  248. $email = $this->escapeUrl($block[1]);
  249. return "\\href{mailto:{$email}}{{$email}}";
  250. }
  251. /**
  252. * @inheritdoc
  253. */
  254. protected function renderUrl($block)
  255. {
  256. return '\url{' . $this->escapeUrl($block[1]) . '}';
  257. }
  258. /**
  259. * @inheritdoc
  260. */
  261. protected function renderInlineCode($block)
  262. {
  263. // replace No-Break Space characters in code block, which do not render in LaTeX
  264. $content = preg_replace("/[\x{00a0}\x{202f}]/u", ' ', $block[1]);
  265. if (strpos($content, '|') !== false) {
  266. return '\\lstinline`' . str_replace("\n", ' ', $content) . '`'; // TODO make this more robust against code containing backticks
  267. } else {
  268. return '\\lstinline|' . str_replace("\n", ' ', $content) . '|';
  269. }
  270. }
  271. /**
  272. * @inheritdoc
  273. */
  274. protected function renderStrong($block)
  275. {
  276. return '\textbf{' . $this->renderAbsy($block[1]) . '}';
  277. }
  278. /**
  279. * @inheritdoc
  280. */
  281. protected function renderEmph($block)
  282. {
  283. return '\textit{' . $this->renderAbsy($block[1]) . '}';
  284. }
  285. /**
  286. * Parses escaped special characters.
  287. * This allow a backslash to be interpreted as LaTeX
  288. * @marker \
  289. */
  290. protected function parseEscape($text)
  291. {
  292. if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) {
  293. if ($text[1] === '\\') {
  294. return [['backslash'], 2];
  295. }
  296. return [['text', $text[1]], 2];
  297. }
  298. return [['text', $text[0]], 1];
  299. }
  300. protected function renderBackslash()
  301. {
  302. return '\\';
  303. }
  304. private $_escaper;
  305. /**
  306. * Escape special characters in URLs
  307. */
  308. protected function escapeUrl($string)
  309. {
  310. return str_replace('%', '\\%', $this->escapeLatex($string));
  311. }
  312. /**
  313. * Escape special LaTeX characters
  314. */
  315. protected function escapeLatex($string)
  316. {
  317. if ($this->_escaper === null) {
  318. $this->_escaper = new TextToLatex();
  319. }
  320. return $this->_escaper->convert($string);
  321. }
  322. /**
  323. * @inheritdocs
  324. *
  325. * Parses a newline indicated by two spaces on the end of a markdown line.
  326. */
  327. protected function renderText($text)
  328. {
  329. $output = str_replace(" \n", "\\\\\n", $this->escapeLatex($text[1]));
  330. // support No-Break Space in LaTeX
  331. $output = preg_replace("/\x{00a0}/u", '~', $output);
  332. // support Narrow No-Break Space spaces in LaTeX
  333. // http://unicode-table.com/en/202F/
  334. // http://tex.stackexchange.com/questions/76132/how-to-typeset-a-small-non-breaking-space
  335. $output = preg_replace("/\x{202f}/u", '\nobreak\hspace{.16667em plus .08333em}', $output);
  336. return $output;
  337. }
  338. }