BioTorrents.de’s version of Gazelle
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ParsedownExtra.php 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. <?php
  2. #declare(strict_types=1);
  3. #
  4. #
  5. # Parsedown Extra
  6. # https://github.com/erusev/parsedown-extra
  7. #
  8. # (c) Emanuil Rusev
  9. # http://erusev.com
  10. #
  11. # For the full license information, view the LICENSE file that was distributed
  12. # with this source code.
  13. #
  14. #
  15. class ParsedownExtra extends Parsedown
  16. {
  17. # ~
  18. const version = '0.8.0';
  19. # ~
  20. function __construct()
  21. {
  22. if (version_compare(parent::version, '1.7.1') < 0)
  23. {
  24. throw new Exception('ParsedownExtra requires a later version of Parsedown');
  25. }
  26. $this->BlockTypes[':'] []= 'DefinitionList';
  27. $this->BlockTypes['*'] []= 'Abbreviation';
  28. # identify footnote definitions before reference definitions
  29. array_unshift($this->BlockTypes['['], 'Footnote');
  30. # identify footnote markers before before links
  31. array_unshift($this->InlineTypes['['], 'FootnoteMarker');
  32. }
  33. #
  34. # ~
  35. function text($text)
  36. {
  37. $Elements = $this->textElements($text);
  38. # convert to markup
  39. $markup = $this->elements($Elements);
  40. # trim line breaks
  41. $markup = trim($markup, "\n");
  42. # merge consecutive dl elements
  43. $markup = preg_replace('/<\/dl>\s+<dl>\s+/', '', $markup);
  44. # add footnotes
  45. if (isset($this->DefinitionData['Footnote']))
  46. {
  47. $Element = $this->buildFootnoteElement();
  48. $markup .= "\n" . $this->element($Element);
  49. }
  50. return $markup;
  51. }
  52. #
  53. # Blocks
  54. #
  55. #
  56. # Abbreviation
  57. protected function blockAbbreviation($Line)
  58. {
  59. if (preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $Line['text'], $matches))
  60. {
  61. $this->DefinitionData['Abbreviation'][$matches[1]] = $matches[2];
  62. $Block = array(
  63. 'hidden' => true,
  64. );
  65. return $Block;
  66. }
  67. }
  68. #
  69. # Footnote
  70. protected function blockFootnote($Line)
  71. {
  72. if (preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $Line['text'], $matches))
  73. {
  74. $Block = array(
  75. 'label' => $matches[1],
  76. 'text' => $matches[2],
  77. 'hidden' => true,
  78. );
  79. return $Block;
  80. }
  81. }
  82. protected function blockFootnoteContinue($Line, $Block)
  83. {
  84. if ($Line['text'][0] === '[' and preg_match('/^\[\^(.+?)\]:/', $Line['text']))
  85. {
  86. return;
  87. }
  88. if (isset($Block['interrupted']))
  89. {
  90. if ($Line['indent'] >= 4)
  91. {
  92. $Block['text'] .= "\n\n" . $Line['text'];
  93. return $Block;
  94. }
  95. }
  96. else
  97. {
  98. $Block['text'] .= "\n" . $Line['text'];
  99. return $Block;
  100. }
  101. }
  102. protected function blockFootnoteComplete($Block)
  103. {
  104. $this->DefinitionData['Footnote'][$Block['label']] = array(
  105. 'text' => $Block['text'],
  106. 'count' => null,
  107. 'number' => null,
  108. );
  109. return $Block;
  110. }
  111. #
  112. # Definition List
  113. protected function blockDefinitionList($Line, $Block)
  114. {
  115. if ( ! isset($Block) or $Block['type'] !== 'Paragraph')
  116. {
  117. return;
  118. }
  119. $Element = array(
  120. 'name' => 'dl',
  121. 'elements' => array(),
  122. );
  123. $terms = explode("\n", $Block['element']['handler']['argument']);
  124. foreach ($terms as $term)
  125. {
  126. $Element['elements'] []= array(
  127. 'name' => 'dt',
  128. 'handler' => array(
  129. 'function' => 'lineElements',
  130. 'argument' => $term,
  131. 'destination' => 'elements'
  132. ),
  133. );
  134. }
  135. $Block['element'] = $Element;
  136. $Block = $this->addDdElement($Line, $Block);
  137. return $Block;
  138. }
  139. protected function blockDefinitionListContinue($Line, array $Block)
  140. {
  141. if ($Line['text'][0] === ':')
  142. {
  143. $Block = $this->addDdElement($Line, $Block);
  144. return $Block;
  145. }
  146. else
  147. {
  148. if (isset($Block['interrupted']) and $Line['indent'] === 0)
  149. {
  150. return;
  151. }
  152. if (isset($Block['interrupted']))
  153. {
  154. $Block['dd']['handler']['function'] = 'textElements';
  155. $Block['dd']['handler']['argument'] .= "\n\n";
  156. $Block['dd']['handler']['destination'] = 'elements';
  157. unset($Block['interrupted']);
  158. }
  159. $text = substr($Line['body'], min($Line['indent'], 4));
  160. $Block['dd']['handler']['argument'] .= "\n" . $text;
  161. return $Block;
  162. }
  163. }
  164. #
  165. # Header
  166. protected function blockHeader($Line)
  167. {
  168. $Block = parent::blockHeader($Line);
  169. if ($Block !== null && preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE))
  170. {
  171. $attributeString = $matches[1][0];
  172. $Block['element']['attributes'] = $this->parseAttributeData($attributeString);
  173. $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
  174. }
  175. return $Block;
  176. }
  177. #
  178. # Markup
  179. protected function blockMarkup($Line)
  180. {
  181. if ($this->markupEscaped or $this->safeMode)
  182. {
  183. return;
  184. }
  185. if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
  186. {
  187. $element = strtolower($matches[1]);
  188. if (in_array($element, $this->textLevelElements))
  189. {
  190. return;
  191. }
  192. $Block = array(
  193. 'name' => $matches[1],
  194. 'depth' => 0,
  195. 'element' => array(
  196. 'rawHtml' => $Line['text'],
  197. 'autobreak' => true,
  198. ),
  199. );
  200. $length = strlen($matches[0]);
  201. $remainder = substr($Line['text'], $length);
  202. if (trim($remainder) === '')
  203. {
  204. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  205. {
  206. $Block['closed'] = true;
  207. $Block['void'] = true;
  208. }
  209. }
  210. else
  211. {
  212. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  213. {
  214. return;
  215. }
  216. if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
  217. {
  218. $Block['closed'] = true;
  219. }
  220. }
  221. return $Block;
  222. }
  223. }
  224. protected function blockMarkupContinue($Line, array $Block)
  225. {
  226. if (isset($Block['closed']))
  227. {
  228. return;
  229. }
  230. if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
  231. {
  232. $Block['depth'] ++;
  233. }
  234. if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
  235. {
  236. if ($Block['depth'] > 0)
  237. {
  238. $Block['depth'] --;
  239. }
  240. else
  241. {
  242. $Block['closed'] = true;
  243. }
  244. }
  245. if (isset($Block['interrupted']))
  246. {
  247. $Block['element']['rawHtml'] .= "\n";
  248. unset($Block['interrupted']);
  249. }
  250. $Block['element']['rawHtml'] .= "\n".$Line['body'];
  251. return $Block;
  252. }
  253. protected function blockMarkupComplete($Block)
  254. {
  255. if ( ! isset($Block['void']))
  256. {
  257. $Block['element']['rawHtml'] = $this->processTag($Block['element']['rawHtml']);
  258. }
  259. return $Block;
  260. }
  261. #
  262. # Setext
  263. protected function blockSetextHeader($Line, array $Block = null)
  264. {
  265. $Block = parent::blockSetextHeader($Line, $Block);
  266. if ($Block !== null && preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE))
  267. {
  268. $attributeString = $matches[1][0];
  269. $Block['element']['attributes'] = $this->parseAttributeData($attributeString);
  270. $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
  271. }
  272. return $Block;
  273. }
  274. #
  275. # Inline Elements
  276. #
  277. #
  278. # Footnote Marker
  279. protected function inlineFootnoteMarker($Excerpt)
  280. {
  281. if (preg_match('/^\[\^(.+?)\]/', $Excerpt['text'], $matches))
  282. {
  283. $name = $matches[1];
  284. if ( ! isset($this->DefinitionData['Footnote'][$name]))
  285. {
  286. return;
  287. }
  288. $this->DefinitionData['Footnote'][$name]['count'] ++;
  289. if ( ! isset($this->DefinitionData['Footnote'][$name]['number']))
  290. {
  291. $this->DefinitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; # &raquo; &
  292. }
  293. $Element = array(
  294. 'name' => 'sup',
  295. 'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name),
  296. 'element' => array(
  297. 'name' => 'a',
  298. 'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'),
  299. 'text' => $this->DefinitionData['Footnote'][$name]['number'],
  300. ),
  301. );
  302. return array(
  303. 'extent' => strlen($matches[0]),
  304. 'element' => $Element,
  305. );
  306. }
  307. }
  308. private $footnoteCount = 0;
  309. #
  310. # Link
  311. protected function inlineLink($Excerpt)
  312. {
  313. $Link = parent::inlineLink($Excerpt);
  314. $remainder = $Link !== null ? substr($Excerpt['text'], $Link['extent']) : '';
  315. if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches))
  316. {
  317. $Link['element']['attributes'] += $this->parseAttributeData($matches[1]);
  318. $Link['extent'] += strlen($matches[0]);
  319. }
  320. return $Link;
  321. }
  322. #
  323. # ~
  324. #
  325. private $currentAbreviation;
  326. private $currentMeaning;
  327. protected function insertAbreviation(array $Element)
  328. {
  329. if (isset($Element['text']))
  330. {
  331. $Element['elements'] = self::pregReplaceElements(
  332. '/\b'.preg_quote($this->currentAbreviation, '/').'\b/',
  333. array(
  334. array(
  335. 'name' => 'abbr',
  336. 'attributes' => array(
  337. 'title' => $this->currentMeaning,
  338. ),
  339. 'text' => $this->currentAbreviation,
  340. )
  341. ),
  342. $Element['text']
  343. );
  344. unset($Element['text']);
  345. }
  346. return $Element;
  347. }
  348. protected function inlineText($text)
  349. {
  350. $Inline = parent::inlineText($text);
  351. if (isset($this->DefinitionData['Abbreviation']))
  352. {
  353. foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning)
  354. {
  355. $this->currentAbreviation = $abbreviation;
  356. $this->currentMeaning = $meaning;
  357. $Inline['element'] = $this->elementApplyRecursiveDepthFirst(
  358. array($this, 'insertAbreviation'),
  359. $Inline['element']
  360. );
  361. }
  362. }
  363. return $Inline;
  364. }
  365. #
  366. # Util Methods
  367. #
  368. protected function addDdElement(array $Line, array $Block)
  369. {
  370. $text = substr($Line['text'], 1);
  371. $text = trim($text);
  372. unset($Block['dd']);
  373. $Block['dd'] = array(
  374. 'name' => 'dd',
  375. 'handler' => array(
  376. 'function' => 'lineElements',
  377. 'argument' => $text,
  378. 'destination' => 'elements'
  379. ),
  380. );
  381. if (isset($Block['interrupted']))
  382. {
  383. $Block['dd']['handler']['function'] = 'textElements';
  384. unset($Block['interrupted']);
  385. }
  386. $Block['element']['elements'] []= & $Block['dd'];
  387. return $Block;
  388. }
  389. protected function buildFootnoteElement()
  390. {
  391. $Element = array(
  392. 'name' => 'div',
  393. 'attributes' => array('class' => 'footnotes'),
  394. 'elements' => array(
  395. array('name' => 'hr'),
  396. array(
  397. 'name' => 'ol',
  398. 'elements' => array(),
  399. ),
  400. ),
  401. );
  402. uasort($this->DefinitionData['Footnote'], 'self::sortFootnotes');
  403. foreach ($this->DefinitionData['Footnote'] as $definitionId => $DefinitionData)
  404. {
  405. if ( ! isset($DefinitionData['number']))
  406. {
  407. continue;
  408. }
  409. $text = $DefinitionData['text'];
  410. $textElements = parent::textElements($text);
  411. $numbers = range(1, $DefinitionData['count']);
  412. $backLinkElements = array();
  413. foreach ($numbers as $number)
  414. {
  415. $backLinkElements[] = array('text' => ' ');
  416. $backLinkElements[] = array(
  417. 'name' => 'a',
  418. 'attributes' => array(
  419. 'href' => "#fnref$number:$definitionId",
  420. 'rev' => 'footnote',
  421. 'class' => 'footnote-backref',
  422. ),
  423. 'rawHtml' => '&#8617;',
  424. 'allowRawHtmlInSafeMode' => true,
  425. 'autobreak' => false,
  426. );
  427. }
  428. unset($backLinkElements[0]);
  429. $n = count($textElements) -1;
  430. if ($textElements[$n]['name'] === 'p')
  431. {
  432. $backLinkElements = array_merge(
  433. array(
  434. array(
  435. 'rawHtml' => '&#160;',
  436. 'allowRawHtmlInSafeMode' => true,
  437. ),
  438. ),
  439. $backLinkElements
  440. );
  441. unset($textElements[$n]['name']);
  442. $textElements[$n] = array(
  443. 'name' => 'p',
  444. 'elements' => array_merge(
  445. array($textElements[$n]),
  446. $backLinkElements
  447. ),
  448. );
  449. }
  450. else
  451. {
  452. $textElements[] = array(
  453. 'name' => 'p',
  454. 'elements' => $backLinkElements
  455. );
  456. }
  457. $Element['elements'][1]['elements'] []= array(
  458. 'name' => 'li',
  459. 'attributes' => array('id' => 'fn:'.$definitionId),
  460. 'elements' => array_merge(
  461. $textElements
  462. ),
  463. );
  464. }
  465. return $Element;
  466. }
  467. # ~
  468. protected function parseAttributeData($attributeString)
  469. {
  470. $Data = array();
  471. $attributes = preg_split('/[ ]+/', $attributeString, - 1, PREG_SPLIT_NO_EMPTY);
  472. foreach ($attributes as $attribute)
  473. {
  474. if ($attribute[0] === '#')
  475. {
  476. $Data['id'] = substr($attribute, 1);
  477. }
  478. else # "."
  479. {
  480. $classes []= substr($attribute, 1);
  481. }
  482. }
  483. if (isset($classes))
  484. {
  485. $Data['class'] = implode(' ', $classes);
  486. }
  487. return $Data;
  488. }
  489. # ~
  490. protected function processTag($elementMarkup) # recursive
  491. {
  492. # http://stackoverflow.com/q/1148928/200145
  493. libxml_use_internal_errors(true);
  494. $DOMDocument = new DOMDocument;
  495. # http://stackoverflow.com/q/11309194/200145
  496. $elementMarkup = mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8');
  497. # http://stackoverflow.com/q/4879946/200145
  498. $DOMDocument->loadHTML($elementMarkup);
  499. $DOMDocument->removeChild($DOMDocument->doctype);
  500. $DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild);
  501. $elementText = '';
  502. if ($DOMDocument->documentElement->getAttribute('markdown') === '1')
  503. {
  504. foreach ($DOMDocument->documentElement->childNodes as $Node)
  505. {
  506. $elementText .= $DOMDocument->saveHTML($Node);
  507. }
  508. $DOMDocument->documentElement->removeAttribute('markdown');
  509. $elementText = "\n".$this->text($elementText)."\n";
  510. }
  511. else
  512. {
  513. foreach ($DOMDocument->documentElement->childNodes as $Node)
  514. {
  515. $nodeMarkup = $DOMDocument->saveHTML($Node);
  516. if ($Node instanceof DOMElement and ! in_array($Node->nodeName, $this->textLevelElements))
  517. {
  518. $elementText .= $this->processTag($nodeMarkup);
  519. }
  520. else
  521. {
  522. $elementText .= $nodeMarkup;
  523. }
  524. }
  525. }
  526. # because we don't want for markup to get encoded
  527. $DOMDocument->documentElement->nodeValue = 'placeholder\x1A';
  528. $markup = $DOMDocument->saveHTML($DOMDocument->documentElement);
  529. $markup = str_replace('placeholder\x1A', $elementText, $markup);
  530. return $markup;
  531. }
  532. # ~
  533. protected function sortFootnotes($A, $B) # callback
  534. {
  535. return $A['number'] - $B['number'];
  536. }
  537. #
  538. # Fields
  539. #
  540. protected $regexAttribute = '(?:[#.][-\w]+[ ]*)';
  541. }