BioTorrents.de’s version of Gazelle
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ParsedownExtra.php 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686
  1. <?php
  2. #
  3. #
  4. # Parsedown Extra
  5. # https://github.com/erusev/parsedown-extra
  6. #
  7. # (c) Emanuil Rusev
  8. # http://erusev.com
  9. #
  10. # For the full license information, view the LICENSE file that was distributed
  11. # with this source code.
  12. #
  13. #
  14. class ParsedownExtra extends Parsedown
  15. {
  16. # ~
  17. const version = '0.8.0';
  18. # ~
  19. function __construct()
  20. {
  21. if (version_compare(parent::version, '1.7.1') < 0)
  22. {
  23. throw new Exception('ParsedownExtra requires a later version of Parsedown');
  24. }
  25. $this->BlockTypes[':'] []= 'DefinitionList';
  26. $this->BlockTypes['*'] []= 'Abbreviation';
  27. # identify footnote definitions before reference definitions
  28. array_unshift($this->BlockTypes['['], 'Footnote');
  29. # identify footnote markers before before links
  30. array_unshift($this->InlineTypes['['], 'FootnoteMarker');
  31. }
  32. #
  33. # ~
  34. function text($text)
  35. {
  36. $Elements = $this->textElements($text);
  37. # convert to markup
  38. $markup = $this->elements($Elements);
  39. # trim line breaks
  40. $markup = trim($markup, "\n");
  41. # merge consecutive dl elements
  42. $markup = preg_replace('/<\/dl>\s+<dl>\s+/', '', $markup);
  43. # add footnotes
  44. if (isset($this->DefinitionData['Footnote']))
  45. {
  46. $Element = $this->buildFootnoteElement();
  47. $markup .= "\n" . $this->element($Element);
  48. }
  49. return $markup;
  50. }
  51. #
  52. # Blocks
  53. #
  54. #
  55. # Abbreviation
  56. protected function blockAbbreviation($Line)
  57. {
  58. if (preg_match('/^\*\[(.+?)\]:[ ]*(.+?)[ ]*$/', $Line['text'], $matches))
  59. {
  60. $this->DefinitionData['Abbreviation'][$matches[1]] = $matches[2];
  61. $Block = array(
  62. 'hidden' => true,
  63. );
  64. return $Block;
  65. }
  66. }
  67. #
  68. # Footnote
  69. protected function blockFootnote($Line)
  70. {
  71. if (preg_match('/^\[\^(.+?)\]:[ ]?(.*)$/', $Line['text'], $matches))
  72. {
  73. $Block = array(
  74. 'label' => $matches[1],
  75. 'text' => $matches[2],
  76. 'hidden' => true,
  77. );
  78. return $Block;
  79. }
  80. }
  81. protected function blockFootnoteContinue($Line, $Block)
  82. {
  83. if ($Line['text'][0] === '[' and preg_match('/^\[\^(.+?)\]:/', $Line['text']))
  84. {
  85. return;
  86. }
  87. if (isset($Block['interrupted']))
  88. {
  89. if ($Line['indent'] >= 4)
  90. {
  91. $Block['text'] .= "\n\n" . $Line['text'];
  92. return $Block;
  93. }
  94. }
  95. else
  96. {
  97. $Block['text'] .= "\n" . $Line['text'];
  98. return $Block;
  99. }
  100. }
  101. protected function blockFootnoteComplete($Block)
  102. {
  103. $this->DefinitionData['Footnote'][$Block['label']] = array(
  104. 'text' => $Block['text'],
  105. 'count' => null,
  106. 'number' => null,
  107. );
  108. return $Block;
  109. }
  110. #
  111. # Definition List
  112. protected function blockDefinitionList($Line, $Block)
  113. {
  114. if ( ! isset($Block) or $Block['type'] !== 'Paragraph')
  115. {
  116. return;
  117. }
  118. $Element = array(
  119. 'name' => 'dl',
  120. 'elements' => array(),
  121. );
  122. $terms = explode("\n", $Block['element']['handler']['argument']);
  123. foreach ($terms as $term)
  124. {
  125. $Element['elements'] []= array(
  126. 'name' => 'dt',
  127. 'handler' => array(
  128. 'function' => 'lineElements',
  129. 'argument' => $term,
  130. 'destination' => 'elements'
  131. ),
  132. );
  133. }
  134. $Block['element'] = $Element;
  135. $Block = $this->addDdElement($Line, $Block);
  136. return $Block;
  137. }
  138. protected function blockDefinitionListContinue($Line, array $Block)
  139. {
  140. if ($Line['text'][0] === ':')
  141. {
  142. $Block = $this->addDdElement($Line, $Block);
  143. return $Block;
  144. }
  145. else
  146. {
  147. if (isset($Block['interrupted']) and $Line['indent'] === 0)
  148. {
  149. return;
  150. }
  151. if (isset($Block['interrupted']))
  152. {
  153. $Block['dd']['handler']['function'] = 'textElements';
  154. $Block['dd']['handler']['argument'] .= "\n\n";
  155. $Block['dd']['handler']['destination'] = 'elements';
  156. unset($Block['interrupted']);
  157. }
  158. $text = substr($Line['body'], min($Line['indent'], 4));
  159. $Block['dd']['handler']['argument'] .= "\n" . $text;
  160. return $Block;
  161. }
  162. }
  163. #
  164. # Header
  165. protected function blockHeader($Line)
  166. {
  167. $Block = parent::blockHeader($Line);
  168. if ($Block !== null && preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE))
  169. {
  170. $attributeString = $matches[1][0];
  171. $Block['element']['attributes'] = $this->parseAttributeData($attributeString);
  172. $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
  173. }
  174. return $Block;
  175. }
  176. #
  177. # Markup
  178. protected function blockMarkup($Line)
  179. {
  180. if ($this->markupEscaped or $this->safeMode)
  181. {
  182. return;
  183. }
  184. if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
  185. {
  186. $element = strtolower($matches[1]);
  187. if (in_array($element, $this->textLevelElements))
  188. {
  189. return;
  190. }
  191. $Block = array(
  192. 'name' => $matches[1],
  193. 'depth' => 0,
  194. 'element' => array(
  195. 'rawHtml' => $Line['text'],
  196. 'autobreak' => true,
  197. ),
  198. );
  199. $length = strlen($matches[0]);
  200. $remainder = substr($Line['text'], $length);
  201. if (trim($remainder) === '')
  202. {
  203. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  204. {
  205. $Block['closed'] = true;
  206. $Block['void'] = true;
  207. }
  208. }
  209. else
  210. {
  211. if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
  212. {
  213. return;
  214. }
  215. if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
  216. {
  217. $Block['closed'] = true;
  218. }
  219. }
  220. return $Block;
  221. }
  222. }
  223. protected function blockMarkupContinue($Line, array $Block)
  224. {
  225. if (isset($Block['closed']))
  226. {
  227. return;
  228. }
  229. if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
  230. {
  231. $Block['depth'] ++;
  232. }
  233. if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
  234. {
  235. if ($Block['depth'] > 0)
  236. {
  237. $Block['depth'] --;
  238. }
  239. else
  240. {
  241. $Block['closed'] = true;
  242. }
  243. }
  244. if (isset($Block['interrupted']))
  245. {
  246. $Block['element']['rawHtml'] .= "\n";
  247. unset($Block['interrupted']);
  248. }
  249. $Block['element']['rawHtml'] .= "\n".$Line['body'];
  250. return $Block;
  251. }
  252. protected function blockMarkupComplete($Block)
  253. {
  254. if ( ! isset($Block['void']))
  255. {
  256. $Block['element']['rawHtml'] = $this->processTag($Block['element']['rawHtml']);
  257. }
  258. return $Block;
  259. }
  260. #
  261. # Setext
  262. protected function blockSetextHeader($Line, array $Block = null)
  263. {
  264. $Block = parent::blockSetextHeader($Line, $Block);
  265. if ($Block !== null && preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE))
  266. {
  267. $attributeString = $matches[1][0];
  268. $Block['element']['attributes'] = $this->parseAttributeData($attributeString);
  269. $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
  270. }
  271. return $Block;
  272. }
  273. #
  274. # Inline Elements
  275. #
  276. #
  277. # Footnote Marker
  278. protected function inlineFootnoteMarker($Excerpt)
  279. {
  280. if (preg_match('/^\[\^(.+?)\]/', $Excerpt['text'], $matches))
  281. {
  282. $name = $matches[1];
  283. if ( ! isset($this->DefinitionData['Footnote'][$name]))
  284. {
  285. return;
  286. }
  287. $this->DefinitionData['Footnote'][$name]['count'] ++;
  288. if ( ! isset($this->DefinitionData['Footnote'][$name]['number']))
  289. {
  290. $this->DefinitionData['Footnote'][$name]['number'] = ++ $this->footnoteCount; # &raquo; &
  291. }
  292. $Element = array(
  293. 'name' => 'sup',
  294. 'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name),
  295. 'element' => array(
  296. 'name' => 'a',
  297. 'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'),
  298. 'text' => $this->DefinitionData['Footnote'][$name]['number'],
  299. ),
  300. );
  301. return array(
  302. 'extent' => strlen($matches[0]),
  303. 'element' => $Element,
  304. );
  305. }
  306. }
  307. private $footnoteCount = 0;
  308. #
  309. # Link
  310. protected function inlineLink($Excerpt)
  311. {
  312. $Link = parent::inlineLink($Excerpt);
  313. $remainder = $Link !== null ? substr($Excerpt['text'], $Link['extent']) : '';
  314. if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches))
  315. {
  316. $Link['element']['attributes'] += $this->parseAttributeData($matches[1]);
  317. $Link['extent'] += strlen($matches[0]);
  318. }
  319. return $Link;
  320. }
  321. #
  322. # ~
  323. #
  324. private $currentAbreviation;
  325. private $currentMeaning;
  326. protected function insertAbreviation(array $Element)
  327. {
  328. if (isset($Element['text']))
  329. {
  330. $Element['elements'] = self::pregReplaceElements(
  331. '/\b'.preg_quote($this->currentAbreviation, '/').'\b/',
  332. array(
  333. array(
  334. 'name' => 'abbr',
  335. 'attributes' => array(
  336. 'title' => $this->currentMeaning,
  337. ),
  338. 'text' => $this->currentAbreviation,
  339. )
  340. ),
  341. $Element['text']
  342. );
  343. unset($Element['text']);
  344. }
  345. return $Element;
  346. }
  347. protected function inlineText($text)
  348. {
  349. $Inline = parent::inlineText($text);
  350. if (isset($this->DefinitionData['Abbreviation']))
  351. {
  352. foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning)
  353. {
  354. $this->currentAbreviation = $abbreviation;
  355. $this->currentMeaning = $meaning;
  356. $Inline['element'] = $this->elementApplyRecursiveDepthFirst(
  357. array($this, 'insertAbreviation'),
  358. $Inline['element']
  359. );
  360. }
  361. }
  362. return $Inline;
  363. }
  364. #
  365. # Util Methods
  366. #
  367. protected function addDdElement(array $Line, array $Block)
  368. {
  369. $text = substr($Line['text'], 1);
  370. $text = trim($text);
  371. unset($Block['dd']);
  372. $Block['dd'] = array(
  373. 'name' => 'dd',
  374. 'handler' => array(
  375. 'function' => 'lineElements',
  376. 'argument' => $text,
  377. 'destination' => 'elements'
  378. ),
  379. );
  380. if (isset($Block['interrupted']))
  381. {
  382. $Block['dd']['handler']['function'] = 'textElements';
  383. unset($Block['interrupted']);
  384. }
  385. $Block['element']['elements'] []= & $Block['dd'];
  386. return $Block;
  387. }
  388. protected function buildFootnoteElement()
  389. {
  390. $Element = array(
  391. 'name' => 'div',
  392. 'attributes' => array('class' => 'footnotes'),
  393. 'elements' => array(
  394. array('name' => 'hr'),
  395. array(
  396. 'name' => 'ol',
  397. 'elements' => array(),
  398. ),
  399. ),
  400. );
  401. uasort($this->DefinitionData['Footnote'], 'self::sortFootnotes');
  402. foreach ($this->DefinitionData['Footnote'] as $definitionId => $DefinitionData)
  403. {
  404. if ( ! isset($DefinitionData['number']))
  405. {
  406. continue;
  407. }
  408. $text = $DefinitionData['text'];
  409. $textElements = parent::textElements($text);
  410. $numbers = range(1, $DefinitionData['count']);
  411. $backLinkElements = array();
  412. foreach ($numbers as $number)
  413. {
  414. $backLinkElements[] = array('text' => ' ');
  415. $backLinkElements[] = array(
  416. 'name' => 'a',
  417. 'attributes' => array(
  418. 'href' => "#fnref$number:$definitionId",
  419. 'rev' => 'footnote',
  420. 'class' => 'footnote-backref',
  421. ),
  422. 'rawHtml' => '&#8617;',
  423. 'allowRawHtmlInSafeMode' => true,
  424. 'autobreak' => false,
  425. );
  426. }
  427. unset($backLinkElements[0]);
  428. $n = count($textElements) -1;
  429. if ($textElements[$n]['name'] === 'p')
  430. {
  431. $backLinkElements = array_merge(
  432. array(
  433. array(
  434. 'rawHtml' => '&#160;',
  435. 'allowRawHtmlInSafeMode' => true,
  436. ),
  437. ),
  438. $backLinkElements
  439. );
  440. unset($textElements[$n]['name']);
  441. $textElements[$n] = array(
  442. 'name' => 'p',
  443. 'elements' => array_merge(
  444. array($textElements[$n]),
  445. $backLinkElements
  446. ),
  447. );
  448. }
  449. else
  450. {
  451. $textElements[] = array(
  452. 'name' => 'p',
  453. 'elements' => $backLinkElements
  454. );
  455. }
  456. $Element['elements'][1]['elements'] []= array(
  457. 'name' => 'li',
  458. 'attributes' => array('id' => 'fn:'.$definitionId),
  459. 'elements' => array_merge(
  460. $textElements
  461. ),
  462. );
  463. }
  464. return $Element;
  465. }
  466. # ~
  467. protected function parseAttributeData($attributeString)
  468. {
  469. $Data = array();
  470. $attributes = preg_split('/[ ]+/', $attributeString, - 1, PREG_SPLIT_NO_EMPTY);
  471. foreach ($attributes as $attribute)
  472. {
  473. if ($attribute[0] === '#')
  474. {
  475. $Data['id'] = substr($attribute, 1);
  476. }
  477. else # "."
  478. {
  479. $classes []= substr($attribute, 1);
  480. }
  481. }
  482. if (isset($classes))
  483. {
  484. $Data['class'] = implode(' ', $classes);
  485. }
  486. return $Data;
  487. }
  488. # ~
  489. protected function processTag($elementMarkup) # recursive
  490. {
  491. # http://stackoverflow.com/q/1148928/200145
  492. libxml_use_internal_errors(true);
  493. $DOMDocument = new DOMDocument;
  494. # http://stackoverflow.com/q/11309194/200145
  495. $elementMarkup = mb_convert_encoding($elementMarkup, 'HTML-ENTITIES', 'UTF-8');
  496. # http://stackoverflow.com/q/4879946/200145
  497. $DOMDocument->loadHTML($elementMarkup);
  498. $DOMDocument->removeChild($DOMDocument->doctype);
  499. $DOMDocument->replaceChild($DOMDocument->firstChild->firstChild->firstChild, $DOMDocument->firstChild);
  500. $elementText = '';
  501. if ($DOMDocument->documentElement->getAttribute('markdown') === '1')
  502. {
  503. foreach ($DOMDocument->documentElement->childNodes as $Node)
  504. {
  505. $elementText .= $DOMDocument->saveHTML($Node);
  506. }
  507. $DOMDocument->documentElement->removeAttribute('markdown');
  508. $elementText = "\n".$this->text($elementText)."\n";
  509. }
  510. else
  511. {
  512. foreach ($DOMDocument->documentElement->childNodes as $Node)
  513. {
  514. $nodeMarkup = $DOMDocument->saveHTML($Node);
  515. if ($Node instanceof DOMElement and ! in_array($Node->nodeName, $this->textLevelElements))
  516. {
  517. $elementText .= $this->processTag($nodeMarkup);
  518. }
  519. else
  520. {
  521. $elementText .= $nodeMarkup;
  522. }
  523. }
  524. }
  525. # because we don't want for markup to get encoded
  526. $DOMDocument->documentElement->nodeValue = 'placeholder\x1A';
  527. $markup = $DOMDocument->saveHTML($DOMDocument->documentElement);
  528. $markup = str_replace('placeholder\x1A', $elementText, $markup);
  529. return $markup;
  530. }
  531. # ~
  532. protected function sortFootnotes($A, $B) # callback
  533. {
  534. return $A['number'] - $B['number'];
  535. }
  536. #
  537. # Fields
  538. #
  539. protected $regexAttribute = '(?:[#.][-\w]+[ ]*)';
  540. }