a8f7f2c5de28c3c1165e3410cc7a01e0eb1208b3
[yaffs-website] / Michelf / MarkdownExtra.php
1 <?php
2 /**
3  * Markdown Extra - A text-to-HTML conversion tool for web writers
4  *
5  * @package   php-markdown
6  * @author    Michel Fortin <michel.fortin@michelf.com>
7  * @copyright 2004-2018 Michel Fortin <https://michelf.com/projects/php-markdown/>
8  * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
9  */
10
11 namespace Michelf;
12
13 /**
14  * Markdown Extra Parser Class
15  */
16 class MarkdownExtra extends \Michelf\Markdown {
17         /**
18          * Configuration variables
19          */
20
21         /**
22          * Prefix for footnote ids.
23          * @var string
24          */
25         public $fn_id_prefix = "";
26
27         /**
28          * Optional title attribute for footnote links and backlinks.
29          * @var string
30          */
31         public $fn_link_title     = "";
32         public $fn_backlink_title = "";
33
34         /**
35          * Optional class attribute for footnote links and backlinks.
36          * @var string
37          */
38         public $fn_link_class     = "footnote-ref";
39         public $fn_backlink_class = "footnote-backref";
40
41         /**
42          * Content to be displayed within footnote backlinks. The default is '↩';
43          * the U+FE0E on the end is a Unicode variant selector used to prevent iOS
44          * from displaying the arrow character as an emoji.
45          * @var string
46          */
47         public $fn_backlink_html = '&#8617;&#xFE0E;';
48
49         /**
50          * Class name for table cell alignment (%% replaced left/center/right)
51          * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
52          * If empty, the align attribute is used instead of a class name.
53          * @var string
54          */
55         public $table_align_class_tmpl = '';
56
57         /**
58          * Optional class prefix for fenced code block.
59          * @var string
60          */
61         public $code_class_prefix = "";
62
63         /**
64          * Class attribute for code blocks goes on the `code` tag;
65          * setting this to true will put attributes on the `pre` tag instead.
66          * @var boolean
67          */
68         public $code_attr_on_pre = false;
69
70         /**
71          * Predefined abbreviations.
72          * @var array
73          */
74         public $predef_abbr = array();
75
76         /**
77          * Only convert atx-style headers if there's a space between the header and #
78          * @var boolean
79          */
80         public $hashtag_protection = false;
81
82         /**
83          * Parser implementation
84          */
85
86         /**
87          * Constructor function. Initialize the parser object.
88          * @return void
89          */
90         public function __construct() {
91                 // Add extra escapable characters before parent constructor
92                 // initialize the table.
93                 $this->escape_chars .= ':|';
94
95                 // Insert extra document, block, and span transformations.
96                 // Parent constructor will do the sorting.
97                 $this->document_gamut += array(
98                         "doFencedCodeBlocks" => 5,
99                         "stripFootnotes"     => 15,
100                         "stripAbbreviations" => 25,
101                         "appendFootnotes"    => 50,
102                 );
103                 $this->block_gamut += array(
104                         "doFencedCodeBlocks" => 5,
105                         "doTables"           => 15,
106                         "doDefLists"         => 45,
107                 );
108                 $this->span_gamut += array(
109                         "doFootnotes"        => 5,
110                         "doAbbreviations"    => 70,
111                 );
112
113                 $this->enhanced_ordered_list = true;
114                 parent::__construct();
115         }
116
117
118         /**
119          * Extra variables used during extra transformations.
120          * @var array
121          */
122         protected $footnotes = array();
123         protected $footnotes_ordered = array();
124         protected $footnotes_ref_count = array();
125         protected $footnotes_numbers = array();
126         protected $abbr_desciptions = array();
127         /** @var string */
128         protected $abbr_word_re = '';
129
130         /**
131          * Give the current footnote number.
132          * @var integer
133          */
134         protected $footnote_counter = 1;
135
136         /**
137          * Setting up Extra-specific variables.
138          */
139         protected function setup() {
140                 parent::setup();
141
142                 $this->footnotes = array();
143                 $this->footnotes_ordered = array();
144                 $this->footnotes_ref_count = array();
145                 $this->footnotes_numbers = array();
146                 $this->abbr_desciptions = array();
147                 $this->abbr_word_re = '';
148                 $this->footnote_counter = 1;
149
150                 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
151                         if ($this->abbr_word_re)
152                                 $this->abbr_word_re .= '|';
153                         $this->abbr_word_re .= preg_quote($abbr_word);
154                         $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
155                 }
156         }
157
158         /**
159          * Clearing Extra-specific variables.
160          */
161         protected function teardown() {
162                 $this->footnotes = array();
163                 $this->footnotes_ordered = array();
164                 $this->footnotes_ref_count = array();
165                 $this->footnotes_numbers = array();
166                 $this->abbr_desciptions = array();
167                 $this->abbr_word_re = '';
168
169                 parent::teardown();
170         }
171
172
173         /**
174          * Extra attribute parser
175          */
176
177         /**
178          * Expression to use to catch attributes (includes the braces)
179          * @var string
180          */
181         protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
182
183         /**
184          * Expression to use when parsing in a context when no capture is desired
185          * @var string
186          */
187         protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
188
189         /**
190          * Parse attributes caught by the $this->id_class_attr_catch_re expression
191          * and return the HTML-formatted list of attributes.
192          *
193          * Currently supported attributes are .class and #id.
194          *
195          * In addition, this method also supports supplying a default Id value,
196          * which will be used to populate the id attribute in case it was not
197          * overridden.
198          * @param  string $tag_name
199          * @param  string $attr
200          * @param  mixed  $defaultIdValue
201          * @param  array  $classes
202          * @return string
203          */
204         protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
205                 if (empty($attr) && !$defaultIdValue && empty($classes)) return "";
206
207                 // Split on components
208                 preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
209                 $elements = $matches[0];
210
211                 // Handle classes and IDs (only first ID taken into account)
212                 $attributes = array();
213                 $id = false;
214                 foreach ($elements as $element) {
215                         if ($element{0} == '.') {
216                                 $classes[] = substr($element, 1);
217                         } else if ($element{0} == '#') {
218                                 if ($id === false) $id = substr($element, 1);
219                         } else if (strpos($element, '=') > 0) {
220                                 $parts = explode('=', $element, 2);
221                                 $attributes[] = $parts[0] . '="' . $parts[1] . '"';
222                         }
223                 }
224
225                 if (!$id) $id = $defaultIdValue;
226
227                 // Compose attributes as string
228                 $attr_str = "";
229                 if (!empty($id)) {
230                         $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
231                 }
232                 if (!empty($classes)) {
233                         $attr_str .= ' class="'. implode(" ", $classes) . '"';
234                 }
235                 if (!$this->no_markup && !empty($attributes)) {
236                         $attr_str .= ' '.implode(" ", $attributes);
237                 }
238                 return $attr_str;
239         }
240
241         /**
242          * Strips link definitions from text, stores the URLs and titles in
243          * hash references.
244          * @param  string $text
245          * @return string
246          */
247         protected function stripLinkDefinitions($text) {
248                 $less_than_tab = $this->tab_width - 1;
249
250                 // Link defs are in the form: ^[id]: url "optional title"
251                 $text = preg_replace_callback('{
252                                                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
253                                                           [ ]*
254                                                           \n?                           # maybe *one* newline
255                                                           [ ]*
256                                                         (?:
257                                                           <(.+?)>                       # url = $2
258                                                         |
259                                                           (\S+?)                        # url = $3
260                                                         )
261                                                           [ ]*
262                                                           \n?                           # maybe one newline
263                                                           [ ]*
264                                                         (?:
265                                                                 (?<=\s)                 # lookbehind for whitespace
266                                                                 ["(]
267                                                                 (.*?)                   # title = $4
268                                                                 [")]
269                                                                 [ ]*
270                                                         )?      # title is optional
271                                         (?:[ ]* '.$this->id_class_attr_catch_re.' )?  # $5 = extra id & class attr
272                                                         (?:\n+|\Z)
273                         }xm',
274                         array($this, '_stripLinkDefinitions_callback'),
275                         $text);
276                 return $text;
277         }
278
279         /**
280          * Strip link definition callback
281          * @param  array $matches
282          * @return string
283          */
284         protected function _stripLinkDefinitions_callback($matches) {
285                 $link_id = strtolower($matches[1]);
286                 $url = $matches[2] == '' ? $matches[3] : $matches[2];
287                 $this->urls[$link_id] = $url;
288                 $this->titles[$link_id] =& $matches[4];
289                 $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
290                 return ''; // String that will replace the block
291         }
292
293
294         /**
295          * HTML block parser
296          */
297
298         /**
299          * Tags that are always treated as block tags
300          * @var string
301          */
302         protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
303
304         /**
305          * Tags treated as block tags only if the opening tag is alone on its line
306          * @var string
307          */
308         protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
309
310         /**
311          * Tags where markdown="1" default to span mode:
312          * @var string
313          */
314         protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
315
316         /**
317          * Tags which must not have their contents modified, no matter where
318          * they appear
319          * @var string
320          */
321         protected $clean_tags_re = 'script|style|math|svg';
322
323         /**
324          * Tags that do not need to be closed.
325          * @var string
326          */
327         protected $auto_close_tags_re = 'hr|img|param|source|track';
328
329         /**
330          * Hashify HTML Blocks and "clean tags".
331          *
332          * We only want to do this for block-level HTML tags, such as headers,
333          * lists, and tables. That's because we still want to wrap <p>s around
334          * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
335          * phrase emphasis, and spans. The list of tags we're looking for is
336          * hard-coded.
337          *
338          * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
339          * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
340          * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
341          *  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
342          * These two functions are calling each other. It's recursive!
343          * @param  string $text
344          * @return string
345          */
346         protected function hashHTMLBlocks($text) {
347                 if ($this->no_markup) {
348                         return $text;
349                 }
350
351                 // Call the HTML-in-Markdown hasher.
352                 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
353
354                 return $text;
355         }
356
357         /**
358          * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
359          *
360          * *   $indent is the number of space to be ignored when checking for code
361          *     blocks. This is important because if we don't take the indent into
362          *     account, something like this (which looks right) won't work as expected:
363          *
364          *     <div>
365          *         <div markdown="1">
366          *         Hello World.  <-- Is this a Markdown code block or text?
367          *         </div>  <-- Is this a Markdown code block or a real tag?
368          *     <div>
369          *
370          *     If you don't like this, just don't indent the tag on which
371          *     you apply the markdown="1" attribute.
372          *
373          * *   If $enclosing_tag_re is not empty, stops at the first unmatched closing
374          *     tag with that name. Nested tags supported.
375          *
376          * *   If $span is true, text inside must treated as span. So any double
377          *     newline will be replaced by a single newline so that it does not create
378          *     paragraphs.
379          *
380          * Returns an array of that form: ( processed text , remaining text )
381          *
382          * @param  string  $text
383          * @param  integer $indent
384          * @param  string  $enclosing_tag_re
385          * @param  boolean $span
386          * @return array
387          */
388         protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
389                                                                                 $enclosing_tag_re = '', $span = false)
390         {
391
392                 if ($text === '') return array('', '');
393
394                 // Regex to check for the presense of newlines around a block tag.
395                 $newline_before_re = '/(?:^\n?|\n\n)*$/';
396                 $newline_after_re =
397                         '{
398                                 ^                                               # Start of text following the tag.
399                                 (?>[ ]*<!--.*?-->)?             # Optional comment.
400                                 [ ]*\n                                  # Must be followed by newline.
401                         }xs';
402
403                 // Regex to match any tag.
404                 $block_tag_re =
405                         '{
406                                 (                                       # $2: Capture whole tag.
407                                         </?                                     # Any opening or closing tag.
408                                                 (?>                             # Tag name.
409                                                         ' . $this->block_tags_re . '                    |
410                                                         ' . $this->context_block_tags_re . '    |
411                                                         ' . $this->clean_tags_re . '            |
412                                                         (?!\s)'.$enclosing_tag_re . '
413                                                 )
414                                                 (?:
415                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
416                                                         (?>
417                                                                 ".*?"           |       # Double quotes (can contain `>`)
418                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
419                                                                 .+?                             # Anything but quotes and `>`.
420                                                         )*?
421                                                 )?
422                                         >                                       # End of tag.
423                                 |
424                                         <!--    .*?     -->     # HTML Comment
425                                 |
426                                         <\?.*?\?> | <%.*?%>     # Processing instruction
427                                 |
428                                         <!\[CDATA\[.*?\]\]>     # CData Block
429                                 ' . ( !$span ? ' # If not in span.
430                                 |
431                                         # Indented code block
432                                         (?: ^[ ]*\n | ^ | \n[ ]*\n )
433                                         [ ]{' . ($indent + 4) . '}[^\n]* \n
434                                         (?>
435                                                 (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n
436                                         )*
437                                 |
438                                         # Fenced code block marker
439                                         (?<= ^ | \n )
440                                         [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
441                                         [ ]*
442                                         (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
443                                         [ ]*
444                                         (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
445                                         [ ]*
446                                         (?= \n )
447                                 ' : '' ) . ' # End (if not is span).
448                                 |
449                                         # Code span marker
450                                         # Note, this regex needs to go after backtick fenced
451                                         # code blocks but it should also be kept outside of the
452                                         # "if not in span" condition adding backticks to the parser
453                                         `+
454                                 )
455                         }xs';
456
457
458                 $depth = 0;             // Current depth inside the tag tree.
459                 $parsed = "";   // Parsed text that will be returned.
460
461                 // Loop through every tag until we find the closing tag of the parent
462                 // or loop until reaching the end of text if no parent tag specified.
463                 do {
464                         // Split the text using the first $tag_match pattern found.
465                         // Text before  pattern will be first in the array, text after
466                         // pattern will be at the end, and between will be any catches made
467                         // by the pattern.
468                         $parts = preg_split($block_tag_re, $text, 2,
469                                                                 PREG_SPLIT_DELIM_CAPTURE);
470
471                         // If in Markdown span mode, add a empty-string span-level hash
472                         // after each newline to prevent triggering any block element.
473                         if ($span) {
474                                 $void = $this->hashPart("", ':');
475                                 $newline = "\n$void";
476                                 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
477                         }
478
479                         $parsed .= $parts[0]; // Text before current tag.
480
481                         // If end of $text has been reached. Stop loop.
482                         if (count($parts) < 3) {
483                                 $text = "";
484                                 break;
485                         }
486
487                         $tag  = $parts[1]; // Tag to handle.
488                         $text = $parts[2]; // Remaining text after current tag.
489                         $tag_re = preg_quote($tag); // For use in a regular expression.
490
491                         // Check for: Fenced code block marker.
492                         // Note: need to recheck the whole tag to disambiguate backtick
493                         // fences from code spans
494                         if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) {
495                                 // Fenced code block marker: find matching end marker.
496                                 $fence_indent = strlen($capture[1]); // use captured indent in re
497                                 $fence_re = $capture[2]; // use captured fence in re
498                                 if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text,
499                                         $matches))
500                                 {
501                                         // End marker found: pass text unchanged until marker.
502                                         $parsed .= $tag . $matches[0];
503                                         $text = substr($text, strlen($matches[0]));
504                                 }
505                                 else {
506                                         // No end marker: just skip it.
507                                         $parsed .= $tag;
508                                 }
509                         }
510                         // Check for: Indented code block.
511                         else if ($tag{0} == "\n" || $tag{0} == " ") {
512                                 // Indented code block: pass it unchanged, will be handled
513                                 // later.
514                                 $parsed .= $tag;
515                         }
516                         // Check for: Code span marker
517                         // Note: need to check this after backtick fenced code blocks
518                         else if ($tag{0} == "`") {
519                                 // Find corresponding end marker.
520                                 $tag_re = preg_quote($tag);
521                                 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}',
522                                         $text, $matches))
523                                 {
524                                         // End marker found: pass text unchanged until marker.
525                                         $parsed .= $tag . $matches[0];
526                                         $text = substr($text, strlen($matches[0]));
527                                 }
528                                 else {
529                                         // Unmatched marker: just skip it.
530                                         $parsed .= $tag;
531                                 }
532                         }
533                         // Check for: Opening Block level tag or
534                         //            Opening Context Block tag (like ins and del)
535                         //               used as a block tag (tag is alone on it's line).
536                         else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) ||
537                                 (       preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) &&
538                                         preg_match($newline_before_re, $parsed) &&
539                                         preg_match($newline_after_re, $text)    )
540                                 )
541                         {
542                                 // Need to parse tag and following text using the HTML parser.
543                                 list($block_text, $text) =
544                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
545
546                                 // Make sure it stays outside of any paragraph by adding newlines.
547                                 $parsed .= "\n\n$block_text\n\n";
548                         }
549                         // Check for: Clean tag (like script, math)
550                         //            HTML Comments, processing instructions.
551                         else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) ||
552                                 $tag{1} == '!' || $tag{1} == '?')
553                         {
554                                 // Need to parse tag and following text using the HTML parser.
555                                 // (don't check for markdown attribute)
556                                 list($block_text, $text) =
557                                         $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
558
559                                 $parsed .= $block_text;
560                         }
561                         // Check for: Tag with same name as enclosing tag.
562                         else if ($enclosing_tag_re !== '' &&
563                                 // Same name as enclosing tag.
564                                 preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag))
565                         {
566                                 // Increase/decrease nested tag count.
567                                 if ($tag{1} == '/')                                             $depth--;
568                                 else if ($tag{strlen($tag)-2} != '/')   $depth++;
569
570                                 if ($depth < 0) {
571                                         // Going out of parent element. Clean up and break so we
572                                         // return to the calling function.
573                                         $text = $tag . $text;
574                                         break;
575                                 }
576
577                                 $parsed .= $tag;
578                         }
579                         else {
580                                 $parsed .= $tag;
581                         }
582                 } while ($depth >= 0);
583
584                 return array($parsed, $text);
585         }
586
587         /**
588          * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
589          *
590          * *   Calls $hash_method to convert any blocks.
591          * *   Stops when the first opening tag closes.
592          * *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
593          *     (it is not inside clean tags)
594          *
595          * Returns an array of that form: ( processed text , remaining text )
596          * @param  string $text
597          * @param  string $hash_method
598          * @param  string $md_attr
599          * @return array
600          */
601         protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
602                 if ($text === '') return array('', '');
603
604                 // Regex to match `markdown` attribute inside of a tag.
605                 $markdown_attr_re = '
606                         {
607                                 \s*                     # Eat whitespace before the `markdown` attribute
608                                 markdown
609                                 \s*=\s*
610                                 (?>
611                                         (["\'])         # $1: quote delimiter
612                                         (.*?)           # $2: attribute value
613                                         \1                      # matching delimiter
614                                 |
615                                         ([^\s>]*)       # $3: unquoted attribute value
616                                 )
617                                 ()                              # $4: make $3 always defined (avoid warnings)
618                         }xs';
619
620                 // Regex to match any tag.
621                 $tag_re = '{
622                                 (                                       # $2: Capture whole tag.
623                                         </?                                     # Any opening or closing tag.
624                                                 [\w:$]+                 # Tag name.
625                                                 (?:
626                                                         (?=[\s"\'/a-zA-Z0-9])   # Allowed characters after tag name.
627                                                         (?>
628                                                                 ".*?"           |       # Double quotes (can contain `>`)
629                                                                 \'.*?\'         |       # Single quotes (can contain `>`)
630                                                                 .+?                             # Anything but quotes and `>`.
631                                                         )*?
632                                                 )?
633                                         >                                       # End of tag.
634                                 |
635                                         <!--    .*?     -->     # HTML Comment
636                                 |
637                                         <\?.*?\?> | <%.*?%>     # Processing instruction
638                                 |
639                                         <!\[CDATA\[.*?\]\]>     # CData Block
640                                 )
641                         }xs';
642
643                 $original_text = $text;         // Save original text in case of faliure.
644
645                 $depth          = 0;    // Current depth inside the tag tree.
646                 $block_text     = "";   // Temporary text holder for current text.
647                 $parsed         = "";   // Parsed text that will be returned.
648
649                 // Get the name of the starting tag.
650                 // (This pattern makes $base_tag_name_re safe without quoting.)
651                 if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
652                         $base_tag_name_re = $matches[1];
653
654                 // Loop through every tag until we find the corresponding closing tag.
655                 do {
656                         // Split the text using the first $tag_match pattern found.
657                         // Text before  pattern will be first in the array, text after
658                         // pattern will be at the end, and between will be any catches made
659                         // by the pattern.
660                         $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
661
662                         if (count($parts) < 3) {
663                                 // End of $text reached with unbalenced tag(s).
664                                 // In that case, we return original text unchanged and pass the
665                                 // first character as filtered to prevent an infinite loop in the
666                                 // parent function.
667                                 return array($original_text{0}, substr($original_text, 1));
668                         }
669
670                         $block_text .= $parts[0]; // Text before current tag.
671                         $tag         = $parts[1]; // Tag to handle.
672                         $text        = $parts[2]; // Remaining text after current tag.
673
674                         // Check for: Auto-close tag (like <hr/>)
675                         //                       Comments and Processing Instructions.
676                         if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) ||
677                                 $tag{1} == '!' || $tag{1} == '?')
678                         {
679                                 // Just add the tag to the block as if it was text.
680                                 $block_text .= $tag;
681                         }
682                         else {
683                                 // Increase/decrease nested tag count. Only do so if
684                                 // the tag's name match base tag's.
685                                 if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) {
686                                         if ($tag{1} == '/')                                             $depth--;
687                                         else if ($tag{strlen($tag)-2} != '/')   $depth++;
688                                 }
689
690                                 // Check for `markdown="1"` attribute and handle it.
691                                 if ($md_attr &&
692                                         preg_match($markdown_attr_re, $tag, $attr_m) &&
693                                         preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
694                                 {
695                                         // Remove `markdown` attribute from opening tag.
696                                         $tag = preg_replace($markdown_attr_re, '', $tag);
697
698                                         // Check if text inside this tag must be parsed in span mode.
699                                         $this->mode = $attr_m[2] . $attr_m[3];
700                                         $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
701                                                 preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag);
702
703                                         // Calculate indent before tag.
704                                         if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
705                                                 $strlen = $this->utf8_strlen;
706                                                 $indent = $strlen($matches[1], 'UTF-8');
707                                         } else {
708                                                 $indent = 0;
709                                         }
710
711                                         // End preceding block with this tag.
712                                         $block_text .= $tag;
713                                         $parsed .= $this->$hash_method($block_text);
714
715                                         // Get enclosing tag name for the ParseMarkdown function.
716                                         // (This pattern makes $tag_name_re safe without quoting.)
717                                         preg_match('/^<([\w:$]*)\b/', $tag, $matches);
718                                         $tag_name_re = $matches[1];
719
720                                         // Parse the content using the HTML-in-Markdown parser.
721                                         list ($block_text, $text)
722                                                 = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
723                                                         $tag_name_re, $span_mode);
724
725                                         // Outdent markdown text.
726                                         if ($indent > 0) {
727                                                 $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
728                                                                                                         $block_text);
729                                         }
730
731                                         // Append tag content to parsed text.
732                                         if (!$span_mode)        $parsed .= "\n\n$block_text\n\n";
733                                         else                            $parsed .= "$block_text";
734
735                                         // Start over with a new block.
736                                         $block_text = "";
737                                 }
738                                 else $block_text .= $tag;
739                         }
740
741                 } while ($depth > 0);
742
743                 // Hash last block text that wasn't processed inside the loop.
744                 $parsed .= $this->$hash_method($block_text);
745
746                 return array($parsed, $text);
747         }
748
749         /**
750          * Called whenever a tag must be hashed when a function inserts a "clean" tag
751          * in $text, it passes through this function and is automaticaly escaped,
752          * blocking invalid nested overlap.
753          * @param  string $text
754          * @return string
755          */
756         protected function hashClean($text) {
757                 return $this->hashPart($text, 'C');
758         }
759
760         /**
761          * Turn Markdown link shortcuts into XHTML <a> tags.
762          * @param  string $text
763          * @return string
764          */
765         protected function doAnchors($text) {
766                 if ($this->in_anchor) {
767                         return $text;
768                 }
769                 $this->in_anchor = true;
770
771                 // First, handle reference-style links: [link text] [id]
772                 $text = preg_replace_callback('{
773                         (                                       # wrap whole match in $1
774                           \[
775                                 (' . $this->nested_brackets_re . ')     # link text = $2
776                           \]
777
778                           [ ]?                          # one optional space
779                           (?:\n[ ]*)?           # one optional newline followed by spaces
780
781                           \[
782                                 (.*?)           # id = $3
783                           \]
784                         )
785                         }xs',
786                         array($this, '_doAnchors_reference_callback'), $text);
787
788                 // Next, inline-style links: [link text](url "optional title")
789                 $text = preg_replace_callback('{
790                         (                               # wrap whole match in $1
791                           \[
792                                 (' . $this->nested_brackets_re . ')     # link text = $2
793                           \]
794                           \(                    # literal paren
795                                 [ \n]*
796                                 (?:
797                                         <(.+?)> # href = $3
798                                 |
799                                         (' . $this->nested_url_parenthesis_re . ')      # href = $4
800                                 )
801                                 [ \n]*
802                                 (                       # $5
803                                   ([\'"])       # quote char = $6
804                                   (.*?)         # Title = $7
805                                   \6            # matching quote
806                                   [ \n]*        # ignore any spaces/tabs between closing quote and )
807                                 )?                      # title is optional
808                           \)
809                           (?:[ ]? ' . $this->id_class_attr_catch_re . ' )?       # $8 = id/class attributes
810                         )
811                         }xs',
812                         array($this, '_doAnchors_inline_callback'), $text);
813
814                 // Last, handle reference-style shortcuts: [link text]
815                 // These must come last in case you've also got [link text][1]
816                 // or [link text](/foo)
817                 $text = preg_replace_callback('{
818                         (                                       # wrap whole match in $1
819                           \[
820                                 ([^\[\]]+)              # link text = $2; can\'t contain [ or ]
821                           \]
822                         )
823                         }xs',
824                         array($this, '_doAnchors_reference_callback'), $text);
825
826                 $this->in_anchor = false;
827                 return $text;
828         }
829
830         /**
831          * Callback for reference anchors
832          * @param  array $matches
833          * @return string
834          */
835         protected function _doAnchors_reference_callback($matches) {
836                 $whole_match =  $matches[1];
837                 $link_text   =  $matches[2];
838                 $link_id     =& $matches[3];
839
840                 if ($link_id == "") {
841                         // for shortcut links like [this][] or [this].
842                         $link_id = $link_text;
843                 }
844
845                 // lower-case and turn embedded newlines into spaces
846                 $link_id = strtolower($link_id);
847                 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
848
849                 if (isset($this->urls[$link_id])) {
850                         $url = $this->urls[$link_id];
851                         $url = $this->encodeURLAttribute($url);
852
853                         $result = "<a href=\"$url\"";
854                         if ( isset( $this->titles[$link_id] ) ) {
855                                 $title = $this->titles[$link_id];
856                                 $title = $this->encodeAttribute($title);
857                                 $result .=  " title=\"$title\"";
858                         }
859                         if (isset($this->ref_attr[$link_id]))
860                                 $result .= $this->ref_attr[$link_id];
861
862                         $link_text = $this->runSpanGamut($link_text);
863                         $result .= ">$link_text</a>";
864                         $result = $this->hashPart($result);
865                 }
866                 else {
867                         $result = $whole_match;
868                 }
869                 return $result;
870         }
871
872         /**
873          * Callback for inline anchors
874          * @param  array $matches
875          * @return string
876          */
877         protected function _doAnchors_inline_callback($matches) {
878                 $whole_match    =  $matches[1];
879                 $link_text              =  $this->runSpanGamut($matches[2]);
880                 $url                    =  $matches[3] == '' ? $matches[4] : $matches[3];
881                 $title                  =& $matches[7];
882                 $attr  = $this->doExtraAttributes("a", $dummy =& $matches[8]);
883
884                 // if the URL was of the form <s p a c e s> it got caught by the HTML
885                 // tag parser and hashed. Need to reverse the process before using the URL.
886                 $unhashed = $this->unhash($url);
887                 if ($unhashed != $url)
888                         $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
889
890                 $url = $this->encodeURLAttribute($url);
891
892                 $result = "<a href=\"$url\"";
893                 if (isset($title)) {
894                         $title = $this->encodeAttribute($title);
895                         $result .=  " title=\"$title\"";
896                 }
897                 $result .= $attr;
898
899                 $link_text = $this->runSpanGamut($link_text);
900                 $result .= ">$link_text</a>";
901
902                 return $this->hashPart($result);
903         }
904
905         /**
906          * Turn Markdown image shortcuts into <img> tags.
907          * @param  string $text
908          * @return string
909          */
910         protected function doImages($text) {
911                 // First, handle reference-style labeled images: ![alt text][id]
912                 $text = preg_replace_callback('{
913                         (                               # wrap whole match in $1
914                           !\[
915                                 (' . $this->nested_brackets_re . ')             # alt text = $2
916                           \]
917
918                           [ ]?                          # one optional space
919                           (?:\n[ ]*)?           # one optional newline followed by spaces
920
921                           \[
922                                 (.*?)           # id = $3
923                           \]
924
925                         )
926                         }xs',
927                         array($this, '_doImages_reference_callback'), $text);
928
929                 // Next, handle inline images:  ![alt text](url "optional title")
930                 // Don't forget: encode * and _
931                 $text = preg_replace_callback('{
932                         (                               # wrap whole match in $1
933                           !\[
934                                 (' . $this->nested_brackets_re . ')             # alt text = $2
935                           \]
936                           \s?                   # One optional whitespace character
937                           \(                    # literal paren
938                                 [ \n]*
939                                 (?:
940                                         <(\S*)> # src url = $3
941                                 |
942                                         (' . $this->nested_url_parenthesis_re . ')      # src url = $4
943                                 )
944                                 [ \n]*
945                                 (                       # $5
946                                   ([\'"])       # quote char = $6
947                                   (.*?)         # title = $7
948                                   \6            # matching quote
949                                   [ \n]*
950                                 )?                      # title is optional
951                           \)
952                           (?:[ ]? ' . $this->id_class_attr_catch_re . ' )?       # $8 = id/class attributes
953                         )
954                         }xs',
955                         array($this, '_doImages_inline_callback'), $text);
956
957                 return $text;
958         }
959
960         /**
961          * Callback for referenced images
962          * @param  array $matches
963          * @return string
964          */
965         protected function _doImages_reference_callback($matches) {
966                 $whole_match = $matches[1];
967                 $alt_text    = $matches[2];
968                 $link_id     = strtolower($matches[3]);
969
970                 if ($link_id == "") {
971                         $link_id = strtolower($alt_text); // for shortcut links like ![this][].
972                 }
973
974                 $alt_text = $this->encodeAttribute($alt_text);
975                 if (isset($this->urls[$link_id])) {
976                         $url = $this->encodeURLAttribute($this->urls[$link_id]);
977                         $result = "<img src=\"$url\" alt=\"$alt_text\"";
978                         if (isset($this->titles[$link_id])) {
979                                 $title = $this->titles[$link_id];
980                                 $title = $this->encodeAttribute($title);
981                                 $result .=  " title=\"$title\"";
982                         }
983                         if (isset($this->ref_attr[$link_id]))
984                                 $result .= $this->ref_attr[$link_id];
985                         $result .= $this->empty_element_suffix;
986                         $result = $this->hashPart($result);
987                 }
988                 else {
989                         // If there's no such link ID, leave intact:
990                         $result = $whole_match;
991                 }
992
993                 return $result;
994         }
995
996         /**
997          * Callback for inline images
998          * @param  array $matches
999          * @return string
1000          */
1001         protected function _doImages_inline_callback($matches) {
1002                 $whole_match    = $matches[1];
1003                 $alt_text               = $matches[2];
1004                 $url                    = $matches[3] == '' ? $matches[4] : $matches[3];
1005                 $title                  =& $matches[7];
1006                 $attr  = $this->doExtraAttributes("img", $dummy =& $matches[8]);
1007
1008                 $alt_text = $this->encodeAttribute($alt_text);
1009                 $url = $this->encodeURLAttribute($url);
1010                 $result = "<img src=\"$url\" alt=\"$alt_text\"";
1011                 if (isset($title)) {
1012                         $title = $this->encodeAttribute($title);
1013                         $result .=  " title=\"$title\""; // $title already quoted
1014                 }
1015                 $result .= $attr;
1016                 $result .= $this->empty_element_suffix;
1017
1018                 return $this->hashPart($result);
1019         }
1020
1021         /**
1022          * Process markdown headers. Redefined to add ID and class attribute support.
1023          * @param  string $text
1024          * @return string
1025          */
1026         protected function doHeaders($text) {
1027                 // Setext-style headers:
1028                 //  Header 1  {#header1}
1029                 //        ========
1030                 //
1031                 //        Header 2  {#header2 .class1 .class2}
1032                 //        --------
1033                 //
1034                 $text = preg_replace_callback(
1035                         '{
1036                                 (^.+?)                                                          # $1: Header text
1037                                 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )?         # $3 = id/class attributes
1038                                 [ ]*\n(=+|-+)[ ]*\n+                            # $3: Header footer
1039                         }mx',
1040                         array($this, '_doHeaders_callback_setext'), $text);
1041
1042                 // atx-style headers:
1043                 //      # Header 1        {#header1}
1044                 //      ## Header 2       {#header2}
1045                 //      ## Header 2 with closing hashes ##  {#header3.class1.class2}
1046                 //      ...
1047                 //      ###### Header 6   {.class2}
1048                 //
1049                 $text = preg_replace_callback('{
1050                                 ^(\#{1,6})      # $1 = string of #\'s
1051                                 [ ]'.($this->hashtag_protection ? '+' : '*').'
1052                                 (.+?)           # $2 = Header text
1053                                 [ ]*
1054                                 \#*                     # optional closing #\'s (not counted)
1055                                 (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )?         # $3 = id/class attributes
1056                                 [ ]*
1057                                 \n+
1058                         }xm',
1059                         array($this, '_doHeaders_callback_atx'), $text);
1060
1061                 return $text;
1062         }
1063
1064         /**
1065          * Callback for setext headers
1066          * @param  array $matches
1067          * @return string
1068          */
1069         protected function _doHeaders_callback_setext($matches) {
1070                 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) {
1071                         return $matches[0];
1072                 }
1073
1074                 $level = $matches[3]{0} == '=' ? 1 : 2;
1075
1076                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
1077
1078                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
1079                 $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>";
1080                 return "\n" . $this->hashBlock($block) . "\n\n";
1081         }
1082
1083         /**
1084          * Callback for atx headers
1085          * @param  array $matches
1086          * @return string
1087          */
1088         protected function _doHeaders_callback_atx($matches) {
1089                 $level = strlen($matches[1]);
1090
1091                 $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
1092                 $attr  = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
1093                 $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>";
1094                 return "\n" . $this->hashBlock($block) . "\n\n";
1095         }
1096
1097         /**
1098          * Form HTML tables.
1099          * @param  string $text
1100          * @return string
1101          */
1102         protected function doTables($text) {
1103                 $less_than_tab = $this->tab_width - 1;
1104                 // Find tables with leading pipe.
1105                 //
1106                 //      | Header 1 | Header 2
1107                 //      | -------- | --------
1108                 //      | Cell 1   | Cell 2
1109                 //      | Cell 3   | Cell 4
1110                 $text = preg_replace_callback('
1111                         {
1112                                 ^                                                       # Start of a line
1113                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1114                                 [|]                                                     # Optional leading pipe (present)
1115                                 (.+) \n                                         # $1: Header row (at least one pipe)
1116
1117                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1118                                 [|] ([ ]*[-:]+[-| :]*) \n       # $2: Header underline
1119
1120                                 (                                                       # $3: Cells
1121                                         (?>
1122                                                 [ ]*                            # Allowed whitespace.
1123                                                 [|] .* \n                       # Row content.
1124                                         )*
1125                                 )
1126                                 (?=\n|\Z)                                       # Stop at final double newline.
1127                         }xm',
1128                         array($this, '_doTable_leadingPipe_callback'), $text);
1129
1130                 // Find tables without leading pipe.
1131                 //
1132                 //      Header 1 | Header 2
1133                 //      -------- | --------
1134                 //      Cell 1   | Cell 2
1135                 //      Cell 3   | Cell 4
1136                 $text = preg_replace_callback('
1137                         {
1138                                 ^                                                       # Start of a line
1139                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1140                                 (\S.*[|].*) \n                          # $1: Header row (at least one pipe)
1141
1142                                 [ ]{0,' . $less_than_tab . '}   # Allowed whitespace.
1143                                 ([-:]+[ ]*[|][-| :]*) \n        # $2: Header underline
1144
1145                                 (                                                       # $3: Cells
1146                                         (?>
1147                                                 .* [|] .* \n            # Row content
1148                                         )*
1149                                 )
1150                                 (?=\n|\Z)                                       # Stop at final double newline.
1151                         }xm',
1152                         array($this, '_DoTable_callback'), $text);
1153
1154                 return $text;
1155         }
1156
1157         /**
1158          * Callback for removing the leading pipe for each row
1159          * @param  array $matches
1160          * @return string
1161          */
1162         protected function _doTable_leadingPipe_callback($matches) {
1163                 $head           = $matches[1];
1164                 $underline      = $matches[2];
1165                 $content        = $matches[3];
1166
1167                 $content        = preg_replace('/^ *[|]/m', '', $content);
1168
1169                 return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
1170         }
1171
1172         /**
1173          * Make the align attribute in a table
1174          * @param  string $alignname
1175          * @return string
1176          */
1177         protected function _doTable_makeAlignAttr($alignname)
1178         {
1179                 if (empty($this->table_align_class_tmpl)) {
1180                         return " align=\"$alignname\"";
1181                 }
1182
1183                 $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
1184                 return " class=\"$classname\"";
1185         }
1186
1187         /**
1188          * Calback for processing tables
1189          * @param  array $matches
1190          * @return string
1191          */
1192         protected function _doTable_callback($matches) {
1193                 $head           = $matches[1];
1194                 $underline      = $matches[2];
1195                 $content        = $matches[3];
1196
1197                 // Remove any tailing pipes for each line.
1198                 $head           = preg_replace('/[|] *$/m', '', $head);
1199                 $underline      = preg_replace('/[|] *$/m', '', $underline);
1200                 $content        = preg_replace('/[|] *$/m', '', $content);
1201
1202                 // Reading alignement from header underline.
1203                 $separators     = preg_split('/ *[|] */', $underline);
1204                 foreach ($separators as $n => $s) {
1205                         if (preg_match('/^ *-+: *$/', $s))
1206                                 $attr[$n] = $this->_doTable_makeAlignAttr('right');
1207                         else if (preg_match('/^ *:-+: *$/', $s))
1208                                 $attr[$n] = $this->_doTable_makeAlignAttr('center');
1209                         else if (preg_match('/^ *:-+ *$/', $s))
1210                                 $attr[$n] = $this->_doTable_makeAlignAttr('left');
1211                         else
1212                                 $attr[$n] = '';
1213                 }
1214
1215                 // Parsing span elements, including code spans, character escapes,
1216                 // and inline HTML tags, so that pipes inside those gets ignored.
1217                 $head           = $this->parseSpan($head);
1218                 $headers        = preg_split('/ *[|] */', $head);
1219                 $col_count      = count($headers);
1220                 $attr       = array_pad($attr, $col_count, '');
1221
1222                 // Write column headers.
1223                 $text = "<table>\n";
1224                 $text .= "<thead>\n";
1225                 $text .= "<tr>\n";
1226                 foreach ($headers as $n => $header)
1227                         $text .= "  <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n";
1228                 $text .= "</tr>\n";
1229                 $text .= "</thead>\n";
1230
1231                 // Split content by row.
1232                 $rows = explode("\n", trim($content, "\n"));
1233
1234                 $text .= "<tbody>\n";
1235                 foreach ($rows as $row) {
1236                         // Parsing span elements, including code spans, character escapes,
1237                         // and inline HTML tags, so that pipes inside those gets ignored.
1238                         $row = $this->parseSpan($row);
1239
1240                         // Split row by cell.
1241                         $row_cells = preg_split('/ *[|] */', $row, $col_count);
1242                         $row_cells = array_pad($row_cells, $col_count, '');
1243
1244                         $text .= "<tr>\n";
1245                         foreach ($row_cells as $n => $cell)
1246                                 $text .= "  <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
1247                         $text .= "</tr>\n";
1248                 }
1249                 $text .= "</tbody>\n";
1250                 $text .= "</table>";
1251
1252                 return $this->hashBlock($text) . "\n";
1253         }
1254
1255         /**
1256          * Form HTML definition lists.
1257          * @param  string $text
1258          * @return string
1259          */
1260         protected function doDefLists($text) {
1261                 $less_than_tab = $this->tab_width - 1;
1262
1263                 // Re-usable pattern to match any entire dl list:
1264                 $whole_list_re = '(?>
1265                         (                                                               # $1 = whole list
1266                           (                                                             # $2
1267                                 [ ]{0,' . $less_than_tab . '}
1268                                 ((?>.*\S.*\n)+)                         # $3 = defined term
1269                                 \n?
1270                                 [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1271                           )
1272                           (?s:.+?)
1273                           (                                                             # $4
1274                                   \z
1275                                 |
1276                                   \n{2,}
1277                                   (?=\S)
1278                                   (?!                                           # Negative lookahead for another term
1279                                         [ ]{0,' . $less_than_tab . '}
1280                                         (?: \S.*\n )+?                  # defined term
1281                                         \n?
1282                                         [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1283                                   )
1284                                   (?!                                           # Negative lookahead for another definition
1285                                         [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
1286                                   )
1287                           )
1288                         )
1289                 )'; // mx
1290
1291                 $text = preg_replace_callback('{
1292                                 (?>\A\n?|(?<=\n\n))
1293                                 ' . $whole_list_re . '
1294                         }mx',
1295                         array($this, '_doDefLists_callback'), $text);
1296
1297                 return $text;
1298         }
1299
1300         /**
1301          * Callback for processing definition lists
1302          * @param  array $matches
1303          * @return string
1304          */
1305         protected function _doDefLists_callback($matches) {
1306                 // Re-usable patterns to match list item bullets and number markers:
1307                 $list = $matches[1];
1308
1309                 // Turn double returns into triple returns, so that we can make a
1310                 // paragraph for the last item in a list, if necessary:
1311                 $result = trim($this->processDefListItems($list));
1312                 $result = "<dl>\n" . $result . "\n</dl>";
1313                 return $this->hashBlock($result) . "\n\n";
1314         }
1315
1316         /**
1317          * Process the contents of a single definition list, splitting it
1318          * into individual term and definition list items.
1319          * @param  string $list_str
1320          * @return string
1321          */
1322         protected function processDefListItems($list_str) {
1323
1324                 $less_than_tab = $this->tab_width - 1;
1325
1326                 // Trim trailing blank lines:
1327                 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
1328
1329                 // Process definition terms.
1330                 $list_str = preg_replace_callback('{
1331                         (?>\A\n?|\n\n+)                                         # leading line
1332                         (                                                                       # definition terms = $1
1333                                 [ ]{0,' . $less_than_tab . '}   # leading whitespace
1334                                 (?!\:[ ]|[ ])                                   # negative lookahead for a definition
1335                                                                                                 #   mark (colon) or more whitespace.
1336                                 (?> \S.* \n)+?                                  # actual term (not whitespace).
1337                         )
1338                         (?=\n?[ ]{0,3}:[ ])                                     # lookahead for following line feed
1339                                                                                                 #   with a definition mark.
1340                         }xm',
1341                         array($this, '_processDefListItems_callback_dt'), $list_str);
1342
1343                 // Process actual definitions.
1344                 $list_str = preg_replace_callback('{
1345                         \n(\n+)?                                                        # leading line = $1
1346                         (                                                                       # marker space = $2
1347                                 [ ]{0,' . $less_than_tab . '}   # whitespace before colon
1348                                 \:[ ]+                                                  # definition mark (colon)
1349                         )
1350                         ((?s:.+?))                                                      # definition text = $3
1351                         (?= \n+                                                         # stop at next definition mark,
1352                                 (?:                                                             # next term or end of text
1353                                         [ ]{0,' . $less_than_tab . '} \:[ ]     |
1354                                         <dt> | \z
1355                                 )
1356                         )
1357                         }xm',
1358                         array($this, '_processDefListItems_callback_dd'), $list_str);
1359
1360                 return $list_str;
1361         }
1362
1363         /**
1364          * Callback for <dt> elements in definition lists
1365          * @param  array $matches
1366          * @return string
1367          */
1368         protected function _processDefListItems_callback_dt($matches) {
1369                 $terms = explode("\n", trim($matches[1]));
1370                 $text = '';
1371                 foreach ($terms as $term) {
1372                         $term = $this->runSpanGamut(trim($term));
1373                         $text .= "\n<dt>" . $term . "</dt>";
1374                 }
1375                 return $text . "\n";
1376         }
1377
1378         /**
1379          * Callback for <dd> elements in definition lists
1380          * @param  array $matches
1381          * @return string
1382          */
1383         protected function _processDefListItems_callback_dd($matches) {
1384                 $leading_line   = $matches[1];
1385                 $marker_space   = $matches[2];
1386                 $def                    = $matches[3];
1387
1388                 if ($leading_line || preg_match('/\n{2,}/', $def)) {
1389                         // Replace marker with the appropriate whitespace indentation
1390                         $def = str_repeat(' ', strlen($marker_space)) . $def;
1391                         $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
1392                         $def = "\n". $def ."\n";
1393                 }
1394                 else {
1395                         $def = rtrim($def);
1396                         $def = $this->runSpanGamut($this->outdent($def));
1397                 }
1398
1399                 return "\n<dd>" . $def . "</dd>\n";
1400         }
1401
1402         /**
1403          * Adding the fenced code block syntax to regular Markdown:
1404          *
1405          * ~~~
1406          * Code block
1407          * ~~~
1408          *
1409          * @param  string $text
1410          * @return string
1411          */
1412         protected function doFencedCodeBlocks($text) {
1413
1414                 $less_than_tab = $this->tab_width;
1415
1416                 $text = preg_replace_callback('{
1417                                 (?:\n|\A)
1418                                 # 1: Opening marker
1419                                 (
1420                                         (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
1421                                 )
1422                                 [ ]*
1423                                 (?:
1424                                         \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
1425                                 )?
1426                                 [ ]*
1427                                 (?:
1428                                         ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes
1429                                 )?
1430                                 [ ]* \n # Whitespace and newline following marker.
1431
1432                                 # 4: Content
1433                                 (
1434                                         (?>
1435                                                 (?!\1 [ ]* \n)  # Not a closing marker.
1436                                                 .*\n+
1437                                         )+
1438                                 )
1439
1440                                 # Closing marker.
1441                                 \1 [ ]* (?= \n )
1442                         }xm',
1443                         array($this, '_doFencedCodeBlocks_callback'), $text);
1444
1445                 return $text;
1446         }
1447
1448         /**
1449          * Callback to process fenced code blocks
1450          * @param  array $matches
1451          * @return string
1452          */
1453         protected function _doFencedCodeBlocks_callback($matches) {
1454                 $classname =& $matches[2];
1455                 $attrs     =& $matches[3];
1456                 $codeblock = $matches[4];
1457
1458                 if ($this->code_block_content_func) {
1459                         $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
1460                 } else {
1461                         $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
1462                 }
1463
1464                 $codeblock = preg_replace_callback('/^\n+/',
1465                         array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
1466
1467                 $classes = array();
1468                 if ($classname != "") {
1469                         if ($classname{0} == '.')
1470                                 $classname = substr($classname, 1);
1471                         $classes[] = $this->code_class_prefix . $classname;
1472                 }
1473                 $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
1474                 $pre_attr_str  = $this->code_attr_on_pre ? $attr_str : '';
1475                 $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
1476                 $codeblock  = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
1477
1478                 return "\n\n".$this->hashBlock($codeblock)."\n\n";
1479         }
1480
1481         /**
1482          * Replace new lines in fenced code blocks
1483          * @param  array $matches
1484          * @return string
1485          */
1486         protected function _doFencedCodeBlocks_newlines($matches) {
1487                 return str_repeat("<br$this->empty_element_suffix",
1488                         strlen($matches[0]));
1489         }
1490
1491         /**
1492          * Redefining emphasis markers so that emphasis by underscore does not
1493          * work in the middle of a word.
1494          * @var array
1495          */
1496         protected $em_relist = array(
1497                 ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
1498                 '*' => '(?<![\s*])\*(?!\*)',
1499                 '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
1500         );
1501         protected $strong_relist = array(
1502                 ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
1503                 '**' => '(?<![\s*])\*\*(?!\*)',
1504                 '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
1505         );
1506         protected $em_strong_relist = array(
1507                 ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
1508                 '***' => '(?<![\s*])\*\*\*(?!\*)',
1509                 '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
1510         );
1511
1512         /**
1513          * Parse text into paragraphs
1514          * @param  string $text String to process in paragraphs
1515          * @param  boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
1516          * @return string       HTML output
1517          */
1518         protected function formParagraphs($text, $wrap_in_p = true) {
1519                 // Strip leading and trailing lines:
1520                 $text = preg_replace('/\A\n+|\n+\z/', '', $text);
1521
1522                 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1523
1524                 // Wrap <p> tags and unhashify HTML blocks
1525                 foreach ($grafs as $key => $value) {
1526                         $value = trim($this->runSpanGamut($value));
1527
1528                         // Check if this should be enclosed in a paragraph.
1529                         // Clean tag hashes & block tag hashes are left alone.
1530                         $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
1531
1532                         if ($is_p) {
1533                                 $value = "<p>$value</p>";
1534                         }
1535                         $grafs[$key] = $value;
1536                 }
1537
1538                 // Join grafs in one text, then unhash HTML tags.
1539                 $text = implode("\n\n", $grafs);
1540
1541                 // Finish by removing any tag hashes still present in $text.
1542                 $text = $this->unhash($text);
1543
1544                 return $text;
1545         }
1546
1547
1548         /**
1549          * Footnotes - Strips link definitions from text, stores the URLs and
1550          * titles in hash references.
1551          * @param  string $text
1552          * @return string
1553          */
1554         protected function stripFootnotes($text) {
1555                 $less_than_tab = $this->tab_width - 1;
1556
1557                 // Link defs are in the form: [^id]: url "optional title"
1558                 $text = preg_replace_callback('{
1559                         ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?:  # note_id = $1
1560                           [ ]*
1561                           \n?                                   # maybe *one* newline
1562                         (                                               # text = $2 (no blank lines allowed)
1563                                 (?:
1564                                         .+                              # actual text
1565                                 |
1566                                         \n                              # newlines but
1567                                         (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
1568                                         (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
1569                                                                         # by non-indented content
1570                                 )*
1571                         )
1572                         }xm',
1573                         array($this, '_stripFootnotes_callback'),
1574                         $text);
1575                 return $text;
1576         }
1577
1578         /**
1579          * Callback for stripping footnotes
1580          * @param  array $matches
1581          * @return string
1582          */
1583         protected function _stripFootnotes_callback($matches) {
1584                 $note_id = $this->fn_id_prefix . $matches[1];
1585                 $this->footnotes[$note_id] = $this->outdent($matches[2]);
1586                 return ''; // String that will replace the block
1587         }
1588
1589         /**
1590          * Replace footnote references in $text [^id] with a special text-token
1591          * which will be replaced by the actual footnote marker in appendFootnotes.
1592          * @param  string $text
1593          * @return string
1594          */
1595         protected function doFootnotes($text) {
1596                 if (!$this->in_anchor) {
1597                         $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
1598                 }
1599                 return $text;
1600         }
1601
1602         /**
1603          * Append footnote list to text
1604          * @param  string $text
1605          * @return string
1606          */
1607         protected function appendFootnotes($text) {
1608                 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1609                         array($this, '_appendFootnotes_callback'), $text);
1610
1611                 if (!empty($this->footnotes_ordered)) {
1612                         $text .= "\n\n";
1613                         $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n";
1614                         $text .= "<hr" . $this->empty_element_suffix . "\n";
1615                         $text .= "<ol>\n\n";
1616
1617                         $attr = "";
1618                         if ($this->fn_backlink_class != "") {
1619                                 $class = $this->fn_backlink_class;
1620                                 $class = $this->encodeAttribute($class);
1621                                 $attr .= " class=\"$class\"";
1622                         }
1623                         if ($this->fn_backlink_title != "") {
1624                                 $title = $this->fn_backlink_title;
1625                                 $title = $this->encodeAttribute($title);
1626                                 $attr .= " title=\"$title\"";
1627                                 $attr .= " aria-label=\"$title\"";
1628                         }
1629                         $attr .= " role=\"doc-backlink\"";
1630                         $backlink_text = $this->fn_backlink_html;
1631                         $num = 0;
1632
1633                         while (!empty($this->footnotes_ordered)) {
1634                                 $footnote = reset($this->footnotes_ordered);
1635                                 $note_id = key($this->footnotes_ordered);
1636                                 unset($this->footnotes_ordered[$note_id]);
1637                                 $ref_count = $this->footnotes_ref_count[$note_id];
1638                                 unset($this->footnotes_ref_count[$note_id]);
1639                                 unset($this->footnotes[$note_id]);
1640
1641                                 $footnote .= "\n"; // Need to append newline before parsing.
1642                                 $footnote = $this->runBlockGamut("$footnote\n");
1643                                 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
1644                                         array($this, '_appendFootnotes_callback'), $footnote);
1645
1646                                 $attr = str_replace("%%", ++$num, $attr);
1647                                 $note_id = $this->encodeAttribute($note_id);
1648
1649                                 // Prepare backlink, multiple backlinks if multiple references
1650                                 $backlink = "<a href=\"#fnref:$note_id\"$attr>$backlink_text</a>";
1651                                 for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) {
1652                                         $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>$backlink_text</a>";
1653                                 }
1654                                 // Add backlink to last paragraph; create new paragraph if needed.
1655                                 if (preg_match('{</p>$}', $footnote)) {
1656                                         $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
1657                                 } else {
1658                                         $footnote .= "\n\n<p>$backlink</p>";
1659                                 }
1660
1661                                 $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n";
1662                                 $text .= $footnote . "\n";
1663                                 $text .= "</li>\n\n";
1664                         }
1665
1666                         $text .= "</ol>\n";
1667                         $text .= "</div>";
1668                 }
1669                 return $text;
1670         }
1671
1672         /**
1673          * Callback for appending footnotes
1674          * @param  array $matches
1675          * @return string
1676          */
1677         protected function _appendFootnotes_callback($matches) {
1678                 $node_id = $this->fn_id_prefix . $matches[1];
1679
1680                 // Create footnote marker only if it has a corresponding footnote *and*
1681                 // the footnote hasn't been used by another marker.
1682                 if (isset($this->footnotes[$node_id])) {
1683                         $num =& $this->footnotes_numbers[$node_id];
1684                         if (!isset($num)) {
1685                                 // Transfer footnote content to the ordered list and give it its
1686                                 // number
1687                                 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
1688                                 $this->footnotes_ref_count[$node_id] = 1;
1689                                 $num = $this->footnote_counter++;
1690                                 $ref_count_mark = '';
1691                         } else {
1692                                 $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
1693                         }
1694
1695                         $attr = "";
1696                         if ($this->fn_link_class != "") {
1697                                 $class = $this->fn_link_class;
1698                                 $class = $this->encodeAttribute($class);
1699                                 $attr .= " class=\"$class\"";
1700                         }
1701                         if ($this->fn_link_title != "") {
1702                                 $title = $this->fn_link_title;
1703                                 $title = $this->encodeAttribute($title);
1704                                 $attr .= " title=\"$title\"";
1705                         }
1706                         $attr .= " role=\"doc-noteref\"";
1707
1708                         $attr = str_replace("%%", $num, $attr);
1709                         $node_id = $this->encodeAttribute($node_id);
1710
1711                         return
1712                                 "<sup id=\"fnref$ref_count_mark:$node_id\">".
1713                                 "<a href=\"#fn:$node_id\"$attr>$num</a>".
1714                                 "</sup>";
1715                 }
1716
1717                 return "[^" . $matches[1] . "]";
1718         }
1719
1720
1721         /**
1722          * Abbreviations - strips abbreviations from text, stores titles in hash
1723          * references.
1724          * @param  string $text
1725          * @return string
1726          */
1727         protected function stripAbbreviations($text) {
1728                 $less_than_tab = $this->tab_width - 1;
1729
1730                 // Link defs are in the form: [id]*: url "optional title"
1731                 $text = preg_replace_callback('{
1732                         ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?:  # abbr_id = $1
1733                         (.*)                                    # text = $2 (no blank lines allowed)
1734                         }xm',
1735                         array($this, '_stripAbbreviations_callback'),
1736                         $text);
1737                 return $text;
1738         }
1739
1740         /**
1741          * Callback for stripping abbreviations
1742          * @param  array $matches
1743          * @return string
1744          */
1745         protected function _stripAbbreviations_callback($matches) {
1746                 $abbr_word = $matches[1];
1747                 $abbr_desc = $matches[2];
1748                 if ($this->abbr_word_re) {
1749                         $this->abbr_word_re .= '|';
1750                 }
1751                 $this->abbr_word_re .= preg_quote($abbr_word);
1752                 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
1753                 return ''; // String that will replace the block
1754         }
1755
1756         /**
1757          * Find defined abbreviations in text and wrap them in <abbr> elements.
1758          * @param  string $text
1759          * @return string
1760          */
1761         protected function doAbbreviations($text) {
1762                 if ($this->abbr_word_re) {
1763                         // cannot use the /x modifier because abbr_word_re may
1764                         // contain significant spaces:
1765                         $text = preg_replace_callback('{' .
1766                                 '(?<![\w\x1A])' .
1767                                 '(?:' . $this->abbr_word_re . ')' .
1768                                 '(?![\w\x1A])' .
1769                                 '}',
1770                                 array($this, '_doAbbreviations_callback'), $text);
1771                 }
1772                 return $text;
1773         }
1774
1775         /**
1776          * Callback for processing abbreviations
1777          * @param  array $matches
1778          * @return string
1779          */
1780         protected function _doAbbreviations_callback($matches) {
1781                 $abbr = $matches[0];
1782                 if (isset($this->abbr_desciptions[$abbr])) {
1783                         $desc = $this->abbr_desciptions[$abbr];
1784                         if (empty($desc)) {
1785                                 return $this->hashPart("<abbr>$abbr</abbr>");
1786                         } else {
1787                                 $desc = $this->encodeAttribute($desc);
1788                                 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
1789                         }
1790                 } else {
1791                         return $matches[0];
1792                 }
1793         }
1794 }