diff --git a/src/infrastructure/markup/blockrule/PhutilRemarkupCodeBlockRule.php b/src/infrastructure/markup/blockrule/PhutilRemarkupCodeBlockRule.php index b8fef85b98..8b23fe6541 100644 --- a/src/infrastructure/markup/blockrule/PhutilRemarkupCodeBlockRule.php +++ b/src/infrastructure/markup/blockrule/PhutilRemarkupCodeBlockRule.php @@ -1,252 +1,346 @@ <?php final class PhutilRemarkupCodeBlockRule extends PhutilRemarkupBlockRule { public function getMatchingLineCount(array $lines, $cursor) { $num_lines = 0; $match_ticks = null; if (preg_match('/^(\s{2,}).+/', $lines[$cursor])) { $match_ticks = false; } else if (preg_match('/^\s*(```)/', $lines[$cursor])) { $match_ticks = true; } else { return $num_lines; } $num_lines++; if ($match_ticks && preg_match('/^\s*(```)(.*)(```)\s*$/', $lines[$cursor])) { return $num_lines; } $cursor++; while (isset($lines[$cursor])) { if ($match_ticks) { if (preg_match('/```\s*$/', $lines[$cursor])) { $num_lines++; break; } $num_lines++; } else { if (strlen(trim($lines[$cursor]))) { if (!preg_match('/^\s{2,}/', $lines[$cursor])) { break; } } $num_lines++; } $cursor++; } return $num_lines; } public function markupText($text, $children) { - if (preg_match('/^\s*```/', $text)) { + // Header/footer eventually useful to be nice with "flavored markdown". + // When it starts with ```stuff the header is 'stuff' (->language) + // When it ends with stuff``` the footer is 'stuff' (->garbage) + $header_line = null; + $footer_line = null; + + $matches = null; + if (preg_match('/^\s*```(.*)/', $text, $matches)) { + if (isset($matches[1])) { + $header_line = $matches[1]; + } + // If this is a ```-style block, trim off the backticks and any leading // blank line. $text = preg_replace('/^\s*```(\s*\n)?/', '', $text); $text = preg_replace('/```\s*$/', '', $text); } $lines = explode("\n", $text); + + // If we have a flavored header, it has sense to look for the footer. + if ($header_line !== null && $lines) { + $footer_line = $lines[last_key($lines)]; + } + + // Strip final empty lines while ($lines && !strlen(last($lines))) { unset($lines[last_key($lines)]); } $options = array( 'counterexample' => false, 'lang' => null, 'name' => null, 'lines' => null, ); $parser = new PhutilSimpleOptions(); $custom = $parser->parse(head($lines)); + $valid_options = null; if ($custom) { - $valid = true; + $valid_options = true; foreach ($custom as $key => $value) { if (!array_key_exists($key, $options)) { - $valid = false; + $valid_options = false; break; } } - if ($valid) { + if ($valid_options) { array_shift($lines); $options = $custom + $options; } } + // Parse flavored markdown strictly to don't eat legitimate Remarkup. + // Proceed only if we tried to parse options and we failed + // (no options also mean no language). + // For example this is not a valid option: ```php + // Proceed only if the footer exists and it is not: blabla``` + // Accept only 2 lines or more. First line: header; then content. + if ( + $valid_options === false && + $header_line !== null && + $footer_line === '' && + count($lines) > 1 + ) { + if (self::isKnownLanguageCode($header_line)) { + array_shift($lines); + $options['lang'] = $header_line; + } + } + // Normalize the text back to a 0-level indent. $min_indent = 80; foreach ($lines as $line) { for ($ii = 0; $ii < strlen($line); $ii++) { if ($line[$ii] != ' ') { $min_indent = min($ii, $min_indent); break; } } } $text = implode("\n", $lines); if ($min_indent) { $indent_string = str_repeat(' ', $min_indent); $text = preg_replace('/^'.$indent_string.'/m', '', $text); } if ($this->getEngine()->isTextMode()) { $out = array(); $header = array(); if ($options['counterexample']) { $header[] = 'counterexample'; } if ($options['name'] != '') { $header[] = 'name='.$options['name']; } if ($header) { $out[] = implode(', ', $header); } $text = preg_replace('/^/m', ' ', $text); $out[] = $text; return implode("\n", $out); } if (empty($options['lang'])) { // If the user hasn't specified "lang=..." explicitly, try to guess the // language. If we fail, fall back to configured defaults. $lang = PhutilLanguageGuesser::guessLanguage($text); if (!$lang) { $lang = nonempty( $this->getEngine()->getConfig('phutil.codeblock.language-default'), 'text'); } $options['lang'] = $lang; } $code_body = $this->highlightSource($text, $options); $name_header = null; $block_style = null; if ($this->getEngine()->isHTMLMailMode()) { $map = $this->getEngine()->getConfig('phutil.codeblock.style-map'); if ($map) { $raw_body = id(new PhutilPygmentizeParser()) ->setMap($map) ->parse((string)$code_body); $code_body = phutil_safe_html($raw_body); } $style_rules = array( 'padding: 6px 12px;', 'font-size: 13px;', 'font-weight: bold;', 'display: inline-block;', 'border-top-left-radius: 3px;', 'border-top-right-radius: 3px;', 'color: rgba(0,0,0,.75);', ); if ($options['counterexample']) { $style_rules[] = 'background: #f7e6e6'; } else { $style_rules[] = 'background: rgba(71, 87, 120, 0.08);'; } $header_attributes = array( 'style' => implode(' ', $style_rules), ); $block_style = 'margin: 12px 0;'; } else { $header_attributes = array( 'class' => 'remarkup-code-header', ); } if ($options['name']) { $name_header = phutil_tag( 'div', $header_attributes, $options['name']); } $class = 'remarkup-code-block'; if ($options['counterexample']) { $class = 'remarkup-code-block code-block-counterexample'; } $attributes = array( 'class' => $class, 'style' => $block_style, 'data-code-lang' => $options['lang'], 'data-sigil' => 'remarkup-code-block', ); return phutil_tag( 'div', $attributes, array($name_header, $code_body)); } private function highlightSource($text, array $options) { if ($options['counterexample']) { $aux_class = ' remarkup-counterexample'; } else { $aux_class = null; } $aux_style = null; if ($this->getEngine()->isHTMLMailMode()) { $aux_style = array( 'font: 11px/15px "Menlo", "Consolas", "Monaco", monospace;', 'padding: 12px;', 'margin: 0;', ); if ($options['counterexample']) { $aux_style[] = 'background: #f7e6e6;'; } else { $aux_style[] = 'background: rgba(71, 87, 120, 0.08);'; } $aux_style = implode(' ', $aux_style); } if ($options['lines']) { // Put a minimum size on this because the scrollbar is otherwise // unusable. $height = max(6, (int)$options['lines']); $aux_style = $aux_style .' ' .'max-height: ' .(2 * $height) .'em; overflow: auto;'; } $engine = $this->getEngine()->getConfig('syntax-highlighter.engine'); if (!$engine) { $engine = 'PhutilDefaultSyntaxHighlighterEngine'; } $engine = newv($engine, array()); $engine->setConfig( 'pygments.enabled', $this->getEngine()->getConfig('pygments.enabled')); return phutil_tag( 'pre', array( 'class' => 'remarkup-code'.$aux_class, 'style' => $aux_style, ), PhutilSafeHTML::applyFunction( 'rtrim', $engine->highlightSource($options['lang'], $text))); } + /** + * Check if a language code can be used in a generic flavored markdown. + * @param string $lang Language code + * @return bool + */ + private static function isKnownLanguageCode($lang) { + $languages = self::knownLanguageCodes(); + return isset($languages[$lang]); + } + + /** + * Get the available languages for a generic flavored markdown. + * @return array Languages as array keys. Ignore the value. + */ + private static function knownLanguageCodes() { + // This is a friendly subset from https://pygments.org/languages/ + static $map = array( + 'arduino' => 1, + 'assembly' => 1, + 'awk' => 1, + 'bash' => 1, + 'bat' => 1, + 'c' => 1, + 'cmake' => 1, + 'cobol' => 1, + 'cpp' => 1, + 'css' => 1, + 'csharp' => 1, + 'dart' => 1, + 'delphi' => 1, + 'fortran' => 1, + 'go' => 1, + 'groovy' => 1, + 'haskell' => 1, + 'java' => 1, + 'javascript' => 1, + 'kotlin' => 1, + 'lisp' => 1, + 'lua' => 1, + 'matlab' => 1, + 'make' => 1, + 'perl' => 1, + 'php' => 1, + 'powershell' => 1, + 'python' => 1, + 'r' => 1, + 'ruby' => 1, + 'rust' => 1, + 'scala' => 1, + 'sh' => 1, + 'sql' => 1, + 'typescript' => 1, + 'vba' => 1, + ); + return $map; + } + } diff --git a/src/infrastructure/markup/remarkup/__tests__/PhutilRemarkupEngineTestCase.php b/src/infrastructure/markup/remarkup/__tests__/PhutilRemarkupEngineTestCase.php index c3b4960d0c..38831d034a 100644 --- a/src/infrastructure/markup/remarkup/__tests__/PhutilRemarkupEngineTestCase.php +++ b/src/infrastructure/markup/remarkup/__tests__/PhutilRemarkupEngineTestCase.php @@ -1,132 +1,134 @@ <?php /** * Test cases for @{class:PhutilRemarkupEngine}. + * @TODO: This unit is not always triggered when you need it. + * https://we.phorge.it/T15500 */ final class PhutilRemarkupEngineTestCase extends PhutilTestCase { public function testEngine() { $root = dirname(__FILE__).'/remarkup/'; foreach (Filesystem::listDirectory($root, $hidden = false) as $file) { $this->markupText($root.$file); } } private function markupText($markup_file) { $contents = Filesystem::readFile($markup_file); $file = basename($markup_file); $parts = explode("\n~~~~~~~~~~\n", $contents); $this->assertEqual(3, count($parts), $markup_file); list($input_remarkup, $expected_output, $expected_text) = $parts; $input_remarkup = $this->unescapeTrailingWhitespace($input_remarkup); $expected_output = $this->unescapeTrailingWhitespace($expected_output); $expected_text = $this->unescapeTrailingWhitespace($expected_text); $engine = $this->buildNewTestEngine(); switch ($file) { case 'raw-escape.txt': // NOTE: Here, we want to test PhutilRemarkupEscapeRemarkupRule and // PhutilRemarkupBlockStorage, which are triggered by "\1". In the // test, "~" is used as a placeholder for "\1" since it's hard to type // "\1". $input_remarkup = str_replace('~', "\1", $input_remarkup); $expected_output = str_replace('~', "\1", $expected_output); $expected_text = str_replace('~', "\1", $expected_text); break; case 'toc.txt': $engine->setConfig('header.generate-toc', true); break; case 'link-same-window.txt': $engine->setConfig('uri.same-window', true); break; case 'link-square.txt': $engine->setConfig('uri.base', 'http://www.example.com/'); $engine->setConfig('uri.here', 'http://www.example.com/page/'); break; } $actual_output = (string)$engine->markupText($input_remarkup); switch ($file) { case 'toc.txt': $table_of_contents = PhutilRemarkupHeaderBlockRule::renderTableOfContents($engine); $actual_output = $table_of_contents."\n\n".$actual_output; break; } $this->assertEqual( $expected_output, $actual_output, pht("Failed to markup HTML in file '%s'.", $file)); $engine->setMode(PhutilRemarkupEngine::MODE_TEXT); $actual_output = (string)$engine->markupText($input_remarkup); $this->assertEqual( $expected_text, $actual_output, pht("Failed to markup text in file '%s'.", $file)); } private function buildNewTestEngine() { $engine = new PhutilRemarkupEngine(); $engine->setConfig( 'uri.allowed-protocols', array( 'http' => true, 'mailto' => true, 'tel' => true, )); $rules = array(); $rules[] = new PhutilRemarkupEscapeRemarkupRule(); $rules[] = new PhutilRemarkupMonospaceRule(); $rules[] = new PhutilRemarkupDocumentLinkRule(); $rules[] = new PhutilRemarkupHyperlinkRule(); $rules[] = new PhutilRemarkupBoldRule(); $rules[] = new PhutilRemarkupItalicRule(); $rules[] = new PhutilRemarkupDelRule(); $rules[] = new PhutilRemarkupUnderlineRule(); $rules[] = new PhutilRemarkupHighlightRule(); $blocks = array(); $blocks[] = new PhutilRemarkupQuotesBlockRule(); $blocks[] = new PhutilRemarkupReplyBlockRule(); $blocks[] = new PhutilRemarkupHeaderBlockRule(); $blocks[] = new PhutilRemarkupHorizontalRuleBlockRule(); $blocks[] = new PhutilRemarkupCodeBlockRule(); $blocks[] = new PhutilRemarkupLiteralBlockRule(); $blocks[] = new PhutilRemarkupNoteBlockRule(); $blocks[] = new PhutilRemarkupTableBlockRule(); $blocks[] = new PhutilRemarkupSimpleTableBlockRule(); $blocks[] = new PhutilRemarkupDefaultBlockRule(); $blocks[] = new PhutilRemarkupListBlockRule(); $blocks[] = new PhutilRemarkupInterpreterBlockRule(); foreach ($blocks as $block) { if (!($block instanceof PhutilRemarkupCodeBlockRule)) { $block->setMarkupRules($rules); } } $engine->setBlockRules($blocks); return $engine; } private function unescapeTrailingWhitespace($input) { // Remove up to one "~" at the end of each line so trailing whitespace may // be written in tests as " ~". return preg_replace('/~$/m', '', $input); } } diff --git a/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-flavored.txt b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-flavored.txt new file mode 100644 index 0000000000..f224942b1d --- /dev/null +++ b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-flavored.txt @@ -0,0 +1,7 @@ +```cpp +code +``` +~~~~~~~~~~ +<div class="remarkup-code-block" data-code-lang="cpp" data-sigil="remarkup-code-block"><pre class="remarkup-code">code</pre></div> +~~~~~~~~~~ + code diff --git a/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored-comment.txt b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored-comment.txt new file mode 100644 index 0000000000..bcdaca8063 --- /dev/null +++ b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored-comment.txt @@ -0,0 +1,18 @@ +```#comment +code + +#more comment +more code``` + +~~~~~~~~~~ +<div class="remarkup-code-block" data-code-lang="text" data-sigil="remarkup-code-block"><pre class="remarkup-code">#comment +code + +#more comment +more code</pre></div> +~~~~~~~~~~ + #comment + code + + #more comment + more code diff --git a/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored-empty.txt b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored-empty.txt new file mode 100644 index 0000000000..c05d44ec0f --- /dev/null +++ b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored-empty.txt @@ -0,0 +1,9 @@ +``` +cpp +second line``` +~~~~~~~~~~ +<div class="remarkup-code-block" data-code-lang="text" data-sigil="remarkup-code-block"><pre class="remarkup-code">cpp +second line</pre></div> +~~~~~~~~~~ + cpp + second line diff --git a/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored.txt b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored.txt new file mode 100644 index 0000000000..5dc2cef421 --- /dev/null +++ b/src/infrastructure/markup/remarkup/__tests__/remarkup/tick-block-multi-flavored.txt @@ -0,0 +1,20 @@ +```cpp +code + +more code + +more code +``` + +~~~~~~~~~~ +<div class="remarkup-code-block" data-code-lang="cpp" data-sigil="remarkup-code-block"><pre class="remarkup-code">code + +more code + +more code</pre></div> +~~~~~~~~~~ + code + + more code + + more code