From 5106f96244aa36bdcf2c926451041a470434f5eb Mon Sep 17 00:00:00 2001 From: Brett Walker <bwalker@gitlab.com> Date: Fri, 6 Jan 2023 15:05:48 -0600 Subject: [PATCH] Move dollar math handling to markdown pipeline to better handle escaping characters Changelog: changed --- .../output_example_snapshots/html.yml | 8 +- lib/banzai/filter/dollar_math_post_filter.rb | 76 ++++++++ lib/banzai/filter/dollar_math_pre_filter.rb | 49 +++++ .../filter/markdown_post_escape_filter.rb | 16 +- .../filter/markdown_pre_escape_filter.rb | 3 + lib/banzai/filter/math_filter.rb | 96 ++-------- .../pipeline/plain_markdown_pipeline.rb | 7 + spec/lib/banzai/filter/math_filter_spec.rb | 170 +++++++++++------- .../lib/banzai/pipeline/full_pipeline_spec.rb | 2 +- .../pipeline/plain_markdown_pipeline_spec.rb | 1 + 10 files changed, 277 insertions(+), 151 deletions(-) create mode 100644 lib/banzai/filter/dollar_math_post_filter.rb create mode 100644 lib/banzai/filter/dollar_math_pre_filter.rb diff --git a/glfm_specification/output_example_snapshots/html.yml b/glfm_specification/output_example_snapshots/html.yml index 66ff8822c0224..f82e9d18150fa 100644 --- a/glfm_specification/output_example_snapshots/html.yml +++ b/glfm_specification/output_example_snapshots/html.yml @@ -4785,7 +4785,7 @@ canonical: | <p>!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~</p> static: |- - <p data-sourcepos="1:1-1:232" dir="auto"><span>!</span>"<span>#</span><span>$</span><span>%</span><span>&</span>'()*+,-./:;<=>?<span>@</span>[\]<span>^</span>_`{|}<span>~</span></p> + <p data-sourcepos="1:1-1:295" dir="auto"><span>!</span>"<span>#</span><span>$</span><span>%</span><span>&</span>'()*+,-./:;<=>?<span>@</span>[\]<span>^</span><span>_</span>`<span>{</span>|<span>}</span><span>~</span></p> wysiwyg: |- <p>!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~</p> 06_02_00__inlines__backslash_escapes__002: @@ -5929,7 +5929,7 @@ canonical: | <p>foo <em>_</em></p> static: |- - <p data-sourcepos="1:1-1:8" dir="auto">foo <em>_</em></p> + <p data-sourcepos="1:1-1:29" dir="auto">foo <em><span>_</span></em></p> wysiwyg: |- <p>foo <em>_</em></p> 06_05_00__inlines__emphasis_and_strong_emphasis__100: @@ -5950,7 +5950,7 @@ canonical: | <p>foo <strong>_</strong></p> static: |- - <p data-sourcepos="1:1-1:10" dir="auto">foo <strong>_</strong></p> + <p data-sourcepos="1:1-1:31" dir="auto">foo <strong><span>_</span></strong></p> wysiwyg: |- <p>foo <strong>_</strong></p> 06_05_00__inlines__emphasis_and_strong_emphasis__103: @@ -8441,7 +8441,7 @@ canonical: | TODO: Write canonical HTML for this example static: |- - <p data-sourcepos="1:1-1:36" dir="auto">This math is inline <code class="code math js-render-math" data-math-style="inline">a^2+b^2=c^2</code>.</p> + <p data-sourcepos="1:1-1:36" dir="auto">This math is inline <code data-math-style="inline" class="code math js-render-math">a^2+b^2=c^2</code>.</p> <p data-sourcepos="3:1-3:27" dir="auto">This is on a separate line:</p> <div class="gl-relative markdown-code-block js-markdown-code"> <pre data-sourcepos="5:1-7:3" lang="math" data-math-style="display" class="js-render-math code highlight js-syntax-highlight language-math" v-pre="true"><code><span id="LC1" class="line" lang="math">a^2+b^2=c^2</span></code></pre> diff --git a/lib/banzai/filter/dollar_math_post_filter.rb b/lib/banzai/filter/dollar_math_post_filter.rb new file mode 100644 index 0000000000000..94d1b4bcb48c7 --- /dev/null +++ b/lib/banzai/filter/dollar_math_post_filter.rb @@ -0,0 +1,76 @@ +# frozen_string_literal: true + +# Generated HTML is transformed back to GFM by: +# - app/assets/javascripts/behaviors/markdown/marks/math.js +# - app/assets/javascripts/behaviors/markdown/nodes/code_block.js +module Banzai + module Filter + # HTML filter that implements our dollar math syntax, one of three filters: + # DollarMathPreFilter, DollarMathPostFilter, and MathFilter + # + class DollarMathPostFilter < HTML::Pipeline::Filter + # Based on the Pandoc heuristics, + # https://pandoc.org/MANUAL.html#extension-tex_math_dollars + # + # Handle the $...$ and $$...$$ inline syntax in this filter, after markdown processing + # but before post-handling of escaped characters. Any escaped $ will have been specially + # encoded and will therefore not interfere with the detection of the dollar syntax. + + # Corresponds to the "$...$" syntax + DOLLAR_INLINE_PATTERN = %r{ + (?<matched>\$(?<math>(?:\S[^$\n]*?\S|[^$\s]))\$)(?:[^\d]|$) + }x.freeze + + # Corresponds to the "$$...$$" syntax + DOLLAR_DISPLAY_INLINE_PATTERN = %r{ + (?<matched>\$\$\ *(?<math>[^$\n]+?)\ *\$\$) + }x.freeze + + # Order dependent. Handle the `$$` syntax before the `$` syntax + DOLLAR_MATH_PIPELINE = [ + { pattern: DOLLAR_DISPLAY_INLINE_PATTERN, style: :display }, + { pattern: DOLLAR_INLINE_PATTERN, style: :inline } + ].freeze + + # Do not recognize math inside these tags + IGNORED_ANCESTOR_TAGS = %w[pre code tt].to_set + + def call + process_dollar_pipeline + + doc + end + + def process_dollar_pipeline + doc.xpath('descendant-or-self::text()').each do |node| + next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) + + node_html = node.to_html + next unless node_html.match?(DOLLAR_INLINE_PATTERN) || + node_html.match?(DOLLAR_DISPLAY_INLINE_PATTERN) + + temp_doc = Nokogiri::HTML.fragment(node_html) + + DOLLAR_MATH_PIPELINE.each do |pipeline| + temp_doc.xpath('child::text()').each do |temp_node| + html = temp_node.to_html + temp_node.content.scan(pipeline[:pattern]).each do |matched, math| + html.sub!(matched, math_html(math: math, style: pipeline[:style])) + end + + temp_node.replace(html) + end + end + + node.replace(temp_doc) + end + end + + private + + def math_html(math:, style:) + "<code data-math-style=\"#{style}\">#{math}</code>" + end + end + end +end diff --git a/lib/banzai/filter/dollar_math_pre_filter.rb b/lib/banzai/filter/dollar_math_pre_filter.rb new file mode 100644 index 0000000000000..aaa186f87a63d --- /dev/null +++ b/lib/banzai/filter/dollar_math_pre_filter.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +# Generated HTML is transformed back to GFM by: +# - app/assets/javascripts/behaviors/markdown/marks/math.js +# - app/assets/javascripts/behaviors/markdown/nodes/code_block.js +module Banzai + module Filter + # HTML filter that implements our dollar math syntax, one of three filters: + # DollarMathPreFilter, DollarMathPostFilter, and MathFilter + # + class DollarMathPreFilter < HTML::Pipeline::TextFilter + # Based on the Pandoc heuristics, + # https://pandoc.org/MANUAL.html#extension-tex_math_dollars + # + # Handle the $$\n...\n$$ syntax in this filter, before markdown processing, + # by converting it into the ```math syntax. In this way, we can ensure + # that it's considered a code block and will not have any markdown processed inside it. + + # Corresponds to the "$$\n...\n$$" syntax + REGEX = %r{ + #{::Gitlab::Regex.markdown_code_or_html_blocks} + | + (?=(?<=^\n|\A)\$\$\ *\n.*\n\$\$\ *(?=\n$|\z))(?: + # Display math block: + # $$ + # latex math + # $$ + + (?<=^\n|\A)\$\$\ *\n + (?<display_math> + (?:.)+? + ) + \n\$\$\ *(?=\n$|\z) + ) + }mx.freeze + + def call + @text.gsub(REGEX) do + if $~[:display_math] + # change from $$ to ```math + "```math\n#{$~[:display_math]}\n```" + else + $~[0] + end + end + end + end + end +end diff --git a/lib/banzai/filter/markdown_post_escape_filter.rb b/lib/banzai/filter/markdown_post_escape_filter.rb index 9f092f58e0c1e..8c0bd62f80af9 100644 --- a/lib/banzai/filter/markdown_post_escape_filter.rb +++ b/lib/banzai/filter/markdown_post_escape_filter.rb @@ -9,8 +9,9 @@ class MarkdownPostEscapeFilter < HTML::Pipeline::Filter NOT_LITERAL_REGEX = %r{#{LITERAL_KEYWORD}-((%5C|\\).+?)-#{LITERAL_KEYWORD}}.freeze SPAN_REGEX = %r{<span>(.*?)</span>}.freeze - XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath('a').freeze - XPATH_LANG_TAG = Gitlab::Utils::Nokogiri.css_to_xpath('pre').freeze + XPATH_A = Gitlab::Utils::Nokogiri.css_to_xpath('a').freeze + XPATH_LANG_TAG = Gitlab::Utils::Nokogiri.css_to_xpath('pre').freeze + XPATH_CODE_SPAN = Gitlab::Utils::Nokogiri.css_to_xpath('code > span').freeze def call return doc unless result[:escaped_literals] @@ -21,6 +22,7 @@ def call @doc = parse_html(new_html) remove_spans_in_certain_attributes + remove_spans_in_code doc end @@ -72,6 +74,16 @@ def remove_spans_in_certain_attributes node.attributes['lang'].value = node.attributes['lang'].value.gsub(SPAN_REGEX, '\1') if node.attributes['lang'] end end + + # Any `<span>` that makes it into a `<code>` element is from the math processing, + # convert back to the escaped character, such as `\$` + def remove_spans_in_code + doc.xpath(XPATH_CODE_SPAN).each do |node| + escaped_item = Banzai::Filter::MarkdownPreEscapeFilter::ESCAPABLE_CHARS.find { |item| item[:char] == node.content && item[:latex] } + + node.replace(escaped_item[:escaped]) if escaped_item + end + end end end end diff --git a/lib/banzai/filter/markdown_pre_escape_filter.rb b/lib/banzai/filter/markdown_pre_escape_filter.rb index 4161ac51a11d2..8cc7b0defd668 100644 --- a/lib/banzai/filter/markdown_pre_escape_filter.rb +++ b/lib/banzai/filter/markdown_pre_escape_filter.rb @@ -47,6 +47,9 @@ class MarkdownPreEscapeFilter < HTML::Pipeline::TextFilter { char: '%', escaped: '\%', token: '\+b', reference: true, latex: true }, { char: '#', escaped: '\#', token: '\+c', reference: true, latex: true }, { char: '&', escaped: '\&', token: '\+d', reference: true, latex: true }, + { char: '{', escaped: '\{', token: '\+e', reference: false, latex: true }, + { char: '}', escaped: '\}', token: '\+f', reference: false, latex: true }, + { char: '_', escaped: '\_', token: '\+g', reference: false, latex: true }, { char: '@', escaped: '\@', token: '\+h', reference: true, latex: false }, { char: '!', escaped: '\!', token: '\+i', reference: true, latex: false }, { char: '~', escaped: '\~', token: '\+j', reference: true, latex: false }, diff --git a/lib/banzai/filter/math_filter.rb b/lib/banzai/filter/math_filter.rb index e096f1753ed99..9b6fc71077a2c 100644 --- a/lib/banzai/filter/math_filter.rb +++ b/lib/banzai/filter/math_filter.rb @@ -1,55 +1,29 @@ # frozen_string_literal: true -require 'uri' - # Generated HTML is transformed back to GFM by: # - app/assets/javascripts/behaviors/markdown/marks/math.js # - app/assets/javascripts/behaviors/markdown/nodes/code_block.js module Banzai module Filter - # HTML filter that implements our math syntax, adding class="code math" + # HTML filter that implements the original GitLab math syntax, one of three filters: + # DollarMathPreFilter, DollarMathPostFilter, and MathFilter # class MathFilter < HTML::Pipeline::Filter + # Handle the $`...`$ and ```math syntax in this filter. + # Also add necessary classes any existing math blocks. + CSS_MATH = 'pre[lang="math"] > code' XPATH_MATH = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_MATH).freeze CSS_CODE = 'code' XPATH_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_CODE).freeze - - # These are based on the Pandoc heuristics, - # https://pandoc.org/MANUAL.html#extension-tex_math_dollars - # Note: at this time, using a dollar sign literal, `\$` inside - # a math statement does not work correctly. - # Corresponds to the "$...$" syntax - DOLLAR_INLINE_PATTERN = %r{ - (?<matched>\$(?<math>(?:\S[^$\n]*?\S|[^$\s]))\$)(?:[^\d]|$) - }x.freeze - - # Corresponds to the "$$...$$" syntax - DOLLAR_DISPLAY_INLINE_PATTERN = %r{ - (?<matched>\$\$\ *(?<math>[^$\n]+?)\ *\$\$) - }x.freeze - - # Corresponds to the $$\n...\n$$ syntax - DOLLAR_DISPLAY_BLOCK_PATTERN = %r{ - ^(?<matched>\$\$\ *\n(?<math>.*)\n\$\$\ *)$ - }mx.freeze - - # Order dependent. Handle the `$$` syntax before the `$` syntax - DOLLAR_MATH_PIPELINE = [ - { pattern: DOLLAR_DISPLAY_INLINE_PATTERN, tag: :code, style: :display }, - { pattern: DOLLAR_DISPLAY_BLOCK_PATTERN, tag: :pre, style: :display }, - { pattern: DOLLAR_INLINE_PATTERN, tag: :code, style: :inline } - ].freeze - - # Do not recognize math inside these tags - IGNORED_ANCESTOR_TAGS = %w[pre code tt].to_set + CSS_INLINE_CODE = 'code[data-math-style]' + XPATH_INLINE_CODE = Gitlab::Utils::Nokogiri.css_to_xpath(CSS_INLINE_CODE).freeze # Attribute indicating inline or display math. STYLE_ATTRIBUTE = 'data-math-style' # Class used for tagging elements that should be rendered TAG_CLASS = 'js-render-math' - MATH_CLASSES = "code math #{TAG_CLASS}" DOLLAR_SIGN = '$' @@ -61,47 +35,31 @@ class MathFilter < HTML::Pipeline::Filter def call @nodes_count = 0 - process_dollar_pipeline - + process_existing process_dollar_backtick_inline process_math_codeblock doc end - def process_dollar_pipeline - doc.xpath('descendant-or-self::text()').each do |node| - next if has_ancestor?(node, IGNORED_ANCESTOR_TAGS) - - node_html = node.to_html - next unless node_html.match?(DOLLAR_INLINE_PATTERN) || - node_html.match?(DOLLAR_DISPLAY_INLINE_PATTERN) || - node_html.match?(DOLLAR_DISPLAY_BLOCK_PATTERN) - - temp_doc = Nokogiri::HTML.fragment(node_html) - DOLLAR_MATH_PIPELINE.each do |pipeline| - temp_doc.xpath('child::text()').each do |temp_node| - html = temp_node.to_html - temp_node.content.scan(pipeline[:pattern]).each do |matched, math| - html.sub!(matched, math_html(tag: pipeline[:tag], style: pipeline[:style], math: math)) - - @nodes_count += 1 - break if @nodes_count >= RENDER_NODES_LIMIT - end + private - temp_node.replace(html) + # Add necessary classes to any existing math blocks + def process_existing + doc.xpath(XPATH_INLINE_CODE).each do |code| + break if @nodes_count >= RENDER_NODES_LIMIT - break if @nodes_count >= RENDER_NODES_LIMIT - end - end + code[:class] = MATH_CLASSES - node.replace(temp_doc) + @nodes_count += 1 end end # Corresponds to the "$`...`$" syntax def process_dollar_backtick_inline doc.xpath(XPATH_CODE).each do |code| + break if @nodes_count >= RENDER_NODES_LIMIT + closing = code.next opening = code.previous @@ -112,17 +70,16 @@ def process_dollar_backtick_inline closing.content.first == DOLLAR_SIGN && opening.content.last == DOLLAR_SIGN - code[:class] = MATH_CLASSES code[STYLE_ATTRIBUTE] = 'inline' + code[:class] = MATH_CLASSES closing.content = closing.content[1..] opening.content = opening.content[0..-2] @nodes_count += 1 - break if @nodes_count >= RENDER_NODES_LIMIT end end - # corresponds to the "```math...```" syntax + # Corresponds to the "```math...```" syntax def process_math_codeblock doc.xpath(XPATH_MATH).each do |node| pre_node = node.parent @@ -130,21 +87,6 @@ def process_math_codeblock pre_node[:class] = TAG_CLASS end end - - private - - def math_html(tag:, math:, style:) - case tag - when :code - "<code class=\"#{MATH_CLASSES}\" data-math-style=\"#{style}\">#{math}</code>" - when :pre - "<pre class=\"#{MATH_CLASSES}\" data-math-style=\"#{style}\"><code>#{math}</code></pre>" - end - end - - def group - context[:group] || context[:project]&.group - end end end end diff --git a/lib/banzai/pipeline/plain_markdown_pipeline.rb b/lib/banzai/pipeline/plain_markdown_pipeline.rb index 1da0f72996bed..205bbc2140d56 100644 --- a/lib/banzai/pipeline/plain_markdown_pipeline.rb +++ b/lib/banzai/pipeline/plain_markdown_pipeline.rb @@ -3,10 +3,17 @@ module Banzai module Pipeline class PlainMarkdownPipeline < BasePipeline + # DollarMathPreFilter and DollarMathPostFilter need to be included here, + # rather than in another pipeline. However, since dollar math would most + # likely be supported as an extension in any other markdown parser we used, + # it is not out of place. We are considering this a part of the actual + # markdown processing def self.filters FilterArray[ Filter::MarkdownPreEscapeFilter, + Filter::DollarMathPreFilter, Filter::MarkdownFilter, + Filter::DollarMathPostFilter, Filter::MarkdownPostEscapeFilter ] end diff --git a/spec/lib/banzai/filter/math_filter_spec.rb b/spec/lib/banzai/filter/math_filter_spec.rb index 1dcfae0c38cc2..374983e40a157 100644 --- a/spec/lib/banzai/filter/math_filter_spec.rb +++ b/spec/lib/banzai/filter/math_filter_spec.rb @@ -2,14 +2,15 @@ require 'spec_helper' -RSpec.describe Banzai::Filter::MathFilter do +RSpec.describe Banzai::Filter::MathFilter, feature_category: :team_planning do using RSpec::Parameterized::TableSyntax include FilterSpecHelper shared_examples 'inline math' do it 'removes surrounding dollar signs and adds class code, math and js-render-math' do - doc = filter(text) - expected = result_template.gsub('<math>', '<code class="code math js-render-math" data-math-style="inline">') + doc = pipeline_filter(text) + + expected = result_template.gsub('<math>', '<code data-math-style="inline" class="code math js-render-math">') expected.gsub!('</math>', '</code>') expect(doc.to_s).to eq expected @@ -17,12 +18,12 @@ end shared_examples 'display math' do - let_it_be(:template_prefix_with_pre) { '<pre class="code math js-render-math" data-math-style="display"><code>' } - let_it_be(:template_prefix_with_code) { '<code class="code math js-render-math" data-math-style="display">' } + let_it_be(:template_prefix_with_pre) { '<pre lang="math" data-math-style="display" class="js-render-math"><code>' } + let_it_be(:template_prefix_with_code) { '<code data-math-style="display" class="code math js-render-math">' } let(:use_pre_tags) { false } it 'removes surrounding dollar signs and adds class code, math and js-render-math' do - doc = filter(text) + doc = pipeline_filter(text) template_prefix = use_pre_tags ? template_prefix_with_pre : template_prefix_with_code template_suffix = "</code>#{'</pre>' if use_pre_tags}" @@ -36,36 +37,38 @@ describe 'inline math using $...$ syntax' do context 'with valid syntax' do where(:text, :result_template) do - '$2+2$' | '<math>2+2</math>' - '$22+1$ and $22 + a^2$' | '<math>22+1</math> and <math>22 + a^2</math>' - '$22 and $2+2$' | '$22 and <math>2+2</math>' - '$2+2$ $22 and flightjs/Flight$22 $2+2$' | '<math>2+2</math> $22 and flightjs/Flight$22 <math>2+2</math>' - '$1/2$ <b>test</b>' | '<math>1/2</math> <b>test</b>' - '$a!$' | '<math>a!</math>' - '$x$' | '<math>x</math>' + '$2+2$' | '<p><math>2+2</math></p>' + '$22+1$ and $22 + a^2$' | '<p><math>22+1</math> and <math>22 + a^2</math></p>' + '$22 and $2+2$' | '<p>$22 and <math>2+2</math></p>' + '$2+2$ $22 and flightjs/Flight$22 $2+2$' | '<p><math>2+2</math> $22 and flightjs/Flight$22 <math>2+2</math></p>' + '$1/2$ <b>test</b>' | '<p><math>1/2</math> <b>test</b></p>' + '$a!$' | '<p><math>a!</math></p>' + '$x$' | '<p><math>x</math></p>' + '$1+2\$$' | '<p><math>1+2\$</math></p>' + '$1+\$2$' | '<p><math>1+\$2</math></p>' + '$1+\%2$' | '<p><math>1+\%2</math></p>' + '$1+\#2$' | '<p><math>1+\#2</math></p>' + '$1+\&2$' | '<p><math>1+\&2</math></p>' + '$1+\{2$' | '<p><math>1+\{2</math></p>' + '$1+\}2$' | '<p><math>1+\}2</math></p>' + '$1+\_2$' | '<p><math>1+\_2</math></p>' end with_them do it_behaves_like 'inline math' end end - - it 'does not handle dollar literals properly' do - doc = filter('$20+30\$$') - expected = '<code class="code math js-render-math" data-math-style="inline">20+30\\</code>$' - - expect(doc.to_s).to eq expected - end end describe 'inline math using $`...`$ syntax' do context 'with valid syntax' do where(:text, :result_template) do - '$<code>2+2</code>$' | '<math>2+2</math>' - '$<code>22+1</code>$ and $<code>22 + a^2</code>$' | '<math>22+1</math> and <math>22 + a^2</math>' - '$22 and $<code>2+2</code>$' | '$22 and <math>2+2</math>' - '$<code>2+2</code>$ $22 and flightjs/Flight$22 $<code>2+2</code>$' | '<math>2+2</math> $22 and flightjs/Flight$22 <math>2+2</math>' - 'test $$<code>2+2</code>$$ test' | 'test $<math>2+2</math>$ test' + '$`2+2`$' | '<p><math>2+2</math></p>' + '$`22+1`$ and $`22 + a^2`$' | '<p><math>22+1</math> and <math>22 + a^2</math></p>' + '$22 and $`2+2`$' | '<p>$22 and <math>2+2</math></p>' + '$`2+2`$ $22 and flightjs/Flight$22 $`2+2`$' | '<p><math>2+2</math> $22 and flightjs/Flight$22 <math>2+2</math></p>' + 'test $$`2+2`$$ test' | '<p>test $<math>2+2</math>$ test</p>' + '$`1+\$2`$' | '<p><math>1+\$2</math></p>' end with_them do @@ -77,15 +80,15 @@ describe 'inline display math using $$...$$ syntax' do context 'with valid syntax' do where(:text, :result_template) do - '$$2+2$$' | '<math>2+2</math>' - '$$ 2+2 $$' | '<math>2+2</math>' - '$$22+1$$ and $$22 + a^2$$' | '<math>22+1</math> and <math>22 + a^2</math>' - '$22 and $$2+2$$' | '$22 and <math>2+2</math>' - '$$2+2$$ $22 and flightjs/Flight$22 $$2+2$$' | '<math>2+2</math> $22 and flightjs/Flight$22 <math>2+2</math>' - 'flightjs/Flight$22 and $$a^2 + b^2 = c^2$$' | 'flightjs/Flight$22 and <math>a^2 + b^2 = c^2</math>' - '$$a!$$' | '<math>a!</math>' - '$$x$$' | '<math>x</math>' - '$$20,000 and $$30,000' | '<math>20,000 and</math>30,000' + '$$2+2$$' | '<p><math>2+2</math></p>' + '$$ 2+2 $$' | '<p><math>2+2</math></p>' + '$$22+1$$ and $$22 + a^2$$' | '<p><math>22+1</math> and <math>22 + a^2</math></p>' + '$22 and $$2+2$$' | '<p>$22 and <math>2+2</math></p>' + '$$2+2$$ $22 and flightjs/Flight$22 $$2+2$$' | '<p><math>2+2</math> $22 and flightjs/Flight$22 <math>2+2</math></p>' + 'flightjs/Flight$22 and $$a^2 + b^2 = c^2$$' | '<p>flightjs/Flight$22 and <math>a^2 + b^2 = c^2</math></p>' + '$$a!$$' | '<p><math>a!</math></p>' + '$$x$$' | '<p><math>x</math></p>' + '$$20,000 and $$30,000' | '<p><math>20,000 and</math>30,000</p>' end with_them do @@ -97,8 +100,8 @@ describe 'block display math using $$\n...\n$$ syntax' do context 'with valid syntax' do where(:text, :result_template) do - "$$\n2+2\n$$" | "<math>2+2</math>" - "$$\n2+2\n3+4\n$$" | "<math>2+2\n3+4</math>" + "$$\n2+2\n$$" | "<math>2+2\n</math>" + "$$\n2+2\n3+4\n$$" | "<math>2+2\n3+4\n</math>" end with_them do @@ -107,72 +110,96 @@ end end end + + context 'when it spans multiple lines' do + let(:math) do + <<~MATH + \\begin{align*} + \\Delta t \\frac{d(b_i, a_i)}{c} + \\Delta t_{b_i} + \\end{align*} + MATH + end + + let(:text) { "$$\n#{math}$$" } + let(:result_template) { "<math>#{math}</math>" } + + it_behaves_like 'display math' do + let(:use_pre_tags) { true } + end + end + + context 'when it contains \\' do + let(:math) do + <<~MATH + E = mc^2 \\\\ + E = \\$mc^2 + MATH + end + + let(:text) { "$$\n#{math}$$" } + let(:result_template) { "<math>#{math}</math>" } + + it_behaves_like 'display math' do + let(:use_pre_tags) { true } + end + end end describe 'display math using ```math...``` syntax' do it 'adds data-math-style display attribute to display math' do - doc = filter('<pre lang="math"><code>2+2</code></pre>') + doc = pipeline_filter("```math\n2+2\n```") pre = doc.xpath('descendant-or-self::pre').first expect(pre['data-math-style']).to eq 'display' end it 'adds js-render-math class to display math' do - doc = filter('<pre lang="math"><code>2+2</code></pre>') + doc = pipeline_filter("```math\n2+2\n```") pre = doc.xpath('descendant-or-self::pre').first expect(pre[:class]).to include("js-render-math") end it 'ignores code blocks that are not math' do - input = '<pre lang="plaintext"><code>2+2</code></pre>' - doc = filter(input) + input = "```plaintext\n2+2\n```" + doc = pipeline_filter(input) - expect(doc.to_s).to eq input + expect(doc.to_s).to eq "<pre lang=\"plaintext\"><code>2+2\n</code></pre>" end it 'requires the pre to contain both code and math' do input = '<pre lang="math">something</pre>' - doc = filter(input) + doc = pipeline_filter(input) expect(doc.to_s).to eq input end - - it 'dollar signs around to display math' do - doc = filter('$<pre lang="math"><code>2+2</code></pre>$') - before = doc.xpath('descendant-or-self::text()[1]').first - after = doc.xpath('descendant-or-self::text()[3]').first - - expect(before.to_s).to eq '$' - expect(after.to_s).to eq '$' - end end describe 'unrecognized syntax' do - where(:text) do - [ - '<code>2+2</code>', - 'test $<code>2+2</code> test', - 'test <code>2+2</code>$ test', - '<em>$</em><code>2+2</code><em>$</em>', - '$20,000 and $30,000', - '$20,000 in $USD', - '$ a^2 $', - "test $$\n2+2\n$$", - "$\n$", - '$$$' - ] + where(:text, :result) do + '`2+2`' | '<p><code>2+2</code></p>' + 'test $`2+2` test' | '<p>test $<code>2+2</code> test</p>' + 'test `2+2`$ test' | '<p>test <code>2+2</code>$ test</p>' + '$20,000 and $30,000' | '<p>$20,000 and $30,000</p>' + '$20,000 in $USD' | '<p>$20,000 in $USD</p>' + '$ a^2 $' | '<p>$ a^2 $</p>' + "test $$\n2+2\n$$" | "<p>test $$\n2+2\n$$</p>" + "$\n$" | "<p>$\n$</p>" + '$$$' | '<p>$$$</p>' + '`$1+2$`' | '<p><code>$1+2$</code></p>' + '`$$1+2$$`' | '<p><code>$$1+2$$</code></p>' + '`$\$1+2$$`' | '<p><code>$\$1+2$$</code></p>' end with_them do it 'is ignored' do - expect(filter(text).to_s).to eq text + expect(pipeline_filter(text).to_s).to eq result end end end it 'handles multiple styles in one text block' do - doc = filter('$<code>2+2</code>$ + $3+3$ + $$4+4$$') + doc = pipeline_filter('$`2+2`$ + $3+3$ + $$4+4$$') expect(doc.search('.js-render-math').count).to eq(3) expect(doc.search('[data-math-style="inline"]').count).to eq(2) @@ -182,8 +209,17 @@ it 'limits how many elements can be marked as math' do stub_const('Banzai::Filter::MathFilter::RENDER_NODES_LIMIT', 2) - doc = filter('$<code>2+2</code>$ + $<code>3+3</code>$ + $<code>4+4</code>$') + doc = pipeline_filter('$`2+2`$ + $3+3$ + $$4+4$$') expect(doc.search('.js-render-math').count).to eq(2) end + + def pipeline_filter(text) + context = { project: nil, no_sourcepos: true } + doc = Banzai::Pipeline::PreProcessPipeline.call(text, {}) + doc = Banzai::Pipeline::PlainMarkdownPipeline.call(doc[:output], context) + doc = Banzai::Filter::SanitizationFilter.call(doc[:output], context, nil) + + filter(doc) + end end diff --git a/spec/lib/banzai/pipeline/full_pipeline_spec.rb b/spec/lib/banzai/pipeline/full_pipeline_spec.rb index 0a86d795056a9..c1d5f16b5621b 100644 --- a/spec/lib/banzai/pipeline/full_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/full_pipeline_spec.rb @@ -164,7 +164,7 @@ markdown = '_@test\__' output = described_class.to_html(markdown, project: project) - expect(output).to include('<em>@test_</em>') + expect(output).to include('<em>@test<span>_</span></em>') end end diff --git a/spec/lib/banzai/pipeline/plain_markdown_pipeline_spec.rb b/spec/lib/banzai/pipeline/plain_markdown_pipeline_spec.rb index e2a36e58cf7e5..0e4a4e4492e85 100644 --- a/spec/lib/banzai/pipeline/plain_markdown_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/plain_markdown_pipeline_spec.rb @@ -61,6 +61,7 @@ %q(`` \@\! ``) | %q(<code>\@\!</code>) %q( \@\!) | %Q(<code>\\@\\!\n</code>) %Q(~~~\n\\@\\!\n~~~) | %Q(<code>\\@\\!\n</code>) + %q($1+\$2$) | %q(<code data-math-style="inline">1+\\$2</code>) %q(<http://example.com?find=\@>) | %q(<a href="http://example.com?find=%5C@">http://example.com?find=\@</a>) %q[<a href="/bar\@)">] | %q[<a href="/bar%5C@)">] end -- GitLab