From b747668d9bb4e314252f82be62cc66c65898168c Mon Sep 17 00:00:00 2001 From: Chad Woolley <cwoolley@gitlab.com> Date: Sat, 4 Jun 2022 19:51:41 -0700 Subject: [PATCH] Normalize GLFM example static HTML - Add support for normalizing entries in GLFM static HTML snapshot examples, to replace variable values in actual HTML responses --- .../specification_guide/index.md | 105 +++++++++++++++++- .../glfm_example_normalizations.yml | 27 +++++ spec/requests/api/markdown_snapshot_spec.rb | 3 +- .../markdown_snapshot_shared_examples.rb | 33 +++++- 4 files changed, 163 insertions(+), 5 deletions(-) create mode 100644 glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml diff --git a/doc/development/gitlab_flavored_markdown/specification_guide/index.md b/doc/development/gitlab_flavored_markdown/specification_guide/index.md index acd346a3a85e5..802b01fe7f9a5 100644 --- a/doc/development/gitlab_flavored_markdown/specification_guide/index.md +++ b/doc/development/gitlab_flavored_markdown/specification_guide/index.md @@ -167,6 +167,9 @@ Regarding the terminology used here: they are colocated under the `spec/fixtures` directory with the rest of the fixture data for the GitLab Rails application. +See also the section on [normalization](#normalization) below, which is an important concept used +in the Markdown snapshot testing. + ## Parsing and Rendering The Markdown dialect used in the GitLab application has a dual requirement for rendering: @@ -268,6 +271,48 @@ HTML. (For example, when they are represented as an image.) In these cases, the conformance test for the example can be skipped by setting `skip_update_example_snapshots: true` for the example in `glfm_specification/input/gitlab_flavored_markdown/glfm_example_status.yml`. +### Normalization + +Different versions of the rendered HTML and ProseMirror JSON can vary for a number of reasons. There +are not only differences in styling or HTML structure, but the values of attributes or nodes may +vary across different test runs or environments as well. Here's some examples: + +1. Database record identifiers +1. Namespace or project identifiers +1. Portions of URIs +1. File paths or names +1. Random values + +This means that in order for the [Markdown snapshot testing](#markdown-snapshot-testing) to work +properly, these differences must be accounted for in a way that ensures the tests are reliable, +and always behave the same across different test runs or environments. + +To account for these differences, there is a process called "**_normalization_**". Normalization +allows custom regular expressions with +[_capturing groups_](https://ruby-doc.org/core-3.1.2/Regexp.html#class-Regexp-label-Capturing) +to be applied to two different versions of HTML or JSON for a given Markdown example, +and the contents of the captured groups can be replaced with the same fixed values. + +Then, the two normalized versions can be compared to each other to ensure all other non-variable +content is identical. + +It is important to note that we don't care about verifying specific attribute values here, so +it's OK if the normalizations discard and replace these variable values with fixed values. This is +because different testing levels have different purposes: + +1. [Markdown snapshot testing](#markdown-snapshot-testing) is intended to enforce the structure of + the rendered HTML/JSON, and to ensure that it conforms to the canonical specification. +1. Individual unit tests of the implementation for a specific Markdown example are responsible for + specific and targeted testing of these variable values. + +We also use this same regex capture-and-replace normalization approach for +[Canonicalization of HTML](#canonicalization-of-html), because it is essentially the same process. +With canonicalization, instead of just replacing variable values, we are removing non-canonical +portions of the HTML. + +See [`glfm_example_normalizations.yml`](#glfm_example_normalizationsyml) for a detailed explanation +of how the normalizations are specified. + ## Goals Given the constraints above, we have a few goals related to the GLFM @@ -641,6 +686,63 @@ The following optional entries are supported for each example. They all default skip_running_snapshot_prosemirror_json_tests: 'An explanation of the reason for skipping.' ``` +##### `glfm_example_normalizations.yml` + +[`glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml`](https://gitlab.com/gitlab-org/gitlab/-/blob/master/glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml) +controls the [normalization](#normalization) process. It allows one or more `regex`/`replacement` pairs +to be specified for a Markdown example. + +- It is manually updated. +- It has a nested structure corresponding to the example and type of entry it refers to. +- It extensively uses [YAML anchors and aliases](https://yaml.org/spec/1.2.2/#692-node-anchors) + to avoid duplication of `regex`/`replacement` pairs and allow them to be shared across multiple examples. +- The YAML anchors use a naming convention based on the index number of the example, in order to + ensure unique anchor names and avoid naming conflicts. + +`glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml` sample entries: + +```yaml +# NOTE: All YAML anchors which are shared across one or more examples are defined in the `00_shared` section. +00_shared: + 00_uri: &00_uri + - regex: '(href|data-src)(=")(.*?)(test-file\.(png|zip)")' + replacement: '\1\2URI_PREFIX\4' +01_01__section_one__example_containing_a_uri__001: + html: + static: + canonical: + 01_01_uri: *00_uri + snapshot: + 01_01_uri: *00_uri + wysiwyg: + 01_01_uri: *00_uri + prosemirror_json: + 01_01_uri: *00_uri +07_01__gitlab_specific_markdown__footnotes__001: + # YAML anchors which are only shared within a single example should be defined within the example + shared: + 07_01_href: &07_01_href + - regex: '(href)(=")(.+?)(")' + replacement: '\1\2REF\4' + 07_01_id: &07_01_id + - regex: '(id)(=")(.+?)(")' + replacement: '\1\2ID\4' + html: + static: + canonical: + 07_01_href: *07_01_href + 07_01_id: *07_01_id + snapshot: + 07_01_href: *07_01_href + 07_01_id: *07_01_id + wysiwyg: + 07_01_href: *07_01_href + 07_01_id: *07_01_id + prosemirror_json: + 07_01_href: *07_01_href + 07_01_id: *07_01_id +``` + #### Output specification files The `glfm_specification/output` directory contains the CommonMark standard format @@ -654,7 +756,8 @@ are colocated under the same parent folder `glfm_specification` with the other a mix of manually edited and generated files. In GFM, `spec.txt` is [located in the test dir](https://github.com/github/cmark-gfm/blob/master/test/spec.txt), -and in CommonMark it's located [in the project root](https://github.com/github/cmark-gfm/blob/master/test/spec.txt). No precedent exists for a standard location. In the future, we may decide to +and in CommonMark it's located [in the project root](https://github.com/github/cmark-gfm/blob/master/test/spec.txt). +No precedent exists for a standard location. In the future, we may decide to move or copy a hosted version of the rendered HTML `spec.html` version to another location or site. ##### spec.txt diff --git a/glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml b/glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml new file mode 100644 index 0000000000000..15df659f0f49e --- /dev/null +++ b/glfm_specification/input/gitlab_flavored_markdown/glfm_example_normalizations.yml @@ -0,0 +1,27 @@ +--- +# See the following documentation for more info on normalization: +# +# - https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#normalization +# - https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#glfm_example_normalizationsyml +# +# NOTE: All YAML anchors which are shared across one or more entries are defined in the `00_shared` section. +00_shared: + 00_uri: &00_uri + - regex: '(href|data-src)(=")(.*?)(test-file\.(png|zip)")' + replacement: '\1\2URI_PREFIX\4' +07_01__gitlab_specific_markdown__footnotes__001: + html: + static: + shared: + 07_01_href: &07_01_href + - regex: '(href)(=")(.+?)(")' + replacement: '\1\2REF\4' + 07_01_id: &07_01_id + - regex: '(id)(=")(.+?)(")' + replacement: '\1\2ID\4' + canonical: + 07_01_href: *07_01_href + 07_01_id: *07_01_id + snapshot: + 07_01_href: *07_01_href + 07_01_id: *07_01_id diff --git a/spec/requests/api/markdown_snapshot_spec.rb b/spec/requests/api/markdown_snapshot_spec.rb index fdb55a6280287..37607a4e86670 100644 --- a/spec/requests/api/markdown_snapshot_spec.rb +++ b/spec/requests/api/markdown_snapshot_spec.rb @@ -5,6 +5,7 @@ # See https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#markdown-snapshot-testing # for documentation on this spec. RSpec.describe API::Markdown, 'Snapshot' do + glfm_specification_dir = File.expand_path('../../../glfm_specification', __dir__) glfm_example_snapshots_dir = File.expand_path('../../fixtures/glfm/example_snapshots', __dir__) - include_context 'API::Markdown Snapshot shared context', glfm_example_snapshots_dir + include_context 'with API::Markdown Snapshot shared context', glfm_specification_dir, glfm_example_snapshots_dir end diff --git a/spec/support/shared_contexts/markdown_snapshot_shared_examples.rb b/spec/support/shared_contexts/markdown_snapshot_shared_examples.rb index 531a176d76b81..de52b58982e15 100644 --- a/spec/support/shared_contexts/markdown_snapshot_shared_examples.rb +++ b/spec/support/shared_contexts/markdown_snapshot_shared_examples.rb @@ -4,7 +4,9 @@ # See https://docs.gitlab.com/ee/development/gitlab_flavored_markdown/specification_guide/#markdown-snapshot-testing # for documentation on this spec. -RSpec.shared_context 'API::Markdown Snapshot shared context' do |glfm_example_snapshots_dir| +# rubocop:disable Layout/LineLength +RSpec.shared_context 'with API::Markdown Snapshot shared context' do |glfm_specification_dir, glfm_example_snapshots_dir| + # rubocop:enable Layout/LineLength include ApiHelpers markdown_examples, html_examples = %w[markdown.yml html.yml].map do |file_name| @@ -12,7 +14,11 @@ YAML.safe_load(yaml, symbolize_names: true, aliases: true) end - if focused_markdown_examples_string = ENV['FOCUSED_MARKDOWN_EXAMPLES'] + normalizations_yaml = File.read( + "#{glfm_specification_dir}/input/gitlab_flavored_markdown/glfm_example_normalizations.yml") + normalizations_by_example_name = YAML.safe_load(normalizations_yaml, symbolize_names: true, aliases: true) + + if (focused_markdown_examples_string = ENV['FOCUSED_MARKDOWN_EXAMPLES']) focused_markdown_examples = focused_markdown_examples_string.split(',').map(&:strip).map(&:to_sym) markdown_examples.select! { |example_name| focused_markdown_examples.include?(example_name) } end @@ -20,17 +26,38 @@ markdown_examples.each do |name, markdown| context "for #{name}" do let(:html) { html_examples.fetch(name).fetch(:static) } + let(:normalizations) { normalizations_by_example_name.dig(name, :html, :static, :snapshot) } it "verifies conversion of GLFM to HTML", :unlimited_max_formatted_output_length do api_url = api "/markdown" + # noinspection RubyResolve + normalized_html = normalize_html(html, normalizations) + post api_url, params: { text: markdown, gfm: true } expect(response).to be_successful response_body = Gitlab::Json.parse(response.body) # Some requests have the HTML in the `html` key, others in the `body` key. response_html = response_body['body'] ? response_body.fetch('body') : response_body.fetch('html') + # noinspection RubyResolve + normalized_response_html = normalize_html(response_html, normalizations) + + expect(normalized_response_html).to eq(normalized_html) + end + + def normalize_html(html, normalizations) + return html unless normalizations + + normalized_html = html.dup + normalizations.each_value do |normalization_entry| + normalization_entry.each do |normalization| + regex = normalization.fetch(:regex) + replacement = normalization.fetch(:replacement) + normalized_html.gsub!(%r{#{regex}}, replacement) + end + end - expect(response_html).to eq(html) + normalized_html end end end -- GitLab