From 0a445981459f9d5a0526741a784d7fdc6d2ccf19 Mon Sep 17 00:00:00 2001 From: Tan Le <tle@gitlab.com> Date: Wed, 2 Dec 2020 15:57:28 +1100 Subject: [PATCH] Replace whitelist wording in Banzai module We prefer a neutral tone when specifying a list of allowed elements (aka. allowlist). We need to bump our version of `html-pipeline` gem to `2.13.2` to bring in some changes in the method that we override. Related upstream change https://github.com/jch/html-pipeline/pull/339 --- Gemfile | 2 +- Gemfile.lock | 4 +- .../filter/ascii_doc_sanitization_filter.rb | 38 +++++++++---------- lib/banzai/filter/asset_proxy_filter.rb | 10 ++--- lib/banzai/filter/base_sanitization_filter.rb | 34 ++++++++--------- .../broadcast_message_sanitization_filter.rb | 10 ++--- lib/banzai/filter/sanitization_filter.rb | 22 +++++------ lib/banzai/pipeline/description_pipeline.rb | 4 +- .../banzai/filter/asset_proxy_filter_spec.rb | 10 ++--- ...adcast_message_sanitization_filter_spec.rb | 12 +++--- .../banzai/filter/sanitization_filter_spec.rb | 24 ++++++------ .../pipeline/description_pipeline_spec.rb | 2 +- spec/lib/banzai/pipeline/gfm_pipeline_spec.rb | 4 +- spec/lib/gitlab/asset_proxy_spec.rb | 4 +- .../sanitization_filter_shared_examples.rb | 4 +- 15 files changed, 92 insertions(+), 92 deletions(-) diff --git a/Gemfile b/Gemfile index b5ecfe56f2838..29c3f0c6eeafb 100644 --- a/Gemfile +++ b/Gemfile @@ -145,7 +145,7 @@ gem 'aws-sdk-s3', '~> 1' gem 'faraday_middleware-aws-sigv4', '~>0.3.0' # Markdown and HTML processing -gem 'html-pipeline', '~> 2.12' +gem 'html-pipeline', '~> 2.13.2' gem 'deckar01-task_list', '2.3.1' gem 'gitlab-markup', '~> 1.7.1' gem 'github-markup', '~> 1.7.0', require: 'github/markup' diff --git a/Gemfile.lock b/Gemfile.lock index bd3e26516a33e..778276b485b52 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -577,7 +577,7 @@ GEM hipchat (1.5.2) httparty mimemagic - html-pipeline (2.12.2) + html-pipeline (2.13.2) activesupport (>= 2) nokogiri (>= 1.4) html2text (0.2.0) @@ -1391,7 +1391,7 @@ DEPENDENCIES hashie-forbidden_attributes health_check (~> 3.0) hipchat (~> 1.5.0) - html-pipeline (~> 2.12) + html-pipeline (~> 2.13.2) html2text httparty (~> 0.16.4) icalendar diff --git a/lib/banzai/filter/ascii_doc_sanitization_filter.rb b/lib/banzai/filter/ascii_doc_sanitization_filter.rb index 11762c3bfb429..67f5baf4635aa 100644 --- a/lib/banzai/filter/ascii_doc_sanitization_filter.rb +++ b/lib/banzai/filter/ascii_doc_sanitization_filter.rb @@ -27,7 +27,7 @@ class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter TABLE_GRID_CLASSES = %w(grid-all grid-rows grid-cols grid-none).freeze TABLE_STRIPES_CLASSES = %w(stripes-all stripes-odd stripes-even stripes-hover stripes-none).freeze - ELEMENT_CLASSES_WHITELIST = { + ELEMENT_CLASSES_ALLOWLIST = { span: %w(big small underline overline line-through).freeze, div: ALIGNMENT_BUILTINS_CLASSES + ['admonitionblock'].freeze, td: ['icon'].freeze, @@ -38,35 +38,35 @@ class AsciiDocSanitizationFilter < Banzai::Filter::BaseSanitizationFilter table: TABLE_FRAME_CLASSES + TABLE_GRID_CLASSES + TABLE_STRIPES_CLASSES }.freeze - def customize_whitelist(whitelist) + def customize_allowlist(allowlist) # Allow marks - whitelist[:elements].push('mark') + allowlist[:elements].push('mark') # Allow any classes in `span`, `i`, `div`, `td`, `ul`, `ol` and `a` elements # but then remove any unknown classes - whitelist[:attributes]['span'] = %w(class) - whitelist[:attributes]['div'].push('class') - whitelist[:attributes]['td'] = %w(class) - whitelist[:attributes]['i'] = %w(class) - whitelist[:attributes]['ul'] = %w(class) - whitelist[:attributes]['ol'] = %w(class) - whitelist[:attributes]['a'].push('class') - whitelist[:attributes]['table'] = %w(class) - whitelist[:transformers].push(self.class.remove_element_classes) + allowlist[:attributes]['span'] = %w(class) + allowlist[:attributes]['div'].push('class') + allowlist[:attributes]['td'] = %w(class) + allowlist[:attributes]['i'] = %w(class) + allowlist[:attributes]['ul'] = %w(class) + allowlist[:attributes]['ol'] = %w(class) + allowlist[:attributes]['a'].push('class') + allowlist[:attributes]['table'] = %w(class) + allowlist[:transformers].push(self.class.remove_element_classes) # Allow `id` in anchor and footnote elements - whitelist[:attributes]['a'].push('id') - whitelist[:attributes]['div'].push('id') + allowlist[:attributes]['a'].push('id') + allowlist[:attributes]['div'].push('id') # Allow `id` in heading elements for section anchors SECTION_HEADINGS.each do |header| - whitelist[:attributes][header] = %w(id) + allowlist[:attributes][header] = %w(id) end # Remove ids that are not explicitly allowed - whitelist[:transformers].push(self.class.remove_disallowed_ids) + allowlist[:transformers].push(self.class.remove_disallowed_ids) - whitelist + allowlist end class << self @@ -91,11 +91,11 @@ def remove_element_classes lambda do |env| node = env[:node] - return unless (classes_whitelist = ELEMENT_CLASSES_WHITELIST[node.name.to_sym]) + return unless (classes_allowlist = ELEMENT_CLASSES_ALLOWLIST[node.name.to_sym]) return unless node.has_attribute?('class') classes = node['class'].strip.split(' ') - allowed_classes = (classes & classes_whitelist) + allowed_classes = (classes & classes_allowlist) if allowed_classes.empty? node.remove_attribute('class') else diff --git a/lib/banzai/filter/asset_proxy_filter.rb b/lib/banzai/filter/asset_proxy_filter.rb index 8acd3917d81ee..55dc426edaf75 100644 --- a/lib/banzai/filter/asset_proxy_filter.rb +++ b/lib/banzai/filter/asset_proxy_filter.rb @@ -15,7 +15,7 @@ def validate needs(:asset_proxy, :asset_proxy_secret_key) if asset_proxy_enabled? end - def asset_host_whitelisted?(host) + def asset_host_allowed?(host) context[:asset_proxy_domain_regexp] ? context[:asset_proxy_domain_regexp].match?(host) : false end @@ -44,21 +44,21 @@ def self.initialize_settings Gitlab.config.asset_proxy['enabled'] = application_settings.asset_proxy_enabled Gitlab.config.asset_proxy['url'] = application_settings.asset_proxy_url Gitlab.config.asset_proxy['secret_key'] = application_settings.asset_proxy_secret_key - Gitlab.config.asset_proxy['whitelist'] = determine_whitelist(application_settings) - Gitlab.config.asset_proxy['domain_regexp'] = compile_whitelist(Gitlab.config.asset_proxy.whitelist) + Gitlab.config.asset_proxy['allowlist'] = determine_allowlist(application_settings) + Gitlab.config.asset_proxy['domain_regexp'] = compile_allowlist(Gitlab.config.asset_proxy.allowlist) else Gitlab.config.asset_proxy['enabled'] = ::ApplicationSetting.defaults[:asset_proxy_enabled] end end - def self.compile_whitelist(domain_list) + def self.compile_allowlist(domain_list) return if domain_list.empty? escaped = domain_list.map { |domain| Regexp.escape(domain).gsub('\*', '.*?') } Regexp.new("^(#{escaped.join('|')})$", Regexp::IGNORECASE) end - def self.determine_whitelist(application_settings) + def self.determine_allowlist(application_settings) application_settings.asset_proxy_whitelist.presence || [Gitlab.config.gitlab.host] end end diff --git a/lib/banzai/filter/base_sanitization_filter.rb b/lib/banzai/filter/base_sanitization_filter.rb index 4f9e8cffd116f..c63453f94ca16 100644 --- a/lib/banzai/filter/base_sanitization_filter.rb +++ b/lib/banzai/filter/base_sanitization_filter.rb @@ -16,42 +16,42 @@ class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze - def whitelist - strong_memoize(:whitelist) do - whitelist = super.deep_dup + def allowlist + strong_memoize(:allowlist) do + allowlist = super.deep_dup # Allow span elements - whitelist[:elements].push('span') + allowlist[:elements].push('span') # Allow data-math-style attribute in order to support LaTeX formatting - whitelist[:attributes]['code'] = %w(data-math-style) - whitelist[:attributes]['pre'] = %w(data-math-style data-mermaid-style data-kroki-style) + allowlist[:attributes]['code'] = %w(data-math-style) + allowlist[:attributes]['pre'] = %w(data-math-style data-mermaid-style data-kroki-style) # Allow html5 details/summary elements - whitelist[:elements].push('details') - whitelist[:elements].push('summary') + allowlist[:elements].push('details') + allowlist[:elements].push('summary') # Allow abbr elements with title attribute - whitelist[:elements].push('abbr') - whitelist[:attributes]['abbr'] = %w(title) + allowlist[:elements].push('abbr') + allowlist[:attributes]['abbr'] = %w(title) # Disallow `name` attribute globally, allow on `a` - whitelist[:attributes][:all].delete('name') - whitelist[:attributes]['a'].push('name') + allowlist[:attributes][:all].delete('name') + allowlist[:attributes]['a'].push('name') # Allow any protocol in `a` elements # and then remove links with unsafe protocols - whitelist[:protocols].delete('a') - whitelist[:transformers].push(self.class.method(:remove_unsafe_links)) + allowlist[:protocols].delete('a') + allowlist[:transformers].push(self.class.method(:remove_unsafe_links)) # Remove `rel` attribute from `a` elements - whitelist[:transformers].push(self.class.remove_rel) + allowlist[:transformers].push(self.class.remove_rel) - customize_whitelist(whitelist) + customize_allowlist(allowlist) end end - def customize_whitelist(whitelist) + def customize_allowlist(allowlist) raise NotImplementedError end diff --git a/lib/banzai/filter/broadcast_message_sanitization_filter.rb b/lib/banzai/filter/broadcast_message_sanitization_filter.rb index 042293170c86c..183908d02a9a3 100644 --- a/lib/banzai/filter/broadcast_message_sanitization_filter.rb +++ b/lib/banzai/filter/broadcast_message_sanitization_filter.rb @@ -6,14 +6,14 @@ module Filter # # Extends Banzai::Filter::BaseSanitizationFilter with specific rules. class BroadcastMessageSanitizationFilter < Banzai::Filter::BaseSanitizationFilter - def customize_whitelist(whitelist) - whitelist[:elements].push('br') + def customize_allowlist(allowlist) + allowlist[:elements].push('br') - whitelist[:attributes]['a'].push('class', 'style') + allowlist[:attributes]['a'].push('class', 'style') - whitelist[:css] = { properties: %w(color border background padding margin text-decoration) } + allowlist[:css] = { properties: %w(color border background padding margin text-decoration) } - whitelist + allowlist end end end diff --git a/lib/banzai/filter/sanitization_filter.rb b/lib/banzai/filter/sanitization_filter.rb index f57e57890f8ed..f6314040f286a 100644 --- a/lib/banzai/filter/sanitization_filter.rb +++ b/lib/banzai/filter/sanitization_filter.rb @@ -9,26 +9,26 @@ class SanitizationFilter < Banzai::Filter::BaseSanitizationFilter # Styles used by Markdown for table alignment TABLE_ALIGNMENT_PATTERN = /text-align: (?<alignment>center|left|right)/.freeze - def customize_whitelist(whitelist) - # Allow table alignment; we whitelist specific text-align values in a + def customize_allowlist(allowlist) + # Allow table alignment; we allow specific text-align values in a # transformer below - whitelist[:attributes]['th'] = %w(style) - whitelist[:attributes]['td'] = %w(style) - whitelist[:css] = { properties: ['text-align'] } + allowlist[:attributes]['th'] = %w(style) + allowlist[:attributes]['td'] = %w(style) + allowlist[:css] = { properties: ['text-align'] } # Allow the 'data-sourcepos' from CommonMark on all elements - whitelist[:attributes][:all].push('data-sourcepos') + allowlist[:attributes][:all].push('data-sourcepos') # Remove any `style` properties not required for table alignment - whitelist[:transformers].push(self.class.remove_unsafe_table_style) + allowlist[:transformers].push(self.class.remove_unsafe_table_style) # Allow `id` in a and li elements for footnotes # and remove any `id` properties not matching for footnotes - whitelist[:attributes]['a'].push('id') - whitelist[:attributes]['li'] = %w(id) - whitelist[:transformers].push(self.class.remove_non_footnote_ids) + allowlist[:attributes]['a'].push('id') + allowlist[:attributes]['li'] = %w(id) + allowlist[:transformers].push(self.class.remove_non_footnote_ids) - whitelist + allowlist end class << self diff --git a/lib/banzai/pipeline/description_pipeline.rb b/lib/banzai/pipeline/description_pipeline.rb index d5ff9b025cc9d..8f8ce1cbd4181 100644 --- a/lib/banzai/pipeline/description_pipeline.rb +++ b/lib/banzai/pipeline/description_pipeline.rb @@ -3,14 +3,14 @@ module Banzai module Pipeline class DescriptionPipeline < FullPipeline - WHITELIST = Banzai::Filter::SanitizationFilter::LIMITED.deep_dup.merge( + ALLOWLIST = Banzai::Filter::SanitizationFilter::LIMITED.deep_dup.merge( elements: Banzai::Filter::SanitizationFilter::LIMITED[:elements] - %w(pre code img ol ul li) ) def self.transform_context(context) super(context).merge( # SanitizationFilter - whitelist: WHITELIST + allowlist: ALLOWLIST ) end end diff --git a/spec/lib/banzai/filter/asset_proxy_filter_spec.rb b/spec/lib/banzai/filter/asset_proxy_filter_spec.rb index 2a4ee28130b9a..1f886059bf653 100644 --- a/spec/lib/banzai/filter/asset_proxy_filter_spec.rb +++ b/spec/lib/banzai/filter/asset_proxy_filter_spec.rb @@ -35,8 +35,8 @@ def image(path) expect(Gitlab.config.asset_proxy.enabled).to be_truthy expect(Gitlab.config.asset_proxy.secret_key).to eq 'shared-secret' expect(Gitlab.config.asset_proxy.url).to eq 'https://assets.example.com' - expect(Gitlab.config.asset_proxy.whitelist).to eq %w(gitlab.com *.mydomain.com) - expect(Gitlab.config.asset_proxy.domain_regexp).to eq /^(gitlab\.com|.*?\.mydomain\.com)$/i + expect(Gitlab.config.asset_proxy.allowlist).to eq %w(gitlab.com *.mydomain.com) + expect(Gitlab.config.asset_proxy.domain_regexp).to eq(/^(gitlab\.com|.*?\.mydomain\.com)$/i) end context 'when whitelist is empty' do @@ -46,7 +46,7 @@ def image(path) described_class.initialize_settings - expect(Gitlab.config.asset_proxy.whitelist).to eq [Gitlab.config.gitlab.host] + expect(Gitlab.config.asset_proxy.allowlist).to eq [Gitlab.config.gitlab.host] end end end @@ -56,8 +56,8 @@ def image(path) stub_asset_proxy_setting(enabled: true) stub_asset_proxy_setting(secret_key: 'shared-secret') stub_asset_proxy_setting(url: 'https://assets.example.com') - stub_asset_proxy_setting(whitelist: %W(gitlab.com *.mydomain.com #{Gitlab.config.gitlab.host})) - stub_asset_proxy_setting(domain_regexp: described_class.compile_whitelist(Gitlab.config.asset_proxy.whitelist)) + stub_asset_proxy_setting(allowlist: %W(gitlab.com *.mydomain.com #{Gitlab.config.gitlab.host})) + stub_asset_proxy_setting(domain_regexp: described_class.compile_allowlist(Gitlab.config.asset_proxy.allowlist)) @context = described_class.transform_context({}) end diff --git a/spec/lib/banzai/filter/broadcast_message_sanitization_filter_spec.rb b/spec/lib/banzai/filter/broadcast_message_sanitization_filter_spec.rb index 1f65268bd3ca5..67b480f897386 100644 --- a/spec/lib/banzai/filter/broadcast_message_sanitization_filter_spec.rb +++ b/spec/lib/banzai/filter/broadcast_message_sanitization_filter_spec.rb @@ -5,9 +5,9 @@ RSpec.describe Banzai::Filter::BroadcastMessageSanitizationFilter do include FilterSpecHelper - it_behaves_like 'default whitelist' + it_behaves_like 'default allowlist' - describe 'custom whitelist' do + describe 'custom allowlist' do it_behaves_like 'XSS prevention' it_behaves_like 'sanitize link' @@ -26,19 +26,19 @@ end context 'when `a` elements have `style` attribute' do - let(:whitelisted_style) { 'color: red; border: blue; background: green; padding: 10px; margin: 10px; text-decoration: underline;' } + let(:allowed_style) { 'color: red; border: blue; background: green; padding: 10px; margin: 10px; text-decoration: underline;' } context 'allows specific properties' do - let(:exp) { %{<a href="#" style="#{whitelisted_style}">Stylish Link</a>} } + let(:exp) { %{<a href="#" style="#{allowed_style}">Stylish Link</a>} } it { is_expected.to eq(exp) } end it 'disallows other properties in `style` attribute on `a` elements' do - style = [whitelisted_style, 'position: fixed'].join(';') + style = [allowed_style, 'position: fixed'].join(';') doc = filter(%{<a href="#" style="#{style}">Stylish Link</a>}) - expect(doc.at_css('a')['style']).to eq(whitelisted_style) + expect(doc.at_css('a')['style']).to eq(allowed_style) end end diff --git a/spec/lib/banzai/filter/sanitization_filter_spec.rb b/spec/lib/banzai/filter/sanitization_filter_spec.rb index 09dcd5518ff77..bc4b60dfe6040 100644 --- a/spec/lib/banzai/filter/sanitization_filter_spec.rb +++ b/spec/lib/banzai/filter/sanitization_filter_spec.rb @@ -5,31 +5,31 @@ RSpec.describe Banzai::Filter::SanitizationFilter do include FilterSpecHelper - it_behaves_like 'default whitelist' + it_behaves_like 'default allowlist' - describe 'custom whitelist' do + describe 'custom allowlist' do it_behaves_like 'XSS prevention' it_behaves_like 'sanitize link' - it 'customizes the whitelist only once' do + it 'customizes the allowlist only once' do instance = described_class.new('Foo') - control_count = instance.whitelist[:transformers].size + control_count = instance.allowlist[:transformers].size - 3.times { instance.whitelist } + 3.times { instance.allowlist } - expect(instance.whitelist[:transformers].size).to eq control_count + expect(instance.allowlist[:transformers].size).to eq control_count end - it 'customizes the whitelist only once for different instances' do + it 'customizes the allowlist only once for different instances' do instance1 = described_class.new('Foo1') instance2 = described_class.new('Foo2') - control_count = instance1.whitelist[:transformers].size + control_count = instance1.allowlist[:transformers].size - instance1.whitelist - instance2.whitelist + instance1.allowlist + instance2.allowlist - expect(instance1.whitelist[:transformers].size).to eq control_count - expect(instance2.whitelist[:transformers].size).to eq control_count + expect(instance1.allowlist[:transformers].size).to eq control_count + expect(instance2.allowlist[:transformers].size).to eq control_count end it 'sanitizes `class` attribute from all elements' do diff --git a/spec/lib/banzai/pipeline/description_pipeline_spec.rb b/spec/lib/banzai/pipeline/description_pipeline_spec.rb index 82d4f883e0dac..be553433e9eff 100644 --- a/spec/lib/banzai/pipeline/description_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/description_pipeline_spec.rb @@ -21,7 +21,7 @@ def parse(html) stub_commonmark_sourcepos_disabled end - it 'uses a limited whitelist' do + it 'uses a limited allowlist' do doc = parse('# Description') expect(doc.strip).to eq 'Description' diff --git a/spec/lib/banzai/pipeline/gfm_pipeline_spec.rb b/spec/lib/banzai/pipeline/gfm_pipeline_spec.rb index 247f459163271..31047b9494ac7 100644 --- a/spec/lib/banzai/pipeline/gfm_pipeline_spec.rb +++ b/spec/lib/banzai/pipeline/gfm_pipeline_spec.rb @@ -176,8 +176,8 @@ stub_asset_proxy_setting(enabled: true) stub_asset_proxy_setting(secret_key: 'shared-secret') stub_asset_proxy_setting(url: 'https://assets.example.com') - stub_asset_proxy_setting(whitelist: %W(gitlab.com *.mydomain.com #{Gitlab.config.gitlab.host})) - stub_asset_proxy_setting(domain_regexp: Banzai::Filter::AssetProxyFilter.compile_whitelist(Gitlab.config.asset_proxy.whitelist)) + stub_asset_proxy_setting(allowlist: %W(gitlab.com *.mydomain.com #{Gitlab.config.gitlab.host})) + stub_asset_proxy_setting(domain_regexp: Banzai::Filter::AssetProxyFilter.compile_allowlist(Gitlab.config.asset_proxy.allowlist)) end it 'replaces a lazy loaded img src' do diff --git a/spec/lib/gitlab/asset_proxy_spec.rb b/spec/lib/gitlab/asset_proxy_spec.rb index 73b101c0dd8bf..7d7952d5741f1 100644 --- a/spec/lib/gitlab/asset_proxy_spec.rb +++ b/spec/lib/gitlab/asset_proxy_spec.rb @@ -17,12 +17,12 @@ context 'when asset proxy is enabled' do before do - stub_asset_proxy_setting(whitelist: %w(gitlab.com *.mydomain.com)) + stub_asset_proxy_setting(allowlist: %w(gitlab.com *.mydomain.com)) stub_asset_proxy_setting( enabled: true, url: 'https://assets.example.com', secret_key: 'shared-secret', - domain_regexp: Banzai::Filter::AssetProxyFilter.compile_whitelist(Gitlab.config.asset_proxy.whitelist) + domain_regexp: Banzai::Filter::AssetProxyFilter.compile_allowlist(Gitlab.config.asset_proxy.allowlist) ) end diff --git a/spec/support/shared_examples/lib/banzai/filters/sanitization_filter_shared_examples.rb b/spec/support/shared_examples/lib/banzai/filters/sanitization_filter_shared_examples.rb index 134e38833cf79..b5c07f45d5911 100644 --- a/spec/support/shared_examples/lib/banzai/filters/sanitization_filter_shared_examples.rb +++ b/spec/support/shared_examples/lib/banzai/filters/sanitization_filter_shared_examples.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true -RSpec.shared_examples 'default whitelist' do - it 'sanitizes tags that are not whitelisted' do +RSpec.shared_examples 'default allowlist' do + it 'sanitizes tags that are not allowed' do act = %q{<textarea>no inputs</textarea> and <blink>no blinks</blink>} exp = 'no inputs and no blinks' expect(filter(act).to_html).to eq exp -- GitLab