From 91b9690e165cec24d2f1167c285b976739c860e9 Mon Sep 17 00:00:00 2001
From: Ryan Cobb <rcobb@gitlab.com>
Date: Wed, 5 Aug 2020 22:01:31 +0000
Subject: [PATCH] Add metrics dashboard json schema validator

Adds a metrics dashboard json schema validator to be used in a metrics
ingestion pipeline.
---
 Gemfile                                       |  1 +
 Gemfile.lock                                  | 10 +++
 lib/gitlab/metrics/dashboard/validator.rb     | 30 +++++++++
 .../metrics/dashboard/validator/client.rb     | 49 ++++++++++++++
 .../dashboard/validator/custom_formats.rb     | 23 +++++++
 .../metrics/dashboard/validator/errors.rb     | 37 ++++++++++
 .../validator/post_schema_validator.rb        | 31 +++++++++
 locale/gitlab.pot                             |  6 ++
 .../dashboard/duplicate_id_dashboard.yml      | 67 +++++++++++++++++++
 .../metrics/dashboard/invalid_dashboard.yml   | 67 +++++++++++++++++++
 .../dashboard/validator/client_spec.rb        | 29 ++++++++
 .../validator/custom_formats_spec.rb          | 15 +++++
 .../dashboard/validator/errors_spec.rb        | 38 +++++++++++
 .../validator/post_schema_validator_spec.rb   | 20 ++++++
 .../metrics/dashboard/validator_spec.rb       | 64 ++++++++++++++++++
 15 files changed, 487 insertions(+)
 create mode 100644 lib/gitlab/metrics/dashboard/validator.rb
 create mode 100644 lib/gitlab/metrics/dashboard/validator/client.rb
 create mode 100644 lib/gitlab/metrics/dashboard/validator/custom_formats.rb
 create mode 100644 lib/gitlab/metrics/dashboard/validator/errors.rb
 create mode 100644 lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb
 create mode 100644 spec/fixtures/lib/gitlab/metrics/dashboard/duplicate_id_dashboard.yml
 create mode 100644 spec/fixtures/lib/gitlab/metrics/dashboard/invalid_dashboard.yml
 create mode 100644 spec/lib/gitlab/metrics/dashboard/validator/client_spec.rb
 create mode 100644 spec/lib/gitlab/metrics/dashboard/validator/custom_formats_spec.rb
 create mode 100644 spec/lib/gitlab/metrics/dashboard/validator/errors_spec.rb
 create mode 100644 spec/lib/gitlab/metrics/dashboard/validator/post_schema_validator_spec.rb
 create mode 100644 spec/lib/gitlab/metrics/dashboard/validator_spec.rb

diff --git a/Gemfile b/Gemfile
index f346bdfac7082..3466a8fa208bd 100644
--- a/Gemfile
+++ b/Gemfile
@@ -507,5 +507,6 @@ gem 'valid_email', '~> 0.1'
 # JSON
 gem 'json', '~> 2.3.0'
 gem 'json-schema', '~> 2.8.0'
+gem 'json_schemer', '~> 0.2.12'
 gem 'oj', '~> 3.10.6'
 gem 'multi_json', '~> 1.14.1'
diff --git a/Gemfile.lock b/Gemfile.lock
index 5f4f5252f5a0e..07997637f3716 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -272,6 +272,8 @@ GEM
       dry-equalizer (~> 0.3)
       dry-inflector (~> 0.1, >= 0.1.2)
       dry-logic (~> 1.0, >= 1.0.2)
+    ecma-re-validator (0.2.1)
+      regexp_parser (~> 1.2)
     ed25519 (1.2.4)
     elasticsearch (6.8.0)
       elasticsearch-api (= 6.8.0)
@@ -522,6 +524,7 @@ GEM
       temple (>= 0.8.2)
       thor
       tilt
+    hana (1.3.6)
     hangouts-chat (0.0.5)
     hashdiff (0.3.8)
     hashie (3.6.0)
@@ -582,6 +585,11 @@ GEM
       bindata
     json-schema (2.8.0)
       addressable (>= 2.4)
+    json_schemer (0.2.12)
+      ecma-re-validator (~> 0.2)
+      hana (~> 1.3)
+      regexp_parser (~> 1.5)
+      uri_template (~> 0.7)
     jwt (2.1.0)
     kaminari (1.2.1)
       activesupport (>= 4.1.0)
@@ -1135,6 +1143,7 @@ GEM
       equalizer (~> 0.0.9)
       parser (>= 2.6.5)
       procto (~> 0.0.2)
+    uri_template (0.7.0)
     valid_email (0.1.3)
       activemodel
       mail (>= 2.6.1)
@@ -1305,6 +1314,7 @@ DEPENDENCIES
   js_regex (~> 3.4)
   json (~> 2.3.0)
   json-schema (~> 2.8.0)
+  json_schemer (~> 0.2.12)
   jwt (~> 2.1.0)
   kaminari (~> 1.0)
   knapsack (~> 1.17)
diff --git a/lib/gitlab/metrics/dashboard/validator.rb b/lib/gitlab/metrics/dashboard/validator.rb
new file mode 100644
index 0000000000000..c198ce4864945
--- /dev/null
+++ b/lib/gitlab/metrics/dashboard/validator.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Metrics
+    module Dashboard
+      module Validator
+        DASHBOARD_SCHEMA_PATH = 'lib/gitlab/metrics/dashboard/validator/schemas/dashboard.json'.freeze
+
+        class << self
+          def validate(content, schema_path = DASHBOARD_SCHEMA_PATH, project: nil)
+            errors = _validate(content, schema_path, project: project)
+            errors.empty?
+          end
+
+          def validate!(content, schema_path = DASHBOARD_SCHEMA_PATH, project: nil)
+            errors = _validate(content, schema_path, project: project)
+            errors.empty? || raise(errors.first)
+          end
+
+          private
+
+          def _validate(content, schema_path, project)
+            client = Client.new(content, schema_path, project: project)
+            client.execute
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/dashboard/validator/client.rb b/lib/gitlab/metrics/dashboard/validator/client.rb
new file mode 100644
index 0000000000000..6143edbd248ce
--- /dev/null
+++ b/lib/gitlab/metrics/dashboard/validator/client.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Metrics
+    module Dashboard
+      module Validator
+        class Client
+          # @param content [Hash] Representing a raw, unprocessed
+          # dashboard object
+          # @param schema_path [String] Representing path to dashboard schema file
+          def initialize(content, schema_path, project: nil)
+            @content = content
+            @schema_path = schema_path
+            @project = project
+          end
+
+          def execute
+            errors = validate_against_schema
+            errors += post_schema_validator.validate
+
+            errors.compact
+          end
+
+          private
+
+          attr_reader :content, :schema_path, :project
+
+          def custom_formats
+            @custom_formats ||= CustomFormats.new
+          end
+
+          def post_schema_validator
+            @post_schema_validator ||= PostSchemaValidator.new(project: project, metric_ids: custom_formats.metric_ids_cache)
+          end
+
+          def schemer
+            @schemer ||= JSONSchemer.schema(Pathname.new(schema_path), formats: custom_formats.format_handlers)
+          end
+
+          def validate_against_schema
+            schemer.validate(content).map do |error|
+              Errors::SchemaValidationError.new(error)
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/dashboard/validator/custom_formats.rb b/lib/gitlab/metrics/dashboard/validator/custom_formats.rb
new file mode 100644
index 0000000000000..485e80ad1b74d
--- /dev/null
+++ b/lib/gitlab/metrics/dashboard/validator/custom_formats.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Metrics
+    module Dashboard
+      module Validator
+        class CustomFormats
+          def format_handlers
+            # Key is custom JSON Schema format name. Value is a proc that takes data and schema and handles
+            # validations.
+            @format_handlers ||= {
+              "add_to_metric_id_cache" => ->(data, schema) { metric_ids_cache << data }
+            }
+          end
+
+          def metric_ids_cache
+            @metric_ids_cache ||= []
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/dashboard/validator/errors.rb b/lib/gitlab/metrics/dashboard/validator/errors.rb
new file mode 100644
index 0000000000000..104eb162209c8
--- /dev/null
+++ b/lib/gitlab/metrics/dashboard/validator/errors.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Metrics
+    module Dashboard
+      module Validator
+        module Errors
+          InvalidDashboardError = Class.new(StandardError)
+
+          class SchemaValidationError < InvalidDashboardError
+            def initialize(error = {})
+              if error.is_a?(Hash) && error.present?
+                data            = error["data"]
+                data_pointer    = error["data_pointer"]
+                schema          = error["schema"]
+                schema_pointer  = error["schema_pointer"]
+
+                msg = _("'%{data}' is invalid at '%{data_pointer}'. Should be '%{schema}' due to schema definition at '%{schema_pointer}'") %
+                  { data: data, data_pointer: data_pointer, schema: schema, schema_pointer: schema_pointer }
+              else
+                msg = "Dashboard failed schema validation"
+              end
+
+              super(msg)
+            end
+          end
+
+          class DuplicateMetricIds < InvalidDashboardError
+            def initialize
+              super(_("metric_id must be unique across a project"))
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb b/lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb
new file mode 100644
index 0000000000000..f235a4089ae1e
--- /dev/null
+++ b/lib/gitlab/metrics/dashboard/validator/post_schema_validator.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Metrics
+    module Dashboard
+      module Validator
+        class PostSchemaValidator
+          def initialize(project: nil, metric_ids: [])
+            @project = project
+            @metric_ids = metric_ids
+          end
+
+          def validate
+            [uniq_metric_ids_across_project].compact
+          end
+
+          private
+
+          attr_reader :project, :metric_ids
+
+          def uniq_metric_ids_across_project
+            # TODO: modify this method to check metric identifier uniqueness across project once we start
+            # recording dashboard_path https://gitlab.com/gitlab-org/gitlab/-/merge_requests/38237
+
+            Validator::Errors::DuplicateMetricIds.new if metric_ids.uniq!
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/locale/gitlab.pot b/locale/gitlab.pot
index 5ee29dd0904cc..037718f3b72a9 100644
--- a/locale/gitlab.pot
+++ b/locale/gitlab.pot
@@ -800,6 +800,9 @@ msgstr ""
 msgid "&lt;project name&gt;"
 msgstr ""
 
+msgid "'%{data}' is invalid at '%{data_pointer}'. Should be '%{schema}' due to schema definition at '%{schema_pointer}'"
+msgstr ""
+
 msgid "'%{level}' is not a valid visibility level"
 msgstr ""
 
@@ -28837,6 +28840,9 @@ msgstr[1] ""
 msgid "merged %{timeAgo}"
 msgstr ""
 
+msgid "metric_id must be unique across a project"
+msgstr ""
+
 msgid "missing"
 msgstr ""
 
diff --git a/spec/fixtures/lib/gitlab/metrics/dashboard/duplicate_id_dashboard.yml b/spec/fixtures/lib/gitlab/metrics/dashboard/duplicate_id_dashboard.yml
new file mode 100644
index 0000000000000..09a87703bfa0f
--- /dev/null
+++ b/spec/fixtures/lib/gitlab/metrics/dashboard/duplicate_id_dashboard.yml
@@ -0,0 +1,67 @@
+dashboard: 'Test Dashboard'
+priority: 1
+links:
+- title: Link 1
+  url: https://gitlab.com
+- title: Link 2
+  url: https://docs.gitlab.com
+templating:
+  variables:
+    text_variable_full_syntax:
+      label: 'Variable 1'
+      type: text
+      options:
+        default_value: 'default'
+    text_variable_simple_syntax: 'default value'
+    custom_variable_simple_syntax: ['value1', 'value2', 'value3']
+    custom_variable_full_syntax:
+      label: 'Variable 2'
+      type: custom
+      options:
+        values:
+        - value: 'value option 1'
+          text: 'Option 1'
+        - value: 'value_option_2'
+          text: 'Option 2'
+          default: true
+    metric_label_values_variable:
+      label: 'Variable 3'
+      type: metric_label_values
+      options:
+        series_selector: 'backend:haproxy_backend_availability:ratio{env="{{env}}"}'
+        label: 'backend'
+panel_groups:
+- group: Group A
+  priority: 1
+  panels:
+  - title: "Super Chart A1"
+    type: "area-chart"
+    y_label: "y_label"
+    weight: 1
+    max_value: 1
+    metrics:
+    - id: metric_a1
+      query_range: 'query'
+      unit: unit
+      label: Legend Label
+  - title: "Super Chart A2"
+    type: "area-chart"
+    y_label: "y_label"
+    weight: 2
+    metrics:
+    - id: metric_a1
+      query_range: 'query'
+      label: Legend Label
+      unit: unit
+- group: Group B
+  priority: 10
+  panels:
+  - title: "Super Chart B"
+    type: "area-chart"
+    y_label: "y_label"
+    weight: 1
+    metrics:
+    - id: metric_a1
+      query_range: 'query'
+      unit: unit
+      label: Legend Label
diff --git a/spec/fixtures/lib/gitlab/metrics/dashboard/invalid_dashboard.yml b/spec/fixtures/lib/gitlab/metrics/dashboard/invalid_dashboard.yml
new file mode 100644
index 0000000000000..312053d277091
--- /dev/null
+++ b/spec/fixtures/lib/gitlab/metrics/dashboard/invalid_dashboard.yml
@@ -0,0 +1,67 @@
+dashboard: 'Test Dashboard'
+priority: 1
+links:
+- title: Link 1
+  url: https://gitlab.com
+- title: Link 2
+  url: https://docs.gitlab.com
+templating:
+  variables:
+    text_variable_full_syntax:
+      label: 'Variable 1'
+      type: text
+      options:
+        default_value: 'default'
+    text_variable_simple_syntax: 'default value'
+    custom_variable_simple_syntax: ['value1', 'value2', 'value3']
+    custom_variable_full_syntax:
+      label: 'Variable 2'
+      type: custom
+      options:
+        values:
+        - value: 'value option 1'
+          text: 'Option 1'
+        - value: 'value_option_2'
+          text: 'Option 2'
+          default: true
+    metric_label_values_variable:
+      label: 'Variable 3'
+      type: metric_label_values
+      options:
+        series_selector: 'backend:haproxy_backend_availability:ratio{env="{{env}}"}'
+        label: 'backend'
+panel_groups:
+- group: Group A
+  priority: 1
+  panels:
+  - title: "Super Chart A1"
+    type: "area-chart"
+    y_label: "y_label"
+    weight: this_should_be_a_int
+    max_value: 1
+    metrics:
+    - id: metric_a1
+      query_range: 'query'
+      unit: unit
+      label: Legend Label
+  - title: "Super Chart A2"
+    type: "area-chart"
+    y_label: "y_label"
+    weight: 2
+    metrics:
+    - id: metric_a2
+      query_range: 'query'
+      label: Legend Label
+      unit: unit
+- group: Group B
+  priority: 10
+  panels:
+  - title: "Super Chart B"
+    type: "area-chart"
+    y_label: "y_label"
+    weight: 1
+    metrics:
+    - id: metric_b
+      query_range: 'query'
+      unit: unit
+      label: Legend Label
diff --git a/spec/lib/gitlab/metrics/dashboard/validator/client_spec.rb b/spec/lib/gitlab/metrics/dashboard/validator/client_spec.rb
new file mode 100644
index 0000000000000..9b13fb55764c3
--- /dev/null
+++ b/spec/lib/gitlab/metrics/dashboard/validator/client_spec.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Metrics::Dashboard::Validator::Client do
+  include MetricsDashboardHelpers
+
+  let_it_be(:schema_path) { 'lib/gitlab/metrics/dashboard/validator/schemas/dashboard.json' }
+
+  subject { described_class.new(dashboard, schema_path) }
+
+  describe '#execute' do
+    context 'with no validation errors' do
+      let(:dashboard) { load_sample_dashboard }
+
+      it 'returns empty array' do
+        expect(subject.execute).to eq([])
+      end
+    end
+
+    context 'with validation errors' do
+      let(:dashboard) { load_dashboard_yaml(fixture_file('lib/gitlab/metrics/dashboard/invalid_dashboard.yml')) }
+
+      it 'returns array of error objects' do
+        expect(subject.execute).to all(be_a(Gitlab::Metrics::Dashboard::Validator::Errors::SchemaValidationError))
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/metrics/dashboard/validator/custom_formats_spec.rb b/spec/lib/gitlab/metrics/dashboard/validator/custom_formats_spec.rb
new file mode 100644
index 0000000000000..129fb631f3e2b
--- /dev/null
+++ b/spec/lib/gitlab/metrics/dashboard/validator/custom_formats_spec.rb
@@ -0,0 +1,15 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Metrics::Dashboard::Validator::CustomFormats do
+  describe '#format_handlers' do
+    describe 'add_to_metric_id_cache' do
+      it 'adds data to metric id cache' do
+        subject.format_handlers['add_to_metric_id_cache'].call('metric_id', '_schema')
+
+        expect(subject.metric_ids_cache).to eq(["metric_id"])
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/metrics/dashboard/validator/errors_spec.rb b/spec/lib/gitlab/metrics/dashboard/validator/errors_spec.rb
new file mode 100644
index 0000000000000..bf1629e33dd3c
--- /dev/null
+++ b/spec/lib/gitlab/metrics/dashboard/validator/errors_spec.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Metrics::Dashboard::Validator::Errors do
+  describe Gitlab::Metrics::Dashboard::Validator::Errors::SchemaValidationError do
+    context 'valid error hash from jsonschemer' do
+      let(:error_hash) do
+        {
+          'data'            => 'data',
+          'data_pointer'    => 'data_pointer',
+          'schema'          => 'schema',
+          'schema_pointer'  => 'schema_pointer'
+        }
+      end
+
+      it 'formats message' do
+        expect(described_class.new(error_hash).message).to eq(
+          "'data' is invalid at 'data_pointer'. Should be 'schema' due to schema definition at 'schema_pointer'"
+        )
+      end
+    end
+
+    context 'empty error hash' do
+      let(:error_hash) { {} }
+
+      it 'uses default error message' do
+        expect(described_class.new(error_hash).message).to eq('Dashboard failed schema validation')
+      end
+    end
+  end
+
+  describe Gitlab::Metrics::Dashboard::Validator::Errors::DuplicateMetricIds do
+    it 'has custom error message' do
+      expect(described_class.new.message).to eq('metric_id must be unique across a project')
+    end
+  end
+end
diff --git a/spec/lib/gitlab/metrics/dashboard/validator/post_schema_validator_spec.rb b/spec/lib/gitlab/metrics/dashboard/validator/post_schema_validator_spec.rb
new file mode 100644
index 0000000000000..8cc2f8e652e1b
--- /dev/null
+++ b/spec/lib/gitlab/metrics/dashboard/validator/post_schema_validator_spec.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Metrics::Dashboard::Validator::PostSchemaValidator do
+  describe '#validate' do
+    context 'unique metric ids' do
+      it 'returns blank array' do
+        expect(described_class.new(metric_ids: [1, 2, 3]).validate).to eq([])
+      end
+    end
+
+    context 'duplicate metric ids' do
+      it 'raises error' do
+        expect(described_class.new(metric_ids: [1, 1]).validate)
+          .to eq([Gitlab::Metrics::Dashboard::Validator::Errors::DuplicateMetricIds])
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/metrics/dashboard/validator_spec.rb b/spec/lib/gitlab/metrics/dashboard/validator_spec.rb
new file mode 100644
index 0000000000000..22c6ec310e2fd
--- /dev/null
+++ b/spec/lib/gitlab/metrics/dashboard/validator_spec.rb
@@ -0,0 +1,64 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+RSpec.describe Gitlab::Metrics::Dashboard::Validator do
+  include MetricsDashboardHelpers
+
+  let_it_be(:valid_dashboard) { load_sample_dashboard }
+  let_it_be(:invalid_dashboard) { load_dashboard_yaml(fixture_file('lib/gitlab/metrics/dashboard/invalid_dashboard.yml')) }
+  let_it_be(:duplicate_id_dashboard) { load_dashboard_yaml(fixture_file('lib/gitlab/metrics/dashboard/duplicate_id_dashboard.yml')) }
+
+  describe '#validate' do
+    context 'valid dashboard' do
+      it 'returns true' do
+        expect(described_class.validate(valid_dashboard)).to be true
+      end
+    end
+
+    context 'invalid dashboard' do
+      context 'invalid schema' do
+        it 'returns false' do
+          expect(described_class.validate(invalid_dashboard)).to be false
+        end
+      end
+
+      context 'duplicate metric ids' do
+        context 'with no project given' do
+          it 'checks against given dashboard and returns false' do
+            expect(described_class.validate(duplicate_id_dashboard)).to be false
+          end
+        end
+      end
+    end
+  end
+
+  describe '#validate!' do
+    context 'valid dashboard' do
+      it 'returns true' do
+        expect(described_class.validate!(valid_dashboard)).to be true
+      end
+    end
+
+    context 'invalid dashboard' do
+      context 'invalid schema' do
+        it 'raises error' do
+          expect { described_class.validate!(invalid_dashboard) }
+            .to raise_error(Gitlab::Metrics::Dashboard::Validator::Errors::InvalidDashboardError,
+              "'this_should_be_a_int' is invalid at '/panel_groups/0/panels/0/weight'."\
+              " Should be '{\"type\"=>\"number\"}' due to schema definition at '/properties/weight'")
+        end
+      end
+
+      context 'duplicate metric ids' do
+        context 'with no project given' do
+          it 'checks against given dashboard and returns false' do
+            expect { described_class.validate!(duplicate_id_dashboard) }
+              .to raise_error(Gitlab::Metrics::Dashboard::Validator::Errors::InvalidDashboardError,
+                "metric_id must be unique across a project")
+          end
+        end
+      end
+    end
+  end
+end
-- 
GitLab