From f45d6af6f6cfb4fa117fb9be6bc5a097a083091b Mon Sep 17 00:00:00 2001
From: Paul Slaughter <pslaughter@gitlab.com>
Date: Mon, 8 Nov 2021 14:14:02 +0000
Subject: [PATCH] Step 1.1 - Create graphql_known_operations from webpack
 plugin

---
 .../graphql_known_operations_plugin.js        | 112 ++++++++++++++++++
 config/webpack.config.js                      |   3 +
 lib/gitlab/graphql/known_operations.rb        |  40 +++++++
 lib/gitlab/webpack/file_loader.rb             |  65 ++++++++++
 .../webpack/graphql_known_operations.rb       |  25 ++++
 lib/gitlab/webpack/manifest.rb                |  50 ++------
 .../features/graphql_known_operations_spec.rb |  29 +++++
 .../gitlab/graphql/known_operations_spec.rb   |  72 +++++++++++
 spec/lib/gitlab/webpack/file_loader_spec.rb   |  79 ++++++++++++
 .../webpack/graphql_known_operations_spec.rb  |  47 ++++++++
 10 files changed, 479 insertions(+), 43 deletions(-)
 create mode 100644 config/plugins/graphql_known_operations_plugin.js
 create mode 100644 lib/gitlab/graphql/known_operations.rb
 create mode 100644 lib/gitlab/webpack/file_loader.rb
 create mode 100644 lib/gitlab/webpack/graphql_known_operations.rb
 create mode 100644 spec/features/graphql_known_operations_spec.rb
 create mode 100644 spec/lib/gitlab/graphql/known_operations_spec.rb
 create mode 100644 spec/lib/gitlab/webpack/file_loader_spec.rb
 create mode 100644 spec/lib/gitlab/webpack/graphql_known_operations_spec.rb

diff --git a/config/plugins/graphql_known_operations_plugin.js b/config/plugins/graphql_known_operations_plugin.js
new file mode 100644
index 0000000000000..164b34c1dd13e
--- /dev/null
+++ b/config/plugins/graphql_known_operations_plugin.js
@@ -0,0 +1,112 @@
+/* eslint-disable no-underscore-dangle */
+const yaml = require('js-yaml');
+
+const PLUGIN_NAME = 'GraphqlKnownOperationsPlugin';
+const GRAPHQL_PATH_REGEX = /(query|mutation)\.graphql$/;
+const OPERATION_NAME_SOURCE_REGEX = /^\s*module\.exports.*oneQuery.*"(\w+)"/gm;
+
+/**
+ * Returns whether a given webpack module is a "graphql" module
+ */
+const isGraphqlModule = (module) => {
+  return GRAPHQL_PATH_REGEX.test(module.resource);
+};
+
+/**
+ * Returns graphql operation names we can parse from the given module
+ *
+ * Since webpack gives us the source **after** the graphql-tag/loader runs,
+ * we can look for specific lines we're guaranteed to have from the
+ * graphql-tag/loader.
+ */
+const getOperationNames = (module) => {
+  const originalSource = module.originalSource();
+
+  if (!originalSource) {
+    return [];
+  }
+
+  const matches = originalSource.source().toString().matchAll(OPERATION_NAME_SOURCE_REGEX);
+
+  return Array.from(matches).map((match) => match[1]);
+};
+
+const createFileContents = (knownOperations) => {
+  const sourceData = Array.from(knownOperations.values()).sort((a, b) => a.localeCompare(b));
+
+  return yaml.dump(sourceData);
+};
+
+/**
+ * Creates a webpack4 compatible "RawSource"
+ *
+ * Inspired from https://sourcegraph.com/github.com/FormidableLabs/webpack-stats-plugin@e050ff8c362d5ddd45c66ade724d4a397ace3e5c/-/blob/lib/stats-writer-plugin.js?L144
+ */
+const createWebpackRawSource = (source) => {
+  const buff = Buffer.from(source, 'utf-8');
+
+  return {
+    source() {
+      return buff;
+    },
+    size() {
+      return buff.length;
+    },
+  };
+};
+
+const onSucceedModule = ({ module, knownOperations }) => {
+  if (!isGraphqlModule(module)) {
+    return;
+  }
+
+  getOperationNames(module).forEach((x) => knownOperations.add(x));
+};
+
+const onCompilerEmit = ({ compilation, knownOperations, filename }) => {
+  const contents = createFileContents(knownOperations);
+  const source = createWebpackRawSource(contents);
+
+  const asset = compilation.getAsset(filename);
+  if (asset) {
+    compilation.updateAsset(filename, source);
+  } else {
+    compilation.emitAsset(filename, source);
+  }
+};
+
+/**
+ * Webpack plugin that outputs a file containing known graphql operations.
+ *
+ * A lot of the mechanices was expired from [this example][1].
+ *
+ * [1]: https://sourcegraph.com/github.com/FormidableLabs/webpack-stats-plugin@e050ff8c362d5ddd45c66ade724d4a397ace3e5c/-/blob/lib/stats-writer-plugin.js?L136
+ */
+class GraphqlKnownOperationsPlugin {
+  constructor({ filename }) {
+    this._filename = filename;
+  }
+
+  apply(compiler) {
+    const knownOperations = new Set();
+
+    compiler.hooks.emit.tap(PLUGIN_NAME, (compilation) => {
+      onCompilerEmit({
+        compilation,
+        knownOperations,
+        filename: this._filename,
+      });
+    });
+
+    compiler.hooks.compilation.tap(PLUGIN_NAME, (compilation) => {
+      compilation.hooks.succeedModule.tap(PLUGIN_NAME, (module) => {
+        onSucceedModule({
+          module,
+          knownOperations,
+        });
+      });
+    });
+  }
+}
+
+module.exports = GraphqlKnownOperationsPlugin;
diff --git a/config/webpack.config.js b/config/webpack.config.js
index 7f220b584a39c..f334e17bbafab 100644
--- a/config/webpack.config.js
+++ b/config/webpack.config.js
@@ -24,6 +24,7 @@ const IS_JH = require('./helpers/is_jh_env');
 const vendorDllHash = require('./helpers/vendor_dll_hash');
 
 const MonacoWebpackPlugin = require('./plugins/monaco_webpack');
+const GraphqlKnownOperationsPlugin = require('./plugins/graphql_known_operations_plugin');
 
 const ROOT_PATH = path.resolve(__dirname, '..');
 const SUPPORTED_BROWSERS = fs.readFileSync(path.join(ROOT_PATH, '.browserslistrc'), 'utf-8');
@@ -456,6 +457,8 @@ module.exports = {
       globalAPI: true,
     }),
 
+    new GraphqlKnownOperationsPlugin({ filename: 'graphql_known_operations.yml' }),
+
     // fix legacy jQuery plugins which depend on globals
     new webpack.ProvidePlugin({
       $: 'jquery',
diff --git a/lib/gitlab/graphql/known_operations.rb b/lib/gitlab/graphql/known_operations.rb
new file mode 100644
index 0000000000000..954efa5a93320
--- /dev/null
+++ b/lib/gitlab/graphql/known_operations.rb
@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Graphql
+    class KnownOperations
+      Operation = Struct.new(:name) do
+        def to_caller_id
+          "graphql:#{name}"
+        end
+      end
+
+      ANONYMOUS = Operation.new("anonymous").freeze
+      UNKNOWN = Operation.new("unknown").freeze
+
+      def self.default
+        @default ||= self.new(Gitlab::Webpack::GraphqlKnownOperations.load)
+      end
+
+      def initialize(operation_names)
+        @operation_hash = operation_names
+          .map { |name| Operation.new(name).freeze }
+          .concat([ANONYMOUS, UNKNOWN])
+          .index_by(&:name)
+      end
+
+      # Returns the known operation from the given ::GraphQL::Query object
+      def from_query(query)
+        operation_name = query.selected_operation_name
+
+        return ANONYMOUS unless operation_name
+
+        @operation_hash[operation_name] || UNKNOWN
+      end
+
+      def operations
+        @operation_hash.values
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/webpack/file_loader.rb b/lib/gitlab/webpack/file_loader.rb
new file mode 100644
index 0000000000000..35ecb1eb4edca
--- /dev/null
+++ b/lib/gitlab/webpack/file_loader.rb
@@ -0,0 +1,65 @@
+# frozen_string_literal: true
+
+require 'net/http'
+require 'uri'
+
+module Gitlab
+  module Webpack
+    class FileLoader
+      class BaseError < StandardError
+        attr_reader :original_error, :uri
+
+        def initialize(uri, orig)
+          super orig.message
+          @uri = uri.to_s
+          @original_error = orig
+        end
+      end
+
+      StaticLoadError = Class.new(BaseError)
+      DevServerLoadError = Class.new(BaseError)
+      DevServerSSLError = Class.new(BaseError)
+
+      def self.load(path)
+        if Gitlab.config.webpack.dev_server.enabled
+          self.load_from_dev_server(path)
+        else
+          self.load_from_static(path)
+        end
+      end
+
+      def self.load_from_dev_server(path)
+        host = Gitlab.config.webpack.dev_server.host
+        port = Gitlab.config.webpack.dev_server.port
+        scheme = Gitlab.config.webpack.dev_server.https ? 'https' : 'http'
+        uri = Addressable::URI.new(scheme: scheme, host: host, port: port, path: self.dev_server_path(path))
+
+        # localhost could be blocked via Gitlab::HTTP
+        response = HTTParty.get(uri.to_s, verify: false) # rubocop:disable Gitlab/HTTParty
+
+        return response.body if response.code == 200
+
+        raise "HTTP error #{response.code}"
+      rescue OpenSSL::SSL::SSLError, EOFError => e
+        raise DevServerSSLError.new(uri, e)
+      rescue StandardError => e
+        raise DevServerLoadError.new(uri, e)
+      end
+
+      def self.load_from_static(path)
+        file_uri = ::Rails.root.join(
+          Gitlab.config.webpack.output_dir,
+          path
+        )
+
+        File.read(file_uri)
+      rescue StandardError => e
+        raise StaticLoadError.new(file_uri, e)
+      end
+
+      def self.dev_server_path(path)
+        "/#{Gitlab.config.webpack.public_path}/#{path}"
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/webpack/graphql_known_operations.rb b/lib/gitlab/webpack/graphql_known_operations.rb
new file mode 100644
index 0000000000000..7945513667c43
--- /dev/null
+++ b/lib/gitlab/webpack/graphql_known_operations.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Webpack
+    class GraphqlKnownOperations
+      class << self
+        include Gitlab::Utils::StrongMemoize
+
+        def clear_memoization!
+          clear_memoization(:graphql_known_operations)
+        end
+
+        def load
+          strong_memoize(:graphql_known_operations) do
+            data = ::Gitlab::Webpack::FileLoader.load("graphql_known_operations.yml")
+
+            YAML.safe_load(data)
+          rescue StandardError
+            []
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/webpack/manifest.rb b/lib/gitlab/webpack/manifest.rb
index b73c2ebb578af..06cddc23134a0 100644
--- a/lib/gitlab/webpack/manifest.rb
+++ b/lib/gitlab/webpack/manifest.rb
@@ -1,8 +1,5 @@
 # frozen_string_literal: true
 
-require 'net/http'
-require 'uri'
-
 module Gitlab
   module Webpack
     class Manifest
@@ -78,49 +75,16 @@ def manifest
         end
 
         def load_manifest
-          data = if Gitlab.config.webpack.dev_server.enabled
-                   load_dev_server_manifest
-                 else
-                   load_static_manifest
-                 end
+          data = Gitlab::Webpack::FileLoader.load(Gitlab.config.webpack.manifest_filename)
 
           Gitlab::Json.parse(data)
-        end
-
-        def load_dev_server_manifest
-          host = Gitlab.config.webpack.dev_server.host
-          port = Gitlab.config.webpack.dev_server.port
-          scheme = Gitlab.config.webpack.dev_server.https ? 'https' : 'http'
-          uri = Addressable::URI.new(scheme: scheme, host: host, port: port, path: dev_server_path)
-
-          # localhost could be blocked via Gitlab::HTTP
-          response = HTTParty.get(uri.to_s, verify: false) # rubocop:disable Gitlab/HTTParty
-
-          return response.body if response.code == 200
-
-          raise "HTTP error #{response.code}"
-        rescue OpenSSL::SSL::SSLError, EOFError => e
+        rescue Gitlab::Webpack::FileLoader::StaticLoadError => e
+          raise ManifestLoadError.new("Could not load compiled manifest from #{e.uri}.\n\nHave you run `rake gitlab:assets:compile`?", e.original_error)
+        rescue Gitlab::Webpack::FileLoader::DevServerSSLError => e
           ssl_status = Gitlab.config.webpack.dev_server.https ? ' over SSL' : ''
-          raise ManifestLoadError.new("Could not connect to webpack-dev-server at #{uri}#{ssl_status}.\n\nIs SSL enabled? Check that settings in `gitlab.yml` and webpack-dev-server match.", e)
-        rescue StandardError => e
-          raise ManifestLoadError.new("Could not load manifest from webpack-dev-server at #{uri}.\n\nIs webpack-dev-server running? Try running `gdk status webpack` or `gdk tail webpack`.", e)
-        end
-
-        def load_static_manifest
-          File.read(static_manifest_path)
-        rescue StandardError => e
-          raise ManifestLoadError.new("Could not load compiled manifest from #{static_manifest_path}.\n\nHave you run `rake gitlab:assets:compile`?", e)
-        end
-
-        def static_manifest_path
-          ::Rails.root.join(
-            Gitlab.config.webpack.output_dir,
-            Gitlab.config.webpack.manifest_filename
-          )
-        end
-
-        def dev_server_path
-          "/#{Gitlab.config.webpack.public_path}/#{Gitlab.config.webpack.manifest_filename}"
+          raise ManifestLoadError.new("Could not connect to webpack-dev-server at #{e.uri}#{ssl_status}.\n\nIs SSL enabled? Check that settings in `gitlab.yml` and webpack-dev-server match.", e.original_error)
+        rescue Gitlab::Webpack::FileLoader::DevServerLoadError => e
+          raise ManifestLoadError.new("Could not load manifest from webpack-dev-server at #{e.uri}.\n\nIs webpack-dev-server running? Try running `gdk status webpack` or `gdk tail webpack`.", e.original_error)
         end
       end
     end
diff --git a/spec/features/graphql_known_operations_spec.rb b/spec/features/graphql_known_operations_spec.rb
new file mode 100644
index 0000000000000..ef406f1290292
--- /dev/null
+++ b/spec/features/graphql_known_operations_spec.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+# We need to distinguish between known and unknown GraphQL operations. This spec
+# tests that we set up Gitlab::Graphql::KnownOperations.default which requires
+# integration of FE queries, webpack plugin, and BE.
+RSpec.describe 'Graphql known operations', :js do
+  around do |example|
+    # Let's make sure we aren't receiving or leaving behind any side-effects
+    # https://gitlab.com/gitlab-org/gitlab/-/jobs/1743294100
+    ::Gitlab::Graphql::KnownOperations.instance_variable_set(:@default, nil)
+    ::Gitlab::Webpack::GraphqlKnownOperations.clear_memoization!
+
+    example.run
+
+    ::Gitlab::Graphql::KnownOperations.instance_variable_set(:@default, nil)
+    ::Gitlab::Webpack::GraphqlKnownOperations.clear_memoization!
+  end
+
+  it 'collects known Graphql operations from the code', :aggregate_failures do
+    # Check that we include some arbitrary operation name we expect
+    known_operations = Gitlab::Graphql::KnownOperations.default.operations.map(&:name)
+
+    expect(known_operations).to include("searchProjects")
+    expect(known_operations.length).to be > 20
+    expect(known_operations).to all( match(%r{^[a-z]+}i) )
+  end
+end
diff --git a/spec/lib/gitlab/graphql/known_operations_spec.rb b/spec/lib/gitlab/graphql/known_operations_spec.rb
new file mode 100644
index 0000000000000..58fa2c18639d2
--- /dev/null
+++ b/spec/lib/gitlab/graphql/known_operations_spec.rb
@@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+require 'rspec-parameterized'
+require "support/graphql/fake_query_type"
+
+RSpec.describe Gitlab::Graphql::KnownOperations do
+  using RSpec::Parameterized::TableSyntax
+
+  # Include duplicated operation names to test that we are unique-ifying them
+  let(:fake_operations) { %w(foo foo bar bar) }
+  let(:fake_schema) do
+    Class.new(GraphQL::Schema) do
+      query Graphql::FakeQueryType
+    end
+  end
+
+  subject { described_class.new(fake_operations) }
+
+  describe "#from_query" do
+    where(:query_string, :expected) do
+      "query { helloWorld }"         | described_class::ANONYMOUS
+      "query fuzzyyy { helloWorld }" | described_class::UNKNOWN
+      "query foo { helloWorld }"     | described_class::Operation.new("foo")
+    end
+
+    with_them do
+      it "returns known operation name from GraphQL Query" do
+        query = ::GraphQL::Query.new(fake_schema, query_string)
+
+        expect(subject.from_query(query)).to eq(expected)
+      end
+    end
+  end
+
+  describe "#operations" do
+    it "returns array of known operations" do
+      expect(subject.operations.map(&:name)).to match_array(%w(anonymous unknown foo bar))
+    end
+  end
+
+  describe "Operation#to_caller_id" do
+    where(:query_string, :expected) do
+      "query { helloWorld }"         | "graphql:#{described_class::ANONYMOUS.name}"
+      "query foo { helloWorld }"     | "graphql:foo"
+    end
+
+    with_them do
+      it "formats operation name for caller_id metric property" do
+        query = ::GraphQL::Query.new(fake_schema, query_string)
+
+        expect(subject.from_query(query).to_caller_id).to eq(expected)
+      end
+    end
+  end
+
+  describe ".default" do
+    it "returns a memoization of values from webpack", :aggregate_failures do
+      # .default could have been referenced in another spec, so we need to clean it up here
+      described_class.instance_variable_set(:@default, nil)
+
+      expect(Gitlab::Webpack::GraphqlKnownOperations).to receive(:load).once.and_return(fake_operations)
+
+      2.times { described_class.default }
+
+      # Uses reference equality to verify memoization
+      expect(described_class.default).to equal(described_class.default)
+      expect(described_class.default).to be_a(described_class)
+      expect(described_class.default.operations.map(&:name)).to include(*fake_operations)
+    end
+  end
+end
diff --git a/spec/lib/gitlab/webpack/file_loader_spec.rb b/spec/lib/gitlab/webpack/file_loader_spec.rb
new file mode 100644
index 0000000000000..34d00b9f1060f
--- /dev/null
+++ b/spec/lib/gitlab/webpack/file_loader_spec.rb
@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+require 'support/helpers/file_read_helpers'
+require 'support/webmock'
+
+RSpec.describe Gitlab::Webpack::FileLoader do
+  include FileReadHelpers
+  include WebMock::API
+
+  let(:error_file_path) { "error.yml" }
+  let(:file_path) { "my_test_file.yml" }
+  let(:file_contents) do
+    <<-EOF
+    - hello
+    - world
+    - test
+    EOF
+  end
+
+  before do
+    allow(Gitlab.config.webpack.dev_server).to receive_messages(host: 'hostname', port: 2000, https: false)
+    allow(Gitlab.config.webpack).to receive(:public_path).and_return('public_path')
+    allow(Gitlab.config.webpack).to receive(:output_dir).and_return('webpack_output')
+  end
+
+  context "with dev server enabled" do
+    before do
+      allow(Gitlab.config.webpack.dev_server).to receive(:enabled).and_return(true)
+
+      stub_request(:get, "http://hostname:2000/public_path/not_found").to_return(status: 404)
+      stub_request(:get, "http://hostname:2000/public_path/#{file_path}").to_return(body: file_contents, status: 200)
+      stub_request(:get, "http://hostname:2000/public_path/#{error_file_path}").to_raise(StandardError)
+    end
+
+    it "returns content when respondes succesfully" do
+      expect(Gitlab::Webpack::FileLoader.load(file_path)).to be(file_contents)
+    end
+
+    it "raises error when 404" do
+      expect { Gitlab::Webpack::FileLoader.load("not_found") }.to raise_error("HTTP error 404")
+    end
+
+    it "raises error when errors out" do
+      expect { Gitlab::Webpack::FileLoader.load(error_file_path) }.to raise_error(Gitlab::Webpack::FileLoader::DevServerLoadError)
+    end
+  end
+
+  context "with dev server enabled and https" do
+    before do
+      allow(Gitlab.config.webpack.dev_server).to receive(:enabled).and_return(true)
+      allow(Gitlab.config.webpack.dev_server).to receive(:https).and_return(true)
+
+      stub_request(:get, "https://hostname:2000/public_path/#{error_file_path}").to_raise(EOFError)
+    end
+
+    it "raises error if catches SSLError" do
+      expect { Gitlab::Webpack::FileLoader.load(error_file_path) }.to raise_error(Gitlab::Webpack::FileLoader::DevServerSSLError)
+    end
+  end
+
+  context "with dev server disabled" do
+    before do
+      allow(Gitlab.config.webpack.dev_server).to receive(:enabled).and_return(false)
+      stub_file_read(::Rails.root.join("webpack_output/#{file_path}"), content: file_contents)
+      stub_file_read(::Rails.root.join("webpack_output/#{error_file_path}"), error: Errno::ENOENT)
+    end
+
+    describe ".load" do
+      it "returns file content from file path" do
+        expect(Gitlab::Webpack::FileLoader.load(file_path)).to be(file_contents)
+      end
+
+      it "throws error if file cannot be read" do
+        expect { Gitlab::Webpack::FileLoader.load(error_file_path) }.to raise_error(Gitlab::Webpack::FileLoader::StaticLoadError)
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/webpack/graphql_known_operations_spec.rb b/spec/lib/gitlab/webpack/graphql_known_operations_spec.rb
new file mode 100644
index 0000000000000..89cade82fe698
--- /dev/null
+++ b/spec/lib/gitlab/webpack/graphql_known_operations_spec.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+
+RSpec.describe Gitlab::Webpack::GraphqlKnownOperations do
+  let(:content) do
+    <<-EOF
+    - hello
+    - world
+    - test
+    EOF
+  end
+
+  around do |example|
+    described_class.clear_memoization!
+
+    example.run
+
+    described_class.clear_memoization!
+  end
+
+  describe ".load" do
+    context "when file loader returns" do
+      before do
+        allow(::Gitlab::Webpack::FileLoader).to receive(:load).with("graphql_known_operations.yml").and_return(content)
+      end
+
+      it "returns memoized value" do
+        expect(::Gitlab::Webpack::FileLoader).to receive(:load).once
+
+        2.times { ::Gitlab::Webpack::GraphqlKnownOperations.load }
+
+        expect(::Gitlab::Webpack::GraphqlKnownOperations.load).to eq(%w(hello world test))
+      end
+    end
+
+    context "when file loader errors" do
+      before do
+        allow(::Gitlab::Webpack::FileLoader).to receive(:load).and_raise(StandardError.new("test"))
+      end
+
+      it "returns empty array" do
+        expect(::Gitlab::Webpack::GraphqlKnownOperations.load).to eq([])
+      end
+    end
+  end
+end
-- 
GitLab