From 0f05c6af710a02eb306314c65da2bb6eddce9c1d Mon Sep 17 00:00:00 2001 From: alinamihaila <amihaila@gitlab.com> Date: Wed, 16 Jun 2021 11:59:31 +0300 Subject: [PATCH] Use instrumentation classes in usage_data - Add add_metric method in usage_data.rb - Update docs - Use existing instrumentation classes in usage_data.rb --- doc/development/service_ping/index.md | 17 ++++++++++++++++- .../service_ping/metrics_instrumentation.md | 15 +++++++++++++++ .../service_ping/review_guidelines.md | 3 +++ .../usage_ping/metrics_instrumentation.md | 1 + .../metrics/every_metric_definition_spec.rb | 1 + .../metrics/instrumentations/base_metric.rb | 6 +++++- .../metrics/instrumentations/generic_metric.rb | 8 ++------ .../instrumentations/redis_hll_metric.rb | 9 +-------- .../metrics/names_suggestions/generator.rb | 4 ++++ lib/gitlab/usage_data.rb | 11 ++++++----- lib/gitlab/usage_data_non_sql_metrics.rb | 4 ++++ lib/gitlab/usage_data_queries.rb | 4 ++++ lib/gitlab/utils/usage_data.rb | 4 ++++ .../metrics/names_suggestions/generator_spec.rb | 8 ++++++++ spec/lib/gitlab/usage_data_queries_spec.rb | 8 ++++++++ spec/lib/gitlab/utils/usage_data_spec.rb | 8 ++++++++ 16 files changed, 90 insertions(+), 21 deletions(-) diff --git a/doc/development/service_ping/index.md b/doc/development/service_ping/index.md index 41a2022473af..f99b797ab240 100644 --- a/doc/development/service_ping/index.md +++ b/doc/development/service_ping/index.md @@ -377,7 +377,9 @@ happened over time, such as how many CI pipelines have run. They are monotonic a Observations are facts collected from one or more GitLab instances and can carry arbitrary data. There are no general guidelines around how to collect those, due to the individual nature of that data. -There are several types of counters which are all found in `usage_data.rb`: +### Types of counters + +There are several types of counters in `usage_data.rb`: - **Ordinary Batch Counters:** Simple count of a given ActiveRecord_Relation - **Distinct Batch Counters:** Distinct count of a given ActiveRecord_Relation in a given column @@ -388,6 +390,19 @@ There are several types of counters which are all found in `usage_data.rb`: NOTE: Only use the provided counter methods. Each counter method contains a built in fail safe to isolate each counter to avoid breaking the entire Service Ping. +### Using instrumentation classes + +We recommend you use [instrumentation classes](metrics_instrumentation.md) in `usage_data.rb` where possible. + +For example, we have the following instrumentation class: +`lib/gitlab/usage/metrics/instrumentations/count_boards_metric.rb`. + +You should add it to `usage_data.rb` as follows: + +```ruby +boards: add_metric(Gitlab::Usage::Metrics::Instrumentations::CountBoardsMetric.new(time_frame: 'all')), +``` + ### Why batch counting For large tables, PostgreSQL can take a long time to count rows due to MVCC [(Multi-version Concurrency Control)](https://en.wikipedia.org/wiki/Multiversion_concurrency_control). Batch counting is a counting method where a single large query is broken into multiple smaller queries. For example, instead of a single query querying 1,000,000 records, with batch counting, you can execute 100 queries of 10,000 records each. Batch counting is useful for avoiding database timeouts as each batch query is significantly shorter than one single long running query. diff --git a/doc/development/service_ping/metrics_instrumentation.md b/doc/development/service_ping/metrics_instrumentation.md index 61a1ff828be9..8a091616dc3a 100644 --- a/doc/development/service_ping/metrics_instrumentation.md +++ b/doc/development/service_ping/metrics_instrumentation.md @@ -86,6 +86,21 @@ module Gitlab end ``` +## Support for instrumentation classes + +There is support for: + +- `count`, `distinct_count` for [database metrics](#database-metrics). +- [Redis HLL metrics](#redis-hyperloglog-metrics). +- [Generic metrics](#generic-metrics), which are metrics based on settings or configurations. + +Currently, there is no support for: + +- `add`, `sum`, `histogram`, `estimate_batch_distinct_count` for database metrics. +- Regular Redis counters. + +You can [track the progress to support these](https://gitlab.com/groups/gitlab-org/-/epics/6118). + ## Creating a new metric instrumentation class To create a stub instrumentation for a Service Ping metric, you can use a dedicated [generator](https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/generators/gitlab/usage_metric_generator.rb): diff --git a/doc/development/service_ping/review_guidelines.md b/doc/development/service_ping/review_guidelines.md index a4fb929a8a0c..0b8f10b0aded 100644 --- a/doc/development/service_ping/review_guidelines.md +++ b/doc/development/service_ping/review_guidelines.md @@ -63,6 +63,9 @@ any of the following Service Ping files: Read the [stages file](https://gitlab.com/gitlab-com/www-gitlab-com/blob/master/data/stages.yml). - Check the file location. Consider the time frame, and if the file should be under `ee`. - Check the tiers. +- Metrics instrumentations + - Recommend to use metrics instrumentation for new metrics addded to service with + [limitations](metrics_instrumentation.md#support-for-instrumentation-classes) - Approve the MR, and relabel the MR with `~"product intelligence::approved"`. ## Review workload distribution diff --git a/doc/development/usage_ping/metrics_instrumentation.md b/doc/development/usage_ping/metrics_instrumentation.md index f2d731803b88..3bc48932eae0 100644 --- a/doc/development/usage_ping/metrics_instrumentation.md +++ b/doc/development/usage_ping/metrics_instrumentation.md @@ -5,5 +5,6 @@ remove_date: '2021-10-09' This file was moved to [another location](../service_ping/metrics_instrumentation.md). +<<<<<<< HEAD <!-- This redirect file can be deleted after <2021-10-09>. --> <!-- Before deletion, see: https://docs.gitlab.com/ee/development/documentation/#move-or-rename-a-page --> diff --git a/ee/spec/config/metrics/every_metric_definition_spec.rb b/ee/spec/config/metrics/every_metric_definition_spec.rb index 1e3c19bd83ac..161597c5c166 100644 --- a/ee/spec/config/metrics/every_metric_definition_spec.rb +++ b/ee/spec/config/metrics/every_metric_definition_spec.rb @@ -67,6 +67,7 @@ def parse_usage_ping_keys(object, key_path = []) allow(Gitlab::Geo).to receive(:enabled?).and_return(true) stub_licensed_features(requirements: true) stub_prometheus_queries + stub_usage_data_connections end it 'is included in the Usage Ping hash structure' do diff --git a/lib/gitlab/usage/metrics/instrumentations/base_metric.rb b/lib/gitlab/usage/metrics/instrumentations/base_metric.rb index 7b5bee3f8bd6..f0ba0322ab62 100644 --- a/lib/gitlab/usage/metrics/instrumentations/base_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/base_metric.rb @@ -11,10 +11,14 @@ class BaseMetric attr_reader :time_frame attr_reader :options - def initialize(time_frame:, options: {}) + def initialize(time_frame: 'none', options: {}) @time_frame = time_frame @options = options end + + def to_sql + value + end end end end diff --git a/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb b/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb index 8ce9e9d8c3a3..f52d08cb3150 100644 --- a/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/generic_metric.rb @@ -15,9 +15,7 @@ class GenericMetric < BaseMetric FALLBACK = -1 class << self - attr_reader :metric_operation, :metric_value - - @metric_operation = :alt + attr_reader :metric_value def fallback(custom_fallback = FALLBACK) return @metric_fallback if defined?(@metric_fallback) @@ -37,9 +35,7 @@ def value end def suggested_name - Gitlab::Usage::Metrics::NameSuggestion.for( - self.class.metric_operation - ) + Gitlab::Usage::Metrics::NameSuggestion.for(:alt) end end end diff --git a/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb b/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb index a36e612a1cb4..bb27cca1bb9b 100644 --- a/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb +++ b/lib/gitlab/usage/metrics/instrumentations/redis_hll_metric.rb @@ -12,11 +12,6 @@ class RedisHLLMetric < BaseMetric # events: # - g_analytics_valuestream # end - class << self - attr_reader :metric_operation - @metric_operation = :redis - end - def initialize(time_frame:, options: {}) super @@ -36,9 +31,7 @@ def value end def suggested_name - Gitlab::Usage::Metrics::NameSuggestion.for( - self.class.metric_operation - ) + Gitlab::Usage::Metrics::NameSuggestion.for(:redis) end private diff --git a/lib/gitlab/usage/metrics/names_suggestions/generator.rb b/lib/gitlab/usage/metrics/names_suggestions/generator.rb index a669b43f395f..c8b37c38a349 100644 --- a/lib/gitlab/usage/metrics/names_suggestions/generator.rb +++ b/lib/gitlab/usage/metrics/names_suggestions/generator.rb @@ -10,6 +10,10 @@ def generate(key_path) uncached_data.deep_stringify_keys.dig(*key_path.split('.')) end + def add_metric(metric) + metric.suggested_name + end + private def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil) diff --git a/lib/gitlab/usage_data.rb b/lib/gitlab/usage_data.rb index aabc706901e0..b71c22289c1c 100644 --- a/lib/gitlab/usage_data.rb +++ b/lib/gitlab/usage_data.rb @@ -72,8 +72,8 @@ def to_json(force_refresh: false) def license_usage_data { recorded_at: recorded_at, - uuid: alt_usage_data { Gitlab::CurrentSettings.uuid }, - hostname: alt_usage_data { Gitlab.config.gitlab.host }, + uuid: add_metric(Gitlab::Usage::Metrics::Instrumentations::UuidMetric.new), + hostname: add_metric(Gitlab::Usage::Metrics::Instrumentations::HostnameMetric.new), version: alt_usage_data { Gitlab::VERSION }, installation_type: alt_usage_data { installation_type }, active_user_count: count(User.active), @@ -93,7 +93,7 @@ def system_usage_data { counts: { assignee_lists: count(List.assignee), - boards: count(Board), + boards: add_metric(Gitlab::Usage::Metrics::Instrumentations::CountBoardsMetric.new(time_frame: 'all')), ci_builds: count(::Ci::Build), ci_internal_pipelines: count(::Ci::Pipeline.internal), ci_external_pipelines: count(::Ci::Pipeline.external), @@ -138,7 +138,7 @@ def system_usage_data in_review_folder: count(::Environment.in_review_folder), grafana_integrated_projects: count(GrafanaIntegration.enabled), groups: count(Group), - issues: count(Issue, start: minimum_id(Issue), finish: maximum_id(Issue)), + issues: add_metric(Gitlab::Usage::Metrics::Instrumentations::CountIssuesMetric.new(time_frame: 'all')), issues_created_from_gitlab_error_tracking_ui: count(SentryIssue), issues_with_associated_zoom_link: count(ZoomMeeting.added_to_issue), issues_using_zoom_quick_actions: distinct_count(ZoomMeeting, :issue_id), @@ -644,8 +644,9 @@ def usage_activity_by_stage_package(time_period) # Omitted because of encrypted properties: `projects_jira_cloud_active`, `projects_jira_server_active` # rubocop: disable CodeReuse/ActiveRecord def usage_activity_by_stage_plan(time_period) + time_frame = time_period.present? ? '28d' : 'none' { - issues: distinct_count(::Issue.where(time_period), :author_id), + issues: add_metric(Gitlab::Usage::Metrics::Instrumentations::CountUsersCreatingIssuesMetric.new(time_frame: time_frame)), notes: distinct_count(::Note.where(time_period), :author_id), projects: distinct_count(::Project.where(time_period), :creator_id), todos: distinct_count(::Todo.where(time_period), :author_id), diff --git a/lib/gitlab/usage_data_non_sql_metrics.rb b/lib/gitlab/usage_data_non_sql_metrics.rb index 44d5baa42f60..38d3f69eea6b 100644 --- a/lib/gitlab/usage_data_non_sql_metrics.rb +++ b/lib/gitlab/usage_data_non_sql_metrics.rb @@ -5,6 +5,10 @@ class UsageDataNonSqlMetrics < UsageData SQL_METRIC_DEFAULT = -3 class << self + def add_metric(metric) + metric.value unless metric.is_a?(Gitlab::Usage::Metrics::Instrumentations::DatabaseMetric) + end + def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil) SQL_METRIC_DEFAULT end diff --git a/lib/gitlab/usage_data_queries.rb b/lib/gitlab/usage_data_queries.rb index 63e6cf03d1fd..6f589f206ce6 100644 --- a/lib/gitlab/usage_data_queries.rb +++ b/lib/gitlab/usage_data_queries.rb @@ -5,6 +5,10 @@ module Gitlab # See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/41091 class UsageDataQueries < UsageData class << self + def add_metric(metric) + metric.to_sql + end + def count(relation, column = nil, *args, **kwargs) Gitlab::Usage::Metrics::Query.for(:count, relation, column) end diff --git a/lib/gitlab/utils/usage_data.rb b/lib/gitlab/utils/usage_data.rb index faa524d171c3..fc00abcab6e4 100644 --- a/lib/gitlab/utils/usage_data.rb +++ b/lib/gitlab/utils/usage_data.rb @@ -44,6 +44,10 @@ module UsageData DISTRIBUTED_HLL_FALLBACK = -2 MAX_BUCKET_SIZE = 100 + def add_metric(metric) + metric.value + end + def count(relation, column = nil, batch: true, batch_size: nil, start: nil, finish: nil) if batch Gitlab::Database::BatchCount.batch_count(relation, column, batch_size: batch_size, start: start, finish: finish) diff --git a/spec/lib/gitlab/usage/metrics/names_suggestions/generator_spec.rb b/spec/lib/gitlab/usage/metrics/names_suggestions/generator_spec.rb index b4ab9d4861be..58839f720bc2 100644 --- a/spec/lib/gitlab/usage/metrics/names_suggestions/generator_spec.rb +++ b/spec/lib/gitlab/usage/metrics/names_suggestions/generator_spec.rb @@ -16,6 +16,14 @@ end end + describe '#add_metric' do + let(:metric) { double(:metric, suggested_name: 'counts_issues') } + + it 'computes the metric value for given metric' do + expect(described_class.add_metric(metric)).to eq('counts_issues') + end + end + context 'for count with default column metrics' do it_behaves_like 'name suggestion' do # corresponding metric is collected with count(Board) diff --git a/spec/lib/gitlab/usage_data_queries_spec.rb b/spec/lib/gitlab/usage_data_queries_spec.rb index 438ae3efd11d..c04545adac99 100644 --- a/spec/lib/gitlab/usage_data_queries_spec.rb +++ b/spec/lib/gitlab/usage_data_queries_spec.rb @@ -7,6 +7,14 @@ allow(ActiveRecord::Base.connection).to receive(:transaction_open?).and_return(false) end + describe '#add_metric' do + let(:metric) { double(:metric, to_sql: 'SELECT COUNT("users"."id") FROM "users"') } + + it 'computes the metric value for given metric' do + expect(described_class.add_metric(metric)).to eq('SELECT COUNT("users"."id") FROM "users"') + end + end + describe '.count' do it 'returns the raw SQL' do expect(described_class.count(User)).to start_with('SELECT COUNT("users"."id") FROM "users"') diff --git a/spec/lib/gitlab/utils/usage_data_spec.rb b/spec/lib/gitlab/utils/usage_data_spec.rb index 8f705d6a487b..f4f5be7e41e3 100644 --- a/spec/lib/gitlab/utils/usage_data_spec.rb +++ b/spec/lib/gitlab/utils/usage_data_spec.rb @@ -5,6 +5,14 @@ RSpec.describe Gitlab::Utils::UsageData do include Database::DatabaseHelpers + describe '#add_metric' do + let(:metric) { double(:metric, value: 1) } + + it 'computes the metric value for given metric' do + expect(described_class.add_metric(metric)).to eq(1) + end + end + describe '#count' do let(:relation) { double(:relation) } -- GitLab