mirror of
https://github.com/gitlabhq/gitlabhq.git
synced 2026-01-14 02:58:29 +00:00
109 lines
4.4 KiB
Ruby
Executable File
109 lines
4.4 KiB
Ruby
Executable File
#!/usr/bin/env ruby
|
||
# frozen_string_literal: true
|
||
|
||
# CONTEXT FOR LLMs:
|
||
#
|
||
# This script implements CI cache analytics as part of a BigQuery POC to replace Snowflake's
|
||
# 24-hour delay with real-time cache performance monitoring.
|
||
#
|
||
# DESIGN DECISIONS:
|
||
# - Single BigQuery table approach: cache_events table contains both cache operations
|
||
# (hit/miss/create) AND related command durations (bundle install, yarn install, etc.)
|
||
# because of 1:1 relationship between cache types and commands they optimize
|
||
# - Dual tracking: Both BigQuery (real-time POC) and GitLab internal events (standard pipeline)
|
||
# - Self-contained script: Uses inline bundler to avoid adding google-cloud-bigquery to main Gemfile
|
||
# - GitLab-specific patterns: Cache type inference based on actual GitLab CI cache configurations
|
||
# - Class-based structure: All logic in Tooling::CiAnalytics module to avoid global namespace pollution
|
||
#
|
||
# BIGQUERY SCHEMA:
|
||
# Table: gitlab-qa-resources.ci_analytics.cache_events
|
||
# Key fields: job_id, cache_key, cache_type, cache_operation (pull/push),
|
||
# cache_result (hit/miss/created), cache_operation_duration_seconds,
|
||
# operation_command, operation_duration_seconds, operation_success
|
||
# Partitioned by DATE(created_at), clustered by job_id, cache_type, cache_operation
|
||
#
|
||
# CACHE TYPES SUPPORTED:
|
||
# ruby-gems, node-modules, go (gitaly-binaries), assets, rubocop, qa-ruby-gems, cng-helm, npm, unknown
|
||
#
|
||
# USAGE: Add to .gitlab-ci.yml after_script: ruby scripts/cache_analytics.rb || true
|
||
# REQUIRES: BIGQUERY_SERVICE_ACCOUNT_KEY (File type CI variable), CI_INTERNAL_EVENTS_TOKEN (automatic)
|
||
#
|
||
# RELATED FILES:
|
||
# - tooling/lib/tooling/ci_analytics/* (parsing and BigQuery logic)
|
||
# - config/events/glci_cache_operation.yml (internal event definition)
|
||
# - GitLab issue: https://gitlab.com/gitlab-org/quality/analytics/team/-/issues/195
|
||
|
||
require_relative '../tooling/lib/tooling/ci_analytics/big_query_client'
|
||
require_relative '../tooling/lib/tooling/ci_analytics/job_trace_downloader'
|
||
require_relative '../tooling/lib/tooling/ci_analytics/cache_log_parser'
|
||
require_relative '../tooling/lib/tooling/ci_analytics/cache_event_builder'
|
||
require_relative '../tooling/lib/tooling/events/track_pipeline_events'
|
||
|
||
module Tooling
|
||
module CiAnalytics
|
||
class CacheMetrics
|
||
def self.run
|
||
puts "🔍 Analyzing cache performance for job ##{ENV['CI_JOB_ID']}..."
|
||
|
||
trace_downloader = JobTraceDownloader.new(
|
||
api_url: ENV.fetch('CI_API_V4_URL'),
|
||
token: ENV.fetch('CI_INTERNAL_EVENTS_TOKEN'),
|
||
project_id: cache_metrics_ci_environment[:project_id]
|
||
)
|
||
|
||
bigquery_client = BigQueryClient.new(
|
||
credentials_path: ENV.fetch('GLCI_BIGQUERY_CREDENTIALS_PATH')
|
||
)
|
||
|
||
cache_event_builder = CacheEventBuilder.new(cache_metrics_ci_environment)
|
||
events_tracker = Events::TrackPipelineEvents.new
|
||
|
||
job_trace = trace_downloader.download_job_trace(cache_metrics_ci_environment[:job_id])
|
||
return unless job_trace
|
||
|
||
cache_events = CacheLogParser.extract_cache_events(job_trace)
|
||
|
||
if cache_events.empty?
|
||
puts "ℹ️ No cache events found"
|
||
return
|
||
end
|
||
|
||
cache_events.each do |cache_data|
|
||
bigquery_event = cache_event_builder.build_bigquery_event(cache_data)
|
||
bigquery_client.insert_cache_event(bigquery_event)
|
||
|
||
properties = cache_event_builder.build_internal_event_properties(cache_data)
|
||
events_tracker.send_event(
|
||
'glci_cache_operation',
|
||
label: properties[:label],
|
||
property: properties[:property],
|
||
value: properties[:value],
|
||
extra_properties: properties[:extra_properties]
|
||
)
|
||
end
|
||
|
||
puts "✅ Processed #{cache_events.length} cache events"
|
||
rescue StandardError => e
|
||
puts "❌ Cache analysis failed: #{e.message}"
|
||
exit 0
|
||
end
|
||
|
||
def self.cache_metrics_ci_environment
|
||
@cache_metrics_ci_environment ||= {
|
||
job_id: ENV['CI_JOB_ID'],
|
||
job_name: ENV['CI_JOB_NAME'],
|
||
pipeline_id: ENV['CI_PIPELINE_ID'],
|
||
project_id: ENV['CI_PROJECT_ID'],
|
||
merge_request_iid: ENV['CI_MERGE_REQUEST_IID'],
|
||
merge_request_target_branch: ENV['CI_MERGE_REQUEST_TARGET_BRANCH_NAME'],
|
||
pipeline_source: ENV['CI_PIPELINE_SOURCE'],
|
||
ref: ENV['CI_COMMIT_REF_NAME'],
|
||
job_url: ENV['CI_JOB_URL']
|
||
}
|
||
end
|
||
end
|
||
end
|
||
end
|
||
|
||
Tooling::CiAnalytics::CacheMetrics.run if __FILE__ == $PROGRAM_NAME
|