Files
gitlab-ce/scripts/merge_request_query_differ.rb
2025-04-28 15:09:57 +00:00

299 lines
8.8 KiB
Ruby
Executable File
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env ruby
# frozen_string_literal: true
require 'json'
require 'uri'
require 'net/http'
require 'zlib'
require 'rubygems/package'
require 'stringio'
require 'logger'
require_relative 'sql_fingerprint_extractor'
# MergeRequestQueryDiffer compares auto_explain logs from an MR against master
# to identify new query patterns introduced by the MR
class MergeRequestQueryDiffer
PROJECT_ID = ENV['CI_PROJECT_ID'] || '278964'
PACKAGE_NAME = 'auto-explain-logs'
PACKAGE_FILE = 'query-fingerprints.tar.gz'
NEW_QUERIES_PATH = 'new_sql_queries.md'
CONSOLIDATED_FINGERPRINTS_URL = ENV['CONSOLIDATED_FINGERPRINTS_URL'] ||
"https://gitlab.com/api/v4/projects/#{PROJECT_ID}/packages/generic/#{PACKAGE_NAME}/master/#{PACKAGE_FILE}"
attr_reader :mr_auto_explain_path, :output_file, :logger, :sql_fingerprint_extractor, :report_generator
def initialize(mr_auto_explain_path, logger = nil)
@mr_auto_explain_path = mr_auto_explain_path
output_dir = File.dirname(mr_auto_explain_path)
@output_file = File.join(output_dir, NEW_QUERIES_PATH)
@logger = logger || Logger.new($stdout)
@sql_fingerprint_extractor = SQLFingerprintExtractor.new(@logger)
@report_generator = ReportGenerator.new(@logger)
end
def run
logger.info "MR Query Diff: Analyzing new queries in MR compared to master"
# Step 1: Extract query fingerprints from MR
mr_queries = sql_fingerprint_extractor.extract_queries_from_file(mr_auto_explain_path)
if mr_queries.empty?
logger.info "No queries found in MR file"
write_report(output_file, "# SQL Query Analysis\n\nNo queries found in this MR.")
return 0
end
mr_fingerprints = mr_queries.filter_map { |q| q['fingerprint'] }
if mr_fingerprints.empty?
logger.info "No fingerprints found in MR queries... exiting"
return 0
end
logger.info "Found #{mr_fingerprints.size} total queries in MR"
# Step 2: Get master fingerprints
master_fingerprints = get_master_fingerprints
if master_fingerprints.empty?
logger.info "No master fingerprints found for comparison... exiting"
return 0
end
# Step 3: Compare and filter
mr_queries = filter_new_queries(mr_queries, master_fingerprints)
# Step 4: Report generation
logger.info "Final result: #{mr_queries.size} new queries compared to all master packages"
report = report_generator.generate(mr_queries)
write_report(output_file, report)
mr_queries.size
rescue StandardError => e
logger.info "Error in main execution: #{e.message}"
write_report(output_file, "# SQL Query Analysis\n\n Analysis failed: #{e.message}")
0
end
def filter_new_queries(mr_queries, master_fingerprints)
original_count = mr_queries.size
logger.info "Filtering #{original_count} queries against master fingerprints..."
# Only keep queries with fingerprints not in master set
new_queries = mr_queries.select { |q| q['fingerprint'] && Set[q['fingerprint']].disjoint?(master_fingerprints) }
filtered_count = original_count - new_queries.size
logger.info "Filtered out #{filtered_count} existing queries, #{new_queries.size} new queries found"
if new_queries.empty?
logger.info "All queries in MR are already present in master packages"
write_report(output_file, %(# SQL Query Analysis
No new SQL queries detected in this MR.
All queries in this MR are already present in master
))
end
new_queries
end
def get_master_fingerprints
logger.info "Fetching master fingerprints from consolidated package..."
fingerprints = Set.new
begin
content = download_consolidated_package
if content.nil?
logger.error "Failed to download consolidated package"
return fingerprints
end
# Extract fingerprints from the package
fingerprints = sql_fingerprint_extractor.extract_from_tar_gz(content)
logger.info "Loaded #{fingerprints.size} master fingerprints from consolidated package"
rescue StandardError => e
logger.error "Error loading master fingerprints: #{e.message}"
end
fingerprints
end
def download_consolidated_package(max_size_mb = 100)
logger.info "Downloading from: #{CONSOLIDATED_FINGERPRINTS_URL}"
url = URI(CONSOLIDATED_FINGERPRINTS_URL)
# Check file size first
begin
response = make_request(url, method: :head, parse_json: false)
if response.is_a?(Net::HTTPResponse)
content_length_mb = response['content-length'].to_i / (1024**2)
if content_length_mb > max_size_mb
logger.error "Package size (#{content_length_mb}MB) exceeds maximum allowed size (#{max_size_mb}MB)"
return
end
end
rescue StandardError => e
logger.warn "Warning: Could not validate file size: #{e}"
end
make_request(url, method: :get, parse_json: false)
end
def write_report(output_file, content)
File.write(output_file, content)
logger.info "Report saved to #{output_file}"
rescue StandardError => e
logger.error "Could not write report to file: #{e.message}"
end
def make_request(url, method: :get, parse_json: true, attempt: 1, max_attempts: 10)
if attempt >= max_attempts
logger.info "Maximum retry attempts (#{max_attempts}) reached for rate limiting"
return parse_json ? [] : nil
end
begin
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = (url.scheme == 'https')
http.read_timeout = 120
request = build_request(method, url)
if ENV['GITLAB_TOKEN']
request['PRIVATE-TOKEN'] = ENV['GITLAB_TOKEN']
elsif ENV['CI_JOB_TOKEN']
request['JOB-TOKEN'] = ENV['CI_JOB_TOKEN']
end
response = http.request(request)
case response
when Net::HTTPSuccess
return response if method == :head
if parse_json
begin
JSON.parse(response.body)
rescue JSON::ParserError => e
logger.error "Failed to parse JSON: #{e.message}"
[]
end
else
response.body
end
when Net::HTTPTooManyRequests,
Net::HTTPServerError,
Net::HTTPInternalServerError,
Net::HTTPServiceUnavailable,
Net::HTTPGatewayTimeout,
Net::HTTPBadGateway
backoff = [1 * (2**attempt), 60].min
logger.info "HTTP #{response.code} - Waiting and retrying after #{backoff} secs"
sleep(backoff)
make_request(
url, method: method, parse_json: parse_json, attempt: attempt + 1, max_attempts: max_attempts
)
else
logger.error "HTTP request failed: #{response.code} - #{response.message}"
parse_json ? [] : nil
end
rescue StandardError => e
logger.error "Error making request: #{e}"
parse_json ? [] : nil
end
end
private
def build_request(method, url)
case method
when :get
Net::HTTP::Get.new(url)
when :head
Net::HTTP::Head.new(url)
else
raise ArgumentError, "Unsupported HTTP method: #{method}"
end
end
# ReportGenerator handles creation of readable reports from query data
class ReportGenerator
attr_reader :logger
def initialize(logger)
@logger = logger || Logger.new($stdout)
end
def generate(mr_queries)
report = "# SQL Query Analysis\n\n"
if mr_queries.empty?
report += "No new SQL queries detected in this MR."
return report
end
report += "## Identified potential #{mr_queries.size} new SQL queries:\n\n"
mr_queries.each_with_index do |query, idx|
next unless query['normalized']
report += <<~DETAILS
<details>
<summary><b>Query #{idx + 1}</b>: #{format_query_summary(query)}</summary>
```sql
#{query['normalized']}
```
**Fingerprint:** `#{query['fingerprint']}`
#{query['plan'] ? format_plan(query['plan']) : ''}
</details>
DETAILS
end
report
end
def format_query_summary(query)
text = query['normalized'] || ""
cleaned = text.gsub(/\s+/, ' ').strip
cleaned.size > 80 ? "#{cleaned[0..77]}..." : cleaned
end
def format_plan(plan)
return "" unless plan
<<~PLAN
**Execution Plan:**
```json
#{
if plan.is_a?(Hash)
JSON.pretty_generate(plan)
else
plan.respond_to?(:to_s) ? plan.to_s : plan.inspect
end
}
```
PLAN
end
end
private_constant :ReportGenerator
end
if $PROGRAM_NAME == __FILE__
if ARGV.empty?
puts "Usage: #{$PROGRAM_NAME} <path/to/mr_auto_explain.ndjson[.gz]>"
exit 1
end
mr_auto_explain_path = ARGV[0]
unless File.exist?(mr_auto_explain_path)
puts "Error: File not found - #{mr_auto_explain_path}"
exit 1
end
diff = MergeRequestQueryDiffer.new(mr_auto_explain_path)
diff.run
end