#!/usr/bin/env ruby
# frozen_string_literal: true
require 'json'
require 'uri'
require 'net/http'
require 'zlib'
require 'rubygems/package'
require 'stringio'
require 'logger'
require_relative 'sql_fingerprint_extractor'
# MergeRequestQueryDiffer compares auto_explain logs from an MR against master
# to identify new query patterns introduced by the MR
class MergeRequestQueryDiffer
PROJECT_ID = ENV['CI_PROJECT_ID'] || '278964'
PACKAGE_NAME = 'auto-explain-logs'
PACKAGE_FILE = 'query-fingerprints.tar.gz'
NEW_QUERIES_PATH = 'new_sql_queries.md'
CONSOLIDATED_FINGERPRINTS_URL = ENV['CONSOLIDATED_FINGERPRINTS_URL'] ||
"https://gitlab.com/api/v4/projects/#{PROJECT_ID}/packages/generic/#{PACKAGE_NAME}/master/#{PACKAGE_FILE}"
attr_reader :mr_auto_explain_path, :output_file, :logger, :sql_fingerprint_extractor, :report_generator
def initialize(mr_auto_explain_path, logger = nil)
@mr_auto_explain_path = mr_auto_explain_path
output_dir = File.dirname(mr_auto_explain_path)
@output_file = File.join(output_dir, NEW_QUERIES_PATH)
@logger = logger || Logger.new($stdout)
@sql_fingerprint_extractor = SQLFingerprintExtractor.new(@logger)
@report_generator = ReportGenerator.new(@logger)
end
def run
logger.info "MR Query Diff: Analyzing new queries in MR compared to master"
# Step 1: Extract query fingerprints from MR
mr_queries = sql_fingerprint_extractor.extract_queries_from_file(mr_auto_explain_path)
if mr_queries.empty?
logger.info "No queries found in MR file"
write_report(output_file, "# SQL Query Analysis\n\nNo queries found in this MR.")
return 0
end
mr_fingerprints = mr_queries.filter_map { |q| q['fingerprint'] }
if mr_fingerprints.empty?
logger.info "No fingerprints found in MR queries... exiting"
return 0
end
logger.info "Found #{mr_fingerprints.size} total queries in MR"
# Step 2: Get master fingerprints
master_fingerprints = get_master_fingerprints
if master_fingerprints.empty?
logger.info "No master fingerprints found for comparison... exiting"
return 0
end
# Step 3: Compare and filter
mr_queries = filter_new_queries(mr_queries, master_fingerprints)
# Step 4: Report generation
logger.info "Final result: #{mr_queries.size} new queries compared to all master packages"
report = report_generator.generate(mr_queries)
write_report(output_file, report)
mr_queries.size
rescue StandardError => e
logger.info "Error in main execution: #{e.message}"
write_report(output_file, "# SQL Query Analysis\n\n️ Analysis failed: #{e.message}")
0
end
def filter_new_queries(mr_queries, master_fingerprints)
original_count = mr_queries.size
logger.info "Filtering #{original_count} queries against master fingerprints..."
# Only keep queries with fingerprints not in master set
new_queries = mr_queries.select { |q| q['fingerprint'] && Set[q['fingerprint']].disjoint?(master_fingerprints) }
filtered_count = original_count - new_queries.size
logger.info "Filtered out #{filtered_count} existing queries, #{new_queries.size} new queries found"
if new_queries.empty?
logger.info "All queries in MR are already present in master packages"
write_report(output_file, %(# SQL Query Analysis
No new SQL queries detected in this MR.
All queries in this MR are already present in master
))
end
new_queries
end
def get_master_fingerprints
logger.info "Fetching master fingerprints from consolidated package..."
fingerprints = Set.new
begin
content = download_consolidated_package
if content.nil?
logger.error "Failed to download consolidated package"
return fingerprints
end
# Extract fingerprints from the package
fingerprints = sql_fingerprint_extractor.extract_from_tar_gz(content)
logger.info "Loaded #{fingerprints.size} master fingerprints from consolidated package"
rescue StandardError => e
logger.error "Error loading master fingerprints: #{e.message}"
end
fingerprints
end
def download_consolidated_package(max_size_mb = 100)
logger.info "Downloading from: #{CONSOLIDATED_FINGERPRINTS_URL}"
url = URI(CONSOLIDATED_FINGERPRINTS_URL)
# Check file size first
begin
response = make_request(url, method: :head, parse_json: false)
if response.is_a?(Net::HTTPResponse)
content_length_mb = response['content-length'].to_i / (1024**2)
if content_length_mb > max_size_mb
logger.error "Package size (#{content_length_mb}MB) exceeds maximum allowed size (#{max_size_mb}MB)"
return
end
end
rescue StandardError => e
logger.warn "Warning: Could not validate file size: #{e}"
end
make_request(url, method: :get, parse_json: false)
end
def write_report(output_file, content)
File.write(output_file, content)
logger.info "Report saved to #{output_file}"
rescue StandardError => e
logger.error "Could not write report to file: #{e.message}"
end
def make_request(url, method: :get, parse_json: true, attempt: 1, max_attempts: 10)
if attempt >= max_attempts
logger.info "Maximum retry attempts (#{max_attempts}) reached for rate limiting"
return parse_json ? [] : nil
end
begin
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = (url.scheme == 'https')
http.read_timeout = 120
request = build_request(method, url)
if ENV['GITLAB_TOKEN']
request['PRIVATE-TOKEN'] = ENV['GITLAB_TOKEN']
elsif ENV['CI_JOB_TOKEN']
request['JOB-TOKEN'] = ENV['CI_JOB_TOKEN']
end
response = http.request(request)
case response
when Net::HTTPSuccess
return response if method == :head
if parse_json
begin
JSON.parse(response.body)
rescue JSON::ParserError => e
logger.error "Failed to parse JSON: #{e.message}"
[]
end
else
response.body
end
when Net::HTTPTooManyRequests,
Net::HTTPServerError,
Net::HTTPInternalServerError,
Net::HTTPServiceUnavailable,
Net::HTTPGatewayTimeout,
Net::HTTPBadGateway
backoff = [1 * (2**attempt), 60].min
logger.info "HTTP #{response.code} - Waiting and retrying after #{backoff} secs"
sleep(backoff)
make_request(
url, method: method, parse_json: parse_json, attempt: attempt + 1, max_attempts: max_attempts
)
else
logger.error "HTTP request failed: #{response.code} - #{response.message}"
parse_json ? [] : nil
end
rescue StandardError => e
logger.error "Error making request: #{e}"
parse_json ? [] : nil
end
end
private
def build_request(method, url)
case method
when :get
Net::HTTP::Get.new(url)
when :head
Net::HTTP::Head.new(url)
else
raise ArgumentError, "Unsupported HTTP method: #{method}"
end
end
# ReportGenerator handles creation of readable reports from query data
class ReportGenerator
attr_reader :logger
def initialize(logger)
@logger = logger || Logger.new($stdout)
end
def generate(mr_queries)
report = "# SQL Query Analysis\n\n"
if mr_queries.empty?
report += "No new SQL queries detected in this MR."
return report
end
report += "## Identified potential #{mr_queries.size} new SQL queries:\n\n"
mr_queries.each_with_index do |query, idx|
next unless query['normalized']
report += <<~DETAILS
Query #{idx + 1}: #{format_query_summary(query)}
```sql
#{query['normalized']}
```
**Fingerprint:** `#{query['fingerprint']}`
#{query['plan'] ? format_plan(query['plan']) : ''}
DETAILS
end
report
end
def format_query_summary(query)
text = query['normalized'] || ""
cleaned = text.gsub(/\s+/, ' ').strip
cleaned.size > 80 ? "#{cleaned[0..77]}..." : cleaned
end
def format_plan(plan)
return "" unless plan
<<~PLAN
**Execution Plan:**
```json
#{
if plan.is_a?(Hash)
JSON.pretty_generate(plan)
else
plan.respond_to?(:to_s) ? plan.to_s : plan.inspect
end
}
```
PLAN
end
end
private_constant :ReportGenerator
end
if $PROGRAM_NAME == __FILE__
if ARGV.empty?
puts "Usage: #{$PROGRAM_NAME} "
exit 1
end
mr_auto_explain_path = ARGV[0]
unless File.exist?(mr_auto_explain_path)
puts "Error: File not found - #{mr_auto_explain_path}"
exit 1
end
diff = MergeRequestQueryDiffer.new(mr_auto_explain_path)
diff.run
end