# frozen_string_literal: true require 'open3' require 'yaml' class MigrationSchemaValidator FILENAME = 'db/structure.sql' MIGRATION_DIRS = %w[db/migrate db/post_migrate].freeze SCHEMA_VERSION_DIR = 'db/schema_migrations' MODELS_DIR = 'app/models' EE_MODELS_DIR = 'ee/app/models' DB_DOCS_DIR = 'db/docs' DOC_URL = "https://docs.gitlab.com/ee/development/database/avoiding_downtime_in_migrations.html" VERSION_DIGITS = 14 SKIP_VALIDATION_LABEL = 'pipeline:skip-check-migrations' MIGRATION_METHODS = %w[ cleanup_concurrent_column_rename cleanup_conversion_of_integer_to_bigint rename_column rename_column_concurrently remove_column ].freeze MIGRATION_METHODS_REGEX = /(#{MIGRATION_METHODS.join('|')})[(\s]/ TABLE_AND_COLUMN_NAME_REGEX = /(?:#{MIGRATION_METHODS.join('|')})\s+:(\w+),\s+:(\w+)/ UP_OR_CHANGE_METHOD_REGEX = /def (?:up|change)(.*?)end/m PERMITTED_YAML_CLASSES = [String, Array, Hash].freeze def initialize @models_missing_ignore = Hash.new { |h, k| h[k] = [] } end def validate! if committed_migrations.empty? puts "\e[32m No migrations found, skipping schema validation\e[0m" return end # only check allowed to be skipped is validate_schema_on_rollback! validate_ignore_columns! validate_schema_on_migrate! validate_schema_version_files! if skip_validation? puts "\e[32m Label #{SKIP_VALIDATION_LABEL} is present, skipping schema validation\e[0m" return end validate_schema_on_rollback! end private def skip_validation? ENV.fetch('CI_MERGE_REQUEST_LABELS', '').split(',').include?(SKIP_VALIDATION_LABEL) end def validate_ignore_columns! base_message = <<~MSG.freeze Column operations, like dropping, renaming or primary key conversion, require columns to be ignored in the model. This step is necessary because Rails caches the columns and re-uses it in various places across the application. Refer to these pages for more information: #{DOC_URL}#dropping-columns #{DOC_URL}#renaming-columns #{DOC_URL}#migrating-integer-primary-keys-to-bigint Please ensure that columns are properly ignored in the models MSG committed_migrations.each do |file_name| check_file(file_name) end return if @models_missing_ignore.empty? models_missing_text = @models_missing_ignore.map { |key, values| "#{key}: #{values.join(', ')}" }.join("\n") die "#{base_message}\n#{models_missing_text}" end def extract_up_or_change_method(file_content) method_content = file_content.match(UP_OR_CHANGE_METHOD_REGEX) method_content ? method_content[1].strip : nil end def check_file(file_path) return unless File.exist?(file_path) file_content = File.read(file_path) method_content = extract_up_or_change_method(file_content) return unless method_content&.match?(MIGRATION_METHODS_REGEX) table_column_pairs = method_content.scan(TABLE_AND_COLUMN_NAME_REGEX) table_column_pairs.each do |table, column| model_name = model(table) next unless model_name model_file_path = model_path(model_name) next unless File.exist?(model_file_path) model_content = File.read(model_file_path) next if model_content.match?(/\s(ignore_column|ignore_columns)\s(:|%i\[).*?#{column}\b/m) @models_missing_ignore[model_name.to_s] << column end end def model(table_name) db_docs_file = File.join(DB_DOCS_DIR, "#{table_name}.yml") return unless File.exist?(db_docs_file) data = YAML.safe_load(File.read(db_docs_file), permitted_classes: PERMITTED_YAML_CLASSES) data['classes'].first rescue Psych::DisallowedClass => e puts "Error: Unexpected object type in YAML file for table '#{table_name}': #{e.message}" nil end def model_path(model) nested_model = underscore(model) file_path = File.join(MODELS_DIR, "#{nested_model}.rb") if File.exist?(file_path) file_path else File.join(EE_MODELS_DIR, "#{nested_model}.rb") end end def underscore(str) str.gsub(/::/, '/') .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2') .gsub(/([a-z\d])([A-Z])/, '\1_\2') .tr("-", "_") .downcase end def validate_schema_on_rollback! committed_migrations.reverse_each do |filename| version = find_migration_version(filename) run("scripts/db_tasks db:migrate:down VERSION=#{version}") run("scripts/db_tasks db:schema:dump") end git_command = "git diff #{diff_target} -- #{FILENAME}" base_message = "rollback of added migrations does not revert #{FILENAME} to previous state, " \ "please investigate. Apply the '#{SKIP_VALIDATION_LABEL}' label to skip this check if needed." \ "If you are unsure why this job is failing for your MR, then please refer to this page: " \ "https://docs.gitlab.com/ee/development/database/dbcheck-migrations-job.html#false-positives" validate_clean_output!(git_command, base_message) end def validate_schema_on_migrate! run("scripts/db_tasks db:migrate") run("scripts/db_tasks db:schema:dump") git_command = "git diff -- #{FILENAME}" base_message = "the committed #{FILENAME} does not match the one generated by running added migrations" validate_clean_output!(git_command, base_message) end def validate_schema_version_files! git_command = "git add -A -n #{SCHEMA_VERSION_DIR}" base_message = "the committed files in #{SCHEMA_VERSION_DIR} do not match those expected by the added migrations" validate_clean_output!(git_command, base_message) end def committed_migrations @committed_migrations ||= begin git_command = "git diff --name-only --diff-filter=A #{diff_target} -- #{MIGRATION_DIRS.join(' ')}" run(git_command).split("\n") end end def diff_target @diff_target ||= pipeline_for_merged_results? ? target_branch : merge_base end def merge_base run("git merge-base #{target_branch} #{source_ref}") end def target_branch ENV['CI_MERGE_REQUEST_TARGET_BRANCH_NAME'] || ENV['TARGET'] || ENV['CI_DEFAULT_BRANCH'] || 'master' end def source_ref ENV['CI_COMMIT_SHA'] || 'HEAD' end def pipeline_for_merged_results? ENV.key?('CI_MERGE_REQUEST_SOURCE_BRANCH_SHA') end def find_migration_version(filename) file_basename = File.basename(filename) version_match = /\A(?\d{#{VERSION_DIGITS}})_/o.match(file_basename) die "#{filename} has an invalid migration version" if version_match.nil? version_match[:version] end def validate_clean_output!(command, base_message) command_output = run(command) return if command_output.empty? die "#{base_message}:\n#{command_output}" end def die(message, error_code: 1) puts "\e[31mError: #{message}\e[0m" exit error_code end def run(cmd) puts "\e[32m$ #{cmd}\e[37m" stdout_str, stderr_str, status = Open3.capture3(cmd) puts "#{stdout_str}#{stderr_str}\e[0m" die "command failed: #{stderr_str}" unless status.success? stdout_str.chomp end end