# frozen_string_literal: true require_relative '../config/environment' require_relative '../lib/generators/post_deployment_migration/post_deployment_migration_generator' require_relative './helpers/postgres_ai' require_relative 'helpers/groups' require 'rubocop' module Keeps # This is an implementation of a ::Gitlab::Housekeeper::Keep. This keep will locate any old batched background # migrations that were added before CUTOFF_MILESTONE and then check if they are finished by querying Postgres.ai # database archive. Once it has determined it is safe to finalize the batched background migration it will generate a # new migration which calls `ensure_batched_background_migration_is_finished` for this migration. It also updates the # `db/docs/batched_background_migrations` file with `finalized_by` and generates the `schema_migrations` file. # # This keep requires the following additional environment variables to be set: # - POSTGRES_AI_CONNECTION_STRING: A valid postgres connection string # - POSTGRES_AI_PASSWORD: The password for the postgres database in connection string # # You can run it individually with: # # ``` # bundle exec gitlab-housekeeper -d \ # -k Keeps::OverdueFinalizeBackgroundMigration # ``` class OverdueFinalizeBackgroundMigration < ::Gitlab::Housekeeper::Keep def each_change batched_background_migrations.each do |migration_yaml_file, migration| next unless before_cuttoff_milestone?(migration['milestone']) job_name = migration['migration_job_name'] next if migration_finalized?(migration, job_name) migration_record = fetch_migration_status(job_name) next unless migration_record last_migration_file = last_migration_for_job(job_name) next unless last_migration_file change = initialize_change(migration, migration_record, job_name, last_migration_file) queue_method_node = find_queue_method_node(last_migration_file) migration_name = truncate_migration_name("FinalizeHK#{job_name}") PostDeploymentMigration::PostDeploymentMigrationGenerator .source_root('generator_templates/post_deployment_migration/post_deployment_migration/') generator = ::PostDeploymentMigration::PostDeploymentMigrationGenerator.new([migration_name]) migration_file = generator.invoke_all.first change.changed_files = [migration_file] add_ensure_call_to_migration(migration_file, queue_method_node, job_name, migration_record) ::Gitlab::Housekeeper::Shell.rubocop_autocorrect(migration_file) digest = Digest::SHA256.hexdigest(generator.migration_number) digest_file = Pathname.new('db').join('schema_migrations', generator.migration_number.to_s).to_s File.open(digest_file, 'w') { |f| f.write(digest) } add_finalized_by_to_yaml(migration_yaml_file, generator.migration_number) change.changed_files << digest_file change.changed_files << migration_yaml_file yield(change) end end def initialize_change(migration, migration_record, job_name, last_migration_file) # Finalize the migration change = ::Gitlab::Housekeeper::Change.new change.title = "Finalize BBM #{job_name}" change.identifiers = [self.class.name.demodulize, job_name] change.description = change_description(migration_record, job_name, last_migration_file) feature_category = migration['feature_category'] change.labels = groups_helper.labels_for_feature_category(feature_category) + [ 'maintenance::removal' ] change.reviewers = groups_helper.pick_reviewer_for_feature_category( feature_category, change.identifiers, fallback_feature_category: 'database' ) change end def change_description(migration_record, job_name, last_migration_file) # rubocop:disable Gitlab/DocumentationLinks/HardcodedUrl -- Not running inside rails application <<~MARKDOWN #{migration_code_not_present_message unless migration_code_present?(job_name)} This migration was finished at `#{migration_record.finished_at || migration_record.updated_at}`, you can confirm the status using our [batched background migration chatops commands](https://docs.gitlab.com/ee/development/database/batched_background_migrations.html#monitor-the-progress-and-status-of-a-batched-background-migration). To confirm it is finished you can run: ``` /chatops run batched_background_migrations status #{migration_record.id} --database #{database_name(migration_record)} ``` The last time this background migration was triggered was in [#{last_migration_file}](https://gitlab.com/gitlab-org/gitlab/-/blob/master/#{last_migration_file}) You can read more about the process for finalizing batched background migrations in https://docs.gitlab.com/ee/development/database/batched_background_migrations.html . As part of our process we want to ensure all batched background migrations have had at least one [required stop](https://docs.gitlab.com/ee/development/database/required_stops.html) to process the migration. Therefore we can finalize any batched background migration that was added before the last required stop. MARKDOWN # rubocop:enable Gitlab/DocumentationLinks/HardcodedUrl end def truncate_migration_name(migration_name) # File names not allowed to exceed 100 chars due to Cop/FilenameLength so we truncate to 70 because there will be # underscores added. if migration_name.length > 70 # Consisten 5 digit integer hash so that we always get the same name every time we run this keep hash = Digest::SHA256.hexdigest(migration_name).to_i(16) % 100000 end migration_name[0...65] + hash.to_s end def add_finalized_by_to_yaml(yaml_file, migration_number) content = YAML.load_file(yaml_file) content['finalized_by'] = migration_number File.open(yaml_file, 'w') { |f| f.write(YAML.dump(content)) } end def last_migration_for_job(job_name) files = ::Gitlab::Housekeeper::Shell.execute('git', 'grep', '--name-only', "MIGRATION = .#{job_name}.") .each_line.map(&:chomp) result = files.select do |file| File.read(file).include?('queue_batched_background_migration') end.max raise "Could not find migration for #{job_name}" unless result.present? result rescue ::Gitlab::Housekeeper::Shell::Error # `git grep` returns an error status code if it finds no results nil end def add_ensure_call_to_migration(file, queue_method_node, job_name, migration_record) source = RuboCop::ProcessedSource.new(File.read(file), 3.1) ast = source.ast source_buffer = source.buffer rewriter = Parser::Source::TreeRewriter.new(source_buffer) up_method = ast.children[2].each_child_node(:def).find do |child| child.method_name == :up end table_name = queue_method_node.children[3] column_name = queue_method_node.children[4] job_arguments = queue_method_node.children[5..].select { |s| s.type != :hash } # All remaining non-keyword args gitlab_schema = migration_record.gitlab_schema added_content = <<~RUBY.strip disable_ddl_transaction! restrict_gitlab_migration gitlab_schema: :#{gitlab_schema} def up ensure_batched_background_migration_is_finished( job_class_name: '#{job_name}', table_name: #{table_name.source}, column_name: #{column_name.source}, job_arguments: [#{job_arguments.map(&:source).join(', ')}], finalize: true ) end RUBY rewriter.replace(up_method.loc.expression, added_content) content = strip_comments(rewriter.process) File.write(file, content) end def strip_comments(code) result = [] code.each_line.with_index do |line, index| result << line unless index > 0 && line.lstrip.start_with?('#') end result.join end def fetch_migration_status(job_name) result = postgres_ai.fetch_background_migration_status(job_name) return unless result.count == 1 migration_model = ::Gitlab::Database::BackgroundMigration::BatchedMigration.new(result.first) migration_model if migration_model.finished? end def postgres_ai @postgres_ai ||= Keeps::Helpers::PostgresAi.new end def migration_finalized?(migration, job_name) return true if migration['finalized_by'].present? result = `git grep --name-only "#{job_name}"`.chomp result.each_line.select do |file| File.read(file.chomp).include?('ensure_batched_background_migration_is_finished') end.any? end def find_queue_method_node(file) source = RuboCop::ProcessedSource.new(File.read(file), 3.1) ast = source.ast up_method = ast.children[2].children.find do |child| child.def_type? && child.method_name == :up end up_method.each_descendant.find do |child| child && child.send_type? && child.method_name == :queue_batched_background_migration end end def before_cuttoff_milestone?(milestone) Gem::Version.new(milestone) <= Gem::Version.new(::Gitlab::Database.min_schema_gitlab_version) end def batched_background_migrations migrations = all_batched_background_migration_files.index_with do |f| YAML.load_file(f) end migrations.sort_by { |_f, migration| Gitlab::VersionInfo.parse_from_milestone(migration['milestone']) } end def all_batched_background_migration_files Dir.glob("db/docs/batched_background_migrations/*.yml") end def groups_helper @groups_helper ||= ::Keeps::Helpers::Groups.new end def migration_code_not_present_message <<~MARKDOWN ### Warning The migration code was **not found** in the codebase, the finalization cannot complete without it. Please re-add the background migration code to this merge request and start database testing pipeline MARKDOWN end def migration_code_present?(job_name) file_name = "#{job_name.underscore}.rb" migration_code_in_ce?(file_name) || migration_code_in_ee?(file_name) end def migration_code_in_ce?(file_name) File.exist?( Rails.root.join(*%w[lib gitlab background_migration]).join(file_name) ) end def migration_code_in_ee?(file_name) File.exist?( Rails.root.join(*%w[ee lib ee gitlab background_migration]).join(file_name) ) end def database_name(migration_record) gitlab_schema = migration_record.gitlab_schema connection = Gitlab::Database.schemas_to_base_models[gitlab_schema].first.connection Gitlab::Database.db_config_name(connection) end end end