mirror of
https://github.com/gitlabhq/gitlabhq.git
synced 2025-08-01 15:53:04 +00:00
306 lines
10 KiB
Ruby
306 lines
10 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Gitlab
|
|
module Database
|
|
class CollationChecker
|
|
include Gitlab::Database::Migrations::TimeoutHelpers
|
|
|
|
COLLATION_VERSION_MISMATCH_QUERY = <<~SQL
|
|
SELECT
|
|
collname AS collation_name,
|
|
collprovider AS provider,
|
|
collversion AS stored_version,
|
|
pg_collation_actual_version(oid) AS actual_version,
|
|
collversion <> pg_collation_actual_version(oid) AS has_mismatch
|
|
FROM
|
|
pg_collation
|
|
WHERE
|
|
collprovider IN ('c', 'd')
|
|
AND (collversion IS DISTINCT FROM pg_collation_actual_version(oid));
|
|
SQL
|
|
|
|
# Few Tables/indexes prone to corruption issues for spot check
|
|
# based on the issue: https://gitlab.com/gitlab-org/gitlab/-/issues/505982
|
|
INDEXES_TO_SPOT_CHECK = {
|
|
'main' => {
|
|
'merge_request_diff_commit_users' => %w[
|
|
index_merge_request_diff_commit_users_on_name_and_email
|
|
index_merge_request_diff_commit_users_on_org_id_name_email
|
|
]
|
|
}
|
|
}.freeze
|
|
|
|
INDEXES_TO_SPOT_CHECK_QUERY = <<~SQL
|
|
SELECT DISTINCT
|
|
indrelid::regclass::text AS table_name,
|
|
indexrelid::regclass::text AS index_name,
|
|
string_agg(a.attname, ', ' ORDER BY a.attnum) AS affected_columns,
|
|
i.indisunique AS is_unique,
|
|
pg_relation_size(indexrelid) AS size_bytes
|
|
FROM
|
|
pg_index i
|
|
JOIN
|
|
pg_class idx ON idx.oid = i.indexrelid
|
|
JOIN
|
|
pg_class tbl ON tbl.oid = i.indrelid
|
|
JOIN
|
|
pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey)
|
|
WHERE
|
|
idx.relname IN (%{indexes_to_spot_check})
|
|
GROUP BY
|
|
i.indexrelid, i.indrelid, i.indisunique, idx.relname, indrelid::regclass::text, indexrelid::regclass::text
|
|
ORDER BY
|
|
table_name, index_name;
|
|
SQL
|
|
|
|
def self.run(database_name: nil, logger: Gitlab::AppLogger)
|
|
results = {}
|
|
|
|
Gitlab::Database::EachDatabase.each_connection(only: database_name) do |connection, database|
|
|
results[database] = new(connection, database, logger).run
|
|
end
|
|
|
|
results
|
|
end
|
|
|
|
attr_reader :connection, :database_name, :logger
|
|
|
|
def initialize(connection, database_name, logger)
|
|
@connection = connection
|
|
@database_name = database_name
|
|
@logger = logger
|
|
end
|
|
|
|
def run
|
|
result = {
|
|
'collation_mismatches' => [],
|
|
'corrupted_indexes' => []
|
|
}
|
|
|
|
logger.info("Checking for PostgreSQL collation mismatches on #{database_name} database...")
|
|
|
|
result['collation_mismatches'] = check_collation_mismatches
|
|
|
|
indexes_to_spot_check = transform_indexes_to_spot_check
|
|
logger.info("Found #{indexes_to_spot_check.count} indexes to corruption spot check.")
|
|
|
|
if indexes_to_spot_check.any?
|
|
result['corrupted_indexes'] = identify_corrupted_indexes(indexes_to_spot_check)
|
|
log_results(result['corrupted_indexes'])
|
|
else
|
|
logger.info("No indexes found for corruption spot check.")
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
private
|
|
|
|
def execute(sql)
|
|
connection.execute(sql) # rubocop:disable Database/AvoidUsingConnectionExecute -- Required for TimeoutHelpers
|
|
end
|
|
|
|
def check_collation_mismatches
|
|
mismatched = connection.select_all(COLLATION_VERSION_MISMATCH_QUERY).to_a
|
|
|
|
if mismatched.any?
|
|
logger.warn("Collation mismatches detected on #{database_name} database!")
|
|
logger.warn("#{mismatched.count} collation(s) have version mismatches:")
|
|
mismatched.each do |row|
|
|
logger.warn(
|
|
" - #{row['collation_name']}: stored=#{row['stored_version']}, actual=#{row['actual_version']}"
|
|
)
|
|
end
|
|
else
|
|
logger.info("No collation version mismatches detected on #{database_name}.")
|
|
end
|
|
|
|
mismatched
|
|
end
|
|
|
|
def transform_indexes_to_spot_check
|
|
indexes_to_spot_check = []
|
|
|
|
return indexes_to_spot_check unless INDEXES_TO_SPOT_CHECK.key?(database_name)
|
|
|
|
INDEXES_TO_SPOT_CHECK[database_name].each do |table_name, index_names|
|
|
temp = index_names.map { |idx_name| { 'table_name' => table_name, 'index_name' => idx_name } }
|
|
indexes_to_spot_check.concat(temp)
|
|
end
|
|
|
|
indexes_to_spot_check
|
|
end
|
|
|
|
def identify_corrupted_indexes(indexes)
|
|
return [] if indexes.empty?
|
|
|
|
corrupted_indexes = []
|
|
|
|
duplicates = check_unique_index_duplicates(fetch_index_info(indexes).select { |idx| unique?(idx) })
|
|
duplicates.each do |idx|
|
|
corruption_info = {
|
|
'index_name' => idx['index_name'],
|
|
'table_name' => idx['table_name'],
|
|
'affected_columns' => idx['affected_columns'],
|
|
'is_unique' => true,
|
|
'size_bytes' => idx['size_bytes'].to_i,
|
|
'corruption_types' => ['duplicates'],
|
|
'needs_deduplication' => true
|
|
}
|
|
|
|
corrupted_indexes << corruption_info
|
|
end
|
|
|
|
corrupted_indexes
|
|
end
|
|
|
|
def log_results(corrupted_indexes)
|
|
if corrupted_indexes.any?
|
|
logger.warn("#{corrupted_indexes.count} corrupted indexes detected!")
|
|
logger.warn("Affected indexes that need to be rebuilt:")
|
|
corrupted_indexes.each do |idx|
|
|
log_index_details(idx)
|
|
end
|
|
|
|
provide_remediation_guidance(corrupted_indexes)
|
|
else
|
|
logger.info("No corrupted indexes detected.")
|
|
end
|
|
end
|
|
|
|
def log_index_details(idx)
|
|
logger.warn(" - #{idx['index_name']} on table #{idx['table_name']}")
|
|
logger.warn(" • Issues detected: #{idx['corruption_types'].join(', ')}")
|
|
|
|
return unless idx['is_unique']
|
|
|
|
logger.warn(" • Affected columns: #{idx['affected_columns']}")
|
|
logger.warn(" • Needs deduplication: Yes")
|
|
end
|
|
|
|
def unique?(index)
|
|
Gitlab::Utils.to_boolean(index['is_unique'])
|
|
end
|
|
|
|
def fetch_index_info(indexes)
|
|
result = []
|
|
|
|
return result if indexes.empty?
|
|
|
|
quoted_index_names = indexes.map { |idx| connection.quote(idx['index_name']) }.join(',')
|
|
sql = format(INDEXES_TO_SPOT_CHECK_QUERY, indexes_to_spot_check: quoted_index_names)
|
|
|
|
disable_statement_timeout do
|
|
result = connection.select_all(sql).to_a
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
def check_unique_index_duplicates(indexes)
|
|
duplicate_indexes = []
|
|
|
|
return duplicate_indexes if indexes.empty?
|
|
|
|
indexes.each do |idx|
|
|
all_key_columns = idx['affected_columns'].split(', ')
|
|
next if all_key_columns.empty?
|
|
|
|
all_key_columns.map! { |col| connection.quote_column_name(col) }
|
|
|
|
cols_str = all_key_columns.join(', ')
|
|
not_null_conditions = all_key_columns.map { |col| "#{col} IS NOT NULL" }.join(' AND ')
|
|
|
|
sql = <<~SQL
|
|
SELECT 1
|
|
FROM (
|
|
SELECT #{cols_str}
|
|
FROM #{connection.quote_table_name(idx['table_name'])}
|
|
WHERE #{not_null_conditions}
|
|
GROUP BY #{cols_str}
|
|
HAVING COUNT(*) > 1
|
|
LIMIT 1
|
|
) AS dups
|
|
SQL
|
|
|
|
duplicates_exist = false
|
|
disable_statement_timeout do
|
|
duplicates_exist = connection.select_value(sql).present?
|
|
end
|
|
|
|
if duplicates_exist
|
|
logger.warn("Found duplicates in unique index #{idx['index_name']}")
|
|
duplicate_indexes << idx
|
|
end
|
|
end
|
|
|
|
duplicate_indexes
|
|
end
|
|
|
|
def provide_remediation_guidance(corrupted_indexes)
|
|
log_remediation_header
|
|
log_duplicate_entry_fixes(corrupted_indexes)
|
|
log_index_rebuild_commands(corrupted_indexes)
|
|
log_collation_refresh_commands
|
|
log_conclusion
|
|
end
|
|
|
|
def log_remediation_header
|
|
logger.warn("\nREMEDIATION STEPS:")
|
|
logger.warn("1. Put GitLab into maintenance mode")
|
|
logger.warn("2. Run the following SQL commands:")
|
|
end
|
|
|
|
def log_duplicate_entry_fixes(corrupted_indexes)
|
|
indexes_needing_deduplication = corrupted_indexes.select { |idx| idx['needs_deduplication'] }
|
|
return unless indexes_needing_deduplication.any?
|
|
|
|
logger.warn("\n# Step 1: Fix duplicate entries in unique indexes")
|
|
indexes_needing_deduplication.each do |idx|
|
|
logger.warn("-- Fix duplicates in #{idx['table_name']} (unique index: #{idx['index_name']})")
|
|
|
|
columns = idx['affected_columns'].split(', ')
|
|
cols_str = columns.join(', ')
|
|
not_null_conditions = columns.map do |col|
|
|
"#{col} IS NOT NULL"
|
|
end.join(' AND ')
|
|
|
|
logger.warn(
|
|
"SELECT #{cols_str}, COUNT(*), ARRAY_AGG(id) " \
|
|
"FROM #{idx['table_name']} " \
|
|
"WHERE #{not_null_conditions} " \
|
|
"GROUP BY #{cols_str} HAVING COUNT(*) > 1;"
|
|
)
|
|
end
|
|
|
|
logger.warn("\n# Use gitlab:db:deduplicate_tags or similar tasks " \
|
|
"to fix these duplicate entries before rebuilding indexes.")
|
|
end
|
|
|
|
def log_index_rebuild_commands(corrupted_indexes)
|
|
return unless corrupted_indexes.any?
|
|
|
|
logger.warn("\n# Step 2: Rebuild affected indexes")
|
|
logger.warn("# Option A: Rebuild individual indexes with minimal downtime:")
|
|
corrupted_indexes.each do |idx|
|
|
logger.warn("REINDEX INDEX CONCURRENTLY #{idx['index_name']};")
|
|
end
|
|
|
|
logger.warn("\n# Option B: Alternatively, rebuild all indexes at once (requires downtime):")
|
|
logger.warn("REINDEX DATABASE #{database_name};")
|
|
end
|
|
|
|
def log_collation_refresh_commands
|
|
logger.warn("\n# Step 3: Refresh collation versions")
|
|
logger.warn("ALTER DATABASE #{database_name} REFRESH COLLATION VERSION;")
|
|
logger.warn("-- This updates all collation versions in the database to match the current OS")
|
|
end
|
|
|
|
def log_conclusion
|
|
logger.warn("\n3. Take GitLab out of maintenance mode")
|
|
logger.warn("\nFor more information, see: https://docs.gitlab.com/administration/postgresql/upgrading_os/")
|
|
end
|
|
end
|
|
end
|
|
end
|