From e1d81b823fd4aa6c2be0e7113e3f72bf2a3eda5a Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Sat, 19 Jul 2025 03:10:37 +0000 Subject: [PATCH] Add latest changes from gitlab-org/gitlab@master --- lib/gitlab/database/repair_index.rb | 8 ++++++- spec/lib/gitlab/database/repair_index_spec.rb | 21 +++++++++++++------ 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/lib/gitlab/database/repair_index.rb b/lib/gitlab/database/repair_index.rb index ef7067bab7b..2a1e865eefa 100644 --- a/lib/gitlab/database/repair_index.rb +++ b/lib/gitlab/database/repair_index.rb @@ -15,6 +15,7 @@ module Gitlab FIND_DUPLICATE_SETS_SQL = <<~SQL SELECT ARRAY_AGG(id ORDER BY id ASC) as ids FROM %{table_name} + WHERE %{not_null_conditions} GROUP BY %{column_list} HAVING COUNT(*) > 1 SQL @@ -323,10 +324,15 @@ module Gitlab def find_duplicate_sets(table_name, columns) logger.info("Checking for duplicates in '#{table_name}' for columns: #{columns.join(',')}...") + not_null_conditions = columns.map do |col| + "#{connection.quote_column_name(col)} IS NOT NULL" + end.join(' AND ') + sql = format( FIND_DUPLICATE_SETS_SQL, table_name: connection.quote_table_name(table_name), - column_list: columns.map { |col| connection.quote_column_name(col) }.join(', ') + column_list: columns.map { |col| connection.quote_column_name(col) }.join(', '), + not_null_conditions: not_null_conditions ) execute_local(sql, read_only: true) do diff --git a/spec/lib/gitlab/database/repair_index_spec.rb b/spec/lib/gitlab/database/repair_index_spec.rb index 105ae0bd8c5..d3d462522fc 100644 --- a/spec/lib/gitlab/database/repair_index_spec.rb +++ b/spec/lib/gitlab/database/repair_index_spec.rb @@ -74,8 +74,8 @@ RSpec.describe Gitlab::Database::RepairIndex, feature_category: :database do connection.execute(<<~SQL) CREATE TABLE #{test_table} ( id serial PRIMARY KEY, - name varchar(255) NOT NULL, - email varchar(255) NOT NULL + name varchar(255) NULL, + email varchar(255) NULL ); SQL @@ -184,7 +184,9 @@ RSpec.describe Gitlab::Database::RepairIndex, feature_category: :database do INSERT INTO #{test_table} (name, email) VALUES ('test_user', 'test@example.com'), -- ID 1 ('test_user', 'test@example.com'), -- ID 2 (duplicate) - ('other_user', 'other@example.com'); -- ID 3 + ('test_user', NULL), -- ID 3, email NULL, should be preserved + (NULL, 'other@example.com'), -- ID 4, name NULL, should be preserved + ('other_user', 'other@example.com'); -- ID 5 SQL # Create standard references (no entity column) @@ -217,7 +219,7 @@ RSpec.describe Gitlab::Database::RepairIndex, feature_category: :database do it 'handles all reference types correctly' do # before: 3 users, various references user_count_before = connection.select_value("SELECT COUNT(*) FROM #{test_table}") - expect(user_count_before).to eq(3) + expect(user_count_before).to eq(5) # unique index doesn't exist yet index_exists_before = connection.select_value(<<~SQL).present? @@ -230,9 +232,16 @@ RSpec.describe Gitlab::Database::RepairIndex, feature_category: :database do repairer.run - # after: 2 users (duplicate removed) + # after: 4 users (only true duplicate ID 2 removed) + # ID 3 with NULL value preserved user_count_after = connection.select_value("SELECT COUNT(*) FROM #{test_table}") - expect(user_count_after).to eq(2) + expect(user_count_after).to eq(4) + + # Verify NULL values are preserved + null_records = connection.select_value( + "SELECT COUNT(*) FROM #{test_table} WHERE email IS NULL or name is NULL" + ) + expect(null_records).to eq(2) # standard reference updated to good ID standard_ref = connection.select_value(