mirror of
https://github.com/gitlabhq/gitlabhq.git
synced 2025-07-25 17:08:32 +00:00
163 lines
6.0 KiB
Ruby
163 lines
6.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module Gitlab
|
|
module Cleanup
|
|
class RemoteObjectStorage
|
|
include ::ObjectStorage::FogHelpers
|
|
|
|
attr_reader :logger, :model_class, :storage_location_identifier
|
|
|
|
BATCH_SIZE = 100
|
|
|
|
def initialize(storage_location_identifier, model_class, logger: nil)
|
|
@storage_location_identifier = storage_location_identifier
|
|
@model_class = model_class
|
|
@logger = logger || Gitlab::AppLogger
|
|
end
|
|
|
|
def run!(dry_run: true, delete: false, batch_size: BATCH_SIZE)
|
|
unless object_store.enabled
|
|
logger.warn Rainbow("Object storage not enabled for #{storage_location_identifier}. Skipping.").yellow
|
|
return
|
|
end
|
|
|
|
if bucket_prefix.present?
|
|
error_message = "#{storage_location_identifier} is configured with a bucket prefix '#{bucket_prefix}'.\n"
|
|
error_message += "Unfortunately, prefixes are not supported for this Rake task.\n"
|
|
# At the moment, Fog does not provide a cloud-agnostic way of iterating through a bucket with a prefix.
|
|
logger.error Rainbow(error_message).red
|
|
return
|
|
end
|
|
|
|
action = delete ? 'delete' : 'move to lost and found'
|
|
dry_run_suffix = dry_run ? '. Dry run' : ''
|
|
logger.info "Looking for orphaned remote #{storage_location_identifier} files to #{action}#{dry_run_suffix}..."
|
|
|
|
each_orphan_file(batch_size) do |file|
|
|
handle_orphan_file(file, dry_run: dry_run, delete: delete)
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
# Default implementation, override in specific cleaner classes if needed
|
|
def each_orphan_file(batch_size = BATCH_SIZE)
|
|
# we want to skip files already moved to lost_and_found directory
|
|
lost_dir_match = "^#{lost_and_found_dir}\/"
|
|
|
|
remote_directory.files.each_slice(batch_size) do |remote_files|
|
|
remote_files.reject! { |file| file.key.match(/#{lost_dir_match}/) }
|
|
file_paths = remote_files.map(&:key)
|
|
tracked_paths = find_tracked_paths(file_paths)
|
|
|
|
remote_files.reject! { |file| tracked_paths.include?(file.key) }
|
|
remote_files.each do |file|
|
|
yield file
|
|
end
|
|
end
|
|
end
|
|
|
|
# @param file_paths [Array<String>] an array of remote file paths
|
|
# @return [Array<String>] a subset of the input paths that are tracked in the DB
|
|
def find_tracked_paths(file_paths)
|
|
file_paths.select do |file_path|
|
|
file_tracked_in_the_db?(file_path)
|
|
end
|
|
end
|
|
|
|
# @param file_path [String] a remote file path
|
|
# @return [Boolean] whether or not the file is tracked in the DB. Defaults to "file is tracked" if there is any
|
|
# doubt, to AVOID DATA LOSS.
|
|
def file_tracked_in_the_db?(file_path)
|
|
# Default to "file is tracked"
|
|
return true unless valid_file_path_format?(file_path)
|
|
|
|
query = query_for_row_tracking_the_file(file_path)
|
|
is_tracked = query.exists?
|
|
|
|
log_file_tracked(file_path: file_path, is_tracked: is_tracked, query: query.select('1 as one').limit(1).to_sql)
|
|
|
|
is_tracked
|
|
end
|
|
|
|
# @param args [Hash] log arguments, including at least :file_path and :is_tracked
|
|
def log_file_tracked(**args)
|
|
if args[:is_tracked]
|
|
message = "Found DB record for remote stored file"
|
|
logger.debug(args.merge(message: message))
|
|
else
|
|
message = "Did not find DB record for remote stored file"
|
|
logger.info(args.merge(message: message))
|
|
end
|
|
end
|
|
|
|
# @param file_path [String] a remote file path
|
|
# @return [Boolean] true if file_path matches the expected format, false otherwise.
|
|
def valid_file_path_format?(file_path)
|
|
return true if file_path.match?(expected_file_path_format_regexp)
|
|
|
|
# This can happen if we need to implement support of path formats that we were not aware of. We should increase
|
|
# the severity of this log line after we are confident that we have accounted for all expected formats.
|
|
logger.info(message: "Skipping because the file path doesn't match the expected format", file_path: file_path,
|
|
expected_file_path_format_regexp: expected_file_path_format_regexp)
|
|
|
|
false
|
|
end
|
|
|
|
# @abstract
|
|
# @param file_path [String] a remote file path (the format is specific to each bucket, see the Uploader class
|
|
# for the model being cleaned up for the expected format).
|
|
# @return [ActiveRecord::Relation, nil] a relation that would match the corresponding row in the DB,
|
|
# if it exists, or nil if the file path doesn't match the expected format.
|
|
def query_for_row_tracking_the_file(file_path)
|
|
raise NotImplementedError
|
|
end
|
|
|
|
# @abstract
|
|
# @return [Regexp] the expected file path format regexp specific to each cleaner class
|
|
def expected_file_path_format_regexp
|
|
raise NotImplementedError
|
|
end
|
|
|
|
# @param file [Fog::Storage::File] the orphan file to handle.
|
|
# @param dry_run [Boolean] if true, only log what would be done.
|
|
# @param delete [Boolean] if true, delete the orphan file, otherwise move it to the lost and found directory.
|
|
# @return [void]
|
|
def handle_orphan_file(file, dry_run:, delete:)
|
|
msg = if dry_run
|
|
"Would #{delete ? 'delete' : 'move to lost and found'}: #{file.key}"
|
|
elsif delete
|
|
file.destroy
|
|
"Deleted: #{file.key}"
|
|
else
|
|
new_path = move_to_lost_and_found(file)
|
|
"Moved to lost and found: #{file.key} -> #{new_path}"
|
|
end
|
|
|
|
logger.warn(msg)
|
|
end
|
|
|
|
def move_to_lost_and_found(file)
|
|
new_path = "#{lost_and_found_dir}/#{file.key}"
|
|
|
|
file.copy(object_store['remote_directory'], new_path)
|
|
file.destroy
|
|
|
|
new_path
|
|
end
|
|
|
|
def lost_and_found_dir
|
|
'lost_and_found'
|
|
end
|
|
|
|
def remote_directory
|
|
connection.directories.new(key: object_store['remote_directory'])
|
|
end
|
|
|
|
def bucket_prefix
|
|
object_store.bucket_prefix
|
|
end
|
|
end
|
|
end
|
|
end
|