Files
gitlab-foss/scripts/review_apps/automated_cleanup.rb
2024-08-20 18:08:20 +00:00

235 lines
7.9 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# frozen_string_literal: true
# We need to take some precautions when using the `gitlab` gem in this project.
#
# See https://docs.gitlab.com/ee/development/pipelines/internals.html#using-the-gitlab-ruby-gem-in-the-canonical-project.
require 'gitlab'
require 'optparse'
require 'time'
require_relative File.expand_path('../../tooling/lib/tooling/helm3_client.rb', __dir__)
require_relative File.expand_path('../../tooling/lib/tooling/kubernetes_client.rb', __dir__)
module ReviewApps
class AutomatedCleanup
ENVIRONMENTS_PER_PAGE = 100
IGNORED_HELM_ERRORS = [
'transport is closing',
'error upgrading connection',
'not found'
].freeze
IGNORED_KUBERNETES_ERRORS = [
'NotFound'
].freeze
ENVIRONMENTS_NOT_FOUND_THRESHOLD = 3
def self.parse_args(argv)
options = {
dry_run: false
}
OptionParser.new do |opts|
opts.on("-d BOOLEAN", "--dry-run BOOLEAN", String, "Whether to perform a dry-run or not.") do |value|
options[:dry_run] = true if value == 'true'
end
opts.on("-h", "--help", "Prints this help") do
puts opts
exit
end
end.parse!(argv)
options
end
# $GITLAB_PROJECT_REVIEW_APP_CLEANUP_API_TOKEN => `Automated Review App Cleanup` project token
def initialize(
project_path: ENV['CI_PROJECT_PATH'],
gitlab_token: ENV['GITLAB_PROJECT_REVIEW_APP_CLEANUP_API_TOKEN'],
api_endpoint: ENV['CI_API_V4_URL'],
options: {}
)
@project_path = project_path
@gitlab_token = gitlab_token
@api_endpoint = api_endpoint
@dry_run = options[:dry_run]
end
def gitlab
@gitlab ||= begin
Gitlab.configure do |config|
config.endpoint = api_endpoint
# gitlab-bot's token "GitLab review apps cleanup"
config.private_token = gitlab_token
end
Gitlab
end
end
def helm
@helm ||= Tooling::Helm3Client.new
end
def kubernetes
@kubernetes ||= Tooling::KubernetesClient.new
end
def perform_gitlab_environment_cleanup!(env_prefix:, days_for_delete:)
puts "Dry-run mode." if dry_run
puts "Checking for GitLab #{env_prefix} environments deployed more than #{days_for_delete} days ago..."
delete_threshold = threshold_time(days: days_for_delete)
gitlab.environments(project_path, per_page: ENVIRONMENTS_PER_PAGE, sort: 'desc', search: env_prefix).auto_paginate do |environment|
next unless environment.name.start_with?(env_prefix)
# TODO: Find a way to reset those, so that we can properly delete them.
next if environment.state == 'stopping' # We cannot delete environments in stopping state
next if Time.parse(environment.created_at) > delete_threshold
stop_environment(environment)
delete_environment(environment)
end
end
def perform_helm_releases_cleanup!(days:)
puts "Dry-run mode." if dry_run
puts "Checking for Helm releases that are failed or not updated in the last #{days} days..."
threshold = threshold_time(days: days)
releases_to_delete = []
helm_releases.each do |release|
# Prevents deleting `dns-gitlab-review-app` releases or other unrelated releases
next unless Tooling::KubernetesClient::K8S_ALLOWED_NAMESPACES_REGEX.match?(release.namespace)
next unless release.name.start_with?('review-')
if release.status == 'failed' || release.last_update < threshold
releases_to_delete << release
else
print_release_state(subject: 'Release', release_name: release.name, release_date: release.last_update, action: 'leaving')
end
end
delete_helm_releases(releases_to_delete)
end
def perform_stale_namespace_cleanup!(days:)
puts "Dry-run mode." if dry_run
kubernetes.cleanup_namespaces_by_created_at(created_before: threshold_time(days: days)) unless dry_run
end
private
attr_reader :api_endpoint, :dry_run, :gitlab_token, :project_path
def stop_environment(environment)
return if environment.state == 'stopped' || environment.state == 'stopping'
print_release_state(subject: 'GitLab Environment', release_name: environment.slug, release_date: environment.created_at, action: 'stopping')
gitlab.stop_environment(project_path, environment.id) unless dry_run
rescue Gitlab::Error::Forbidden
puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) is forbidden: skipping it"
end
def delete_environment(environment)
return if environment.state == 'stopping'
print_release_state(subject: 'GitLab environment', release_name: environment.slug, release_date: environment.created_at, action: 'deleting')
gitlab.delete_environment(project_path, environment.id) unless dry_run
rescue Gitlab::Error::NotFound
puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) was not found: ignoring it"
rescue Gitlab::Error::Forbidden
puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) is forbidden: skipping it"
rescue Gitlab::Error::InternalServerError
puts "GitLab environment '#{environment.name}' / '#{environment.slug}' (##{environment.id}) 500 error: ignoring it"
end
def helm_releases
args = ['--all', '--all-namespaces', '--date']
helm.releases(args: args)
end
def delete_helm_releases(releases)
return if releases.empty?
releases.each do |release|
print_release_state(subject: 'Release', release_name: release.name, release_status: release.status, release_date: release.last_update, action: 'cleaning')
end
releases_names = releases.map(&:name)
unless dry_run
helm.delete(release_name: releases_names)
kubernetes.delete_namespaces(releases_names)
end
rescue Tooling::Helm3Client::CommandFailedError => ex
raise ex unless ignore_exception?(ex.message, IGNORED_HELM_ERRORS)
puts "Ignoring the following Helm error:\n#{ex}\n"
rescue Tooling::KubernetesClient::CommandFailedError => ex
raise ex unless ignore_exception?(ex.message, IGNORED_KUBERNETES_ERRORS)
puts "Ignoring the following Kubernetes error:\n#{ex}\n"
end
def threshold_time(days:)
days_integer = days.to_i
raise "days should be an integer between 1 and 365 inclusive! Got #{days_integer}" unless days_integer.between?(1, 365)
Time.now - (days_integer * 24 * 3600)
end
def ignore_exception?(exception_message, exceptions_ignored)
exception_message.match?(/(#{exceptions_ignored})/)
end
def print_release_state(subject:, release_name:, release_date:, action:, release_status: nil)
puts "\n#{subject} '#{release_name}' #{"(#{release_status}) " if release_status}was last deployed on #{release_date}: #{action} it.\n"
end
end
end
def timed(task)
start = Time.now
yield(self)
puts "#{task} finished in #{Time.now - start} seconds.\n"
end
if $PROGRAM_NAME == __FILE__
options = ReviewApps::AutomatedCleanup.parse_args(ARGV)
automated_cleanup = ReviewApps::AutomatedCleanup.new(options: options)
puts
timed('Helm releases cleanup') do
automated_cleanup.perform_helm_releases_cleanup!(days: 2)
end
puts
timed('Review Apps Environments cleanup') do
automated_cleanup.perform_gitlab_environment_cleanup!(env_prefix: 'review/', days_for_delete: 14)
end
timed('Docs Review Apps environments cleanup') do
automated_cleanup.perform_gitlab_environment_cleanup!(env_prefix: 'review-docs/', days_for_delete: 30)
end
timed('as-if-foss Environments cleanup') do
automated_cleanup.perform_gitlab_environment_cleanup!(env_prefix: 'as-if-foss/', days_for_delete: 30)
end
puts
timed('Stale Namespace cleanup') do
automated_cleanup.perform_stale_namespace_cleanup!(days: 3)
end
end