From d720dd102dbf29520cf18a073a87501a13c9e9c0 Mon Sep 17 00:00:00 2001 From: GitLab Bot Date: Tue, 15 Jul 2025 09:07:59 +0000 Subject: [PATCH] Add latest changes from gitlab-org/gitlab@master --- .eslint_todo/vue-no-unused-properties.mjs | 1 - .gitlab/CODEOWNERS | 2 + GITLAB_KAS_VERSION | 2 +- .../security_configuration/components/app.vue | 16 + .../clusters/knative_services_finder.rb | 2 +- app/models/ci/workloads/workload.rb | 4 + app/services/concerns/measurable.rb | 2 +- .../development/preserve_markdown.yml | 2 +- .../ops/allow_anonymous_searches.yml | 8 - config/routes/repository.rb | 7 +- ...earches_flag_to_application_settings_v2.rb | 26 +- doc/administration/gitaly/praefect/_index.md | 1812 +---------------- .../gitaly/praefect/configure.md | 1799 ++++++++++++++++ .../gitaly/praefect/monitoring.md | 2 +- .../gitaly/praefect/recovery.md | 22 +- .../gitaly/praefect/troubleshooting.md | 8 +- .../gitlab_duo_self_hosted/_index.md | 2 +- .../monitoring/prometheus/gitlab_metrics.md | 2 +- doc/administration/postgresql/moving.md | 2 +- .../reference_architectures/10k_users.md | 2 +- .../reference_architectures/25k_users.md | 2 +- .../reference_architectures/3k_users.md | 2 +- .../reference_architectures/50k_users.md | 2 +- .../reference_architectures/5k_users.md | 2 +- .../reference_architectures/_index.md | 2 +- doc/administration/settings/scim_setup.md | 1 + doc/api/graphql/reference/_index.md | 7 + doc/ci/runners/runners_scope.md | 2 +- doc/development/documentation/workflow.md | 4 +- .../best_practices/rspec_metadata_tests.md | 2 +- doc/topics/offline/quick_start_guide.md | 2 +- doc/update/versions/gitlab_15_changes.md | 6 +- doc/update/versions/gitlab_16_changes.md | 6 +- doc/update/versions/gitlab_17_changes.md | 2 +- doc/update/zero_downtime.md | 2 +- lib/gitlab/utils/class_name_converter.rb | 21 + locale/gitlab.pot | 3 + qa/qa/tools/delete_projects.rb | 27 +- qa/qa/tools/delete_resource_base.rb | 7 +- qa/qa/tools/delete_subgroups.rb | 31 +- qa/qa/tools/delete_test_snippets.rb | 6 +- qa/qa/tools/delete_test_ssh_keys.rb | 4 +- qa/qa/tools/delete_test_users.rb | 4 +- qa/qa/tools/delete_user_groups.rb | 9 +- qa/qa/tools/delete_user_projects.rb | 6 +- qa/qa/tools/lib/group.rb | 4 +- ...es_flag_to_application_settings_v2_spec.rb | 53 - spec/models/ci/workloads/workload_spec.rb | 9 + .../mutations/work_items/bulk_update_spec.rb | 2 +- spec/routing/project_routing_spec.rb | 19 + .../ci/deployable_shared_examples.rb | 2 +- .../ci/deployable_shared_examples_ee.rb | 2 +- .../escalatable_shared_examples.rb | 2 +- .../measurable_service_shared_examples.rb | 2 +- 54 files changed, 1992 insertions(+), 1988 deletions(-) delete mode 100644 config/feature_flags/ops/allow_anonymous_searches.yml create mode 100644 doc/administration/gitaly/praefect/configure.md create mode 100644 lib/gitlab/utils/class_name_converter.rb delete mode 100644 spec/migrations/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2_spec.rb diff --git a/.eslint_todo/vue-no-unused-properties.mjs b/.eslint_todo/vue-no-unused-properties.mjs index fa49db737e8..17375e5c983 100644 --- a/.eslint_todo/vue-no-unused-properties.mjs +++ b/.eslint_todo/vue-no-unused-properties.mjs @@ -222,7 +222,6 @@ export default { 'app/assets/javascripts/wikis/wiki_notes/components/placeholder_note.vue', 'app/assets/javascripts/wikis/wiki_notes/components/wiki_discussion.vue', 'app/assets/javascripts/wikis/wiki_notes/components/wiki_notes_app.vue', - 'ee/app/assets/javascripts/admin/subscriptions/show/components/subscription_breakdown.vue', 'ee/app/assets/javascripts/ai/components/duo_chat_feedback_modal.vue', 'ee/app/assets/javascripts/ai/components/user_feedback.vue', 'ee/app/assets/javascripts/analytics/analytics_dashboards/components/visualizations/dora_chart.vue', diff --git a/.gitlab/CODEOWNERS b/.gitlab/CODEOWNERS index b1aa36af599..131f5d15fd6 100644 --- a/.gitlab/CODEOWNERS +++ b/.gitlab/CODEOWNERS @@ -130,6 +130,8 @@ config/bounded_contexts.yml @fabiopitino @grzesiek @stanhu @cwoolley-gitlab @tku /.gitlab/ci/reports.gitlab-ci.yml @gl-dx/pipeline-maintainers @gitlab-com/gl-security/appsec /.gitlab/ci/review-apps/qa.gitlab-ci.yml @gl-dx/pipeline-maintainers @gl-dx/maintainers /.gitlab/ci/test-on-gdk/ @gl-dx/pipeline-maintainers @gl-dx/maintainers +/.gitlab/ci/release-environments.gitlab-ci.yml @gl-dx/pipeline-maintainers @gitlab-org/delivery +/.gitlab/ci/release-environments/ @gl-dx/pipeline-maintainers @gitlab-org/delivery /gems/gem.gitlab-ci.yml [Tooling] @gl-dx/tooling-maintainers diff --git a/GITLAB_KAS_VERSION b/GITLAB_KAS_VERSION index f6c06af1b9b..8db7ccc20fe 100644 --- a/GITLAB_KAS_VERSION +++ b/GITLAB_KAS_VERSION @@ -1 +1 @@ -886a296a3cc0e2c8e1d401c1ef33dfc214148542 +56c389ee230df185af98ba3eb40c1eaab4f8cae0 diff --git a/app/assets/javascripts/security_configuration/components/app.vue b/app/assets/javascripts/security_configuration/components/app.vue index ec8046c3ab3..f5b1d041e87 100644 --- a/app/assets/javascripts/security_configuration/components/app.vue +++ b/app/assets/javascripts/security_configuration/components/app.vue @@ -26,6 +26,10 @@ import TrainingProviderList from './training_provider_list.vue'; export default { i18n, components: { + ApplySecurityLabels: () => + import( + 'ee_component/security_configuration/security_labels/components/apply_security_labels.vue' + ), AutoDevOpsAlert, AutoDevOpsEnabledAlert, FeatureCard, @@ -113,6 +117,11 @@ export default { shouldShowVulnerabilityArchives() { return this.glFeatures?.vulnerabilityArchival; }, + shouldShowSecurityLabels() { + return ( + window.gon?.licensed_features?.securityLabels && this.glFeatures?.securityContextLabels + ); + }, }, methods: { getComponentName(feature) { @@ -260,6 +269,13 @@ export default { + + + diff --git a/app/finders/clusters/knative_services_finder.rb b/app/finders/clusters/knative_services_finder.rb index 64787f9bf70..36e100f793d 100644 --- a/app/finders/clusters/knative_services_finder.rb +++ b/app/finders/clusters/knative_services_finder.rb @@ -80,7 +80,7 @@ module Clusters end def model_name - self.class.name.underscore.tr('/', '_') + ::Gitlab::Utils::ClassNameConverter.new(self.class).string_representation end private diff --git a/app/models/ci/workloads/workload.rb b/app/models/ci/workloads/workload.rb index cc392c2b91d..276631be1f8 100644 --- a/app/models/ci/workloads/workload.rb +++ b/app/models/ci/workloads/workload.rb @@ -14,6 +14,10 @@ module Ci validates :project, presence: true validates :pipeline, presence: true + + def logs_url + Gitlab::Routing.url_helpers.project_pipeline_url(project, pipeline) + end end end end diff --git a/app/services/concerns/measurable.rb b/app/services/concerns/measurable.rb index ebce8a0667a..2552b12e63b 100644 --- a/app/services/concerns/measurable.rb +++ b/app/services/concerns/measurable.rb @@ -54,6 +54,6 @@ module Measurable end def service_class - self.class.name.underscore.tr('/', '_') + ::Gitlab::Utils::ClassNameConverter.new(self.class).string_representation end end diff --git a/config/feature_flags/development/preserve_markdown.yml b/config/feature_flags/development/preserve_markdown.yml index 9483654f8d5..b9c82f1d330 100644 --- a/config/feature_flags/development/preserve_markdown.yml +++ b/config/feature_flags/development/preserve_markdown.yml @@ -5,4 +5,4 @@ rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/474407 milestone: '17.3' type: development group: group::knowledge -default_enabled: false +default_enabled: true diff --git a/config/feature_flags/ops/allow_anonymous_searches.yml b/config/feature_flags/ops/allow_anonymous_searches.yml deleted file mode 100644 index 248beb7c39d..00000000000 --- a/config/feature_flags/ops/allow_anonymous_searches.yml +++ /dev/null @@ -1,8 +0,0 @@ ---- -name: allow_anonymous_searches -introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/138975 -rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/434218 -milestone: '16.7' -type: ops -group: group::global search -default_enabled: true diff --git a/config/routes/repository.rb b/config/routes/repository.rb index d5fc229cef5..163044a9457 100644 --- a/config/routes/repository.rb +++ b/config/routes/repository.rb @@ -86,7 +86,12 @@ scope format: false do get '/blame/*id', to: 'blame#show', as: :blame get '/commits', to: 'commits#commits_root', as: :commits_root - get '/commits/*id/signatures', to: 'commits#signatures', as: :signatures + # this route conflicts with branch names that end with /signatures + # to avoid this issue we ensure that #signatures only responds to json requests + # and also is not a commit pagination request, based on the 'offset' param presence + get '/commits/*id/signatures', to: 'commits#signatures', as: :signatures, constraints: ->(request) do + request.format == :json && !request.params[:offset] + end get '/commits/*id', to: 'commits#show', as: :commits post '/create_dir/*id', to: 'tree#create_dir', as: :create_dir diff --git a/db/post_migrate/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2.rb b/db/post_migrate/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2.rb index d4689f81d55..aee30ebd0ed 100644 --- a/db/post_migrate/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2.rb +++ b/db/post_migrate/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2.rb @@ -5,32 +5,10 @@ class MigrateAnonymousSearchesFlagToApplicationSettingsV2 < Gitlab::Database::Mi milestone '18.1' def up - # rubocop:disable Gitlab/FeatureFlagWithoutActor -- Does not execute in user context - anonymous_searches_allowed = Feature.enabled?(:allow_anonymous_searches) # rubocop:disable Migration/PreventFeatureFlagsUsage -- helper is buggy right now will be fixed in https://gitlab.com/gitlab-org/gitlab/-/merge_requests/190841 - # rubocop:enable Gitlab/FeatureFlagWithoutActor - - sql = <<~SQL - UPDATE application_settings - SET search = jsonb_set( - COALESCE(search, '{}'::jsonb), - '{anonymous_searches_allowed}', - to_jsonb(#{anonymous_searches_allowed}) - ), - updated_at = NOW() - WHERE id = (SELECT MAX(id) FROM application_settings) - SQL - - execute(sql) + # Marking migration as no-op, after required stop. end def down - sql = <<~SQL - UPDATE application_settings - SET search = search - 'anonymous_searches_allowed', - updated_at = NOW() - WHERE id = (SELECT MAX(id) FROM application_settings) - SQL - - execute(sql) + # Marking migration as no-op, after required stop. end end diff --git a/doc/administration/gitaly/praefect/_index.md b/doc/administration/gitaly/praefect/_index.md index f85e8d4d86c..4ce24b113c6 100644 --- a/doc/administration/gitaly/praefect/_index.md +++ b/doc/administration/gitaly/praefect/_index.md @@ -95,7 +95,7 @@ The following table outlines the major differences between Gitaly Cluster and Ge | Tool | Nodes | Locations | Latency tolerance | Failover | Consistency | Provides redundancy for | |:---------------|:---------|:----------|:------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------|:--------------------------------------|:------------------------| -| Gitaly Cluster | Multiple | Single | [Less than 1 second, ideally single-digit milliseconds](#network-latency-and-connectivity) | [Automatic](#automatic-failover-and-primary-election-strategies) | [Strong](#strong-consistency) | Data storage in Git | +| Gitaly Cluster | Multiple | Single | [Less than 1 second, ideally single-digit milliseconds](configure.md#network-latency-and-connectivity) | [Automatic](configure.md#automatic-failover-and-primary-election-strategies) | [Strong](#strong-consistency) | Data storage in Git | | Geo | Multiple | Multiple | Up to one minute | [Manual](../../geo/disaster_recovery/_index.md) | Eventual | Entire GitLab instance | For more information, see: @@ -271,10 +271,10 @@ relative path of the repository in the metadata store. Gitaly Cluster consists of multiple components: -- [Load balancer](#load-balancer) for distributing requests and providing fault-tolerant access to +- [Load balancer](configure.md#load-balancer) for distributing requests and providing fault-tolerant access to Praefect nodes. -- [Praefect](#praefect) nodes for managing the cluster and routing requests to Gitaly nodes. -- [PostgreSQL database](#postgresql) for persisting cluster metadata and [PgBouncer](#use-pgbouncer), +- [Praefect](configure.md#praefect) nodes for managing the cluster and routing requests to Gitaly nodes. +- [PostgreSQL database](configure.md#postgresql) for persisting cluster metadata and [PgBouncer](configure.md#use-pgbouncer), recommended for pooling Praefect's database connections. - Gitaly nodes to provide repository storage and Git access. @@ -294,7 +294,7 @@ Gitaly Cluster provides the following features: - [Distributed reads](#distributed-reads) among Gitaly nodes. - [Strong consistency](#strong-consistency) of the secondary replicas. - [Replication factor](#replication-factor) of repositories for increased redundancy. -- [Automatic failover](#automatic-failover-and-primary-election-strategies) from the +- [Automatic failover](configure.md#automatic-failover-and-primary-election-strategies) from the primary Gitaly node to secondary Gitaly nodes. - Reporting of possible [data loss](recovery.md#check-for-data-loss) if replication queue isn't empty. @@ -350,7 +350,7 @@ replication factor: By default, Gitaly Cluster replicates repositories to every storage in a [virtual storage](#virtual-storage). -For configuration information, see [Configure replication factor](#configure-replication-factor). +For configuration information, see [Configure replication factor](configure.md#configure-replication-factor). ## Upgrade Gitaly Cluster @@ -412,9 +412,9 @@ Before migrating to Gitaly Cluster: To migrate to Gitaly Cluster: 1. Create the required storage. Refer to - [repository storage recommendations](#repository-storage-recommendations). -1. Create and configure [Gitaly Cluster](#configure-gitaly-cluster). -1. Configure the existing Gitaly instance [to use TCP](#use-tcp-for-existing-gitlab-instances), if not already configured that way. + [repository storage recommendations](configure.md#repository-storage-recommendations). +1. Create and configure [Gitaly Cluster](configure.md). +1. Configure the existing Gitaly instance [to use TCP](configure.md#use-tcp-for-existing-gitlab-instances), if not already configured that way. 1. [Move the repositories](../../operations/moving_repositories.md#moving-repositories). To migrate to Gitaly Cluster, existing repositories stored outside Gitaly Cluster must be moved. There is no automatic migration, but the moves can be scheduled with the GitLab API. @@ -474,1797 +474,3 @@ If you are unable to use either method, contact customer support for restoration ### What to do if you are on Gitaly Cluster experiencing an issue or limitation Contact customer support for immediate help in restoration or recovery. - -## Configure Gitaly Cluster - -Configure Gitaly Cluster using either: - -- Gitaly Cluster configuration instructions available as part of - [reference architectures](../../reference_architectures/_index.md) for installations of up to: - - [60 RPS or 3,000 users](../../reference_architectures/3k_users.md#configure-gitaly-cluster). - - [100 RPS or 5,000 users](../../reference_architectures/5k_users.md#configure-gitaly-cluster). - - [200 RPS or 10,000 users](../../reference_architectures/10k_users.md#configure-gitaly-cluster). - - [500 RPS or 25,000 users](../../reference_architectures/25k_users.md#configure-gitaly-cluster). - - [1000 RPS or 50,000 users](../../reference_architectures/50k_users.md#configure-gitaly-cluster). -- The custom configuration instructions that follow on this page. - -Smaller GitLab installations may need only [Gitaly itself](../_index.md). - -{{< alert type="note" >}} - -Gitaly Cluster is not yet supported in Kubernetes, Amazon ECS, or similar container environments. For more information, see -[epic 6127](https://gitlab.com/groups/gitlab-org/-/epics/6127). - -{{< /alert >}} - -## Requirements - -The minimum recommended configuration for a Gitaly Cluster requires: - -- 1 load balancer -- 1 PostgreSQL server (a [supported version](../../../install/requirements.md#postgresql)) -- 3 Praefect nodes -- 3 Gitaly nodes (1 primary, 2 secondary) - -{{< alert type="note" >}} - -[Disk requirements](../_index.md#disk-requirements) apply to Gitaly nodes. - -{{< /alert >}} - -You should configure an odd number of Gitaly nodes so that transactions have a tie-breaker in case one of the -Gitaly nodes fails in a mutating RPC call. - -See the [design document](https://gitlab.com/gitlab-org/gitaly/-/blob/master/doc/design_ha.md) -for implementation details. - -{{< alert type="note" >}} - -If not set in GitLab, feature flags are read as false from the console and Praefect uses their -default value. The default value depends on the GitLab version. - -{{< /alert >}} - -### Network latency and connectivity - -Network latency for Gitaly Cluster should ideally be measurable in single-digit milliseconds. Latency is particularly -important for: - -- Gitaly node health checks. Nodes must be able to respond within 1 second. -- Reference transactions that enforce [strong consistency](#strong-consistency). Lower latencies mean Gitaly - nodes can agree on changes faster. - -Achieving acceptable latency between Gitaly nodes: - -- On physical networks generally means high bandwidth, single location connections. -- On the cloud generally means in the same region, including allowing cross availability zone replication. These links - are designed for this type of synchronization. Latency of less than 2 ms should be sufficient for Gitaly Cluster. - -If you can't provide low network latencies for replication (for example, between distant locations), consider Geo. For -more information, see [Comparison to Geo](#comparison-to-geo). - -Gitaly Cluster [components](#components) communicate with each other over many routes. Your firewall rules must -allow the following for Gitaly Cluster to function properly: - -| From | To | Default port | TLS port | -|:-----------------------|:-----------------------|:-------------|:---------| -| GitLab | Praefect load balancer | `2305` | `3305` | -| Praefect load balancer | Praefect | `2305` | `3305` | -| Praefect | Gitaly | `8075` | `9999` | -| Praefect | GitLab (internal API) | `80` | `443` | -| Gitaly | GitLab (internal API) | `80` | `443` | -| Gitaly | Praefect load balancer | `2305` | `3305` | -| Gitaly | Praefect | `2305` | `3305` | -| Gitaly | Gitaly | `8075` | `9999` | - -{{< alert type="note" >}} - -Gitaly does not directly connect to Praefect. However, requests from Gitaly to the Praefect -load balancer may still be blocked unless firewalls on the Praefect nodes allow traffic from -the Gitaly nodes. - -{{< /alert >}} - -### Praefect database storage - -The requirements are relatively low because the database contains only metadata of: - -- Where repositories are located. -- Some queued work. - -It depends on the number of repositories, but a good minimum is 5-10 GB, similar to the main -GitLab application database. - -## Setup Instructions - -If you [installed](https://about.gitlab.com/install/) GitLab using the Linux package -(highly recommended), follow the steps below: - -1. [Preparation](#preparation) -1. [Configuring the Praefect database](#postgresql) -1. [Configuring the Praefect proxy/router](#praefect) -1. [Configuring each Gitaly node](#gitaly) (once for each Gitaly node) -1. [Configure the load balancer](#load-balancer) -1. [Updating the GitLab server configuration](#gitlab) -1. [Configure Grafana](#grafana) - -### Preparation - -Before beginning, you should already have a working GitLab instance. -[Learn how to install GitLab](https://about.gitlab.com/install/). - -Provision a PostgreSQL server. You should use the PostgreSQL that is shipped -with the Linux package and use it to configure the PostgreSQL database. You can use an -external PostgreSQL server but you must set it up [manually](#manual-database-setup). - -Prepare all your new nodes by [installing GitLab](https://about.gitlab.com/install/). You need: - -- 1 PostgreSQL node -- 1 PgBouncer node (optional) -- At least 1 Praefect node (minimal storage required) -- 3 Gitaly nodes (high CPU, high memory, fast storage) -- 1 GitLab server - -You also need the IP/host address for each node: - -1. `PRAEFECT_LOADBALANCER_HOST`: the IP/host address of Praefect load balancer -1. `POSTGRESQL_HOST`: the IP/host address of the PostgreSQL server -1. `PGBOUNCER_HOST`: the IP/host address of the PostgreSQL server -1. `PRAEFECT_HOST`: the IP/host address of the Praefect server -1. `GITALY_HOST_*`: the IP or host address of each Gitaly server -1. `GITLAB_HOST`: the IP/host address of the GitLab server - -If you are using Google Cloud Platform, SoftLayer, or any other vendor that provides a virtual private cloud (VPC) you can use the private addresses for each cloud instance (corresponds to "internal address" for Google Cloud Platform) for `PRAEFECT_HOST`, `GITALY_HOST_*`, and `GITLAB_HOST`. - -#### Secrets - -The communication between components is secured with different secrets, which -are described below. Before you begin, generate a unique secret for each, and -make note of it. This enables you to replace these placeholder tokens -with secure tokens as you complete the setup process. - -1. `GITLAB_SHELL_SECRET_TOKEN`: this is used by Git hooks to make callback HTTP - API requests to GitLab when accepting a Git push. This secret is shared with - GitLab Shell for legacy reasons. -1. `PRAEFECT_EXTERNAL_TOKEN`: repositories hosted on your Praefect cluster can - only be accessed by Gitaly clients that carry this token. -1. `PRAEFECT_INTERNAL_TOKEN`: this token is used for replication traffic inside - your Praefect cluster. This token is distinct from `PRAEFECT_EXTERNAL_TOKEN` - because Gitaly clients must not be able to access internal nodes of the - Praefect cluster directly; that could lead to data loss. -1. `PRAEFECT_SQL_PASSWORD`: this password is used by Praefect to connect to - PostgreSQL. -1. `PRAEFECT_SQL_PASSWORD_HASH`: the hash of password of the Praefect user. - Use `gitlab-ctl pg-password-md5 praefect` to generate the hash. The command - asks for the password for `praefect` user. Enter `PRAEFECT_SQL_PASSWORD` - plaintext password. By default, Praefect uses `praefect` user, but you can - change it. -1. `PGBOUNCER_SQL_PASSWORD_HASH`: the hash of password of the PgBouncer user. - PgBouncer uses this password to connect to PostgreSQL. For more details - see [bundled PgBouncer](../../postgresql/pgbouncer.md) documentation. - -We note in the instructions below where these secrets are required. - -{{< alert type="note" >}} - -Linux package installations can use `gitlab-secrets.json` for `GITLAB_SHELL_SECRET_TOKEN`. - -{{< /alert >}} - -### Customize time server setting - -By default, Gitaly and Praefect nodes use the time server at `pool.ntp.org` for time synchronization checks. You can customize this setting by adding the -following to `gitlab.rb` on each node: - -- `gitaly['env'] = { "NTP_HOST" => "ntp.example.com" }`, for Gitaly nodes. -- `praefect['env'] = { "NTP_HOST" => "ntp.example.com" }`, for Praefect nodes. - -### PostgreSQL - -{{< alert type="note" >}} - -Do not store the GitLab application database and the Praefect -database on the same PostgreSQL server if using [Geo](../../geo/_index.md). -The replication state is internal to each instance of GitLab and should -not be replicated. - -{{< /alert >}} - -These instructions help set up a single PostgreSQL database, which creates a single point of failure. To avoid this, you can configure your own clustered -PostgreSQL. -Clustered database support for other databases (for example, Praefect and Geo databases) is proposed in -[issue 7292](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/7292). - -The following options are available: - -- For non-Geo installations, either: - - Use one of the documented [PostgreSQL setups](../../postgresql/_index.md). - - Use your own third-party database setup. This requires [manual setup](#manual-database-setup). -- For Geo instances, either: - - Set up a separate [PostgreSQL instance](https://www.postgresql.org/docs/16/high-availability.html). - - Use a cloud-managed PostgreSQL service. AWS - [Relational Database Service](https://aws.amazon.com/rds/) is recommended. - -Setting up PostgreSQL creates empty Praefect tables. For more information, see the -[relevant troubleshooting section](troubleshooting.md#relation-does-not-exist-errors). - -#### Running GitLab and Praefect databases on the same server - -The GitLab application database and the Praefect database can be run on the same server. However, Praefect should have -its own database server when using PostgreSQL from the Linux package. If there is a failover, Praefect isn't aware and starts to -fail as the database it's trying to use would either: - -- Be unavailable. -- In read-only mode. - -#### Manual database setup - -To complete this section you need: - -- One Praefect node -- One PostgreSQL node - - A PostgreSQL user with permissions to manage the database server - -In this section, we configure the PostgreSQL database. This can be used for both external -and Linux package-provided PostgreSQL server. - -To run the following instructions, you can use the Praefect node, where `psql` is installed -by the Linux package (`/opt/gitlab/embedded/bin/psql`). If you are using the Linux package-provided -PostgreSQL you can use `gitlab-psql` on the PostgreSQL node instead: - -1. Create a new user `praefect` to be used by Praefect: - - ```sql - CREATE ROLE praefect WITH LOGIN PASSWORD 'PRAEFECT_SQL_PASSWORD'; - ``` - - Replace `PRAEFECT_SQL_PASSWORD` with the strong password you generated in the preparation step. - -1. Create a new database `praefect_production` that is owned by `praefect` user. - - ```sql - CREATE DATABASE praefect_production WITH OWNER praefect ENCODING UTF8; - ``` - -When using the Linux package-provided PgBouncer, you need to take the following additional steps. We strongly -recommend using the PostgreSQL that is shipped with the Linux package as the backend. The following -instructions only work on the Linux package-provided PostgreSQL: - -1. For the Linux package-provided PgBouncer, you need to use the hash of `praefect` password instead the of the - actual password: - - ```sql - ALTER ROLE praefect WITH PASSWORD 'md5'; - ``` - - Replace `` with the hash of the password you generated in the - preparation step. It is prefixed with `md5` literal. - -1. Create a new user `pgbouncer` to be used by PgBouncer: - - ```sql - CREATE ROLE pgbouncer WITH LOGIN; - ALTER USER pgbouncer WITH password 'md5'; - ``` - - Replace `PGBOUNCER_SQL_PASSWORD_HASH` with the strong password hash you generated in the preparation step. - -1. The PgBouncer that is shipped with the Linux package is configured to use [`auth_query`](https://www.pgbouncer.org/config.html#generic-settings) - and uses `pg_shadow_lookup` function. You need to create this function in `praefect_production` - database: - - ```sql - CREATE OR REPLACE FUNCTION public.pg_shadow_lookup(in i_username text, out username text, out password text) RETURNS record AS $$ - BEGIN - SELECT usename, passwd FROM pg_catalog.pg_shadow - WHERE usename = i_username INTO username, password; - RETURN; - END; - $$ LANGUAGE plpgsql SECURITY DEFINER; - - REVOKE ALL ON FUNCTION public.pg_shadow_lookup(text) FROM public, pgbouncer; - GRANT EXECUTE ON FUNCTION public.pg_shadow_lookup(text) TO pgbouncer; - ``` - -The database used by Praefect is now configured. - -You can now configure Praefect to use the database: - -```ruby -praefect['configuration'] = { - # ... - database: { - # ... - host: POSTGRESQL_HOST, - user: 'praefect', - port: 5432, - password: PRAEFECT_SQL_PASSWORD, - dbname: 'praefect_production', - } -} -``` - -If you see Praefect database errors after configuring PostgreSQL, see -[troubleshooting steps](troubleshooting.md#relation-does-not-exist-errors). - -#### Reads distribution caching - -Praefect performance can be improved by additionally configuring the `session_pooled` -settings: - -```ruby -praefect['configuration'] = { - # ... - database: { - # ... - session_pooled: { - # ... - host: POSTGRESQL_HOST, - port: 5432 - - # Use the following to override parameters of direct database connection. - # Comment out where the parameters are the same for both connections. - user: 'praefect', - password: PRAEFECT_SQL_PASSWORD, - dbname: 'praefect_production', - # sslmode: '...', - # sslcert: '...', - # sslkey: '...', - # sslrootcert: '...', - } - } -} -``` - -When configured, this connection is automatically used for the -[SQL LISTEN](https://www.postgresql.org/docs/16/sql-listen.html) feature and -allows Praefect to receive notifications from PostgreSQL for cache invalidation. - -Verify this feature is working by looking for the following log entry in the Praefect -log: - -```plaintext -reads distribution caching is enabled by configuration -``` - -#### Use PgBouncer - -To reduce PostgreSQL resource consumption, you should set up and configure [PgBouncer](https://www.pgbouncer.org/) in -front of the PostgreSQL instance. However, PgBouncer isn't required because -Praefect makes a low number of connections. If you choose to use PgBouncer, you can use the same PgBouncer instance for -both the GitLab application database and the Praefect database. - -To configure PgBouncer in front of the PostgreSQL instance, you must point Praefect to PgBouncer by setting database -parameters on the Praefect configuration: - -```ruby -praefect['configuration'] = { - # ... - database: { - # ... - host: PGBOUNCER_HOST, - port: 6432, - user: 'praefect', - password: PRAEFECT_SQL_PASSWORD, - dbname: 'praefect_production', - # sslmode: '...', - # sslcert: '...', - # sslkey: '...', - # sslrootcert: '...', - } -} -``` - -Praefect requires an additional connection to the PostgreSQL that supports the -[LISTEN](https://www.postgresql.org/docs/16/sql-listen.html) feature. With PgBouncer -this feature is only available with `session` pool mode (`pool_mode = session`). -It is not supported in `transaction` pool mode (`pool_mode = transaction`). - -To configure the additional connection, you must either: - -- Configure a new PgBouncer database that uses to the same PostgreSQL database endpoint, - but with different pool mode (`pool_mode = session`). -- Connect Praefect directly to PostgreSQL and bypass PgBouncer. - -##### Configure a new PgBouncer database with `pool_mode = session` - -You should use PgBouncer with `session` pool mode. You can use the -[bundled PgBouncer](../../postgresql/pgbouncer.md) or use an external PgBouncer and -[configure it manually](https://www.pgbouncer.org/config.html). - -The following example uses the bundled PgBouncer and sets up two separate connection pools on the PostgreSQL host, -one in `session` pool mode and the other in `transaction` pool mode. For this example to work, -you need to prepare PostgreSQL server as documented in [the setup instructions](#manual-database-setup). - -Then, configure the separate connection pools on the PgBouncer host: - -```ruby -pgbouncer['databases'] = { - # Other database configuration including gitlabhq_production - ... - - praefect_production: { - host: POSTGRESQL_HOST, - # Use `pgbouncer` user to connect to database backend. - user: 'pgbouncer', - password: PGBOUNCER_SQL_PASSWORD_HASH, - pool_mode: 'transaction' - }, - praefect_production_direct: { - host: POSTGRESQL_HOST, - # Use `pgbouncer` user to connect to database backend. - user: 'pgbouncer', - password: PGBOUNCER_SQL_PASSWORD_HASH, - dbname: 'praefect_production', - pool_mode: 'session' - }, - - ... -} - -# Allow the praefect user to connect to PgBouncer -pgbouncer['users'] = { - 'praefect': { - 'password': PRAEFECT_SQL_PASSWORD_HASH, - } -} -``` - -Both `praefect_production` and `praefect_production_direct` use the same database endpoint -(`praefect_production`), but with different pool modes. This translates to the following -`databases` section of PgBouncer: - -```ini -[databases] -praefect_production = host=POSTGRESQL_HOST auth_user=pgbouncer pool_mode=transaction -praefect_production_direct = host=POSTGRESQL_HOST auth_user=pgbouncer dbname=praefect_production pool_mode=session -``` - -Now you can configure Praefect to use PgBouncer for both connections: - -```ruby -praefect['configuration'] = { - # ... - database: { - # ... - host: PGBOUNCER_HOST, - port: 6432, - user: 'praefect', - # `PRAEFECT_SQL_PASSWORD` is the plain-text password of - # Praefect user. Not to be confused with `PRAEFECT_SQL_PASSWORD_HASH`. - password: PRAEFECT_SQL_PASSWORD, - dbname: 'praefect_production', - session_pooled: { - # ... - dbname: 'praefect_production_direct', - # There is no need to repeat the following. Parameters of direct - # database connection will fall back to the values specified in the - # database block. - # - # host: PGBOUNCER_HOST, - # port: 6432, - # user: 'praefect', - # password: PRAEFECT_SQL_PASSWORD, - }, - }, -} -``` - -With this configuration, Praefect uses PgBouncer for both connection types. - -{{< alert type="note" >}} - -Linux package installations handle the authentication requirements (using `auth_query`), but if you are preparing -your databases manually and configuring an external PgBouncer, you must include `praefect` user and -its password in the file used by PgBouncer. For example, `userlist.txt` if the [`auth_file`](https://www.pgbouncer.org/config.html#auth_file) -configuration option is set. For more details, consult the PgBouncer documentation. - -{{< /alert >}} - -##### Configure Praefect to connect directly to PostgreSQL - -As an alternative to configuring PgBouncer with `session` pool mode, Praefect can be configured to use different -connection parameters for direct access to PostgreSQL. This connection supports the `LISTEN` feature. - -An example of Praefect configuration that bypasses PgBouncer and directly connects to PostgreSQL: - -```ruby -praefect['configuration'] = { - # ... - database: { - # ... - session_pooled: { - # ... - host: POSTGRESQL_HOST, - port: 5432, - - # Use the following to override parameters of direct database connection. - # Comment out where the parameters are the same for both connections. - # - user: 'praefect', - password: PRAEFECT_SQL_PASSWORD, - dbname: 'praefect_production', - # sslmode: '...', - # sslcert: '...', - # sslkey: '...', - # sslrootcert: '...', - }, - }, -} -``` - -### Praefect - -If there are multiple Praefect nodes: - -1. Designate one node as the deploy node, and configure it using the following steps. -1. Complete the following steps for each additional node. - -To complete this section you need a [configured PostgreSQL server](#postgresql), including: - -{{< alert type="warning" >}} - -Praefect should be run on a dedicated node. Do not run Praefect on the -application server, or a Gitaly node. - -{{< /alert >}} - -On the Praefect node: - -1. Disable all other services by editing `/etc/gitlab/gitlab.rb`: - - - - ```ruby - # Avoid running unnecessary services on the Praefect server - gitaly['enable'] = false - postgresql['enable'] = false - redis['enable'] = false - nginx['enable'] = false - puma['enable'] = false - sidekiq['enable'] = false - gitlab_workhorse['enable'] = false - prometheus['enable'] = false - alertmanager['enable'] = false - gitlab_exporter['enable'] = false - gitlab_kas['enable'] = false - - # Enable only the Praefect service - praefect['enable'] = true - - # Prevent database migrations from running on upgrade automatically - praefect['auto_migrate'] = false - gitlab_rails['auto_migrate'] = false - ``` - -1. Configure Praefect to listen on network interfaces by editing - `/etc/gitlab/gitlab.rb`: - - ```ruby - praefect['configuration'] = { - # ... - listen_addr: '0.0.0.0:2305', - } - ``` - -1. Configure Prometheus metrics by editing - `/etc/gitlab/gitlab.rb`: - - ```ruby - praefect['configuration'] = { - # ... - # - # Enable Prometheus metrics access to Praefect. You must use firewalls - # to restrict access to this address/port. - # The default metrics endpoint is /metrics - prometheus_listen_addr: '0.0.0.0:9652', - # Some metrics run queries against the database. Enabling separate database metrics allows - # these metrics to be collected when the metrics are - # scraped on a separate /db_metrics endpoint. - prometheus_exclude_database_from_default_metrics: true, - } - ``` - -1. Configure a strong authentication token for Praefect by editing - `/etc/gitlab/gitlab.rb`, which is needed by clients outside the cluster - (like GitLab Shell) to communicate with the Praefect cluster: - - ```ruby - praefect['configuration'] = { - # ... - auth: { - # ... - token: 'PRAEFECT_EXTERNAL_TOKEN', - }, - } - ``` - -1. Configure Praefect to [connect to the PostgreSQL database](#postgresql). We - highly recommend using [PgBouncer](#use-pgbouncer) as well. - - If you want to use a TLS client certificate, the options below can be used: - - ```ruby - praefect['configuration'] = { - # ... - database: { - # ... - # - # Connect to PostgreSQL using a TLS client certificate - # sslcert: '/path/to/client-cert', - # sslkey: '/path/to/client-key', - # - # Trust a custom certificate authority - # sslrootcert: '/path/to/rootcert', - }, - } - ``` - - By default, Praefect uses opportunistic TLS to connect to PostgreSQL. This means that Praefect attempts to connect to PostgreSQL using `sslmode` set to - `prefer`. You can override this by uncommenting the following line: - - ```ruby - praefect['configuration'] = { - # ... - database: { - # ... - # sslmode: 'disable', - }, - } - ``` - -1. Configure the Praefect cluster to connect to each Gitaly node in the - cluster by editing `/etc/gitlab/gitlab.rb`. - - The virtual storage's name must match the configured storage name in GitLab - configuration. In a later step, we configure the storage name as `default` - so we use `default` here as well. This cluster has three Gitaly nodes `gitaly-1`, - `gitaly-2`, and `gitaly-3`, which are intended to be replicas of each other. - - {{< alert type="warning" >}} - - If you have data on an already existing storage called - `default`, you should configure the virtual storage with another name and - [migrate the data to the Gitaly Cluster storage](#migrate-to-gitaly-cluster) - afterwards. - - {{< /alert >}} - - Replace `PRAEFECT_INTERNAL_TOKEN` with a strong secret, which is used by - Praefect when communicating with Gitaly nodes in the cluster. This token is - distinct from the `PRAEFECT_EXTERNAL_TOKEN`. - - Replace `GITALY_HOST_*` with the IP or host address of the each Gitaly node. - - More Gitaly nodes can be added to the cluster to increase the number of - replicas. More clusters can also be added for very large GitLab instances. - - {{< alert type="note" >}} - - When adding additional Gitaly nodes to a virtual storage, all storage names - in that virtual storage must be unique. Additionally, all Gitaly node - addresses referenced in the Praefect configuration must be unique. - - {{< /alert >}} - - ```ruby - # Name of storage hash must match storage name in gitlab_rails['repositories_storages'] on GitLab - # server ('default') and in gitaly['configuration'][:storage][INDEX][:name] on Gitaly nodes ('gitaly-1') - praefect['configuration'] = { - # ... - virtual_storage: [ - { - # ... - name: 'default', - node: [ - { - storage: 'gitaly-1', - address: 'tcp://GITALY_HOST_1:8075', - token: 'PRAEFECT_INTERNAL_TOKEN' - }, - { - storage: 'gitaly-2', - address: 'tcp://GITALY_HOST_2:8075', - token: 'PRAEFECT_INTERNAL_TOKEN' - }, - { - storage: 'gitaly-3', - address: 'tcp://GITALY_HOST_3:8075', - token: 'PRAEFECT_INTERNAL_TOKEN' - }, - ], - }, - ], - } - ``` - -1. Save the changes to `/etc/gitlab/gitlab.rb` and - [reconfigure Praefect](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl reconfigure - ``` - -1. For: - - - The "deploy node": - 1. Enable Praefect database auto-migration again by setting `praefect['auto_migrate'] = true` in - `/etc/gitlab/gitlab.rb`. - 1. To ensure database migrations are only run during reconfigure and not automatically on - upgrade, run: - - ```shell - sudo touch /etc/gitlab/skip-auto-reconfigure - ``` - - - The other nodes, you can leave the settings as they are. Though - `/etc/gitlab/skip-auto-reconfigure` isn't required, you may want to set it to prevent GitLab - running reconfigure automatically when running commands such as `apt-get update`. This way any - additional configuration changes can be done and then reconfigure can be run manually. - -1. Save the changes to `/etc/gitlab/gitlab.rb` and - [reconfigure Praefect](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl reconfigure - ``` - -1. To ensure that Praefect - [has updated its Prometheus listen address](https://gitlab.com/gitlab-org/gitaly/-/issues/2734), - [restart Praefect](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl restart praefect - ``` - -1. Verify that Praefect can reach PostgreSQL: - - ```shell - sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml sql-ping - ``` - - If the check fails, make sure you have followed the steps correctly. If you - edit `/etc/gitlab/gitlab.rb`, remember to run `sudo gitlab-ctl reconfigure` - again before trying the `sql-ping` command. - -#### Enable TLS support - -Praefect supports TLS encryption. To communicate with a Praefect instance that listens -for secure connections, you must: - -- Ensure Gitaly is [configured for TLS](../tls_support.md) and use a `tls://` URL scheme in the `gitaly_address` - of the corresponding storage entry in the GitLab configuration. -- Bring your own certificates because this isn't provided automatically. The certificate - corresponding to each Praefect server must be installed on that Praefect server. - -Additionally the certificate, or its certificate authority, must be installed on all Gitaly servers -and on all Praefect clients that communicate with it following the procedure described in -[GitLab custom certificate configuration](https://docs.gitlab.com/omnibus/settings/ssl/#install-custom-public-certificates) (and repeated below). - -Note the following: - -- The certificate must specify the address you use to access the Praefect server. You must add the hostname or IP - address as a Subject Alternative Name to the certificate. -- When running Praefect sub-commands such as `dial-nodes` and `list-untracked-repositories` from the command line with - [Gitaly TLS enabled](../tls_support.md), you must set the `SSL_CERT_DIR` or `SSL_CERT_FILE` - environment variable so that the Gitaly certificate is trusted. For example: - - ```shell - SSL_CERT_DIR=/etc/gitlab/trusted-certs sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml dial-nodes - ``` - -- You can configure Praefect servers with both an unencrypted listening address - `listen_addr` and an encrypted listening address `tls_listen_addr` at the same time. - This allows you to do a gradual transition from unencrypted to encrypted traffic, if - necessary. - - To disable the unencrypted listener, set: - - ```ruby - praefect['configuration'] = { - # ... - listen_addr: nil, - } - ``` - -Configure Praefect with TLS. - -For Linux package installations: - -1. Create certificates for Praefect servers. - -1. On the Praefect servers, create the `/etc/gitlab/ssl` directory and copy your key - and certificate there: - - ```shell - sudo mkdir -p /etc/gitlab/ssl - sudo chmod 755 /etc/gitlab/ssl - sudo cp key.pem cert.pem /etc/gitlab/ssl/ - sudo chmod 644 key.pem cert.pem - ``` - -1. Edit `/etc/gitlab/gitlab.rb` and add: - - ```ruby - praefect['configuration'] = { - # ... - tls_listen_addr: '0.0.0.0:3305', - tls: { - # ... - certificate_path: '/etc/gitlab/ssl/cert.pem', - key_path: '/etc/gitlab/ssl/key.pem', - }, - } - ``` - -1. Save the file and [reconfigure](../../restart_gitlab.md#reconfigure-a-linux-package-installation). - -1. On the Praefect clients (including each Gitaly server), copy the certificates, - or their certificate authority, into `/etc/gitlab/trusted-certs`: - - ```shell - sudo cp cert.pem /etc/gitlab/trusted-certs/ - ``` - -1. On the Praefect clients (except Gitaly servers), edit `gitlab_rails['repositories_storages']` in - `/etc/gitlab/gitlab.rb` as follows: - - ```ruby - gitlab_rails['repositories_storages'] = { - "default" => { - "gitaly_address" => 'tls://PRAEFECT_LOADBALANCER_HOST:3305', - "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' - } - } - ``` - -1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). - -For self-compiled installations: - -1. Create certificates for Praefect servers. -1. On the Praefect servers, create the `/etc/gitlab/ssl` directory and copy your key and certificate - there: - - ```shell - sudo mkdir -p /etc/gitlab/ssl - sudo chmod 755 /etc/gitlab/ssl - sudo cp key.pem cert.pem /etc/gitlab/ssl/ - sudo chmod 644 key.pem cert.pem - ``` - -1. On the Praefect clients (including each Gitaly server), copy the certificates, - or their certificate authority, into the system trusted certificates: - - ```shell - sudo cp cert.pem /usr/local/share/ca-certificates/praefect.crt - sudo update-ca-certificates - ``` - -1. On the Praefect clients (except Gitaly servers), edit `storages` in - `/home/git/gitlab/config/gitlab.yml` as follows: - - ```yaml - gitlab: - repositories: - storages: - default: - gitaly_address: tls://PRAEFECT_LOADBALANCER_HOST:3305 - ``` - -1. Save the file and [restart GitLab](../../restart_gitlab.md#self-compiled-installations). -1. Copy all Praefect server certificates, or their certificate authority, to the system - trusted certificates on each Gitaly server so the Praefect server trusts the - certificate when called by Gitaly servers: - - ```shell - sudo cp cert.pem /usr/local/share/ca-certificates/praefect.crt - sudo update-ca-certificates - ``` - -1. Edit `/home/git/praefect/config.toml` and add: - - ```toml - tls_listen_addr = '0.0.0.0:3305' - - [tls] - certificate_path = '/etc/gitlab/ssl/cert.pem' - key_path = '/etc/gitlab/ssl/key.pem' - ``` - -1. Save the file and [restart GitLab](../../restart_gitlab.md#self-compiled-installations). - -#### Service discovery - -{{< history >}} - -- [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/8971) in GitLab 15.10. - -{{< /history >}} - -Prerequisites: - -- A DNS server. - -GitLab uses service discovery to retrieve a list of Praefect hosts. Service -discovery involves periodic checks of a DNS A or AAAA record, with the IPs -retrieved from the record serving as the addresses of the target nodes. -Praefect does not support service discovery by SRV record. - -By default, the minimum time between checks is 5 minutes, regardless of the -records' TTLs. Praefect does not support customizing this interval. When clients -receive an update, they: - -- Establish new connections to the new IP addresses. -- Keep existing connections to intact IP addresses. -- Drop connections to removed IP addresses. - -In-flight requests on to-be-removed connections are still handled until they -finish. Workhorse has a 10-minute timeout, while other clients do not specify a -graceful timeout. - -The DNS server should return all IP addresses instead of load-balancing itself. -Clients can distribute requests to IP addresses in a round-robin fashion. - -Before updating client configuration, ensure that DNS service discovery works -correctly. It should return the list of IP addresses correctly. `dig` is a good -tool to use to verify. - -```console -❯ dig A praefect.service.consul @127.0.0.1 - -; <<>> DiG 9.10.6 <<>> A praefect.service.consul @127.0.0.1 -;; global options: +cmd -;; Got answer: -;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 29210 -;; flags: qr aa rd ra; QUERY: 1, ANSWER: 3, AUTHORITY: 0, ADDITIONAL: 1 - -;; OPT PSEUDOSECTION: -; EDNS: version: 0, flags:; udp: 4096 -;; QUESTION SECTION: -;praefect.service.consul. IN A - -;; ANSWER SECTION: -praefect.service.consul. 0 IN A 10.0.0.3 -praefect.service.consul. 0 IN A 10.0.0.2 -praefect.service.consul. 0 IN A 10.0.0.1 - -;; Query time: 0 msec -;; SERVER: ::1#53(::1) -;; WHEN: Wed Dec 14 12:53:58 +07 2022 -;; MSG SIZE rcvd: 86 -``` - -##### Configure service discovery - -By default, Praefect delegates DNS resolution to the operating system. In such -cases, the Gitaly address can be set in either of these formats: - -- `dns:[host]:[port]` -- `dns:///[host]:[port]` (note the three slashes) - -You can also appoint an authoritative name server by setting it in this format: - -- `dns://[authority_host]:[authority_port]/[host]:[port]` - -{{< tabs >}} - -{{< tab title="Linux package (Omnibus)" >}} - -1. Add the IP address for each Praefect node to the DNS service discovery address. -1. On the Praefect clients (except Gitaly servers), edit `gitlab_rails['repositories_storages']` in - `/etc/gitlab/gitlab.rb` as follows. Replace `PRAEFECT_SERVICE_DISCOVERY_ADDRESS` - with Praefect service discovery address, such as `praefect.service.consul`. - - ```ruby - gitlab_rails['repositories_storages'] = { - "default" => { - "gitaly_address" => 'dns:PRAEFECT_SERVICE_DISCOVERY_ADDRESS:2305', - "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' - } - } - ``` - -1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). - -{{< /tab >}} - -{{< tab title="Self-compiled (source)" >}} - -1. Install a DNS service discovery service. Register all Praefect nodes with the service. -1. On the Praefect clients (except Gitaly servers), edit `storages` in - `/home/git/gitlab/config/gitlab.yml` as follows: - - ```yaml - gitlab: - repositories: - storages: - default: - gitaly_address: dns:PRAEFECT_SERVICE_DISCOVERY_ADDRESS:2305 - ``` - -1. Save the file and [restart GitLab](../../restart_gitlab.md#self-compiled-installations). - -{{< /tab >}} - -{{< /tabs >}} - -##### Configure service discovery with Consul - -If you already have a Consul server in your architecture then you can add -a Consul agent on each Praefect node and register the `praefect` service to it. -This registers each node's IP address to `praefect.service.consul` so it can be found -by service discovery. - -Prerequisites: - -- One or more [Consul](../../consul.md) servers to keep track of the Consul agents. - -1. On each Praefect server, add the following to your `/etc/gitlab/gitlab.rb`: - - ```ruby - consul['enable'] = true - praefect['consul_service_name'] = 'praefect' - - # The following must also be added until this issue is addressed: - # https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/8321 - consul['monitoring_service_discovery'] = true - praefect['configuration'] = { - # ... - # - prometheus_listen_addr: '0.0.0.0:9652', - } - ``` - -1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). -1. Repeat the previous steps on each Praefect server to use with - service discovery. -1. On the Praefect clients (except Gitaly servers), edit `gitlab_rails['repositories_storages']` in - `/etc/gitlab/gitlab.rb` as follows. Replace `CONSUL_SERVER` with the IP or - address of a Consul server. The default Consul DNS port is `8600`. - - ```ruby - gitlab_rails['repositories_storages'] = { - "default" => { - "gitaly_address" => 'dns://CONSUL_SERVER:8600/praefect.service.consul:2305', - "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' - } - } - ``` - -1. Use `dig` from the Praefect clients to confirm that each IP address has been registered to - `praefect.service.consul` with `dig A praefect.service.consul @CONSUL_SERVER -p 8600`. - Replace `CONSUL_SERVER` with the value configured previously and all Praefect node IP addresses - should be present in the output. -1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). - -### Gitaly - -{{< alert type="note" >}} - -Complete these steps for each Gitaly node. - -{{< /alert >}} - -To complete this section you need: - -- [Configured Praefect node](#praefect) -- 3 (or more) servers, with GitLab installed, to be configured as Gitaly nodes. - These should be dedicated nodes, do not run other services on these nodes. - -Every Gitaly server assigned to the Praefect cluster needs to be configured. The -configuration is the same as a standard [standalone Gitaly server](_index.md), -except: - -- The storage names are exposed to Praefect, not GitLab -- The secret token is shared with Praefect, not GitLab - -The configuration of all Gitaly nodes in the Praefect cluster can be identical, -because we rely on Praefect to route operations correctly. - -Particular attention should be shown to: - -- The `gitaly['configuration'][:auth][:token]` configured in this section must match the `token` - value under `praefect['configuration'][:virtual_storage][][:node][][:token]` on the Praefect node. This value was - set in the [previous section](#praefect). This document uses the placeholder `PRAEFECT_INTERNAL_TOKEN` throughout. -- The physical storage names in `gitaly['configuration'][:storage]` configured in this section must match the - physical storage names under `praefect['configuration'][:virtual_storage]` on the Praefect node. This - was set in the [previous section](#praefect). This document uses `gitaly-1`, - `gitaly-2`, and `gitaly-3` as physical storage names. - -For more information on Gitaly server configuration, see our -[Gitaly documentation](../configure_gitaly.md#configure-gitaly-servers). - -1. SSH into the Gitaly node and login as root: - - ```shell - sudo -i - ``` - -1. Disable all other services by editing `/etc/gitlab/gitlab.rb`: - - ```ruby - # Disable all other services on the Gitaly node - postgresql['enable'] = false - redis['enable'] = false - nginx['enable'] = false - puma['enable'] = false - sidekiq['enable'] = false - gitlab_workhorse['enable'] = false - prometheus_monitoring['enable'] = false - gitlab_kas['enable'] = false - - # Enable only the Gitaly service - gitaly['enable'] = true - - # Enable Prometheus if needed - prometheus['enable'] = true - - # Disable database migrations to prevent database connections during 'gitlab-ctl reconfigure' - gitlab_rails['auto_migrate'] = false - ``` - -1. Configure Gitaly to listen on network interfaces by editing - `/etc/gitlab/gitlab.rb`: - - ```ruby - gitaly['configuration'] = { - # ... - # - # Make Gitaly accept connections on all network interfaces. - # Use firewalls to restrict access to this address/port. - listen_addr: '0.0.0.0:8075', - # Enable Prometheus metrics access to Gitaly. You must use firewalls - # to restrict access to this address/port. - prometheus_listen_addr: '0.0.0.0:9236', - } - ``` - -1. Configure a strong `auth_token` for Gitaly by editing - `/etc/gitlab/gitlab.rb`, which is needed by clients to communicate with - this Gitaly nodes. Typically, this token is the same for all Gitaly - nodes. - - ```ruby - gitaly['configuration'] = { - # ... - auth: { - # ... - token: 'PRAEFECT_INTERNAL_TOKEN', - }, - } - ``` - -1. Configure the GitLab Shell secret token, which is needed for `git push` operations. Either: - - - Method 1: - - 1. Copy `/etc/gitlab/gitlab-secrets.json` from the Gitaly client to same path on the Gitaly - servers and any other Gitaly clients. - 1. [Reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation) on Gitaly servers. - - - Method 2: - - 1. Edit `/etc/gitlab/gitlab.rb`. - 1. Replace `GITLAB_SHELL_SECRET_TOKEN` with the real secret. - - ```ruby - gitlab_shell['secret_token'] = 'GITLAB_SHELL_SECRET_TOKEN' - ``` - -1. Configure an `internal_api_url`, which is also needed for `git push` operations: - - ```ruby - # Configure the gitlab-shell API callback URL. Without this, `git push` will - # fail. This can be your front door GitLab URL or an internal load balancer. - # Examples: 'https://gitlab.example.com', 'http://10.0.2.2' - gitlab_rails['internal_api_url'] = 'https://gitlab.example.com' - ``` - -1. Configure the storage location for Git data by setting `gitaly['configuration'][:storage]` in - `/etc/gitlab/gitlab.rb`. Each Gitaly node should have a unique storage name - (such as `gitaly-1`) and should not be duplicated on other Gitaly nodes. - - ```ruby - gitaly['configuration'] = { - # ... - storage: [ - # Replace with appropriate name for each Gitaly nodes. - { - name: 'gitaly-1', - path: '/var/opt/gitlab/git-data/repositories', - }, - ], - } - ``` - -1. Save the changes to `/etc/gitlab/gitlab.rb` and - [reconfigure Gitaly](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl reconfigure - ``` - -1. To ensure that Gitaly - [has updated its Prometheus listen address](https://gitlab.com/gitlab-org/gitaly/-/issues/2734), - [restart Gitaly](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl restart gitaly - ``` - -{{< alert type="note" >}} - -The previous steps must be completed for each Gitaly node! - -{{< /alert >}} - -After all Gitaly nodes are configured, run the Praefect connection -checker to verify Praefect can connect to all Gitaly servers in the Praefect -configuration. - -1. SSH into each Praefect node and run the Praefect connection checker: - - ```shell - sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml dial-nodes - ``` - -### Load Balancer - -In a fault-tolerant Gitaly configuration, a load balancer is needed to route -internal traffic from the GitLab application to the Praefect nodes. The -specifics on which load balancer to use or the exact configuration is beyond the -scope of the GitLab documentation. - -{{< alert type="note" >}} - -The load balancer must be configured to accept traffic from the Gitaly nodes in -addition to the GitLab nodes. - -{{< /alert >}} - -We hope that if you're managing fault-tolerant systems like GitLab, you have a load balancer -of choice already. Some examples include [HAProxy](https://www.haproxy.org/) -(open-source), [Google Internal Load Balancer](https://cloud.google.com/load-balancing/docs/internal/), -[AWS Elastic Load Balancer](https://aws.amazon.com/elasticloadbalancing/), F5 -Big-IP LTM, and Citrix Net Scaler. This documentation outlines what ports -and protocols you need configure. - -You should use the equivalent of HAProxy `leastconn` load-balancing strategy because long-running operations (for -example, clones) keep some connections open for extended periods. - -| LB Port | Backend Port | Protocol | -|:--------|:-------------|:---------| -| 2305 | 2305 | TCP | - -You must use a TCP load balancer. Using an HTTP/2 or gRPC load balancer -with Praefect does not work because of [Gitaly sidechannels](https://gitlab.com/gitlab-org/gitaly/-/blob/master/doc/sidechannel.md). -This optimization intercepts the gRPC handshaking process. It redirects all heavy Git operations to a more efficient "channel" than gRPC, -but HTTP/2 or gRPC load balancers do not handle such requests properly. - -If TLS is enabled, [some versions of Praefect](#alpn-enforcement) require that the Application-Layer Protocol Negotiation (ALPN) extension is used per [RFC 7540](https://datatracker.ietf.org/doc/html/rfc7540#section-3.3). -TCP load balancers pass ALPN directly without additional configuration: - -```mermaid -sequenceDiagram - autonumber - participant Client as Client - participant LB as TCP Load Balancer - participant Praefect as Praefect - - Client->>LB: Establish TLS Session (w/ ALPN Extension) - LB->>Praefect: Establish TLS Session (w/ ALPN Extension) - Client->>LB: Encrypted TCP packets - LB->>Praefect: Encrypted TCP packets - Praefect->>LB: Encrypted Response - LB->>Client: Encrypted Response -``` - -Some TCP load balancers can be configured to accept a TLS client connection and -proxy the connection to Praefect with a new TLS connection. However, this only works -if ALPN is supported on both connections. - -For this reason, NGINX's [`ngx_stream_proxy_module`](https://nginx.org/en/docs/stream/ngx_stream_proxy_module.html) -does not work when the `proxy_ssl` configuration option is enabled: - -```mermaid -sequenceDiagram - autonumber - participant Client as Client - participant NGINX as NGINX Stream Proxy - participant Praefect as Praefect - - Client->>NGINX: Establish TLS Session (w/ ALPN Extension) - NGINX->>Praefect: Establish New TLS Session - Praefect->>NGINX: Connection failed: missing selected ALPN property -``` - -On step 2, ALPN is not used because [NGINX does not support this](https://mailman.nginx.org/pipermail/nginx-devel/2017-July/010307.html). -For more information, [follow NGINX issue 406](https://github.com/nginx/nginx/issues/406) for more details. - -#### ALPN enforcement - -ALPN enforcement was enabled in some versions of GitLab. However, ALPN enforcement broke deployments and so is disabled -[to provide a path to migrate](https://github.com/grpc/grpc-go/issues/7922). The following versions of GitLab have ALPN enforcement enabled: - -- GitLab 17.7.0 -- GitLab 17.6.0 - 17.6.2 -- GitLab 17.5.0 - 17.5.4 -- GitLab 17.4.x - -With [GitLab 17.5.5, 17.6.3, and 17.7.1](https://about.gitlab.com/releases/2025/01/08/patch-release-gitlab-17-7-1-released/), -ALPN enforcement is disabled again. GitLab 17.4 and earlier never had ALPN enforcement enabled. - -### GitLab - -To complete this section you need: - -- [Configured Praefect node](#praefect) -- [Configured Gitaly nodes](#gitaly) - -The Praefect cluster needs to be exposed as a storage location to the GitLab -application, which is done by updating `gitlab_rails['repositories_storages']`. - -Particular attention should be shown to: - -- the storage name added to `gitlab_rails['repositories_storages']` in this section must match the - storage name under `praefect['configuration'][:virtual_storage]` on the Praefect nodes. This - was set in the [Praefect](#praefect) section of this guide. This document uses - `default` as the Praefect storage name. - -1. SSH into the GitLab node and login as root: - - ```shell - sudo -i - ``` - -1. Configure the `external_url` so that files could be served by GitLab - by proper endpoint access by editing `/etc/gitlab/gitlab.rb`: - - You need to replace `GITLAB_SERVER_URL` with the real external facing - URL on which current GitLab instance is serving: - - ```ruby - external_url 'GITLAB_SERVER_URL' - ``` - -1. Disable the default Gitaly service running on the GitLab host. It isn't needed - because GitLab connects to the configured cluster. - - {{< alert type="warning" >}} - - If you have existing data stored on the default Gitaly storage, - you should [migrate the data to your Gitaly Cluster storage](#migrate-to-gitaly-cluster) - first. - - {{< /alert >}} - - ```ruby - gitaly['enable'] = false - ``` - -1. Add the Praefect cluster as a storage location by editing - `/etc/gitlab/gitlab.rb`. - - You need to replace: - - - `PRAEFECT_LOADBALANCER_HOST` with the IP address or hostname of the load - balancer. - - `PRAEFECT_EXTERNAL_TOKEN` with the real secret - - If you are using TLS: - - - The `gitaly_address` should begin with `tls://` instead. - - The port should be changed to `3305`. - - ```ruby - gitlab_rails['repositories_storages'] = { - "default" => { - "gitaly_address" => "tcp://PRAEFECT_LOADBALANCER_HOST:2305", - "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' - } - } - ``` - -1. Configure the GitLab Shell secret token so that callbacks from Gitaly nodes during a `git push` - are properly authenticated. Either: - - - Method 1: - - 1. Copy `/etc/gitlab/gitlab-secrets.json` from the Gitaly client to same path on the Gitaly - servers and any other Gitaly clients. - 1. [Reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation) on Gitaly servers. - - - Method 2: - - 1. Edit `/etc/gitlab/gitlab.rb`. - 1. Replace `GITLAB_SHELL_SECRET_TOKEN` with the real secret. - - ```ruby - gitlab_shell['secret_token'] = 'GITLAB_SHELL_SECRET_TOKEN' - ``` - -1. Add Prometheus monitoring settings by editing `/etc/gitlab/gitlab.rb`. If Prometheus - is enabled on a different node, make edits on that node instead. - - You need to replace: - - - `PRAEFECT_HOST` with the IP address or hostname of the Praefect node - - `GITALY_HOST_*` with the IP address or hostname of each Gitaly node - - ```ruby - prometheus['scrape_configs'] = [ - { - 'job_name' => 'praefect', - 'static_configs' => [ - 'targets' => [ - 'PRAEFECT_HOST:9652', # praefect-1 - 'PRAEFECT_HOST:9652', # praefect-2 - 'PRAEFECT_HOST:9652', # praefect-3 - ] - ] - }, - { - 'job_name' => 'praefect-gitaly', - 'static_configs' => [ - 'targets' => [ - 'GITALY_HOST_1:9236', # gitaly-1 - 'GITALY_HOST_2:9236', # gitaly-2 - 'GITALY_HOST_3:9236', # gitaly-3 - ] - ] - } - ] - ``` - -1. Save the changes to `/etc/gitlab/gitlab.rb` and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl reconfigure - ``` - -1. Verify on each Gitaly node the Git Hooks can reach GitLab. On each Gitaly node run: - - For GitLab 15.3 and later, run `sudo -u git -- /opt/gitlab/embedded/bin/gitaly check /var/opt/gitlab/gitaly/config.toml`. - - For GitLab 15.2 and earlier, run `sudo -u git -- /opt/gitlab/embedded/bin/gitaly-hooks check /var/opt/gitlab/gitaly/config.toml`. - -1. Verify that GitLab can reach Praefect: - - ```shell - gitlab-rake gitlab:gitaly:check - ``` - -1. Check that the Praefect storage is configured to store new repositories: - - 1. On the left sidebar, at the bottom, select **Admin**. - 1. On the left sidebar, select **Settings > Repository**. - 1. Expand the **Repository storage** section. - - Following this guide, the `default` storage should have weight 100 to store all new repositories. - -1. Verify everything is working by creating a new project. Check the - "Initialize repository with a README" box so that there is content in the - repository that viewed. If the project is created, and you can see the - README file, it works! - -#### Use TCP for existing GitLab instances - -When adding Gitaly Cluster to an existing Gitaly instance, the existing Gitaly storage -must be listening on TCP/TLS. If `gitaly_address` is not specified, then a Unix socket is used, -which prevents the communication with the cluster. - -For example: - -```ruby -gitlab_rails['repositories_storages'] = { - 'default' => { 'gitaly_address' => 'tcp://old-gitaly.internal:8075' }, - 'cluster' => { - 'gitaly_address' => 'tls://:3305', - 'gitaly_token' => '' - } -} -``` - -See [Mixed Configuration](../configure_gitaly.md#mixed-configuration) for further information on -running multiple Gitaly storages. - -### Grafana - -Grafana is included with GitLab, and can be used to monitor your Praefect -cluster. See [Grafana Dashboard Service](../../monitoring/performance/grafana_configuration.md) -for detailed documentation. - -To get started quickly: - -1. SSH into the GitLab node (or whichever node has Grafana enabled) and login as root: - - ```shell - sudo -i - ``` - -1. Enable the Grafana login form by editing `/etc/gitlab/gitlab.rb`. - - ```ruby - grafana['disable_login_form'] = false - ``` - -1. Save the changes to `/etc/gitlab/gitlab.rb` and - [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation): - - ```shell - gitlab-ctl reconfigure - ``` - -1. Set the Grafana administrator password. This command prompts you to enter a new - password: - - ```shell - gitlab-ctl set-grafana-password - ``` - -1. In your web browser, open `/-/grafana` (such as - `https://gitlab.example.com/-/grafana`) on your GitLab server. - - Login using the password you set, and the username `admin`. - -1. Go to **Explore** and query `gitlab_build_info` to verify that you are - getting metrics from all your machines. - -Congratulations! You've configured an observable fault-tolerant Praefect -cluster. - -## Configure replication factor - -Praefect supports configuring a replication factor on a per-repository basis, by assigning -specific storage nodes to host a repository. - -{{< alert type="warning" >}} - -Configurable replication factors requires [repository-specific primary nodes](#repository-specific-primary-nodes). - -{{< /alert >}} - -Praefect does not store the actual replication factor, but assigns enough storages to host the repository -so the desired replication factor is met. If a storage node is later removed from the virtual storage, -the replication factor of repositories assigned to the storage is decreased accordingly. - -You can configure either: - -- A default replication factor for each virtual storage that is applied to newly created repositories. -- A replication factor for an existing repository with the `set-replication-factor` subcommand. - -### Configure default replication factor - -If `default_replication_factor` is unset, the repositories are always replicated on every storage node defined in -`virtual_storages`. If a new storage node is introduced to the virtual storage, both new and existing repositories are -replicated to the node automatically. - -For large Gitaly Cluster deployments with many storage nodes, replicating a repository to every storage node is often not -sensible and can cause problems. A replication factor of 3 is usually sufficient, which means replicate repositories to -three storages even if more are available. Higher replication factors increase the pressure on the primary storage. - -To configure a default replication factor, add configuration to the `/etc/gitlab/gitlab.rb` file: - -```ruby -praefect['configuration'] = { - # ... - virtual_storage: [ - { - # ... - name: 'default', - default_replication_factor: 3, - }, - ], -} -``` - -### Configure replication factor for existing repositories - -The `set-replication-factor` subcommand automatically assigns or unassigns random storage nodes as -necessary to reach the desired replication factor. The repository's primary node is -always assigned first and is never unassigned. - -```shell -sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml set-replication-factor -virtual-storage -repository -replication-factor -``` - -- `-virtual-storage` is the virtual storage the repository is located in. -- `-repository` is the repository's relative path in the storage. -- `-replication-factor` is the desired replication factor of the repository. The minimum value is - `1` because the primary needs a copy of the repository. The maximum replication factor is the number of - storages in the virtual storage. - -On success, the assigned host storages are printed. For example: - -```shell -$ sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml set-replication-factor -virtual-storage default -repository @hashed/3f/db/3fdba35f04dc8c462986c992bcf875546257113072a909c162f7e470e581e278.git -replication-factor 2 - -current assignments: gitaly-1, gitaly-2 -``` - -### Repository storage recommendations - -The size of the required storage can vary between instances and depends on the set -[replication factor](#replication-factor). You might want to include implementing -repository storage redundancy. - -For a replication factor: - -- Of `1`: Gitaly and Gitaly Cluster have roughly the same storage requirements. -- More than `1`: The amount of required storage is `used space * replication factor`. `used space` - should include any planned future growth. - -## Repository verification - -{{< history >}} - -- [Introduced](https://gitlab.com/gitlab-org/gitaly/-/issues/4080) in GitLab 15.0. - -{{< /history >}} - -Praefect stores metadata about the repositories in a database. If the repositories are modified on disk -without going through Praefect, the metadata can become inaccurate. For example if a Gitaly node is -rebuilt, rather than being replaced with a new node, repository verification ensures this is detected. - -The metadata is used for replication and routing decisions, so any inaccuracies may cause problems. -Praefect contains a background worker that periodically verifies the metadata against the actual state on the disks. -The worker: - -1. Picks up a batch of replicas to verify on healthy storages. The replicas are either unverified or have exceeded - the configured verification interval. Replicas that have never been verified are prioritized, followed by - the other replicas ordered by longest time since the last successful verification. -1. Checks whether the replicas exist on their respective storages. If the: - - Replica exists, update its last successful verification time. - - Replica doesn't exist, remove its metadata record. - - Check failed, the replica is picked up for verification again when the next worker dequeues more work. - -The worker acquires an exclusive verification lease on each of the replicas it is about to verify. This avoids multiple -workers from verifying the same replica concurrently. The worker releases the leases when it has completed its check. -If workers are terminated for some reason without releasing the lease, Praefect contains a background goroutine -that releases stale leases every 10 seconds. - -The worker logs each of the metadata removals prior to executing them. The `perform_deletions` key -indicates whether the invalid metadata records are actually deleted or not. For example: - -```json -{ - "level": "info", - "msg": "removing metadata records of non-existent replicas", - "perform_deletions": false, - "replicas": { - "default": { - "@hashed/6b/86/6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b.git": [ - "praefect-internal-0" - ] - } - } -} -``` - -### Configure the verification worker - -The worker is enabled by default and verifies the metadata records every seven days. The verification -interval is configurable with any valid [Go duration string](https://pkg.go.dev/time#ParseDuration). - -To verify the metadata every three days: - -```ruby -praefect['configuration'] = { - # ... - background_verification: { - # ... - verification_interval: '72h', - }, -} -``` - -Values of 0 and below disable the background verifier. - -```ruby -praefect['configuration'] = { - # ... - background_verification: { - # ... - verification_interval: '0', - }, -} -``` - -#### Enable deletions - -{{< history >}} - -- [Introduced](https://gitlab.com/gitlab-org/gitaly/-/issues/4080) and disabled by default in GitLab 15.0 -- [Default enabled](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5321) in GitLab 15.9. - -{{< /history >}} - -{{< alert type="warning" >}} - -Deletions were disabled by default prior to GitLab 15.9 due to a race condition with repository renames -that can cause incorrect deletions, which is especially prominent in Geo instances as Geo performs more renames -than instances without Geo. In GitLab 15.0 to 15.5, you should enable deletions only if the [`gitaly_praefect_generated_replica_paths` feature flag](#praefect-generated-replica-paths) is enabled. The feature flag was removed in GitLab 15.6 making deletions always safe to enable. - -{{< /alert >}} - -By default, the worker deletes invalid metadata records. It also logs the deleted records and outputs Prometheus -metrics. - -You can disable deleting invalid metadata records with: - -```ruby -praefect['configuration'] = { - # ... - background_verification: { - # ... - delete_invalid_records: false, - }, -} -``` - -### Prioritize verification manually - -You can prioritize verification of some replicas ahead of their next scheduled verification time. -This might be needed after a disk failure, for example, when the administrator knows that the disk contents may have -changed. Praefect would eventually verify the replicas again, but users may encounter errors in the meantime. - -To manually prioritize reverification of some replicas, use the `praefect verify` subcommand. The subcommand marks -replicas as unverified. Unverified replicas are prioritized by the background verification worker. The verification -worker must be enabled for the replicas to be verified. - -Prioritize verifying the replicas of a specific repository: - -```shell -sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml verify -repository-id= -``` - -Prioritize verifying all replicas stored on a virtual storage: - -```shell -sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml verify -virtual-storage= -``` - -Prioritize verifying all replicas stored on a storage: - -```shell -sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml verify -virtual-storage= -storage= -``` - -The output includes the number of replicas that were marked unverified. - -## Automatic failover and primary election strategies - -Praefect regularly checks the health of each Gitaly node, which is used to automatically fail over -to a newly-elected primary Gitaly node if the current primary node is found to be unhealthy. - -[Repository-specific primary nodes](#repository-specific-primary-nodes) is the only available election strategy. - -### Repository-specific primary nodes - -Gitaly Cluster elects a primary Gitaly node separately for each repository. Combined with -[configurable replication factors](#configure-replication-factor), you can horizontally scale storage capacity and distribute write load across Gitaly nodes. - -Primary elections are run lazily. Praefect doesn't immediately elect a new primary node if the current -one is unhealthy. A new primary is elected if a request must be served while the current primary is unavailable. - -A valid primary node candidate is a Gitaly node that: - -- Is healthy. A Gitaly node is considered healthy if `>=50%` Praefect nodes have - successfully health checked the Gitaly node in the previous ten seconds. -- Has a fully up to date copy of the repository. - -If there are multiple primary node candidates, Praefect: - -- Picks one of them randomly. -- Prioritizes promoting a Gitaly node that is assigned to host the repository. If - there are no assigned Gitaly nodes to elect as the primary, Praefect may temporarily - elect an unassigned one. The unassigned primary is demoted in favor of an assigned - one when one becomes available. - -If there are no valid primary candidates for a repository: - -- The unhealthy primary node is demoted and the repository is left without a primary node. -- Operations that require a primary node fail until a primary is successfully elected. diff --git a/doc/administration/gitaly/praefect/configure.md b/doc/administration/gitaly/praefect/configure.md new file mode 100644 index 00000000000..31bd8bb9b1d --- /dev/null +++ b/doc/administration/gitaly/praefect/configure.md @@ -0,0 +1,1799 @@ +--- +stage: Data Access +group: Gitaly +info: To determine the technical writer assigned to the Stage/Group associated with this page, see https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments +title: Configure Gitaly Cluster (Praefect) +--- + +Configure Gitaly Cluster using either: + +- Gitaly Cluster configuration instructions available as part of + [reference architectures](../../reference_architectures/_index.md) for installations of up to: + - [60 RPS or 3,000 users](../../reference_architectures/3k_users.md#configure-gitaly-cluster). + - [100 RPS or 5,000 users](../../reference_architectures/5k_users.md#configure-gitaly-cluster). + - [200 RPS or 10,000 users](../../reference_architectures/10k_users.md#configure-gitaly-cluster). + - [500 RPS or 25,000 users](../../reference_architectures/25k_users.md#configure-gitaly-cluster). + - [1000 RPS or 50,000 users](../../reference_architectures/50k_users.md#configure-gitaly-cluster). +- The custom configuration instructions that follow on this page. + +Smaller GitLab installations may need only [Gitaly itself](../_index.md). + +{{< alert type="note" >}} + +Gitaly Cluster is not yet supported in Kubernetes, Amazon ECS, or similar container environments. For more information, see +[epic 6127](https://gitlab.com/groups/gitlab-org/-/epics/6127). + +{{< /alert >}} + +## Requirements + +The minimum recommended configuration for a Gitaly Cluster requires: + +- 1 load balancer +- 1 PostgreSQL server (a [supported version](../../../install/requirements.md#postgresql)) +- 3 Praefect nodes +- 3 Gitaly nodes (1 primary, 2 secondary) + +{{< alert type="note" >}} + +[Disk requirements](../_index.md#disk-requirements) apply to Gitaly nodes. + +{{< /alert >}} + +You should configure an odd number of Gitaly nodes so that transactions have a tie-breaker in case one of the +Gitaly nodes fails in a mutating RPC call. + +See the [design document](https://gitlab.com/gitlab-org/gitaly/-/blob/master/doc/design_ha.md) +for implementation details. + +{{< alert type="note" >}} + +If not set in GitLab, feature flags are read as false from the console and Praefect uses their +default value. The default value depends on the GitLab version. + +{{< /alert >}} + +### Network latency and connectivity + +Network latency for Gitaly Cluster should ideally be measurable in single-digit milliseconds. Latency is particularly +important for: + +- Gitaly node health checks. Nodes must be able to respond within 1 second. +- Reference transactions that enforce [strong consistency](_index.md#strong-consistency). Lower latencies mean Gitaly + nodes can agree on changes faster. + +Achieving acceptable latency between Gitaly nodes: + +- On physical networks generally means high bandwidth, single location connections. +- On the cloud generally means in the same region, including allowing cross availability zone replication. These links + are designed for this type of synchronization. Latency of less than 2 ms should be sufficient for Gitaly Cluster. + +If you can't provide low network latencies for replication (for example, between distant locations), consider Geo. For +more information, see [Comparison to Geo](_index.md#comparison-to-geo). + +Gitaly Cluster [components](_index.md#components) communicate with each other over many routes. Your firewall rules must +allow the following for Gitaly Cluster to function properly: + +| From | To | Default port | TLS port | +|:-----------------------|:-----------------------|:-------------|:---------| +| GitLab | Praefect load balancer | `2305` | `3305` | +| Praefect load balancer | Praefect | `2305` | `3305` | +| Praefect | Gitaly | `8075` | `9999` | +| Praefect | GitLab (internal API) | `80` | `443` | +| Gitaly | GitLab (internal API) | `80` | `443` | +| Gitaly | Praefect load balancer | `2305` | `3305` | +| Gitaly | Praefect | `2305` | `3305` | +| Gitaly | Gitaly | `8075` | `9999` | + +{{< alert type="note" >}} + +Gitaly does not directly connect to Praefect. However, requests from Gitaly to the Praefect +load balancer may still be blocked unless firewalls on the Praefect nodes allow traffic from +the Gitaly nodes. + +{{< /alert >}} + +### Praefect database storage + +The requirements are relatively low because the database contains only metadata of: + +- Where repositories are located. +- Some queued work. + +It depends on the number of repositories, but a good minimum is 5-10 GB, similar to the main +GitLab application database. + +## Setup Instructions + +If you [installed](https://about.gitlab.com/install/) GitLab using the Linux package +(highly recommended), follow the steps below: + +1. [Preparation](#preparation) +1. [Configuring the Praefect database](#postgresql) +1. [Configuring the Praefect proxy/router](#praefect) +1. [Configuring each Gitaly node](#gitaly) (once for each Gitaly node) +1. [Configure the load balancer](#load-balancer) +1. [Updating the GitLab server configuration](#gitlab) +1. [Configure Grafana](#grafana) + +### Preparation + +Before beginning, you should already have a working GitLab instance. +[Learn how to install GitLab](https://about.gitlab.com/install/). + +Provision a PostgreSQL server. You should use the PostgreSQL that is shipped +with the Linux package and use it to configure the PostgreSQL database. You can use an +external PostgreSQL server but you must set it up [manually](#manual-database-setup). + +Prepare all your new nodes by [installing GitLab](https://about.gitlab.com/install/). You need: + +- 1 PostgreSQL node +- 1 PgBouncer node (optional) +- At least 1 Praefect node (minimal storage required) +- 3 Gitaly nodes (high CPU, high memory, fast storage) +- 1 GitLab server + +You also need the IP/host address for each node: + +1. `PRAEFECT_LOADBALANCER_HOST`: the IP/host address of Praefect load balancer +1. `POSTGRESQL_HOST`: the IP/host address of the PostgreSQL server +1. `PGBOUNCER_HOST`: the IP/host address of the PostgreSQL server +1. `PRAEFECT_HOST`: the IP/host address of the Praefect server +1. `GITALY_HOST_*`: the IP or host address of each Gitaly server +1. `GITLAB_HOST`: the IP/host address of the GitLab server + +If you are using Google Cloud Platform, SoftLayer, or any other vendor that provides a virtual private cloud (VPC) you can use the private addresses for each cloud instance (corresponds to "internal address" for Google Cloud Platform) for `PRAEFECT_HOST`, `GITALY_HOST_*`, and `GITLAB_HOST`. + +#### Secrets + +The communication between components is secured with different secrets, which +are described below. Before you begin, generate a unique secret for each, and +make note of it. This enables you to replace these placeholder tokens +with secure tokens as you complete the setup process. + +1. `GITLAB_SHELL_SECRET_TOKEN`: this is used by Git hooks to make callback HTTP + API requests to GitLab when accepting a Git push. This secret is shared with + GitLab Shell for legacy reasons. +1. `PRAEFECT_EXTERNAL_TOKEN`: repositories hosted on your Praefect cluster can + only be accessed by Gitaly clients that carry this token. +1. `PRAEFECT_INTERNAL_TOKEN`: this token is used for replication traffic inside + your Praefect cluster. This token is distinct from `PRAEFECT_EXTERNAL_TOKEN` + because Gitaly clients must not be able to access internal nodes of the + Praefect cluster directly; that could lead to data loss. +1. `PRAEFECT_SQL_PASSWORD`: this password is used by Praefect to connect to + PostgreSQL. +1. `PRAEFECT_SQL_PASSWORD_HASH`: the hash of password of the Praefect user. + Use `gitlab-ctl pg-password-md5 praefect` to generate the hash. The command + asks for the password for `praefect` user. Enter `PRAEFECT_SQL_PASSWORD` + plaintext password. By default, Praefect uses `praefect` user, but you can + change it. +1. `PGBOUNCER_SQL_PASSWORD_HASH`: the hash of password of the PgBouncer user. + PgBouncer uses this password to connect to PostgreSQL. For more details + see [bundled PgBouncer](../../postgresql/pgbouncer.md) documentation. + +We note in the instructions below where these secrets are required. + +{{< alert type="note" >}} + +Linux package installations can use `gitlab-secrets.json` for `GITLAB_SHELL_SECRET_TOKEN`. + +{{< /alert >}} + +### Customize time server setting + +By default, Gitaly and Praefect nodes use the time server at `pool.ntp.org` for time synchronization checks. You can customize this setting by adding the +following to `gitlab.rb` on each node: + +- `gitaly['env'] = { "NTP_HOST" => "ntp.example.com" }`, for Gitaly nodes. +- `praefect['env'] = { "NTP_HOST" => "ntp.example.com" }`, for Praefect nodes. + +### PostgreSQL + +{{< alert type="note" >}} + +Do not store the GitLab application database and the Praefect +database on the same PostgreSQL server if using [Geo](../../geo/_index.md). +The replication state is internal to each instance of GitLab and should +not be replicated. + +{{< /alert >}} + +These instructions help set up a single PostgreSQL database, which creates a single point of failure. To avoid this, you can configure your own clustered +PostgreSQL. +Clustered database support for other databases (for example, Praefect and Geo databases) is proposed in +[issue 7292](https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/7292). + +The following options are available: + +- For non-Geo installations, either: + - Use one of the documented [PostgreSQL setups](../../postgresql/_index.md). + - Use your own third-party database setup. This requires [manual setup](#manual-database-setup). +- For Geo instances, either: + - Set up a separate [PostgreSQL instance](https://www.postgresql.org/docs/16/high-availability.html). + - Use a cloud-managed PostgreSQL service. AWS + [Relational Database Service](https://aws.amazon.com/rds/) is recommended. + +Setting up PostgreSQL creates empty Praefect tables. For more information, see the +[relevant troubleshooting section](troubleshooting.md#relation-does-not-exist-errors). + +#### Running GitLab and Praefect databases on the same server + +The GitLab application database and the Praefect database can be run on the same server. However, Praefect should have +its own database server when using PostgreSQL from the Linux package. If there is a failover, Praefect isn't aware and starts to +fail as the database it's trying to use would either: + +- Be unavailable. +- In read-only mode. + +#### Manual database setup + +To complete this section you need: + +- One Praefect node +- One PostgreSQL node + - A PostgreSQL user with permissions to manage the database server + +In this section, we configure the PostgreSQL database. This can be used for both external +and Linux package-provided PostgreSQL server. + +To run the following instructions, you can use the Praefect node, where `psql` is installed +by the Linux package (`/opt/gitlab/embedded/bin/psql`). If you are using the Linux package-provided +PostgreSQL you can use `gitlab-psql` on the PostgreSQL node instead: + +1. Create a new user `praefect` to be used by Praefect: + + ```sql + CREATE ROLE praefect WITH LOGIN PASSWORD 'PRAEFECT_SQL_PASSWORD'; + ``` + + Replace `PRAEFECT_SQL_PASSWORD` with the strong password you generated in the preparation step. + +1. Create a new database `praefect_production` that is owned by `praefect` user. + + ```sql + CREATE DATABASE praefect_production WITH OWNER praefect ENCODING UTF8; + ``` + +When using the Linux package-provided PgBouncer, you need to take the following additional steps. We strongly +recommend using the PostgreSQL that is shipped with the Linux package as the backend. The following +instructions only work on the Linux package-provided PostgreSQL: + +1. For the Linux package-provided PgBouncer, you need to use the hash of `praefect` password instead the of the + actual password: + + ```sql + ALTER ROLE praefect WITH PASSWORD 'md5'; + ``` + + Replace `` with the hash of the password you generated in the + preparation step. It is prefixed with `md5` literal. + +1. Create a new user `pgbouncer` to be used by PgBouncer: + + ```sql + CREATE ROLE pgbouncer WITH LOGIN; + ALTER USER pgbouncer WITH password 'md5'; + ``` + + Replace `PGBOUNCER_SQL_PASSWORD_HASH` with the strong password hash you generated in the preparation step. + +1. The PgBouncer that is shipped with the Linux package is configured to use [`auth_query`](https://www.pgbouncer.org/config.html#generic-settings) + and uses `pg_shadow_lookup` function. You need to create this function in `praefect_production` + database: + + ```sql + CREATE OR REPLACE FUNCTION public.pg_shadow_lookup(in i_username text, out username text, out password text) RETURNS record AS $$ + BEGIN + SELECT usename, passwd FROM pg_catalog.pg_shadow + WHERE usename = i_username INTO username, password; + RETURN; + END; + $$ LANGUAGE plpgsql SECURITY DEFINER; + + REVOKE ALL ON FUNCTION public.pg_shadow_lookup(text) FROM public, pgbouncer; + GRANT EXECUTE ON FUNCTION public.pg_shadow_lookup(text) TO pgbouncer; + ``` + +The database used by Praefect is now configured. + +You can now configure Praefect to use the database: + +```ruby +praefect['configuration'] = { + # ... + database: { + # ... + host: POSTGRESQL_HOST, + user: 'praefect', + port: 5432, + password: PRAEFECT_SQL_PASSWORD, + dbname: 'praefect_production', + } +} +``` + +If you see Praefect database errors after configuring PostgreSQL, see +[troubleshooting steps](troubleshooting.md#relation-does-not-exist-errors). + +#### Reads distribution caching + +Praefect performance can be improved by additionally configuring the `session_pooled` +settings: + +```ruby +praefect['configuration'] = { + # ... + database: { + # ... + session_pooled: { + # ... + host: POSTGRESQL_HOST, + port: 5432 + + # Use the following to override parameters of direct database connection. + # Comment out where the parameters are the same for both connections. + user: 'praefect', + password: PRAEFECT_SQL_PASSWORD, + dbname: 'praefect_production', + # sslmode: '...', + # sslcert: '...', + # sslkey: '...', + # sslrootcert: '...', + } + } +} +``` + +When configured, this connection is automatically used for the +[SQL LISTEN](https://www.postgresql.org/docs/16/sql-listen.html) feature and +allows Praefect to receive notifications from PostgreSQL for cache invalidation. + +Verify this feature is working by looking for the following log entry in the Praefect +log: + +```plaintext +reads distribution caching is enabled by configuration +``` + +#### Use PgBouncer + +To reduce PostgreSQL resource consumption, you should set up and configure [PgBouncer](https://www.pgbouncer.org/) in +front of the PostgreSQL instance. However, PgBouncer isn't required because +Praefect makes a low number of connections. If you choose to use PgBouncer, you can use the same PgBouncer instance for +both the GitLab application database and the Praefect database. + +To configure PgBouncer in front of the PostgreSQL instance, you must point Praefect to PgBouncer by setting database +parameters on the Praefect configuration: + +```ruby +praefect['configuration'] = { + # ... + database: { + # ... + host: PGBOUNCER_HOST, + port: 6432, + user: 'praefect', + password: PRAEFECT_SQL_PASSWORD, + dbname: 'praefect_production', + # sslmode: '...', + # sslcert: '...', + # sslkey: '...', + # sslrootcert: '...', + } +} +``` + +Praefect requires an additional connection to the PostgreSQL that supports the +[LISTEN](https://www.postgresql.org/docs/16/sql-listen.html) feature. With PgBouncer +this feature is only available with `session` pool mode (`pool_mode = session`). +It is not supported in `transaction` pool mode (`pool_mode = transaction`). + +To configure the additional connection, you must either: + +- Configure a new PgBouncer database that uses to the same PostgreSQL database endpoint, + but with different pool mode (`pool_mode = session`). +- Connect Praefect directly to PostgreSQL and bypass PgBouncer. + +##### Configure a new PgBouncer database with `pool_mode = session` + +You should use PgBouncer with `session` pool mode. You can use the +[bundled PgBouncer](../../postgresql/pgbouncer.md) or use an external PgBouncer and +[configure it manually](https://www.pgbouncer.org/config.html). + +The following example uses the bundled PgBouncer and sets up two separate connection pools on the PostgreSQL host, +one in `session` pool mode and the other in `transaction` pool mode. For this example to work, +you need to prepare PostgreSQL server as documented in [the setup instructions](#manual-database-setup). + +Then, configure the separate connection pools on the PgBouncer host: + +```ruby +pgbouncer['databases'] = { + # Other database configuration including gitlabhq_production + ... + + praefect_production: { + host: POSTGRESQL_HOST, + # Use `pgbouncer` user to connect to database backend. + user: 'pgbouncer', + password: PGBOUNCER_SQL_PASSWORD_HASH, + pool_mode: 'transaction' + }, + praefect_production_direct: { + host: POSTGRESQL_HOST, + # Use `pgbouncer` user to connect to database backend. + user: 'pgbouncer', + password: PGBOUNCER_SQL_PASSWORD_HASH, + dbname: 'praefect_production', + pool_mode: 'session' + }, + + ... +} + +# Allow the praefect user to connect to PgBouncer +pgbouncer['users'] = { + 'praefect': { + 'password': PRAEFECT_SQL_PASSWORD_HASH, + } +} +``` + +Both `praefect_production` and `praefect_production_direct` use the same database endpoint +(`praefect_production`), but with different pool modes. This translates to the following +`databases` section of PgBouncer: + +```ini +[databases] +praefect_production = host=POSTGRESQL_HOST auth_user=pgbouncer pool_mode=transaction +praefect_production_direct = host=POSTGRESQL_HOST auth_user=pgbouncer dbname=praefect_production pool_mode=session +``` + +Now you can configure Praefect to use PgBouncer for both connections: + +```ruby +praefect['configuration'] = { + # ... + database: { + # ... + host: PGBOUNCER_HOST, + port: 6432, + user: 'praefect', + # `PRAEFECT_SQL_PASSWORD` is the plain-text password of + # Praefect user. Not to be confused with `PRAEFECT_SQL_PASSWORD_HASH`. + password: PRAEFECT_SQL_PASSWORD, + dbname: 'praefect_production', + session_pooled: { + # ... + dbname: 'praefect_production_direct', + # There is no need to repeat the following. Parameters of direct + # database connection will fall back to the values specified in the + # database block. + # + # host: PGBOUNCER_HOST, + # port: 6432, + # user: 'praefect', + # password: PRAEFECT_SQL_PASSWORD, + }, + }, +} +``` + +With this configuration, Praefect uses PgBouncer for both connection types. + +{{< alert type="note" >}} + +Linux package installations handle the authentication requirements (using `auth_query`), but if you are preparing +your databases manually and configuring an external PgBouncer, you must include `praefect` user and +its password in the file used by PgBouncer. For example, `userlist.txt` if the [`auth_file`](https://www.pgbouncer.org/config.html#auth_file) +configuration option is set. For more details, consult the PgBouncer documentation. + +{{< /alert >}} + +##### Configure Praefect to connect directly to PostgreSQL + +As an alternative to configuring PgBouncer with `session` pool mode, Praefect can be configured to use different +connection parameters for direct access to PostgreSQL. This connection supports the `LISTEN` feature. + +An example of Praefect configuration that bypasses PgBouncer and directly connects to PostgreSQL: + +```ruby +praefect['configuration'] = { + # ... + database: { + # ... + session_pooled: { + # ... + host: POSTGRESQL_HOST, + port: 5432, + + # Use the following to override parameters of direct database connection. + # Comment out where the parameters are the same for both connections. + # + user: 'praefect', + password: PRAEFECT_SQL_PASSWORD, + dbname: 'praefect_production', + # sslmode: '...', + # sslcert: '...', + # sslkey: '...', + # sslrootcert: '...', + }, + }, +} +``` + +### Praefect + +If there are multiple Praefect nodes: + +1. Designate one node as the deploy node, and configure it using the following steps. +1. Complete the following steps for each additional node. + +To complete this section you need a [configured PostgreSQL server](#postgresql), including: + +{{< alert type="warning" >}} + +Praefect should be run on a dedicated node. Do not run Praefect on the +application server, or a Gitaly node. + +{{< /alert >}} + +On the Praefect node: + +1. Disable all other services by editing `/etc/gitlab/gitlab.rb`: + + + + ```ruby + # Avoid running unnecessary services on the Praefect server + gitaly['enable'] = false + postgresql['enable'] = false + redis['enable'] = false + nginx['enable'] = false + puma['enable'] = false + sidekiq['enable'] = false + gitlab_workhorse['enable'] = false + prometheus['enable'] = false + alertmanager['enable'] = false + gitlab_exporter['enable'] = false + gitlab_kas['enable'] = false + + # Enable only the Praefect service + praefect['enable'] = true + + # Prevent database migrations from running on upgrade automatically + praefect['auto_migrate'] = false + gitlab_rails['auto_migrate'] = false + ``` + +1. Configure Praefect to listen on network interfaces by editing + `/etc/gitlab/gitlab.rb`: + + ```ruby + praefect['configuration'] = { + # ... + listen_addr: '0.0.0.0:2305', + } + ``` + +1. Configure Prometheus metrics by editing + `/etc/gitlab/gitlab.rb`: + + ```ruby + praefect['configuration'] = { + # ... + # + # Enable Prometheus metrics access to Praefect. You must use firewalls + # to restrict access to this address/port. + # The default metrics endpoint is /metrics + prometheus_listen_addr: '0.0.0.0:9652', + # Some metrics run queries against the database. Enabling separate database metrics allows + # these metrics to be collected when the metrics are + # scraped on a separate /db_metrics endpoint. + prometheus_exclude_database_from_default_metrics: true, + } + ``` + +1. Configure a strong authentication token for Praefect by editing + `/etc/gitlab/gitlab.rb`, which is needed by clients outside the cluster + (like GitLab Shell) to communicate with the Praefect cluster: + + ```ruby + praefect['configuration'] = { + # ... + auth: { + # ... + token: 'PRAEFECT_EXTERNAL_TOKEN', + }, + } + ``` + +1. Configure Praefect to [connect to the PostgreSQL database](#postgresql). We + highly recommend using [PgBouncer](#use-pgbouncer) as well. + + If you want to use a TLS client certificate, the options below can be used: + + ```ruby + praefect['configuration'] = { + # ... + database: { + # ... + # + # Connect to PostgreSQL using a TLS client certificate + # sslcert: '/path/to/client-cert', + # sslkey: '/path/to/client-key', + # + # Trust a custom certificate authority + # sslrootcert: '/path/to/rootcert', + }, + } + ``` + + By default, Praefect uses opportunistic TLS to connect to PostgreSQL. This means that Praefect attempts to connect to PostgreSQL using `sslmode` set to + `prefer`. You can override this by uncommenting the following line: + + ```ruby + praefect['configuration'] = { + # ... + database: { + # ... + # sslmode: 'disable', + }, + } + ``` + +1. Configure the Praefect cluster to connect to each Gitaly node in the + cluster by editing `/etc/gitlab/gitlab.rb`. + + The virtual storage's name must match the configured storage name in GitLab + configuration. In a later step, we configure the storage name as `default` + so we use `default` here as well. This cluster has three Gitaly nodes `gitaly-1`, + `gitaly-2`, and `gitaly-3`, which are intended to be replicas of each other. + + {{< alert type="warning" >}} + + If you have data on an already existing storage called + `default`, you should configure the virtual storage with another name and + [migrate the data to the Gitaly Cluster storage](_index.md#migrate-to-gitaly-cluster) + afterwards. + + {{< /alert >}} + + Replace `PRAEFECT_INTERNAL_TOKEN` with a strong secret, which is used by + Praefect when communicating with Gitaly nodes in the cluster. This token is + distinct from the `PRAEFECT_EXTERNAL_TOKEN`. + + Replace `GITALY_HOST_*` with the IP or host address of the each Gitaly node. + + More Gitaly nodes can be added to the cluster to increase the number of + replicas. More clusters can also be added for very large GitLab instances. + + {{< alert type="note" >}} + + When adding additional Gitaly nodes to a virtual storage, all storage names + in that virtual storage must be unique. Additionally, all Gitaly node + addresses referenced in the Praefect configuration must be unique. + + {{< /alert >}} + + ```ruby + # Name of storage hash must match storage name in gitlab_rails['repositories_storages'] on GitLab + # server ('default') and in gitaly['configuration'][:storage][INDEX][:name] on Gitaly nodes ('gitaly-1') + praefect['configuration'] = { + # ... + virtual_storage: [ + { + # ... + name: 'default', + node: [ + { + storage: 'gitaly-1', + address: 'tcp://GITALY_HOST_1:8075', + token: 'PRAEFECT_INTERNAL_TOKEN' + }, + { + storage: 'gitaly-2', + address: 'tcp://GITALY_HOST_2:8075', + token: 'PRAEFECT_INTERNAL_TOKEN' + }, + { + storage: 'gitaly-3', + address: 'tcp://GITALY_HOST_3:8075', + token: 'PRAEFECT_INTERNAL_TOKEN' + }, + ], + }, + ], + } + ``` + +1. Save the changes to `/etc/gitlab/gitlab.rb` and + [reconfigure Praefect](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl reconfigure + ``` + +1. For: + + - The "deploy node": + 1. Enable Praefect database auto-migration again by setting `praefect['auto_migrate'] = true` in + `/etc/gitlab/gitlab.rb`. + 1. To ensure database migrations are only run during reconfigure and not automatically on + upgrade, run: + + ```shell + sudo touch /etc/gitlab/skip-auto-reconfigure + ``` + + - The other nodes, you can leave the settings as they are. Though + `/etc/gitlab/skip-auto-reconfigure` isn't required, you may want to set it to prevent GitLab + running reconfigure automatically when running commands such as `apt-get update`. This way any + additional configuration changes can be done and then reconfigure can be run manually. + +1. Save the changes to `/etc/gitlab/gitlab.rb` and + [reconfigure Praefect](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl reconfigure + ``` + +1. To ensure that Praefect + [has updated its Prometheus listen address](https://gitlab.com/gitlab-org/gitaly/-/issues/2734), + [restart Praefect](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl restart praefect + ``` + +1. Verify that Praefect can reach PostgreSQL: + + ```shell + sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml sql-ping + ``` + + If the check fails, make sure you have followed the steps correctly. If you + edit `/etc/gitlab/gitlab.rb`, remember to run `sudo gitlab-ctl reconfigure` + again before trying the `sql-ping` command. + +#### Enable TLS support + +Praefect supports TLS encryption. To communicate with a Praefect instance that listens +for secure connections, you must: + +- Ensure Gitaly is [configured for TLS](../tls_support.md) and use a `tls://` URL scheme in the `gitaly_address` + of the corresponding storage entry in the GitLab configuration. +- Bring your own certificates because this isn't provided automatically. The certificate + corresponding to each Praefect server must be installed on that Praefect server. + +Additionally the certificate, or its certificate authority, must be installed on all Gitaly servers +and on all Praefect clients that communicate with it following the procedure described in +[GitLab custom certificate configuration](https://docs.gitlab.com/omnibus/settings/ssl/#install-custom-public-certificates) (and repeated below). + +Note the following: + +- The certificate must specify the address you use to access the Praefect server. You must add the hostname or IP + address as a Subject Alternative Name to the certificate. +- When running Praefect sub-commands such as `dial-nodes` and `list-untracked-repositories` from the command line with + [Gitaly TLS enabled](../tls_support.md), you must set the `SSL_CERT_DIR` or `SSL_CERT_FILE` + environment variable so that the Gitaly certificate is trusted. For example: + + ```shell + SSL_CERT_DIR=/etc/gitlab/trusted-certs sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml dial-nodes + ``` + +- You can configure Praefect servers with both an unencrypted listening address + `listen_addr` and an encrypted listening address `tls_listen_addr` at the same time. + This allows you to do a gradual transition from unencrypted to encrypted traffic, if + necessary. + + To disable the unencrypted listener, set: + + ```ruby + praefect['configuration'] = { + # ... + listen_addr: nil, + } + ``` + +Configure Praefect with TLS. + +For Linux package installations: + +1. Create certificates for Praefect servers. + +1. On the Praefect servers, create the `/etc/gitlab/ssl` directory and copy your key + and certificate there: + + ```shell + sudo mkdir -p /etc/gitlab/ssl + sudo chmod 755 /etc/gitlab/ssl + sudo cp key.pem cert.pem /etc/gitlab/ssl/ + sudo chmod 644 key.pem cert.pem + ``` + +1. Edit `/etc/gitlab/gitlab.rb` and add: + + ```ruby + praefect['configuration'] = { + # ... + tls_listen_addr: '0.0.0.0:3305', + tls: { + # ... + certificate_path: '/etc/gitlab/ssl/cert.pem', + key_path: '/etc/gitlab/ssl/key.pem', + }, + } + ``` + +1. Save the file and [reconfigure](../../restart_gitlab.md#reconfigure-a-linux-package-installation). + +1. On the Praefect clients (including each Gitaly server), copy the certificates, + or their certificate authority, into `/etc/gitlab/trusted-certs`: + + ```shell + sudo cp cert.pem /etc/gitlab/trusted-certs/ + ``` + +1. On the Praefect clients (except Gitaly servers), edit `gitlab_rails['repositories_storages']` in + `/etc/gitlab/gitlab.rb` as follows: + + ```ruby + gitlab_rails['repositories_storages'] = { + "default" => { + "gitaly_address" => 'tls://PRAEFECT_LOADBALANCER_HOST:3305', + "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' + } + } + ``` + +1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). + +For self-compiled installations: + +1. Create certificates for Praefect servers. +1. On the Praefect servers, create the `/etc/gitlab/ssl` directory and copy your key and certificate + there: + + ```shell + sudo mkdir -p /etc/gitlab/ssl + sudo chmod 755 /etc/gitlab/ssl + sudo cp key.pem cert.pem /etc/gitlab/ssl/ + sudo chmod 644 key.pem cert.pem + ``` + +1. On the Praefect clients (including each Gitaly server), copy the certificates, + or their certificate authority, into the system trusted certificates: + + ```shell + sudo cp cert.pem /usr/local/share/ca-certificates/praefect.crt + sudo update-ca-certificates + ``` + +1. On the Praefect clients (except Gitaly servers), edit `storages` in + `/home/git/gitlab/config/gitlab.yml` as follows: + + ```yaml + gitlab: + repositories: + storages: + default: + gitaly_address: tls://PRAEFECT_LOADBALANCER_HOST:3305 + ``` + +1. Save the file and [restart GitLab](../../restart_gitlab.md#self-compiled-installations). +1. Copy all Praefect server certificates, or their certificate authority, to the system + trusted certificates on each Gitaly server so the Praefect server trusts the + certificate when called by Gitaly servers: + + ```shell + sudo cp cert.pem /usr/local/share/ca-certificates/praefect.crt + sudo update-ca-certificates + ``` + +1. Edit `/home/git/praefect/config.toml` and add: + + ```toml + tls_listen_addr = '0.0.0.0:3305' + + [tls] + certificate_path = '/etc/gitlab/ssl/cert.pem' + key_path = '/etc/gitlab/ssl/key.pem' + ``` + +1. Save the file and [restart GitLab](../../restart_gitlab.md#self-compiled-installations). + +#### Service discovery + +{{< history >}} + +- [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/8971) in GitLab 15.10. + +{{< /history >}} + +Prerequisites: + +- A DNS server. + +GitLab uses service discovery to retrieve a list of Praefect hosts. Service +discovery involves periodic checks of a DNS A or AAAA record, with the IPs +retrieved from the record serving as the addresses of the target nodes. +Praefect does not support service discovery by SRV record. + +By default, the minimum time between checks is 5 minutes, regardless of the +records' TTLs. Praefect does not support customizing this interval. When clients +receive an update, they: + +- Establish new connections to the new IP addresses. +- Keep existing connections to intact IP addresses. +- Drop connections to removed IP addresses. + +In-flight requests on to-be-removed connections are still handled until they +finish. Workhorse has a 10-minute timeout, while other clients do not specify a +graceful timeout. + +The DNS server should return all IP addresses instead of load-balancing itself. +Clients can distribute requests to IP addresses in a round-robin fashion. + +Before updating client configuration, ensure that DNS service discovery works +correctly. It should return the list of IP addresses correctly. `dig` is a good +tool to use to verify. + +```console +❯ dig A praefect.service.consul @127.0.0.1 + +; <<>> DiG 9.10.6 <<>> A praefect.service.consul @127.0.0.1 +;; global options: +cmd +;; Got answer: +;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 29210 +;; flags: qr aa rd ra; QUERY: 1, ANSWER: 3, AUTHORITY: 0, ADDITIONAL: 1 + +;; OPT PSEUDOSECTION: +; EDNS: version: 0, flags:; udp: 4096 +;; QUESTION SECTION: +;praefect.service.consul. IN A + +;; ANSWER SECTION: +praefect.service.consul. 0 IN A 10.0.0.3 +praefect.service.consul. 0 IN A 10.0.0.2 +praefect.service.consul. 0 IN A 10.0.0.1 + +;; Query time: 0 msec +;; SERVER: ::1#53(::1) +;; WHEN: Wed Dec 14 12:53:58 +07 2022 +;; MSG SIZE rcvd: 86 +``` + +##### Configure service discovery + +By default, Praefect delegates DNS resolution to the operating system. In such +cases, the Gitaly address can be set in either of these formats: + +- `dns:[host]:[port]` +- `dns:///[host]:[port]` (note the three slashes) + +You can also appoint an authoritative name server by setting it in this format: + +- `dns://[authority_host]:[authority_port]/[host]:[port]` + +{{< tabs >}} + +{{< tab title="Linux package (Omnibus)" >}} + +1. Add the IP address for each Praefect node to the DNS service discovery address. +1. On the Praefect clients (except Gitaly servers), edit `gitlab_rails['repositories_storages']` in + `/etc/gitlab/gitlab.rb` as follows. Replace `PRAEFECT_SERVICE_DISCOVERY_ADDRESS` + with Praefect service discovery address, such as `praefect.service.consul`. + + ```ruby + gitlab_rails['repositories_storages'] = { + "default" => { + "gitaly_address" => 'dns:PRAEFECT_SERVICE_DISCOVERY_ADDRESS:2305', + "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' + } + } + ``` + +1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). + +{{< /tab >}} + +{{< tab title="Self-compiled (source)" >}} + +1. Install a DNS service discovery service. Register all Praefect nodes with the service. +1. On the Praefect clients (except Gitaly servers), edit `storages` in + `/home/git/gitlab/config/gitlab.yml` as follows: + + ```yaml + gitlab: + repositories: + storages: + default: + gitaly_address: dns:PRAEFECT_SERVICE_DISCOVERY_ADDRESS:2305 + ``` + +1. Save the file and [restart GitLab](../../restart_gitlab.md#self-compiled-installations). + +{{< /tab >}} + +{{< /tabs >}} + +##### Configure service discovery with Consul + +If you already have a Consul server in your architecture then you can add +a Consul agent on each Praefect node and register the `praefect` service to it. +This registers each node's IP address to `praefect.service.consul` so it can be found +by service discovery. + +Prerequisites: + +- One or more [Consul](../../consul.md) servers to keep track of the Consul agents. + +1. On each Praefect server, add the following to your `/etc/gitlab/gitlab.rb`: + + ```ruby + consul['enable'] = true + praefect['consul_service_name'] = 'praefect' + + # The following must also be added until this issue is addressed: + # https://gitlab.com/gitlab-org/omnibus-gitlab/-/issues/8321 + consul['monitoring_service_discovery'] = true + praefect['configuration'] = { + # ... + # + prometheus_listen_addr: '0.0.0.0:9652', + } + ``` + +1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). +1. Repeat the previous steps on each Praefect server to use with + service discovery. +1. On the Praefect clients (except Gitaly servers), edit `gitlab_rails['repositories_storages']` in + `/etc/gitlab/gitlab.rb` as follows. Replace `CONSUL_SERVER` with the IP or + address of a Consul server. The default Consul DNS port is `8600`. + + ```ruby + gitlab_rails['repositories_storages'] = { + "default" => { + "gitaly_address" => 'dns://CONSUL_SERVER:8600/praefect.service.consul:2305', + "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' + } + } + ``` + +1. Use `dig` from the Praefect clients to confirm that each IP address has been registered to + `praefect.service.consul` with `dig A praefect.service.consul @CONSUL_SERVER -p 8600`. + Replace `CONSUL_SERVER` with the value configured previously and all Praefect node IP addresses + should be present in the output. +1. Save the file and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation). + +### Gitaly + +{{< alert type="note" >}} + +Complete these steps for each Gitaly node. + +{{< /alert >}} + +To complete this section you need: + +- [Configured Praefect node](#praefect) +- 3 (or more) servers, with GitLab installed, to be configured as Gitaly nodes. + These should be dedicated nodes, do not run other services on these nodes. + +Every Gitaly server assigned to the Praefect cluster needs to be configured. The +configuration is the same as a standard [standalone Gitaly server](_index.md), +except: + +- The storage names are exposed to Praefect, not GitLab +- The secret token is shared with Praefect, not GitLab + +The configuration of all Gitaly nodes in the Praefect cluster can be identical, +because we rely on Praefect to route operations correctly. + +Particular attention should be shown to: + +- The `gitaly['configuration'][:auth][:token]` configured in this section must match the `token` + value under `praefect['configuration'][:virtual_storage][][:node][][:token]` on the Praefect node. This value was + set in the [previous section](#praefect). This document uses the placeholder `PRAEFECT_INTERNAL_TOKEN` throughout. +- The physical storage names in `gitaly['configuration'][:storage]` configured in this section must match the + physical storage names under `praefect['configuration'][:virtual_storage]` on the Praefect node. This + was set in the [previous section](#praefect). This document uses `gitaly-1`, + `gitaly-2`, and `gitaly-3` as physical storage names. + +For more information on Gitaly server configuration, see our +[Gitaly documentation](../configure_gitaly.md#configure-gitaly-servers). + +1. SSH into the Gitaly node and login as root: + + ```shell + sudo -i + ``` + +1. Disable all other services by editing `/etc/gitlab/gitlab.rb`: + + ```ruby + # Disable all other services on the Gitaly node + postgresql['enable'] = false + redis['enable'] = false + nginx['enable'] = false + puma['enable'] = false + sidekiq['enable'] = false + gitlab_workhorse['enable'] = false + prometheus_monitoring['enable'] = false + gitlab_kas['enable'] = false + + # Enable only the Gitaly service + gitaly['enable'] = true + + # Enable Prometheus if needed + prometheus['enable'] = true + + # Disable database migrations to prevent database connections during 'gitlab-ctl reconfigure' + gitlab_rails['auto_migrate'] = false + ``` + +1. Configure Gitaly to listen on network interfaces by editing + `/etc/gitlab/gitlab.rb`: + + ```ruby + gitaly['configuration'] = { + # ... + # + # Make Gitaly accept connections on all network interfaces. + # Use firewalls to restrict access to this address/port. + listen_addr: '0.0.0.0:8075', + # Enable Prometheus metrics access to Gitaly. You must use firewalls + # to restrict access to this address/port. + prometheus_listen_addr: '0.0.0.0:9236', + } + ``` + +1. Configure a strong `auth_token` for Gitaly by editing + `/etc/gitlab/gitlab.rb`, which is needed by clients to communicate with + this Gitaly nodes. Typically, this token is the same for all Gitaly + nodes. + + ```ruby + gitaly['configuration'] = { + # ... + auth: { + # ... + token: 'PRAEFECT_INTERNAL_TOKEN', + }, + } + ``` + +1. Configure the GitLab Shell secret token, which is needed for `git push` operations. Either: + + - Method 1: + + 1. Copy `/etc/gitlab/gitlab-secrets.json` from the Gitaly client to same path on the Gitaly + servers and any other Gitaly clients. + 1. [Reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation) on Gitaly servers. + + - Method 2: + + 1. Edit `/etc/gitlab/gitlab.rb`. + 1. Replace `GITLAB_SHELL_SECRET_TOKEN` with the real secret. + + ```ruby + gitlab_shell['secret_token'] = 'GITLAB_SHELL_SECRET_TOKEN' + ``` + +1. Configure an `internal_api_url`, which is also needed for `git push` operations: + + ```ruby + # Configure the gitlab-shell API callback URL. Without this, `git push` will + # fail. This can be your front door GitLab URL or an internal load balancer. + # Examples: 'https://gitlab.example.com', 'http://10.0.2.2' + gitlab_rails['internal_api_url'] = 'https://gitlab.example.com' + ``` + +1. Configure the storage location for Git data by setting `gitaly['configuration'][:storage]` in + `/etc/gitlab/gitlab.rb`. Each Gitaly node should have a unique storage name + (such as `gitaly-1`) and should not be duplicated on other Gitaly nodes. + + ```ruby + gitaly['configuration'] = { + # ... + storage: [ + # Replace with appropriate name for each Gitaly nodes. + { + name: 'gitaly-1', + path: '/var/opt/gitlab/git-data/repositories', + }, + ], + } + ``` + +1. Save the changes to `/etc/gitlab/gitlab.rb` and + [reconfigure Gitaly](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl reconfigure + ``` + +1. To ensure that Gitaly + [has updated its Prometheus listen address](https://gitlab.com/gitlab-org/gitaly/-/issues/2734), + [restart Gitaly](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl restart gitaly + ``` + +{{< alert type="note" >}} + +The previous steps must be completed for each Gitaly node! + +{{< /alert >}} + +After all Gitaly nodes are configured, run the Praefect connection +checker to verify Praefect can connect to all Gitaly servers in the Praefect +configuration. + +1. SSH into each Praefect node and run the Praefect connection checker: + + ```shell + sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml dial-nodes + ``` + +### Load Balancer + +In a fault-tolerant Gitaly configuration, a load balancer is needed to route +internal traffic from the GitLab application to the Praefect nodes. The +specifics on which load balancer to use or the exact configuration is beyond the +scope of the GitLab documentation. + +{{< alert type="note" >}} + +The load balancer must be configured to accept traffic from the Gitaly nodes in +addition to the GitLab nodes. + +{{< /alert >}} + +We hope that if you're managing fault-tolerant systems like GitLab, you have a load balancer +of choice already. Some examples include [HAProxy](https://www.haproxy.org/) +(open-source), [Google Internal Load Balancer](https://cloud.google.com/load-balancing/docs/internal/), +[AWS Elastic Load Balancer](https://aws.amazon.com/elasticloadbalancing/), F5 +Big-IP LTM, and Citrix Net Scaler. This documentation outlines what ports +and protocols you need configure. + +You should use the equivalent of HAProxy `leastconn` load-balancing strategy because long-running operations (for +example, clones) keep some connections open for extended periods. + +| LB Port | Backend Port | Protocol | +|:--------|:-------------|:---------| +| 2305 | 2305 | TCP | + +You must use a TCP load balancer. Using an HTTP/2 or gRPC load balancer +with Praefect does not work because of [Gitaly sidechannels](https://gitlab.com/gitlab-org/gitaly/-/blob/master/doc/sidechannel.md). +This optimization intercepts the gRPC handshaking process. It redirects all heavy Git operations to a more efficient "channel" than gRPC, +but HTTP/2 or gRPC load balancers do not handle such requests properly. + +If TLS is enabled, [some versions of Praefect](#alpn-enforcement) require that the Application-Layer Protocol Negotiation (ALPN) extension is used per [RFC 7540](https://datatracker.ietf.org/doc/html/rfc7540#section-3.3). +TCP load balancers pass ALPN directly without additional configuration: + +```mermaid +sequenceDiagram + autonumber + participant Client as Client + participant LB as TCP Load Balancer + participant Praefect as Praefect + + Client->>LB: Establish TLS Session (w/ ALPN Extension) + LB->>Praefect: Establish TLS Session (w/ ALPN Extension) + Client->>LB: Encrypted TCP packets + LB->>Praefect: Encrypted TCP packets + Praefect->>LB: Encrypted Response + LB->>Client: Encrypted Response +``` + +Some TCP load balancers can be configured to accept a TLS client connection and +proxy the connection to Praefect with a new TLS connection. However, this only works +if ALPN is supported on both connections. + +For this reason, NGINX's [`ngx_stream_proxy_module`](https://nginx.org/en/docs/stream/ngx_stream_proxy_module.html) +does not work when the `proxy_ssl` configuration option is enabled: + +```mermaid +sequenceDiagram + autonumber + participant Client as Client + participant NGINX as NGINX Stream Proxy + participant Praefect as Praefect + + Client->>NGINX: Establish TLS Session (w/ ALPN Extension) + NGINX->>Praefect: Establish New TLS Session + Praefect->>NGINX: Connection failed: missing selected ALPN property +``` + +On step 2, ALPN is not used because [NGINX does not support this](https://mailman.nginx.org/pipermail/nginx-devel/2017-July/010307.html). +For more information, [follow NGINX issue 406](https://github.com/nginx/nginx/issues/406) for more details. + +#### ALPN enforcement + +ALPN enforcement was enabled in some versions of GitLab. However, ALPN enforcement broke deployments and so is disabled +[to provide a path to migrate](https://github.com/grpc/grpc-go/issues/7922). The following versions of GitLab have ALPN enforcement enabled: + +- GitLab 17.7.0 +- GitLab 17.6.0 - 17.6.2 +- GitLab 17.5.0 - 17.5.4 +- GitLab 17.4.x + +With [GitLab 17.5.5, 17.6.3, and 17.7.1](https://about.gitlab.com/releases/2025/01/08/patch-release-gitlab-17-7-1-released/), +ALPN enforcement is disabled again. GitLab 17.4 and earlier never had ALPN enforcement enabled. + +### GitLab + +To complete this section you need: + +- [Configured Praefect node](#praefect) +- [Configured Gitaly nodes](#gitaly) + +The Praefect cluster needs to be exposed as a storage location to the GitLab +application, which is done by updating `gitlab_rails['repositories_storages']`. + +Particular attention should be shown to: + +- the storage name added to `gitlab_rails['repositories_storages']` in this section must match the + storage name under `praefect['configuration'][:virtual_storage]` on the Praefect nodes. This + was set in the [Praefect](#praefect) section of this guide. This document uses + `default` as the Praefect storage name. + +1. SSH into the GitLab node and login as root: + + ```shell + sudo -i + ``` + +1. Configure the `external_url` so that files could be served by GitLab + by proper endpoint access by editing `/etc/gitlab/gitlab.rb`: + + You need to replace `GITLAB_SERVER_URL` with the real external facing + URL on which current GitLab instance is serving: + + ```ruby + external_url 'GITLAB_SERVER_URL' + ``` + +1. Disable the default Gitaly service running on the GitLab host. It isn't needed + because GitLab connects to the configured cluster. + + {{< alert type="warning" >}} + + If you have existing data stored on the default Gitaly storage, + you should [migrate the data to your Gitaly Cluster storage](_index.md#migrate-to-gitaly-cluster) + first. + + {{< /alert >}} + + ```ruby + gitaly['enable'] = false + ``` + +1. Add the Praefect cluster as a storage location by editing + `/etc/gitlab/gitlab.rb`. + + You need to replace: + + - `PRAEFECT_LOADBALANCER_HOST` with the IP address or hostname of the load + balancer. + - `PRAEFECT_EXTERNAL_TOKEN` with the real secret + + If you are using TLS: + + - The `gitaly_address` should begin with `tls://` instead. + - The port should be changed to `3305`. + + ```ruby + gitlab_rails['repositories_storages'] = { + "default" => { + "gitaly_address" => "tcp://PRAEFECT_LOADBALANCER_HOST:2305", + "gitaly_token" => 'PRAEFECT_EXTERNAL_TOKEN' + } + } + ``` + +1. Configure the GitLab Shell secret token so that callbacks from Gitaly nodes during a `git push` + are properly authenticated. Either: + + - Method 1: + + 1. Copy `/etc/gitlab/gitlab-secrets.json` from the Gitaly client to same path on the Gitaly + servers and any other Gitaly clients. + 1. [Reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation) on Gitaly servers. + + - Method 2: + + 1. Edit `/etc/gitlab/gitlab.rb`. + 1. Replace `GITLAB_SHELL_SECRET_TOKEN` with the real secret. + + ```ruby + gitlab_shell['secret_token'] = 'GITLAB_SHELL_SECRET_TOKEN' + ``` + +1. Add Prometheus monitoring settings by editing `/etc/gitlab/gitlab.rb`. If Prometheus + is enabled on a different node, make edits on that node instead. + + You need to replace: + + - `PRAEFECT_HOST` with the IP address or hostname of the Praefect node + - `GITALY_HOST_*` with the IP address or hostname of each Gitaly node + + ```ruby + prometheus['scrape_configs'] = [ + { + 'job_name' => 'praefect', + 'static_configs' => [ + 'targets' => [ + 'PRAEFECT_HOST:9652', # praefect-1 + 'PRAEFECT_HOST:9652', # praefect-2 + 'PRAEFECT_HOST:9652', # praefect-3 + ] + ] + }, + { + 'job_name' => 'praefect-gitaly', + 'static_configs' => [ + 'targets' => [ + 'GITALY_HOST_1:9236', # gitaly-1 + 'GITALY_HOST_2:9236', # gitaly-2 + 'GITALY_HOST_3:9236', # gitaly-3 + ] + ] + } + ] + ``` + +1. Save the changes to `/etc/gitlab/gitlab.rb` and [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl reconfigure + ``` + +1. Verify on each Gitaly node the Git Hooks can reach GitLab. On each Gitaly node run: + - For GitLab 15.3 and later, run `sudo -u git -- /opt/gitlab/embedded/bin/gitaly check /var/opt/gitlab/gitaly/config.toml`. + - For GitLab 15.2 and earlier, run `sudo -u git -- /opt/gitlab/embedded/bin/gitaly-hooks check /var/opt/gitlab/gitaly/config.toml`. + +1. Verify that GitLab can reach Praefect: + + ```shell + gitlab-rake gitlab:gitaly:check + ``` + +1. Check that the Praefect storage is configured to store new repositories: + + 1. On the left sidebar, at the bottom, select **Admin**. + 1. On the left sidebar, select **Settings > Repository**. + 1. Expand the **Repository storage** section. + + Following this guide, the `default` storage should have weight 100 to store all new repositories. + +1. Verify everything is working by creating a new project. Check the + "Initialize repository with a README" box so that there is content in the + repository that viewed. If the project is created, and you can see the + README file, it works! + +#### Use TCP for existing GitLab instances + +When adding Gitaly Cluster to an existing Gitaly instance, the existing Gitaly storage +must be listening on TCP/TLS. If `gitaly_address` is not specified, then a Unix socket is used, +which prevents the communication with the cluster. + +For example: + +```ruby +gitlab_rails['repositories_storages'] = { + 'default' => { 'gitaly_address' => 'tcp://old-gitaly.internal:8075' }, + 'cluster' => { + 'gitaly_address' => 'tls://:3305', + 'gitaly_token' => '' + } +} +``` + +See [Mixed Configuration](../configure_gitaly.md#mixed-configuration) for further information on +running multiple Gitaly storages. + +### Grafana + +Grafana is included with GitLab, and can be used to monitor your Praefect +cluster. See [Grafana Dashboard Service](../../monitoring/performance/grafana_configuration.md) +for detailed documentation. + +To get started quickly: + +1. SSH into the GitLab node (or whichever node has Grafana enabled) and login as root: + + ```shell + sudo -i + ``` + +1. Enable the Grafana login form by editing `/etc/gitlab/gitlab.rb`. + + ```ruby + grafana['disable_login_form'] = false + ``` + +1. Save the changes to `/etc/gitlab/gitlab.rb` and + [reconfigure GitLab](../../restart_gitlab.md#reconfigure-a-linux-package-installation): + + ```shell + gitlab-ctl reconfigure + ``` + +1. Set the Grafana administrator password. This command prompts you to enter a new + password: + + ```shell + gitlab-ctl set-grafana-password + ``` + +1. In your web browser, open `/-/grafana` (such as + `https://gitlab.example.com/-/grafana`) on your GitLab server. + + Login using the password you set, and the username `admin`. + +1. Go to **Explore** and query `gitlab_build_info` to verify that you are + getting metrics from all your machines. + +Congratulations! You've configured an observable fault-tolerant Praefect +cluster. + +## Configure replication factor + +Praefect supports configuring a replication factor on a per-repository basis, by assigning +specific storage nodes to host a repository. + +{{< alert type="warning" >}} + +Configurable replication factors requires [repository-specific primary nodes](#repository-specific-primary-nodes). + +{{< /alert >}} + +Praefect does not store the actual replication factor, but assigns enough storages to host the repository +so the desired replication factor is met. If a storage node is later removed from the virtual storage, +the replication factor of repositories assigned to the storage is decreased accordingly. + +You can configure either: + +- A default replication factor for each virtual storage that is applied to newly created repositories. +- A replication factor for an existing repository with the `set-replication-factor` subcommand. + +### Configure default replication factor + +If `default_replication_factor` is unset, the repositories are always replicated on every storage node defined in +`virtual_storages`. If a new storage node is introduced to the virtual storage, both new and existing repositories are +replicated to the node automatically. + +For large Gitaly Cluster deployments with many storage nodes, replicating a repository to every storage node is often not +sensible and can cause problems. A replication factor of 3 is usually sufficient, which means replicate repositories to +three storages even if more are available. Higher replication factors increase the pressure on the primary storage. + +To configure a default replication factor, add configuration to the `/etc/gitlab/gitlab.rb` file: + +```ruby +praefect['configuration'] = { + # ... + virtual_storage: [ + { + # ... + name: 'default', + default_replication_factor: 3, + }, + ], +} +``` + +### Configure replication factor for existing repositories + +The `set-replication-factor` subcommand automatically assigns or unassigns random storage nodes as +necessary to reach the desired replication factor. The repository's primary node is +always assigned first and is never unassigned. + +```shell +sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml set-replication-factor -virtual-storage -repository -replication-factor +``` + +- `-virtual-storage` is the virtual storage the repository is located in. +- `-repository` is the repository's relative path in the storage. +- `-replication-factor` is the desired replication factor of the repository. The minimum value is + `1` because the primary needs a copy of the repository. The maximum replication factor is the number of + storages in the virtual storage. + +On success, the assigned host storages are printed. For example: + +```shell +$ sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml set-replication-factor -virtual-storage default -repository @hashed/3f/db/3fdba35f04dc8c462986c992bcf875546257113072a909c162f7e470e581e278.git -replication-factor 2 + +current assignments: gitaly-1, gitaly-2 +``` + +### Repository storage recommendations + +The size of the required storage can vary between instances and depends on the set +[replication factor](_index.md#replication-factor). You might want to include implementing +repository storage redundancy. + +For a replication factor: + +- Of `1`: Gitaly and Gitaly Cluster have roughly the same storage requirements. +- More than `1`: The amount of required storage is `used space * replication factor`. `used space` + should include any planned future growth. + +## Repository verification + +{{< history >}} + +- [Introduced](https://gitlab.com/gitlab-org/gitaly/-/issues/4080) in GitLab 15.0. + +{{< /history >}} + +Praefect stores metadata about the repositories in a database. If the repositories are modified on disk +without going through Praefect, the metadata can become inaccurate. For example if a Gitaly node is +rebuilt, rather than being replaced with a new node, repository verification ensures this is detected. + +The metadata is used for replication and routing decisions, so any inaccuracies may cause problems. +Praefect contains a background worker that periodically verifies the metadata against the actual state on the disks. +The worker: + +1. Picks up a batch of replicas to verify on healthy storages. The replicas are either unverified or have exceeded + the configured verification interval. Replicas that have never been verified are prioritized, followed by + the other replicas ordered by longest time since the last successful verification. +1. Checks whether the replicas exist on their respective storages. If the: + - Replica exists, update its last successful verification time. + - Replica doesn't exist, remove its metadata record. + - Check failed, the replica is picked up for verification again when the next worker dequeues more work. + +The worker acquires an exclusive verification lease on each of the replicas it is about to verify. This avoids multiple +workers from verifying the same replica concurrently. The worker releases the leases when it has completed its check. +If workers are terminated for some reason without releasing the lease, Praefect contains a background goroutine +that releases stale leases every 10 seconds. + +The worker logs each of the metadata removals prior to executing them. The `perform_deletions` key +indicates whether the invalid metadata records are actually deleted or not. For example: + +```json +{ + "level": "info", + "msg": "removing metadata records of non-existent replicas", + "perform_deletions": false, + "replicas": { + "default": { + "@hashed/6b/86/6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b.git": [ + "praefect-internal-0" + ] + } + } +} +``` + +### Configure the verification worker + +The worker is enabled by default and verifies the metadata records every seven days. The verification +interval is configurable with any valid [Go duration string](https://pkg.go.dev/time#ParseDuration). + +To verify the metadata every three days: + +```ruby +praefect['configuration'] = { + # ... + background_verification: { + # ... + verification_interval: '72h', + }, +} +``` + +Values of 0 and below disable the background verifier. + +```ruby +praefect['configuration'] = { + # ... + background_verification: { + # ... + verification_interval: '0', + }, +} +``` + +#### Enable deletions + +{{< history >}} + +- [Introduced](https://gitlab.com/gitlab-org/gitaly/-/issues/4080) and disabled by default in GitLab 15.0 +- [Default enabled](https://gitlab.com/gitlab-org/gitaly/-/merge_requests/5321) in GitLab 15.9. + +{{< /history >}} + +{{< alert type="warning" >}} + +Deletions were disabled by default prior to GitLab 15.9 due to a race condition with repository renames +that can cause incorrect deletions, which is especially prominent in Geo instances as Geo performs more renames +than instances without Geo. In GitLab 15.0 to 15.5, you should enable deletions only if the +[`gitaly_praefect_generated_replica_paths` feature flag](_index.md#praefect-generated-replica-paths) is enabled. The feature flag was removed in GitLab 15.6 making deletions always safe to enable. + +{{< /alert >}} + +By default, the worker deletes invalid metadata records. It also logs the deleted records and outputs Prometheus +metrics. + +You can disable deleting invalid metadata records with: + +```ruby +praefect['configuration'] = { + # ... + background_verification: { + # ... + delete_invalid_records: false, + }, +} +``` + +### Prioritize verification manually + +You can prioritize verification of some replicas ahead of their next scheduled verification time. +This might be needed after a disk failure, for example, when the administrator knows that the disk contents may have +changed. Praefect would eventually verify the replicas again, but users may encounter errors in the meantime. + +To manually prioritize reverification of some replicas, use the `praefect verify` subcommand. The subcommand marks +replicas as unverified. Unverified replicas are prioritized by the background verification worker. The verification +worker must be enabled for the replicas to be verified. + +Prioritize verifying the replicas of a specific repository: + +```shell +sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml verify -repository-id= +``` + +Prioritize verifying all replicas stored on a virtual storage: + +```shell +sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml verify -virtual-storage= +``` + +Prioritize verifying all replicas stored on a storage: + +```shell +sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefect/config.toml verify -virtual-storage= -storage= +``` + +The output includes the number of replicas that were marked unverified. + +## Automatic failover and primary election strategies + +Praefect regularly checks the health of each Gitaly node, which is used to automatically fail over +to a newly-elected primary Gitaly node if the current primary node is found to be unhealthy. + +[Repository-specific primary nodes](#repository-specific-primary-nodes) is the only available election strategy. + +### Repository-specific primary nodes + +Gitaly Cluster elects a primary Gitaly node separately for each repository. Combined with +[configurable replication factors](#configure-replication-factor), you can horizontally scale storage capacity and distribute write load across Gitaly nodes. + +Primary elections are run lazily. Praefect doesn't immediately elect a new primary node if the current +one is unhealthy. A new primary is elected if a request must be served while the current primary is unavailable. + +A valid primary node candidate is a Gitaly node that: + +- Is healthy. A Gitaly node is considered healthy if `>=50%` Praefect nodes have + successfully health checked the Gitaly node in the previous ten seconds. +- Has a fully up to date copy of the repository. + +If there are multiple primary node candidates, Praefect: + +- Picks one of them randomly. +- Prioritizes promoting a Gitaly node that is assigned to host the repository. If + there are no assigned Gitaly nodes to elect as the primary, Praefect may temporarily + elect an unassigned one. The unassigned primary is demoted in favor of an assigned + one when one becomes available. + +If there are no valid primary candidates for a repository: + +- The unhealthy primary node is demoted and the repository is left without a primary node. +- Operations that require a primary node fail until a primary is successfully elected. diff --git a/doc/administration/gitaly/praefect/monitoring.md b/doc/administration/gitaly/praefect/monitoring.md index 655b0c92d8a..a0a4261932d 100644 --- a/doc/administration/gitaly/praefect/monitoring.md +++ b/doc/administration/gitaly/praefect/monitoring.md @@ -43,7 +43,7 @@ To monitor [strong consistency](_index.md#strong-consistency), you can use the f - `gitaly_hook_transaction_voting_delay_seconds`, the client-side delay introduced by waiting for the transaction to be committed. -To monitor [repository verification](_index.md#repository-verification), use the following Prometheus metrics: +To monitor [repository verification](configure.md#repository-verification), use the following Prometheus metrics: - `gitaly_praefect_verification_jobs_dequeued_total`, the number of verification jobs picked up by the worker. diff --git a/doc/administration/gitaly/praefect/recovery.md b/doc/administration/gitaly/praefect/recovery.md index cb8e5da010a..d6cef124069 100644 --- a/doc/administration/gitaly/praefect/recovery.md +++ b/doc/administration/gitaly/praefect/recovery.md @@ -16,8 +16,8 @@ You can add and replace Gitaly nodes on a Gitaly Cluster. To add a new Gitaly node: -1. Install the new Gitaly node by following the [documentation](_index.md#gitaly). -1. Add the new node to your [Praefect configuration](_index.md#praefect) under `praefect['virtual_storages']`. +1. Install the new Gitaly node by following the [documentation](configure.md#gitaly). +1. Add the new node to your [Praefect configuration](configure.md#praefect) under `praefect['virtual_storages']`. 1. Reconfigure and restart Praefect by running following commands: ```shell @@ -30,8 +30,8 @@ The replication behavior depends on your replication factor setting. #### Custom replication factor If a custom replication factor is set, Praefect doesn't automatically replicate existing repositories to the new Gitaly node. You must set the -[replication factor](_index.md#configure-replication-factor) for each repository using the `set-replication-factor` Praefect command. New repositories are replicated based on -the [replication factor](_index.md#configure-replication-factor). +[replication factor](configure.md#configure-replication-factor) for each repository using the `set-replication-factor` Praefect command. New repositories are replicated based on +the [replication factor](configure.md#configure-replication-factor). #### Default replication factor @@ -46,17 +46,17 @@ You can replace an existing Gitaly node with a new node with either the same nam #### With a node with the same name -To use the same name for the replacement node, use [repository verifier](_index.md#enable-deletions) to scan the storage and remove dangling metadata records. -[Manually prioritize verification](_index.md#prioritize-verification-manually) of the replaced storage to speed up the process. +To use the same name for the replacement node, use [repository verifier](configure.md#enable-deletions) to scan the storage and remove dangling metadata records. +[Manually prioritize verification](configure.md#prioritize-verification-manually) of the replaced storage to speed up the process. #### With a node with a different name -The steps use a different name for the replacement node for a Gitaly Cluster depend on if a [replication factor](_index.md#configure-replication-factor) +The steps use a different name for the replacement node for a Gitaly Cluster depend on if a [replication factor](configure.md#configure-replication-factor) is set. ##### Replication factor set -If a custom replication factor is set, use [`praefect set-replication-factor`](_index.md#configure-replication-factor) to set the replication factor per repository again to get +If a custom replication factor is set, use [`praefect set-replication-factor`](configure.md#configure-replication-factor) to set the replication factor per repository again to get new storage assigned. For example, if two nodes in the virtual storage have a replication factor of 2 and a new node (`gitaly-3`) is added, you should increase the replication @@ -70,12 +70,12 @@ current assignments: gitaly-1, gitaly-2, gitaly-3 This ensures that the repository is replicated to the new node and the `repository_assignments` table gets updated with the name of new Gitaly node. -If the [default replication factor](_index.md#configure-replication-factor) is set, new nodes are not automatically included in replication. +If the [default replication factor](configure.md#configure-replication-factor) is set, new nodes are not automatically included in replication. You must follow the steps described previously. After you [verify](#check-for-data-loss) that repository is successfully replicated to the new node: -1. Remove the `gitaly-1` node from the [Praefect configuration](_index.md#praefect) under `praefect['virtual_storages']`. +1. Remove the `gitaly-1` node from the [Praefect configuration](configure.md#praefect) under `praefect['virtual_storages']`. 1. Reconfigure and restart Praefect: ```shell @@ -475,7 +475,7 @@ sudo -u git -- /opt/gitlab/embedded/bin/praefect -config /var/opt/gitlab/praefec - `-replica-path` is the relative path on physical storage. Can start with [`@cluster` or match `relative_path`](../../repository_storage_paths.md#gitaly-cluster-storage). - `-authoritative-storage` is the storage we want Praefect to treat as the primary. Required if - [per-repository replication](_index.md#configure-replication-factor) is set as the replication strategy. + [per-repository replication](configure.md#configure-replication-factor) is set as the replication strategy. - `-replicate-immediately` causes the command to replicate the repository to its secondaries immediately. Otherwise, replication jobs are scheduled for execution in the database and are picked up by a Praefect background process. diff --git a/doc/administration/gitaly/praefect/troubleshooting.md b/doc/administration/gitaly/praefect/troubleshooting.md index 62fde962537..0cd4f8ba5cb 100644 --- a/doc/administration/gitaly/praefect/troubleshooting.md +++ b/doc/administration/gitaly/praefect/troubleshooting.md @@ -108,7 +108,7 @@ Some common reasons for the Praefect database to experience elevated CPU usage i - Prometheus metrics scrapes [running an expensive query](https://gitlab.com/gitlab-org/gitaly/-/issues/3796). Set `praefect['configuration'][:prometheus_exclude_database_from_default_metrics] = true` in `gitlab.rb`. -- [Read distribution caching](_index.md#reads-distribution-caching) is disabled, increasing the number of queries made to the +- [Read distribution caching](configure.md#reads-distribution-caching) is disabled, increasing the number of queries made to the database when user traffic is high. Ensure read distribution caching is enabled. ## Determine primary Gitaly node @@ -202,7 +202,7 @@ For each replica, the following metadata is available: | `Generation` | Latest confirmed generation of the replica. It indicates:

- The replica is fully up to date if the generation matches the repository's generation.
- The replica is outdated if the replica's generation is less than the repository's generation.
- `replica not yet created` if the replica does not yet exist at all on the storage. | | `Healthy` | Indicates whether the Gitaly node that is hosting this replica is considered healthy by the consensus of Praefect nodes. | | `Valid Primary` | Indicates whether the replica is fit to serve as the primary node. If the repository's primary is not a valid primary, a failover occurs on the next write to the repository if there is another replica that is a valid primary. A replica is a valid primary if:

- It is stored on a healthy Gitaly node.
- It is fully up to date.
- It is not targeted by a pending deletion job from decreasing replication factor.
- It is assigned. | -| `Verified At` | Indicates last successful verification of the replica by the [verification worker](_index.md#repository-verification). If the replica has not yet been verified, `unverified` is displayed in place of the last successful verification time. Introduced in GitLab 15.0. | +| `Verified At` | Indicates last successful verification of the replica by the [verification worker](configure.md#repository-verification). If the replica has not yet been verified, `unverified` is displayed in place of the last successful verification time. Introduced in GitLab 15.0. | ### Command fails with 'repository not found' @@ -261,8 +261,8 @@ praefect sql-migrate: OK (applied 21 migrations) ## Requests fail with 'repository scoped: invalid Repository' errors This indicates that the virtual storage name used in the -[Praefect configuration](_index.md#praefect) does not match the storage name used in -[`gitaly['configuration'][:storage][][:name]` setting](_index.md#gitaly) for GitLab. +[Praefect configuration](configure.md#praefect) does not match the storage name used in +[`gitaly['configuration'][:storage][][:name]` setting](configure.md#gitaly) for GitLab. Resolve this by matching the virtual storage names used in Praefect and GitLab configuration. diff --git a/doc/administration/gitlab_duo_self_hosted/_index.md b/doc/administration/gitlab_duo_self_hosted/_index.md index 1ad7a188566..0ca91ceace0 100644 --- a/doc/administration/gitlab_duo_self_hosted/_index.md +++ b/doc/administration/gitlab_duo_self_hosted/_index.md @@ -107,7 +107,7 @@ For more examples of a question you can ask, see | Feature | Available on GitLab Duo Self-Hosted | GitLab version | Status | | -------------------------------------------------------------------------------------------------------------- | ------------------------------------------- | ---------------------- | --- | | [GitLab Duo for the CLI](../../editor_extensions/gitlab_cli/_index.md#gitlab-duo-for-the-cli) | {{< icon name="check-circle-filled" >}} Yes | GitLab 18.1.2 and later | Beta | -| [GitLab Duo Agent Platform](../../user/duo_agent_platform/_index.md) | {{< icon name="dash-circle" >}} No | GitLab 17.4 and later | Not applicable | +| [GitLab Duo Agent Platform](../../user/duo_agent_platform/_index.md) | {{< icon name="dash-circle" >}} No | Not applicable | Not applicable | | [Vulnerability Resolution](../../user/application_security/vulnerabilities/_index.md#vulnerability-resolution) | {{< icon name="check-circle-filled" >}} Yes | GitLab 18.1.2 and later | Beta | | [AI Impact Dashboard](../../user/analytics/ai_impact_analytics.md) | {{< icon name="check-circle-filled" >}} Yes | GitLab 17.9 and later | Beta | diff --git a/doc/administration/monitoring/prometheus/gitlab_metrics.md b/doc/administration/monitoring/prometheus/gitlab_metrics.md index add0b1b9a0b..896a9b226a7 100644 --- a/doc/administration/monitoring/prometheus/gitlab_metrics.md +++ b/doc/administration/monitoring/prometheus/gitlab_metrics.md @@ -225,7 +225,7 @@ The following metrics can be controlled by feature flags: ## Praefect metrics -You can [configure Praefect](../../gitaly/praefect/_index.md#praefect) to report metrics. For information +You can [configure Praefect](../../gitaly/praefect/configure.md#praefect) to report metrics. For information on available metrics, see [Monitoring Gitaly Cluster (Praefect)](../../gitaly/praefect/monitoring.md). ## Sidekiq metrics diff --git a/doc/administration/postgresql/moving.md b/doc/administration/postgresql/moving.md index d9238b2f498..5bc78764e7c 100644 --- a/doc/administration/postgresql/moving.md +++ b/doc/administration/postgresql/moving.md @@ -59,7 +59,7 @@ To move databases from one instance to another: /opt/gitlab/embedded/bin/psql -h $DST_PGHOST -U $DST_PGUSER -f gitlabhq_production.sql postgres ``` -1. Optional. If you migrate from a database that doesn't use PgBouncer to a database that does, you must manually add a [`pg_shadow_lookup` function](../gitaly/praefect/_index.md#manual-database-setup) to the application database (usually `gitlabhq_production`). +1. Optional. If you migrate from a database that doesn't use PgBouncer to a database that does, you must manually add a [`pg_shadow_lookup` function](../gitaly/praefect/configure.md#manual-database-setup) to the application database (usually `gitlabhq_production`). 1. Configure the GitLab application servers with the appropriate connection details for your destination PostgreSQL instance in your `/etc/gitlab/gitlab.rb` file: diff --git a/doc/administration/reference_architectures/10k_users.md b/doc/administration/reference_architectures/10k_users.md index ae91f2751f4..c9643b02f0a 100644 --- a/doc/administration/reference_architectures/10k_users.md +++ b/doc/administration/reference_architectures/10k_users.md @@ -1406,7 +1406,7 @@ the details of each Gitaly node that makes up the cluster. Each storage is also and this name is used in several areas of the configuration. In this guide, the name of the storage will be `default`. Also, this guide is geared towards new installs, if upgrading an existing environment to use Gitaly Cluster, you might have to use a different name. -Refer to the [Praefect documentation](../gitaly/praefect/_index.md#praefect) for more information. +Refer to the [Praefect documentation](../gitaly/praefect/configure.md#praefect) for more information. The following IPs will be used as an example: diff --git a/doc/administration/reference_architectures/25k_users.md b/doc/administration/reference_architectures/25k_users.md index 6e0fc723ea1..d7920f70cc0 100644 --- a/doc/administration/reference_architectures/25k_users.md +++ b/doc/administration/reference_architectures/25k_users.md @@ -1414,7 +1414,7 @@ the details of each Gitaly node that makes up the cluster. Each storage is also and this name is used in several areas of the configuration. In this guide, the name of the storage will be `default`. Also, this guide is geared towards new installs, if upgrading an existing environment to use Gitaly Cluster, you might have to use a different name. -Refer to the [Praefect documentation](../gitaly/praefect/_index.md#praefect) for more information. +Refer to the [Praefect documentation](../gitaly/praefect/configure.md#praefect) for more information. The following IPs will be used as an example: diff --git a/doc/administration/reference_architectures/3k_users.md b/doc/administration/reference_architectures/3k_users.md index d6cbb3307ab..6fa4b3009b5 100644 --- a/doc/administration/reference_architectures/3k_users.md +++ b/doc/administration/reference_architectures/3k_users.md @@ -1240,7 +1240,7 @@ the details of each Gitaly node that makes up the cluster. Each storage is also and this name is used in several areas of the configuration. In this guide, the name of the storage will be `default`. Also, this guide is geared towards new installs, if upgrading an existing environment to use Gitaly Cluster, you might have to use a different name. -Refer to the [Praefect documentation](../gitaly/praefect/_index.md#praefect) for more information. +Refer to the [Praefect documentation](../gitaly/praefect/configure.md#praefect) for more information. The following IPs will be used as an example: diff --git a/doc/administration/reference_architectures/50k_users.md b/doc/administration/reference_architectures/50k_users.md index 25992bb87c2..2488d9fdb94 100644 --- a/doc/administration/reference_architectures/50k_users.md +++ b/doc/administration/reference_architectures/50k_users.md @@ -1419,7 +1419,7 @@ the details of each Gitaly node that makes up the cluster. Each storage is also and this name is used in several areas of the configuration. In this guide, the name of the storage will be `default`. Also, this guide is geared towards new installs, if upgrading an existing environment to use Gitaly Cluster, you might have to use a different name. -Refer to the [Praefect documentation](../gitaly/praefect/_index.md#praefect) for more information. +Refer to the [Praefect documentation](../gitaly/praefect/configure.md#praefect) for more information. The following IPs will be used as an example: diff --git a/doc/administration/reference_architectures/5k_users.md b/doc/administration/reference_architectures/5k_users.md index e77206b55e2..08e1b29c924 100644 --- a/doc/administration/reference_architectures/5k_users.md +++ b/doc/administration/reference_architectures/5k_users.md @@ -1245,7 +1245,7 @@ the details of each Gitaly node that makes up the cluster. Each storage is also and this name is used in several areas of the configuration. In this guide, the name of the storage is `default`. Also, this guide is geared towards new installs, if upgrading an existing environment to use Gitaly Cluster, you might have to use a different name. -Refer to the [Praefect documentation](../gitaly/praefect/_index.md#praefect) for more information. +Refer to the [Praefect documentation](../gitaly/praefect/configure.md#praefect) for more information. The following IPs are used as an example: diff --git a/doc/administration/reference_architectures/_index.md b/doc/administration/reference_architectures/_index.md index 9c90da71c85..37437135f1e 100644 --- a/doc/administration/reference_architectures/_index.md +++ b/doc/administration/reference_architectures/_index.md @@ -434,7 +434,7 @@ architectures are designed to have enough memory in most cases to avoid the need ### Praefect PostgreSQL -[Praefect requires its own database server](../gitaly/praefect/_index.md#postgresql). To achieve full HA, a third-party PostgreSQL database solution is required. +[Praefect requires its own database server](../gitaly/praefect/configure.md#postgresql). To achieve full HA, a third-party PostgreSQL database solution is required. We hope to offer a built-in solution for these restrictions in the future. In the meantime, a non-HA PostgreSQL server can be set up using the Linux package as the specifications reflect. For more details, see the following issues: diff --git a/doc/administration/settings/scim_setup.md b/doc/administration/settings/scim_setup.md index fe9488ada65..6a35123dec4 100644 --- a/doc/administration/settings/scim_setup.md +++ b/doc/administration/settings/scim_setup.md @@ -272,6 +272,7 @@ the user's SCIM identity is reactivated and their GitLab instance access is rest {{< history >}} - [Introduced](https://gitlab.com/groups/gitlab-org/-/epics/15990) in GitLab 18.0 [with a flag](../../administration/feature_flags/_index.md) named `self_managed_scim_group_sync`. Disabled by default. +- [Enabled on GitLab Self-Managed](https://gitlab.com/gitlab-org/gitlab/-/issues/553662) by default in GitLab 18.2. {{< /history >}} diff --git a/doc/api/graphql/reference/_index.md b/doc/api/graphql/reference/_index.md index 462f91fff6e..5573a7506de 100644 --- a/doc/api/graphql/reference/_index.md +++ b/doc/api/graphql/reference/_index.md @@ -183,6 +183,12 @@ This field returns a [connection](#connections). It accepts the four standard [pagination arguments](#pagination-arguments): `before: String`, `after: String`, `first: Int`, and `last: Int`. +#### Arguments + +| Name | Type | Description | +| ---- | ---- | ----------- | +| `itemType` | [`AiCatalogItemType`](#aicatalogitemtype) | Type of items to retrieve. | + ### `Query.aiChatContextPresets` Get available GitLab Duo Chat context presets for the current user for a specific URL. @@ -27678,6 +27684,7 @@ GitLab Duo Agent Platform session. | `goal` | [`String`](#string) | Goal of the session. | | `humanStatus` | [`String!`](#string) | Human-readable status of the session. | | `id` | [`ID!`](#id) | ID of the session. | +| `lastExecutorLogsUrl` | [`String`](#string) | URL to the latest executor logs of the workflow. | | `mcpEnabled` | [`Boolean`](#boolean) | Has MCP been enabled for the namespace. | | `preApprovedAgentPrivilegesNames` | [`[String!]`](#string) | Privileges pre-approved for the agent during execution. | | `project` | [`Project!`](#project) | Project that the session is in. | diff --git a/doc/ci/runners/runners_scope.md b/doc/ci/runners/runners_scope.md index 371a80dd437..1672b440cba 100644 --- a/doc/ci/runners/runners_scope.md +++ b/doc/ci/runners/runners_scope.md @@ -184,7 +184,7 @@ To disable instance runners for a project: 1. On the left sidebar, select **Search or go to** and find your project. 1. Select **Settings > CI/CD**. 1. Expand **Runners**. -1. In the **Instance runners** area, enable the **Turn runners for this project** toggle. +1. In the **Instance runners** area, turn off the **Turn on runners for this project** toggle. Instance runners are automatically disabled for a project: diff --git a/doc/development/documentation/workflow.md b/doc/development/documentation/workflow.md index 0733bcefcb2..ac3d423f2ac 100644 --- a/doc/development/documentation/workflow.md +++ b/doc/development/documentation/workflow.md @@ -95,8 +95,8 @@ enhancement. They are responsible for: [assigned to the DevOps stage group](https://handbook.gitlab.com/handbook/product/ux/technical-writing/#assignments) that is delivering the new feature or feature enhancements. -The first merge request where a feature can be tested should include the -documentation, even if the feature is behind a feature flag. +When possible, the merge request with the code should include the +documentation. For more information, see the [guidelines](feature_flags.md). The author of this MR, either a frontend or backend developer, should write the documentation. diff --git a/doc/development/testing_guide/end_to_end/best_practices/rspec_metadata_tests.md b/doc/development/testing_guide/end_to_end/best_practices/rspec_metadata_tests.md index 7a5d7c63a80..425a501ee9a 100644 --- a/doc/development/testing_guide/end_to_end/best_practices/rspec_metadata_tests.md +++ b/doc/development/testing_guide/end_to_end/best_practices/rspec_metadata_tests.md @@ -18,7 +18,7 @@ This is a partial list of the [RSpec metadata](https://rspec.info/features/3-12/ | `:external_ai_provider` | The test requires an environment that is integrated with a real external AI provider. | | `:feature_flag` | The test uses a feature flag and therefore requires an administrator account to run. When `scope` is set to `:global`, the test will be skipped on all live .com environments. Otherwise, it will be skipped only on Canary, Production, and Pre-production. See [testing with feature flags](feature_flags.md) for more details. | | `:geo` | The test requires two GitLab Geo instances - a primary and a secondary - to be spun up. | -| `:gitaly_cluster` | The test runs against a GitLab instance where repositories are stored on redundant Gitaly nodes behind a Praefect node. All nodes are [separate containers](../../../../administration/gitaly/praefect/_index.md#requirements). Tests that use this tag have a longer setup time since there are three additional containers that need to be started. | +| `:gitaly_cluster` | The test runs against a GitLab instance where repositories are stored on redundant Gitaly nodes behind a Praefect node. All nodes are [separate containers](../../../../administration/gitaly/praefect/configure.md#requirements). Tests that use this tag have a longer setup time since there are three additional containers that need to be started. | | `:github` | The test requires a GitHub personal access token. | | `:group_saml` | The test requires a GitLab instance that has SAML SSO enabled for the group. Interacts with an external SAML identity provider. Paired with the `:orchestrated` tag. | | `:instance_saml` | The test requires a GitLab instance that has SAML SSO enabled for the instance. Interacts with an external SAML identity provider. Paired with the `:orchestrated` tag. | diff --git a/doc/topics/offline/quick_start_guide.md b/doc/topics/offline/quick_start_guide.md index 751db649641..55889338ef4 100644 --- a/doc/topics/offline/quick_start_guide.md +++ b/doc/topics/offline/quick_start_guide.md @@ -225,7 +225,7 @@ for offline environments. ### Configure NTP -Gitaly Cluster assumes `pool.ntp.org` is accessible. If `pool.ntp.org` is not accessible, [customize the time server setting](../../administration/gitaly/praefect/_index.md#customize-time-server-setting) on the Gitaly +Gitaly Cluster assumes `pool.ntp.org` is accessible. If `pool.ntp.org` is not accessible, [customize the time server setting](../../administration/gitaly/praefect/configure.md#customize-time-server-setting) on the Gitaly and Praefect servers so they can use an accessible NTP server. On offline instances, the [GitLab Geo check Rake task](../../administration/geo/replication/troubleshooting/common.md#can-geo-detect-the-current-site-correctly) diff --git a/doc/update/versions/gitlab_15_changes.md b/doc/update/versions/gitlab_15_changes.md index 2aed334e771..68ea11ee0ed 100644 --- a/doc/update/versions/gitlab_15_changes.md +++ b/doc/update/versions/gitlab_15_changes.md @@ -205,7 +205,7 @@ if you can't upgrade to 15.11.12 and later. [issue 393216](https://gitlab.com/gitlab-org/gitlab/-/issues/393216). - The second [bug fix](https://gitlab.com/gitlab-org/gitlab/-/issues/394760) ensures it is possible to upgrade directly from 15.4.x. - As part of the [CI Partitioning effort](../../architecture/blueprints/ci_data_decay/pipeline_partitioning.md), a [new Foreign Key](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/107547) was added to `ci_builds_needs`. On GitLab instances with large CI tables, adding this constraint can take longer than usual. -- Praefect's metadata verifier [invalid metadata deletion behavior](../../administration/gitaly/praefect/_index.md#enable-deletions) is now enabled by default. +- Praefect's metadata verifier [invalid metadata deletion behavior](../../administration/gitaly/praefect/configure.md#enable-deletions) is now enabled by default. The metadata verifier processes replica records in the Praefect database and verifies the replicas actually exist on the Gitaly nodes. If the replica doesn't exist, its metadata record is deleted. This enables Praefect to fix situations where a replica has a metadata record indicating it's fine but, in reality, it doesn't exist on disk. @@ -216,7 +216,7 @@ if you can't upgrade to 15.11.12 and later. unavailable repositories in the metrics and `praefect dataloss` sub-command because of the replica records being removed. If you encounter such repositories, remove the repository using `praefect remove-repository` to remove the repository's remaining records. - You can find repositories with invalid metadata records prior in GitLab 15.0 and later by searching for the log records outputted by the verifier. [Read more about repository verification, and to see an example log entry](../../administration/gitaly/praefect/_index.md#repository-verification). + You can find repositories with invalid metadata records prior in GitLab 15.0 and later by searching for the log records outputted by the verifier. [Read more about repository verification, and to see an example log entry](../../administration/gitaly/praefect/configure.md#repository-verification). - Praefect configuration changes significantly for Linux package instances in GitLab 16.0. You can begin migrating to the new structure in GitLab 15.9 while backwards compatibility is maintained in the lead up to GitLab 16.0. [Read more about this change](gitlab_16_changes.md#praefect-configuration-structure-change). @@ -741,7 +741,7 @@ potentially cause downtime. - GitLab 15.4.0 includes a [batched background migration](../background_migrations.md#batched-background-migrations) to [remove incorrect values from `expire_at` in `ci_job_artifacts` table](https://gitlab.com/gitlab-org/gitlab/-/merge_requests/89318). This migration might take hours or days to complete on larger GitLab instances. - By default, Gitaly and Praefect nodes use the time server at `pool.ntp.org`. If your instance can not connect to `pool.ntp.org`, - [configure the `NTP_HOST` variable](../../administration/gitaly/praefect/_index.md#customize-time-server-setting) otherwise, there can be `ntp: read udp ... i/o timeout` errors + [configure the `NTP_HOST` variable](../../administration/gitaly/praefect/configure.md#customize-time-server-setting) otherwise, there can be `ntp: read udp ... i/o timeout` errors in the logs and the output of `gitlab-rake gitlab:gitaly:check`. However, if the Gitaly hosts' times are in sync, these errors can be ignored. - GitLab 15.4.0 introduced a default [Sidekiq routing rule](../../administration/sidekiq/processing_specific_job_classes.md#routing-rules) that routes all jobs to the `default` queue. For instances using [queue selectors](https://archives.docs.gitlab.com/17.0/ee/administration/sidekiq/processing_specific_job_classes.html#queue-selectors-deprecated), this causes [performance problems](https://gitlab.com/gitlab-com/gl-infra/scalability/-/issues/1991) as some Sidekiq processes will be idle. - The default routing rule has been reverted in 15.4.5, so upgrading to that version or later will return to the previous behavior. diff --git a/doc/update/versions/gitlab_16_changes.md b/doc/update/versions/gitlab_16_changes.md index 20524d00ed9..b5f2ddbc52e 100644 --- a/doc/update/versions/gitlab_16_changes.md +++ b/doc/update/versions/gitlab_16_changes.md @@ -1313,7 +1313,7 @@ Migrate by moving your existing configuration under the new structure. `git_data If you are running Gitaly cluster, [migrate Praefect to the new configuration structure **first**](#praefect-configuration-structure-change). Once this change is tested, proceed with your Gitaly nodes. -If Gitaly is misconfigured as part of the configuration structure change, [repository verification](../../administration/gitaly/praefect/_index.md#repository-verification) +If Gitaly is misconfigured as part of the configuration structure change, [repository verification](../../administration/gitaly/praefect/configure.md#repository-verification) will [delete metadata required for Gitaly cluster to work](https://gitlab.com/gitlab-org/gitaly/-/issues/5529). To protect against configuration mistakes, temporarily disable repository verification in Praefect. @@ -1331,7 +1331,7 @@ To protect against configuration mistakes, temporarily disable repository verifi 1. Apply the change with `gitlab-ctl reconfigure`. 1. Test Git repository functionality in GitLab. 1. Remove the old keys from the configuration once migrated, and then re-run `gitlab-ctl reconfigure`. -1. Recommended, if you're running Gitaly Cluster. Reinstate Praefect [repository verification](../../administration/gitaly/praefect/_index.md#repository-verification) +1. Recommended, if you're running Gitaly Cluster. Reinstate Praefect [repository verification](../../administration/gitaly/praefect/configure.md#repository-verification) by removing `verification_interval: 0`. The new structure is documented in the following script with the old keys described in a comment above the new keys. @@ -1505,7 +1505,7 @@ Migrate by moving your existing configuration under the new structure. The new s Migrate Praefect to the new configuration structure **first**. Once this change is tested, [proceed with your Gitaly nodes](#gitaly-configuration-structure-change). -If Gitaly is misconfigured as part of the configuration structure change, [repository verification](../../administration/gitaly/praefect/_index.md#repository-verification) +If Gitaly is misconfigured as part of the configuration structure change, [repository verification](../../administration/gitaly/praefect/configure.md#repository-verification) will [delete metadata required for Gitaly cluster to work](https://gitlab.com/gitlab-org/gitaly/-/issues/5529). To protect against configuration mistakes, temporarily disable repository verification in Praefect. diff --git a/doc/update/versions/gitlab_17_changes.md b/doc/update/versions/gitlab_17_changes.md index 139f3725469..9cc2f5b4dc3 100644 --- a/doc/update/versions/gitlab_17_changes.md +++ b/doc/update/versions/gitlab_17_changes.md @@ -556,7 +556,7 @@ The OpenSSL 3 upgrade has been postponed to GitLab 17.7.0. Then run `gitlab-ctl reconfigure`. - ALPN enforcement has been disabled again in [GitLab 17.5.5 and other versions](../../administration/gitaly/praefect/_index.md#alpn-enforcement). + ALPN enforcement has been disabled again in [GitLab 17.5.5 and other versions](../../administration/gitaly/praefect/configure.md#alpn-enforcement). Upgrading to one of those versions removes the need to set `GRPC_ENFORCE_ALPN_ENABLED`. ## 17.3.0 diff --git a/doc/update/zero_downtime.md b/doc/update/zero_downtime.md index 7fc29f59e47..d5420a6b1d1 100644 --- a/doc/update/zero_downtime.md +++ b/doc/update/zero_downtime.md @@ -191,7 +191,7 @@ In the future this functionality may be changed, [refer to this Epic](https://gi {{< alert type="note" >}} -This section focuses exclusively on the Praefect component, not its [required PostgreSQL database](../administration/gitaly/praefect/_index.md#postgresql). The [GitLab Linux package does not offer HA](https://gitlab.com/groups/gitlab-org/-/epics/7814) and subsequently Zero Downtime support for the Praefect database. A third party database solution is required to avoid downtime. +This section focuses exclusively on the Praefect component, not its [required PostgreSQL database](../administration/gitaly/praefect/configure.md#postgresql). The [GitLab Linux package does not offer HA](https://gitlab.com/groups/gitlab-org/-/epics/7814) and subsequently Zero Downtime support for the Praefect database. A third party database solution is required to avoid downtime. {{< /alert >}} diff --git a/lib/gitlab/utils/class_name_converter.rb b/lib/gitlab/utils/class_name_converter.rb new file mode 100644 index 00000000000..2722978e943 --- /dev/null +++ b/lib/gitlab/utils/class_name_converter.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Gitlab + module Utils + # Converts a class into the string representation of its name + # Example: `ClassNameConverter.new(Ci::SecureFile).string_representation` returns "ci_secure_file" + class ClassNameConverter + def initialize(klass) + @klass = klass + end + + def string_representation + klass.name.underscore.tr('/', '_') + end + + private + + attr_reader :klass + end + end +end diff --git a/locale/gitlab.pot b/locale/gitlab.pot index a5b54cd1dfa..7781453ca85 100644 --- a/locale/gitlab.pot +++ b/locale/gitlab.pot @@ -56277,6 +56277,9 @@ msgstr "" msgid "SecurityLabels|Security labels" msgstr "" +msgid "SecurityLabels|Security labels help classify and organize your projects. Labels are managed at the group level. You can add or remove labels to this project as needed." +msgstr "" + msgid "SecurityLabels|Selection type" msgstr "" diff --git a/qa/qa/tools/delete_projects.rb b/qa/qa/tools/delete_projects.rb index 8267b665dd7..7856cb7aa0d 100644 --- a/qa/qa/tools/delete_projects.rb +++ b/qa/qa/tools/delete_projects.rb @@ -1,35 +1,34 @@ # frozen_string_literal: true -# This script deletes all projects directly under a group specified by ENV['TOP_LEVEL_GROUP_NAME'] +# This script deletes all projects directly under all 'gitlab-e2e-sandbox-group-<#0-7>' groups OR a group specified by +# ENV['TOP_LEVEL_GROUP_NAME'] # - If `dry_run` is true the script will list projects to be deleted, but it won't delete them # Required environment variables: GITLAB_QA_ACCESS_TOKEN and GITLAB_ADDRESS -# Optional environment variables: TOP_LEVEL_GROUP_NAME (default: 'gitlab-e2e-sandbox-group-'), -# CLEANUP_ALL_E2E_SANDBOX_GROUPS (default: false), +# Optional environment variables: TOP_LEVEL_GROUP_NAME, # PERMANENTLY_DELETE (default: false), -# DELETE_BEFORE (default: 1 day ago) +# DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z + # - Set TOP_LEVEL_GROUP_NAME to the name of the e2e sandbox group that you would like to delete projects under. -# - Set CLEANUP_ALL_E2E_SANDBOX_GROUPS to true if you would like to delete projects under all -# 'gitlab-e2e-sandbox-group-*' groups. Otherwise, this will fall back to TOP_LEVEL_GROUP_NAME. +# Otherwise, this will fall back to deleting projects under all top level groups. # - Set PERMANENTLY_DELETE to true if you would like to permanently delete subgroups on an environment with # deletion protection enabled. Otherwise, subgroups will remain available during the retention period specified # in admin settings. On environments with deletion protection disabled, subgroups will always be permanently deleted. -# - Set DELETE_BEFORE to only delete projects that were created before a given date, otherwise defaults to 1 day ago +# - Set DELETE_BEFORE to only delete projects that were created before a given date, otherwise defaults to 2 hours ago # Run `rake delete_projects` module QA module Tools class DeleteProjects < DeleteResourceBase - # @example mark projects for deletion under 'gitlab-e2e-sandbox-group-' older than 1 day + # @example mark projects for deletion that are older than 2 hours under all gitlab-e2e-sandbox-group-<#0-7> groups # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= bundle exec rake delete_projects # - # @example permanently delete projects older than 1 day under all gitlab-e2e-sandbox-group-* groups + # @example permanently delete projects older than 2 hours under all gitlab-e2e-sandbox-group-<#0-7> groups # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= \ - # CLEANUP_ALL_E2E_SANDBOX_GROUPS=true \ # PERMANENTLY_DELETE=true bundle exec rake delete_projects # # @example mark projects for deletion under 'gitlab-e2e-sandbox-group-2' created before 2023-01-01 @@ -48,14 +47,14 @@ module QA end def run - if ENV['CLEANUP_ALL_E2E_SANDBOX_GROUPS'] + if ENV['TOP_LEVEL_GROUP_NAME'] + group_id = fetch_group_id(@api_client, ENV['TOP_LEVEL_GROUP_NAME']) + results = delete_projects(group_id) + else results = SANDBOX_GROUPS.flat_map do |name| group_id = fetch_group_id(@api_client, name) delete_projects(group_id) end.compact - else - group_id = fetch_group_id(@api_client) - results = delete_projects(group_id) end log_results(results) diff --git a/qa/qa/tools/delete_resource_base.rb b/qa/qa/tools/delete_resource_base.rb index b715eb50a9e..36f510286fd 100644 --- a/qa/qa/tools/delete_resource_base.rb +++ b/qa/qa/tools/delete_resource_base.rb @@ -12,7 +12,8 @@ module QA ITEMS_PER_PAGE = '100' PAGE_CUTOFF = '10' - SANDBOX_GROUPS = %w[gitlab-e2e-sandbox-group-1 + SANDBOX_GROUPS = %w[gitlab-e2e-sandbox-group-0 + gitlab-e2e-sandbox-group-1 gitlab-e2e-sandbox-group-2 gitlab-e2e-sandbox-group-3 gitlab-e2e-sandbox-group-4 @@ -27,7 +28,7 @@ module QA @api_client = Runtime::API::Client.new(ENV['GITLAB_ADDRESS'], personal_access_token: ENV['GITLAB_QA_ACCESS_TOKEN']) - @delete_before = Date.parse(ENV['DELETE_BEFORE'] || (Date.today - 1).to_s) + @delete_before = Time.parse(ENV['DELETE_BEFORE'] || (Time.now - (2 * 3600)).to_s).utc.iso8601(3) @dry_run = dry_run @permanently_delete = !!(ENV['PERMANENTLY_DELETE'].to_s =~ /true|1|y/i) @type = nil @@ -164,7 +165,7 @@ module QA ).url if response.code == HTTP_STATUS_OK - resources.concat(parse_body(response).select { |r| Date.parse(r[:created_at]) < @delete_before }) + resources.concat(parse_body(response).select { |r| Time.parse(r[:created_at]) < @delete_before }) else logger.error("Request for #{@type} returned (#{response.code}): `#{response}` ") exit 1 if fatal_response?(response.code) diff --git a/qa/qa/tools/delete_subgroups.rb b/qa/qa/tools/delete_subgroups.rb index a6ed85422a8..1049ed14a6f 100644 --- a/qa/qa/tools/delete_subgroups.rb +++ b/qa/qa/tools/delete_subgroups.rb @@ -1,36 +1,33 @@ # frozen_string_literal: true -# This script deletes all subgroups of a group specified by ENV['TOP_LEVEL_GROUP_NAME'] -# - If `dry_run` is true the script will list groups to be deleted, but it won't delete them +# This script deletes all subgroups of all 'gitlab-e2e-sandbox-group-<#0-7>' groups OR all subgroups of a group +# specified by ENV['TOP_LEVEL_GROUP_NAME'] +# - If `dry_run` is true the script will list subgroups to be deleted, but it won't delete them # Required environment variables: GITLAB_QA_ACCESS_TOKEN and GITLAB_ADDRESS -# Optional environment variables: TOP_LEVEL_GROUP_NAME (default: 'gitlab-e2e-sandbox-group-'), -# CLEANUP_ALL_E2E_SANDBOX_GROUPS (default: false), +# Optional environment variables: TOP_LEVEL_GROUP_NAME, # PERMANENTLY_DELETE (default: false), -# DELETE_BEFORE (default: 1 day ago) -# - Set TOP_LEVEL_GROUP_NAME to override the default group name determination logic. -# If not set, the default group name will be: -# - All 'gitlab-e2e-sandbox-group-*' groups when CLEANUP_ALL_E2E_SANDBOX_GROUPS is true -# - 'gitlab-e2e-sandbox-group-' when CLEANUP_ALL_E2E_SANDBOX_GROUPS is false +# DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z +# - Set TOP_LEVEL_GROUP_NAME to only delete subgroups under the given group. +# If not set, subgroups of all 'gitlab-e2e-sandbox-group-<#0-7>' groups will be deleted. # - Set PERMANENTLY_DELETE to true if you would like to permanently delete subgroups on an environment with # deletion protection enabled. Otherwise, subgroups will remain available during the retention period specified # in admin settings. On environments with deletion protection disabled, subgroups will always be permanently deleted. -# - Set DELETE_BEFORE to only delete snippets that were created before a given date, otherwise defaults to 1 day ago +# - Set DELETE_BEFORE to only delete snippets that were created before a given date, otherwise defaults to 2 hours ago # Run `rake delete_subgroups` module QA module Tools class DeleteSubgroups < DeleteResourceBase - # @example mark subgroups for deletion that are older than 1 day under 'gitlab-e2e-sandbox-group-' + # @example mark subgroups for deletion that are older than 2 hours under all gitlab-e2e-sandbox-group-<#0-7> groups # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= bundle exec rake delete_subgroups # - # @example permanently delete subgroups older than 1 day under all gitlab-e2e-sandbox-group-* groups + # @example permanently delete subgroups older than 2 hours under all gitlab-e2e-sandbox-group-<#0-7> groups # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= \ - # CLEANUP_ALL_E2E_SANDBOX_GROUPS=true \ # PERMANENTLY_DELETE=true bundle exec rake delete_subgroups # # @example mark subgroups for deletion under 'gitlab-e2e-sandbox-group-2' created before 2023-01-01 @@ -49,14 +46,14 @@ module QA end def run - if ENV['CLEANUP_ALL_E2E_SANDBOX_GROUPS'] && !ENV['TOP_LEVEL_GROUP_NAME'] + if ENV['TOP_LEVEL_GROUP_NAME'] + group_id = fetch_group_id(@api_client, ENV['TOP_LEVEL_GROUP_NAME']) + results = delete_subgroups(group_id) + else results = SANDBOX_GROUPS.flat_map do |name| group_id = fetch_group_id(@api_client, name) delete_subgroups(group_id) end.compact - else - group_id = fetch_group_id(@api_client) - results = delete_subgroups(group_id) end log_results(results) diff --git a/qa/qa/tools/delete_test_snippets.rb b/qa/qa/tools/delete_test_snippets.rb index bba57f73f05..dbb0acfa923 100644 --- a/qa/qa/tools/delete_test_snippets.rb +++ b/qa/qa/tools/delete_test_snippets.rb @@ -6,15 +6,15 @@ # Required environment variables: GITLAB_QA_ACCESS_TOKEN and GITLAB_ADDRESS # - GITLAB_QA_ACCESS_TOKEN should have API access and belong to the user whose snippets will be deleted -# Optional environment variables: DELETE_BEFORE (default: 1 day ago) -# - Set DELETE_BEFORE to only delete snippets that were created before a given date, otherwise defaults to 1 day ago +# Optional environment variables: DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z +# - Set DELETE_BEFORE to only delete snippets that were created before a given date, otherwise defaults to 2 hours ago # Run `rake delete_test_snippets` module QA module Tools class DeleteTestSnippets < DeleteResourceBase - # @example delete snippets older than 1 day for the user associated with the given access token + # @example delete snippets older than 2 hours for the user associated with the given access token # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= bundle exec rake delete_test_snippets # diff --git a/qa/qa/tools/delete_test_ssh_keys.rb b/qa/qa/tools/delete_test_ssh_keys.rb index f1d3a5634f5..883a54a296d 100644 --- a/qa/qa/tools/delete_test_ssh_keys.rb +++ b/qa/qa/tools/delete_test_ssh_keys.rb @@ -8,8 +8,8 @@ # Required environment variables: GITLAB_QA_ACCESS_TOKEN and GITLAB_ADDRESS # - GITLAB_QA_ACCESS_TOKEN should have API access and belong to the user whose keys will be deleted -# Optional environment variables: DELETE_BEFORE (default: 1 day ago) -# - Set DELETE_BEFORE to only delete snippets that were created before a given date, otherwise defaults to 1 day ago +# Optional environment variables: DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z +# - Set DELETE_BEFORE to only delete snippets that were created before a given date, otherwise defaults to 2 hours ago # Run `rake delete_test_ssh_keys` diff --git a/qa/qa/tools/delete_test_users.rb b/qa/qa/tools/delete_test_users.rb index efcbf053187..2192d18706a 100644 --- a/qa/qa/tools/delete_test_users.rb +++ b/qa/qa/tools/delete_test_users.rb @@ -7,8 +7,8 @@ # Required environment variables: GITLAB_QA_ADMIN_ACCESS_TOKEN, GITLAB_QA_ACCESS_TOKEN, and GITLAB_ADDRESS # - GITLAB_QA_ADMIN_ACCESS_TOKEN must have admin API access -# Optional environment variables: DELETE_BEFORE (default: 1 day ago) -# - Set DELETE_BEFORE to only delete users that were created before a given date, otherwise defaults to 1 day ago +# Optional environment variables: DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z +# - Set DELETE_BEFORE to only delete users that were created before a given date, otherwise defaults to 2 hours ago # Run `rake delete_test_users` diff --git a/qa/qa/tools/delete_user_groups.rb b/qa/qa/tools/delete_user_groups.rb index 5a40fb7c84e..2dc9e523977 100644 --- a/qa/qa/tools/delete_user_groups.rb +++ b/qa/qa/tools/delete_user_groups.rb @@ -4,15 +4,16 @@ # - If `dry_run` is true the script will list groups to be deleted, but it won't delete them # Required environment variables: GITLAB_QA_ACCESS_TOKEN, GITLAB_ADDRESS -# Optional environment variables: DELETE_BEFORE -# - Set DELETE_BEFORE to delete only groups that were created before the given date (default: 1 day ago) +# Optional environment variables: DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z +# - Set DELETE_BEFORE to delete only groups that were created before the given date (default: 2 hours ago) # Run `rake delete_user_groups` module QA module Tools class DeleteUserGroups < DeleteResourceBase - EXCLUDE_GROUPS = %w[gitlab-e2e-sandbox-group-1 + EXCLUDE_GROUPS = %w[gitlab-e2e-sandbox-group-0 + gitlab-e2e-sandbox-group-1 gitlab-e2e-sandbox-group-2 gitlab-e2e-sandbox-group-3 gitlab-e2e-sandbox-group-4 @@ -30,7 +31,7 @@ module QA qa-perf-testing remote-development].freeze - # @example - delete user groups older than 1 day + # @example - delete user groups older than 2 hours # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= \ # bundle exec rake delete_user_groups diff --git a/qa/qa/tools/delete_user_projects.rb b/qa/qa/tools/delete_user_projects.rb index d6270e91ccf..4df6e00f625 100644 --- a/qa/qa/tools/delete_user_projects.rb +++ b/qa/qa/tools/delete_user_projects.rb @@ -8,8 +8,8 @@ # OR # - USER_ID to the id of the user whose projects are to be deleted. -# Optional environment variables: DELETE_BEFORE -# - Set DELETE_BEFORE to delete only projects that were created before the given date (default: 1 day ago) +# Optional environment variables: DELETE_BEFORE - YYYY-MM-DD, YYYY-MM-DD HH:MM:SS, or YYYY-MM-DDT00:00:00Z +# - Set DELETE_BEFORE to delete only projects that were created before the given date (default: 2 hours ago) # Run `rake delete_user_projects` @@ -26,7 +26,7 @@ module QA gitlab-qa-user5 gitlab-qa-user6].freeze - # @example - delete the given users projects older than 3 days + # @example - delete the given users projects older than 2 hours # GITLAB_ADDRESS=
\ # GITLAB_QA_ACCESS_TOKEN= \ # USER_ID= bundle exec rake delete_user_projects diff --git a/qa/qa/tools/lib/group.rb b/qa/qa/tools/lib/group.rb index 36e0497d97c..82e2762f934 100644 --- a/qa/qa/tools/lib/group.rb +++ b/qa/qa/tools/lib/group.rb @@ -5,9 +5,7 @@ module QA module Lib module Group include Support::API - def fetch_group_id(api_client, name = ENV['TOP_LEVEL_GROUP_NAME']) - group_name = name || "gitlab-e2e-sandbox-group-#{Time.now.wday + 1}" - + def fetch_group_id(api_client, group_name) logger.info("Fetching group #{group_name}...") group_search_response = get Runtime::API::Request.new(api_client, "/groups/#{group_name}").url diff --git a/spec/migrations/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2_spec.rb b/spec/migrations/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2_spec.rb deleted file mode 100644 index 9e57154d14a..00000000000 --- a/spec/migrations/20250514055316_migrate_anonymous_searches_flag_to_application_settings_v2_spec.rb +++ /dev/null @@ -1,53 +0,0 @@ -# frozen_string_literal: true - -require 'spec_helper' -require_migration! - -RSpec.describe MigrateAnonymousSearchesFlagToApplicationSettingsV2, feature_category: :global_search do - let!(:application_setting) { table(:application_settings).create! } - - describe '#down' do - let(:migration) { described_class.new } - - context 'when search settings is already set' do - it 'removes the global search settings' do - migration.up - expected_search = application_setting.reload.search - expected_search.delete('anonymous_searches_allowed') - expect { migration.down }.to change { application_setting.reload.search }.to(expected_search) - end - end - end - - describe '#up' do - context 'when ff is enabled' do - it 'migrates search from the feature flags in the application_settings successfully' do - search_settings = application_setting.reload.search - expected_settings = { 'anonymous_searches_allowed' => true } - - expected_search = search_settings.merge(expected_settings) - expect { migrate! }.to change { - application_setting.reload.search - }.to eq(expected_search) - end - end - - context 'when both ff are disabled' do - before do - stub_feature_flags(allow_anonymous_searches: false) - end - - it 'migrates search from the feature flags in the application_settings successfully' do - search_settings = application_setting.reload.search - expected_settings = { - 'anonymous_searches_allowed' => false - } - - expected_search = search_settings.merge(expected_settings) - expect { migrate! }.to change { - application_setting.reload.search - }.to eq(expected_search) - end - end - end -end diff --git a/spec/models/ci/workloads/workload_spec.rb b/spec/models/ci/workloads/workload_spec.rb index 4928b709fdd..4d920d5838d 100644 --- a/spec/models/ci/workloads/workload_spec.rb +++ b/spec/models/ci/workloads/workload_spec.rb @@ -47,4 +47,13 @@ RSpec.describe Ci::Workloads::Workload, feature_category: :continuous_integratio let!(:model) { create(:ci_workload, project: parent) } end end + + describe '#logs_url' do + it 'returns the pipeline url' do + allow(Gitlab::Routing).to receive_message_chain(:url_helpers, :project_pipeline_url) + .with(workload.project, workload.pipeline).and_return('log-url') + + expect(workload.logs_url).to eq('log-url') + end + end end diff --git a/spec/requests/api/graphql/mutations/work_items/bulk_update_spec.rb b/spec/requests/api/graphql/mutations/work_items/bulk_update_spec.rb index 0e932202139..e8d77c2ddb6 100644 --- a/spec/requests/api/graphql/mutations/work_items/bulk_update_spec.rb +++ b/spec/requests/api/graphql/mutations/work_items/bulk_update_spec.rb @@ -263,7 +263,7 @@ RSpec.describe 'Bulk update work items', feature_category: :team_planning do } end - it 'updates all specified attributes' do + it 'updates all specified attributes', quarantine: 'https://gitlab.com/gitlab-org/gitlab/-/issues/553628' do expect do post_graphql_mutation(mutation, current_user: current_user) updatable_work_items.each(&:reload) diff --git a/spec/routing/project_routing_spec.rb b/spec/routing/project_routing_spec.rb index 6133c712e99..719a783f4b3 100644 --- a/spec/routing/project_routing_spec.rb +++ b/spec/routing/project_routing_spec.rb @@ -412,6 +412,25 @@ RSpec.describe 'project routing', feature_category: :groups_and_projects do it 'to #show' do expect(get('/gitlab/gitlabhq/-/commits/master.atom')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master.atom') + expect(get('/gitlab/gitlabhq/-/commits/master')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master') + expect(get('/gitlab/gitlabhq/-/commits/master?format=json')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'master', format: 'json') + + # complex branch name without encoding (incorrect format) + expect(get('/gitlab/gitlabhq/-/commits/feature/name')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/name') + expect(get('/gitlab/gitlabhq/-/commits/feature/name?format=json')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/name', format: 'json') + + # complex branch name with encoding + expect(get('/gitlab/gitlabhq/-/commits/feature%2Fname')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/name') + expect(get('/gitlab/gitlabhq/-/commits/feature%2Fname?format=json')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/name', format: 'json') + + # collision with signatures route + expect(get('/gitlab/gitlabhq/-/commits/feature/signatures')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/signatures') + expect(get('/gitlab/gitlabhq/-/commits/feature/signatures?format=json&offset=40')).to route_to('projects/commits#show', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/signatures', format: 'json', offset: '40') + end + + it 'to #signatures' do + expect(get('/gitlab/gitlabhq/-/commits/feature/signatures?format=json')).to route_to('projects/commits#signatures', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature', format: 'json') + expect(get('/gitlab/gitlabhq/-/commits/feature%2Fsignatures/signatures?format=json')).to route_to('projects/commits#signatures', namespace_id: 'gitlab', project_id: 'gitlabhq', id: 'feature/signatures', format: 'json') end it_behaves_like 'redirecting a legacy path', "/gitlab/gitlabhq/commits/master", "/gitlab/gitlabhq/-/commits/master" diff --git a/spec/support/shared_examples/ci/deployable_shared_examples.rb b/spec/support/shared_examples/ci/deployable_shared_examples.rb index abbb93f461c..55b308a6082 100644 --- a/spec/support/shared_examples/ci/deployable_shared_examples.rb +++ b/spec/support/shared_examples/ci/deployable_shared_examples.rb @@ -741,7 +741,7 @@ RSpec.shared_examples 'a deployable job' do end def factory_type - described_class.name.underscore.tr('/', '_') + ::Gitlab::Utils::ClassNameConverter.new(described_class).string_representation end end # rubocop:enable Layout/LineLength diff --git a/spec/support/shared_examples/ci/deployable_shared_examples_ee.rb b/spec/support/shared_examples/ci/deployable_shared_examples_ee.rb index 803af36eabb..86eec5464cd 100644 --- a/spec/support/shared_examples/ci/deployable_shared_examples_ee.rb +++ b/spec/support/shared_examples/ci/deployable_shared_examples_ee.rb @@ -29,6 +29,6 @@ RSpec.shared_examples 'a deployable job in EE' do end def factory_type - described_class.name.underscore.tr('/', '_') + ::Gitlab::Utils::ClassNameConverter.new(described_class).string_representation end end diff --git a/spec/support/shared_examples/models/concerns/incident_management/escalatable_shared_examples.rb b/spec/support/shared_examples/models/concerns/incident_management/escalatable_shared_examples.rb index 6d10dd6ac4f..af393179690 100644 --- a/spec/support/shared_examples/models/concerns/incident_management/escalatable_shared_examples.rb +++ b/spec/support/shared_examples/models/concerns/incident_management/escalatable_shared_examples.rb @@ -272,6 +272,6 @@ RSpec.shared_examples 'a model including Escalatable' do private def factory_from_class(klass) - klass.name.underscore.tr('/', '_') + ::Gitlab::Utils::ClassNameConverter.new(klass).string_representation end end diff --git a/spec/support/shared_examples/services/measurable_service_shared_examples.rb b/spec/support/shared_examples/services/measurable_service_shared_examples.rb index 206c25e49af..ecf591071be 100644 --- a/spec/support/shared_examples/services/measurable_service_shared_examples.rb +++ b/spec/support/shared_examples/services/measurable_service_shared_examples.rb @@ -27,6 +27,6 @@ RSpec.shared_examples 'measurable service' do end def described_class_name - described_class.name.underscore.tr('/', '_') + ::Gitlab::Utils::ClassNameConverter.new(described_class).string_representation end end