gitlab-foss/po_linter.rb at as-if-foss/seat-assignment-create-worker

mirror of https://gitlab.com/gitlab-org/gitlab-foss.git synced 2025-08-03 16:04:30 +00:00

Files

GitLab Bot 8bfaf69a3b Add latest changes from gitlab-org/gitlab@master

2025-04-29 21:14:09 +00:00

364 lines

12 KiB

Ruby

Raw Permalink Blame History

 # frozen_string_literal: true
 require 'securerandom'
 module Gitlab
   module I18n
     class PoLinter
       include Gitlab::Utils::StrongMemoize
       attr_reader :po_path, :translation_entries, :metadata_entry, :locale
       VARIABLE_REGEX = /%{\w*}|%[a-z]/
       def initialize(po_path:, locale: I18n.locale.to_s)
         @po_path = po_path
         @locale = locale
       end
       def errors
         @errors ||= validate_po
       end
       def validate_po
         if (parse_error = parse_po)
           return 'PO-syntax errors' => [parse_error]
         end
         Gitlab::I18n.with_locale(locale) do
           validate_entries
         end
       end
       def parse_po
         entries = SimplePoParser.parse(po_path)
         # The first entry is the metadata entry if there is one.
         # This is an entry when empty `msgid`
         if entries.first[:msgid].empty?
           @metadata_entry = Gitlab::I18n::MetadataEntry.new(entries.shift)
         else
           return 'Missing metadata entry.'
         end
         @translation_entries = entries.map do |entry_data|
           Gitlab::I18n::TranslationEntry.new(
             entry_data: entry_data,
             nplurals: metadata_entry.expected_forms
           )
         end
         nil
       rescue SimplePoParser::ParserError => e
         @translation_entries = []
         e.message
       end
       def validate_entries
         errors = {}
         translation_entries.each do |entry|
           errors_for_entry = validate_entry(entry)
           errors[entry.msgid] = errors_for_entry if errors_for_entry.any?
         end
         errors
       end
       def validate_entry(entry)
         errors = []
         validate_flags(errors, entry)
         validate_variables(errors, entry)
         validate_newlines(errors, entry)
         validate_number_of_plurals(errors, entry)
         validate_unescaped_chars(errors, entry)
         validate_html(errors, entry)
         validate_translation(errors, entry)
         validate_namespace(errors, entry)
         validate_spaces(errors, entry)
         errors
       end
       def validate_spaces(errors, entry)
         if entry.translations_contain_leading_space?
           errors << 'has leading space. Remove it from the translation'
         end
         if entry.translations_contain_trailing_space?
           errors << 'has trailing space. Remove it from the translation'
         end
         if entry.translations_contain_multiple_spaces?
           errors << 'has different sets of consecutive multiple spaces. Make them consistent with source string'
         end
       end
       def validate_namespace(errors, entry)
         if entry.translations_contain_namespace?
           errors << 'contains a namespace. Remove it from the translation. For more information see ' \
                     'https://docs.gitlab.com/ee/development/i18n/translation.html#namespaced-strings'
         end
       end
       def validate_unescaped_chars(errors, entry)
         if entry.msgid_contains_unescaped_chars?
           errors << 'contains unescaped `%`, escape it using `%%`'
         end
         if entry.plural_id_contains_unescaped_chars?
           errors << 'plural id contains unescaped `%`, escape it using `%%`'
         end
         if entry.translations_contain_unescaped_chars?
           errors << 'translation contains unescaped `%`, escape it using `%%`'
         end
       end
       def validate_html(errors, entry)
         common_message = 'contains < or >. Use variables to include HTML in the string, or the &lt; and &gt; codes ' \
           'for the symbols. For more info see: https://docs.gitlab.com/ee/development/i18n/externalization.html#html'
         if entry.msgid_contains_potential_html?
           errors << common_message
         end
         if entry.plural_id_contains_potential_html?
           errors << ('plural id ' + common_message)
         end
         if entry.translations_contain_potential_html?
           errors << ('translation ' + common_message)
         end
       end
       def validate_number_of_plurals(errors, entry)
         return unless metadata_entry&.expected_forms
         return unless entry.translated?
         if entry.has_plural? && entry.all_translations.size != metadata_entry.expected_forms
           errors << "should have #{metadata_entry.expected_forms} "\
                     "#{'translations'.pluralize(metadata_entry.expected_forms)}"
         end
       end
       def validate_newlines(errors, entry)
         if entry.msgid_has_multiple_lines?
           errors << 'is defined over multiple lines, this breaks some tooling.'
         end
         if entry.plural_id_has_multiple_lines?
           errors << 'plural is defined over multiple lines, this breaks some tooling.'
         end
         if entry.translations_have_multiple_lines?
           errors << 'has translations defined over multiple lines, this breaks some tooling.'
         end
       end
       def validate_variables(errors, entry)
         if entry.has_singular_translation?
           validate_variables_in_message(errors, entry.msgid, entry.msgid)
           validate_variables_in_message(errors, entry.msgid, entry.singular_translation)
         end
         if entry.has_plural?
           validate_single_and_plural_variables(errors, entry)
           entry.plural_translations.each do |translation|
             validate_variables_in_message(errors, entry.plural_id, translation)
           end
         end
       end
       def validate_variables_in_message(errors, message_id, message_translation)
         required_variables = message_id.scan(VARIABLE_REGEX)
         validate_unnamed_variables(errors, required_variables)
         validate_variable_usage(errors, message_translation, required_variables)
       end
       # rubocop: disable Style/AsciiComments -- Need for clarity
       # Don't allow mixing named and positional variables in singular
       # and plural forms for languages such as Japanese. For example:
       #
       # msgid "GlobalSearch|Showing 1 code result for %{term}"
       # msgid_plural "GlobalSearch|Showing %{resultsTotal} code results for %{term}"
       # msgstr[0] "%{term}の%{resultsTotal}個のコード結果を表示しています" variables
       #
       # Here we see that both `term` and `resultsTotal` are needed in
       # the final translation. If we mix named and positional variables
       # in the singular and plural forms, it could be ambiguous as to which
       # variables belong where.
       # rubocop: enable Style/AsciiComments
       def validate_single_and_plural_variables(errors, entry)
         variables = entry.msgid.scan(VARIABLE_REGEX)
         plural_variables = entry.plural_id.scan(VARIABLE_REGEX)
         all_variables = (variables + plural_variables).uniq
         validate_unnamed_variables(errors, all_variables)
         validate_variables_in_message(errors, entry.plural_id, entry.plural_id)
       end
       def validate_translation(errors, entry)
         if entry.has_plural?
           translate_plural(entry)
         else
           translate_singular(entry)
         end
       # `sprintf` could raise an `ArgumentError` when invalid passing something
       # other than a Hash when using named variables
       #
       # `sprintf` could raise `TypeError` when passing a wrong type when using
       # unnamed variables
       #
       # FastGettext::Translation could raise `RuntimeError` (raised as a string),
       # or as subclassess `NoTextDomainConfigured` & `InvalidFormat`
       #
       # `FastGettext::Translation` could raise `ArgumentError` as subclassess
       # `InvalidEncoding`, `IllegalSequence` & `InvalidCharacter`
       rescue ArgumentError, TypeError, RuntimeError => e
         errors << "Failure translating to #{locale} in #{po_path}: #{e.message}"
       end
       def translate_singular(entry)
         used_variables = entry.msgid.scan(VARIABLE_REGEX)
         variables = fill_in_variables(used_variables)
         translation = if entry.msgid.include?('|')
                         FastGettext::Translation.s_(entry.msgid)
                       else
                         FastGettext::Translation._(entry.msgid)
                       end
         translation % variables if used_variables.any?
       rescue KeyError => e
         raise "Failed translation '#{translation}' with variables #{variables.keys}: #{e}"
       end
       def translate_plural(entry)
         numbers_covering_all_plurals.map do |number|
           translation = FastGettext::Translation.n_(entry.msgid, entry.plural_id, number)
           variables = entry.msgid.scan(VARIABLE_REGEX)
           plural_variables = entry.plural_id.scan(VARIABLE_REGEX)
           used_variables = (variables + plural_variables).uniq
           variables = fill_in_variables(used_variables)
           begin
             translation % variables if variables.any?
           rescue KeyError => e
             raise "Failed translation '#{translation}' with variables #{variables.keys}: #{e}"
           end
         end
       end
       def numbers_covering_all_plurals
         @numbers_covering_all_plurals ||= calculate_numbers_covering_all_plurals
       end
       def calculate_numbers_covering_all_plurals
         required_numbers = []
         discovered_indexes = []
         counter = 0
         while discovered_indexes.size < metadata_entry.forms_to_test && counter < Gitlab::I18n::MetadataEntry::MAX_FORMS_TO_TEST
           index_for_count = index_for_pluralization(counter)
           unless discovered_indexes.include?(index_for_count)
             discovered_indexes << index_for_count
             required_numbers << counter
           end
           counter += 1
         end
         required_numbers
       end
       def index_for_pluralization(counter)
         # This calls the C function that defines the pluralization rule, it can
         # return a boolean (`false` represents 0, `true` represents 1) or an integer
         # that specifies the plural form to be used for the given number
         pluralization_result = FastGettext.pluralisation_rule.call(counter)
         case pluralization_result
         when false
 
         when true
 
         else
           pluralization_result
         end
       end
       def fill_in_variables(variables)
         if variables.empty?
           []
         elsif variables.any? { |variable| unnamed_variable?(variable) }
           variables.map do |variable|
             variable == '%d' ? random_number : random_string
           end
         else
           variables.each_with_object({}) do |variable, hash|
             variable_name = variable[/\w+/]
             # The variable must be a symbol for Ruby string interpolation to work:
             # "Hello, %{world}!" % { world: 'hi' }      # Works correctly
             # "Hello, %{world}!" % { 'world' => 'hi' }  # Fails with KeyError
             hash[variable_name.to_sym] = random_string
           end
         end
       end
       def random_number
         Random.rand(1000)
       end
       def random_string
         SecureRandom.alphanumeric(64)
       end
       def validate_unnamed_variables(errors, variables)
         unnamed_variables, named_variables = variables.partition { |name| unnamed_variable?(name) }
         if unnamed_variables.any? && named_variables.any?
           errors << 'is combining named variables with unnamed variables'
         end
         if unnamed_variables.size > 1
           errors << 'is combining multiple unnamed variables'
         end
       end
       def validate_variable_usage(errors, translation, required_variables)
         # We don't need to validate when the message is empty.
         # In this case we fall back to the default, which has all the
         # required variables.
         return if translation.empty?
         found_variables = translation.scan(VARIABLE_REGEX)
         missing_variables = required_variables - found_variables
         if missing_variables.any?
           errors << "<#{translation}> is missing: [#{missing_variables.to_sentence}]"
         end
         unknown_variables = found_variables - required_variables
         if unknown_variables.any?
           errors << "<#{translation}> is using unknown variables: [#{unknown_variables.to_sentence}]"
         end
       end
       def unnamed_variable?(variable_name)
         !variable_name.start_with?('%{')
       end
       def validate_flags(errors, entry)
         errors << "is marked #{entry.flag}" if entry.flag
       end
     end
   end
 end

364 lines 12 KiB Ruby Raw Permalink Blame History

364 lines

12 KiB

Ruby

Raw Permalink Blame History