mirror of
https://gitlab.com/gitlab-org/gitlab-foss.git
synced 2025-08-03 16:04:30 +00:00
364 lines
12 KiB
Ruby
364 lines
12 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require 'securerandom'
|
|
|
|
module Gitlab
|
|
module I18n
|
|
class PoLinter
|
|
include Gitlab::Utils::StrongMemoize
|
|
|
|
attr_reader :po_path, :translation_entries, :metadata_entry, :locale
|
|
|
|
VARIABLE_REGEX = /%{\w*}|%[a-z]/
|
|
|
|
def initialize(po_path:, locale: I18n.locale.to_s)
|
|
@po_path = po_path
|
|
@locale = locale
|
|
end
|
|
|
|
def errors
|
|
@errors ||= validate_po
|
|
end
|
|
|
|
def validate_po
|
|
if (parse_error = parse_po)
|
|
return 'PO-syntax errors' => [parse_error]
|
|
end
|
|
|
|
Gitlab::I18n.with_locale(locale) do
|
|
validate_entries
|
|
end
|
|
end
|
|
|
|
def parse_po
|
|
entries = SimplePoParser.parse(po_path)
|
|
|
|
# The first entry is the metadata entry if there is one.
|
|
# This is an entry when empty `msgid`
|
|
if entries.first[:msgid].empty?
|
|
@metadata_entry = Gitlab::I18n::MetadataEntry.new(entries.shift)
|
|
else
|
|
return 'Missing metadata entry.'
|
|
end
|
|
|
|
@translation_entries = entries.map do |entry_data|
|
|
Gitlab::I18n::TranslationEntry.new(
|
|
entry_data: entry_data,
|
|
nplurals: metadata_entry.expected_forms
|
|
)
|
|
end
|
|
|
|
nil
|
|
rescue SimplePoParser::ParserError => e
|
|
@translation_entries = []
|
|
e.message
|
|
end
|
|
|
|
def validate_entries
|
|
errors = {}
|
|
|
|
translation_entries.each do |entry|
|
|
errors_for_entry = validate_entry(entry)
|
|
errors[entry.msgid] = errors_for_entry if errors_for_entry.any?
|
|
end
|
|
|
|
errors
|
|
end
|
|
|
|
def validate_entry(entry)
|
|
errors = []
|
|
|
|
validate_flags(errors, entry)
|
|
validate_variables(errors, entry)
|
|
validate_newlines(errors, entry)
|
|
validate_number_of_plurals(errors, entry)
|
|
validate_unescaped_chars(errors, entry)
|
|
validate_html(errors, entry)
|
|
validate_translation(errors, entry)
|
|
validate_namespace(errors, entry)
|
|
validate_spaces(errors, entry)
|
|
|
|
errors
|
|
end
|
|
|
|
def validate_spaces(errors, entry)
|
|
if entry.translations_contain_leading_space?
|
|
errors << 'has leading space. Remove it from the translation'
|
|
end
|
|
|
|
if entry.translations_contain_trailing_space?
|
|
errors << 'has trailing space. Remove it from the translation'
|
|
end
|
|
|
|
if entry.translations_contain_multiple_spaces?
|
|
errors << 'has different sets of consecutive multiple spaces. Make them consistent with source string'
|
|
end
|
|
end
|
|
|
|
def validate_namespace(errors, entry)
|
|
if entry.translations_contain_namespace?
|
|
errors << 'contains a namespace. Remove it from the translation. For more information see ' \
|
|
'https://docs.gitlab.com/ee/development/i18n/translation.html#namespaced-strings'
|
|
end
|
|
end
|
|
|
|
def validate_unescaped_chars(errors, entry)
|
|
if entry.msgid_contains_unescaped_chars?
|
|
errors << 'contains unescaped `%`, escape it using `%%`'
|
|
end
|
|
|
|
if entry.plural_id_contains_unescaped_chars?
|
|
errors << 'plural id contains unescaped `%`, escape it using `%%`'
|
|
end
|
|
|
|
if entry.translations_contain_unescaped_chars?
|
|
errors << 'translation contains unescaped `%`, escape it using `%%`'
|
|
end
|
|
end
|
|
|
|
def validate_html(errors, entry)
|
|
common_message = 'contains < or >. Use variables to include HTML in the string, or the < and > codes ' \
|
|
'for the symbols. For more info see: https://docs.gitlab.com/ee/development/i18n/externalization.html#html'
|
|
|
|
if entry.msgid_contains_potential_html?
|
|
errors << common_message
|
|
end
|
|
|
|
if entry.plural_id_contains_potential_html?
|
|
errors << ('plural id ' + common_message)
|
|
end
|
|
|
|
if entry.translations_contain_potential_html?
|
|
errors << ('translation ' + common_message)
|
|
end
|
|
end
|
|
|
|
def validate_number_of_plurals(errors, entry)
|
|
return unless metadata_entry&.expected_forms
|
|
return unless entry.translated?
|
|
|
|
if entry.has_plural? && entry.all_translations.size != metadata_entry.expected_forms
|
|
errors << "should have #{metadata_entry.expected_forms} "\
|
|
"#{'translations'.pluralize(metadata_entry.expected_forms)}"
|
|
end
|
|
end
|
|
|
|
def validate_newlines(errors, entry)
|
|
if entry.msgid_has_multiple_lines?
|
|
errors << 'is defined over multiple lines, this breaks some tooling.'
|
|
end
|
|
|
|
if entry.plural_id_has_multiple_lines?
|
|
errors << 'plural is defined over multiple lines, this breaks some tooling.'
|
|
end
|
|
|
|
if entry.translations_have_multiple_lines?
|
|
errors << 'has translations defined over multiple lines, this breaks some tooling.'
|
|
end
|
|
end
|
|
|
|
def validate_variables(errors, entry)
|
|
if entry.has_singular_translation?
|
|
validate_variables_in_message(errors, entry.msgid, entry.msgid)
|
|
|
|
validate_variables_in_message(errors, entry.msgid, entry.singular_translation)
|
|
end
|
|
|
|
if entry.has_plural?
|
|
validate_single_and_plural_variables(errors, entry)
|
|
|
|
entry.plural_translations.each do |translation|
|
|
validate_variables_in_message(errors, entry.plural_id, translation)
|
|
end
|
|
end
|
|
end
|
|
|
|
def validate_variables_in_message(errors, message_id, message_translation)
|
|
required_variables = message_id.scan(VARIABLE_REGEX)
|
|
|
|
validate_unnamed_variables(errors, required_variables)
|
|
validate_variable_usage(errors, message_translation, required_variables)
|
|
end
|
|
|
|
# rubocop: disable Style/AsciiComments -- Need for clarity
|
|
# Don't allow mixing named and positional variables in singular
|
|
# and plural forms for languages such as Japanese. For example:
|
|
#
|
|
# msgid "GlobalSearch|Showing 1 code result for %{term}"
|
|
# msgid_plural "GlobalSearch|Showing %{resultsTotal} code results for %{term}"
|
|
# msgstr[0] "%{term}の%{resultsTotal}個のコード結果を表示しています" variables
|
|
#
|
|
# Here we see that both `term` and `resultsTotal` are needed in
|
|
# the final translation. If we mix named and positional variables
|
|
# in the singular and plural forms, it could be ambiguous as to which
|
|
# variables belong where.
|
|
# rubocop: enable Style/AsciiComments
|
|
def validate_single_and_plural_variables(errors, entry)
|
|
variables = entry.msgid.scan(VARIABLE_REGEX)
|
|
plural_variables = entry.plural_id.scan(VARIABLE_REGEX)
|
|
|
|
all_variables = (variables + plural_variables).uniq
|
|
validate_unnamed_variables(errors, all_variables)
|
|
validate_variables_in_message(errors, entry.plural_id, entry.plural_id)
|
|
end
|
|
|
|
def validate_translation(errors, entry)
|
|
if entry.has_plural?
|
|
translate_plural(entry)
|
|
else
|
|
translate_singular(entry)
|
|
end
|
|
|
|
# `sprintf` could raise an `ArgumentError` when invalid passing something
|
|
# other than a Hash when using named variables
|
|
#
|
|
# `sprintf` could raise `TypeError` when passing a wrong type when using
|
|
# unnamed variables
|
|
#
|
|
# FastGettext::Translation could raise `RuntimeError` (raised as a string),
|
|
# or as subclassess `NoTextDomainConfigured` & `InvalidFormat`
|
|
#
|
|
# `FastGettext::Translation` could raise `ArgumentError` as subclassess
|
|
# `InvalidEncoding`, `IllegalSequence` & `InvalidCharacter`
|
|
rescue ArgumentError, TypeError, RuntimeError => e
|
|
errors << "Failure translating to #{locale} in #{po_path}: #{e.message}"
|
|
end
|
|
|
|
def translate_singular(entry)
|
|
used_variables = entry.msgid.scan(VARIABLE_REGEX)
|
|
variables = fill_in_variables(used_variables)
|
|
|
|
translation = if entry.msgid.include?('|')
|
|
FastGettext::Translation.s_(entry.msgid)
|
|
else
|
|
FastGettext::Translation._(entry.msgid)
|
|
end
|
|
|
|
translation % variables if used_variables.any?
|
|
rescue KeyError => e
|
|
raise "Failed translation '#{translation}' with variables #{variables.keys}: #{e}"
|
|
end
|
|
|
|
def translate_plural(entry)
|
|
numbers_covering_all_plurals.map do |number|
|
|
translation = FastGettext::Translation.n_(entry.msgid, entry.plural_id, number)
|
|
variables = entry.msgid.scan(VARIABLE_REGEX)
|
|
plural_variables = entry.plural_id.scan(VARIABLE_REGEX)
|
|
used_variables = (variables + plural_variables).uniq
|
|
variables = fill_in_variables(used_variables)
|
|
|
|
begin
|
|
translation % variables if variables.any?
|
|
rescue KeyError => e
|
|
raise "Failed translation '#{translation}' with variables #{variables.keys}: #{e}"
|
|
end
|
|
end
|
|
end
|
|
|
|
def numbers_covering_all_plurals
|
|
@numbers_covering_all_plurals ||= calculate_numbers_covering_all_plurals
|
|
end
|
|
|
|
def calculate_numbers_covering_all_plurals
|
|
required_numbers = []
|
|
discovered_indexes = []
|
|
counter = 0
|
|
|
|
while discovered_indexes.size < metadata_entry.forms_to_test && counter < Gitlab::I18n::MetadataEntry::MAX_FORMS_TO_TEST
|
|
index_for_count = index_for_pluralization(counter)
|
|
|
|
unless discovered_indexes.include?(index_for_count)
|
|
discovered_indexes << index_for_count
|
|
required_numbers << counter
|
|
end
|
|
|
|
counter += 1
|
|
end
|
|
|
|
required_numbers
|
|
end
|
|
|
|
def index_for_pluralization(counter)
|
|
# This calls the C function that defines the pluralization rule, it can
|
|
# return a boolean (`false` represents 0, `true` represents 1) or an integer
|
|
# that specifies the plural form to be used for the given number
|
|
pluralization_result = FastGettext.pluralisation_rule.call(counter)
|
|
|
|
case pluralization_result
|
|
when false
|
|
0
|
|
when true
|
|
1
|
|
else
|
|
pluralization_result
|
|
end
|
|
end
|
|
|
|
def fill_in_variables(variables)
|
|
if variables.empty?
|
|
[]
|
|
elsif variables.any? { |variable| unnamed_variable?(variable) }
|
|
variables.map do |variable|
|
|
variable == '%d' ? random_number : random_string
|
|
end
|
|
else
|
|
variables.each_with_object({}) do |variable, hash|
|
|
variable_name = variable[/\w+/]
|
|
# The variable must be a symbol for Ruby string interpolation to work:
|
|
# "Hello, %{world}!" % { world: 'hi' } # Works correctly
|
|
# "Hello, %{world}!" % { 'world' => 'hi' } # Fails with KeyError
|
|
hash[variable_name.to_sym] = random_string
|
|
end
|
|
end
|
|
end
|
|
|
|
def random_number
|
|
Random.rand(1000)
|
|
end
|
|
|
|
def random_string
|
|
SecureRandom.alphanumeric(64)
|
|
end
|
|
|
|
def validate_unnamed_variables(errors, variables)
|
|
unnamed_variables, named_variables = variables.partition { |name| unnamed_variable?(name) }
|
|
|
|
if unnamed_variables.any? && named_variables.any?
|
|
errors << 'is combining named variables with unnamed variables'
|
|
end
|
|
|
|
if unnamed_variables.size > 1
|
|
errors << 'is combining multiple unnamed variables'
|
|
end
|
|
end
|
|
|
|
def validate_variable_usage(errors, translation, required_variables)
|
|
# We don't need to validate when the message is empty.
|
|
# In this case we fall back to the default, which has all the
|
|
# required variables.
|
|
return if translation.empty?
|
|
|
|
found_variables = translation.scan(VARIABLE_REGEX)
|
|
|
|
missing_variables = required_variables - found_variables
|
|
if missing_variables.any?
|
|
errors << "<#{translation}> is missing: [#{missing_variables.to_sentence}]"
|
|
end
|
|
|
|
unknown_variables = found_variables - required_variables
|
|
if unknown_variables.any?
|
|
errors << "<#{translation}> is using unknown variables: [#{unknown_variables.to_sentence}]"
|
|
end
|
|
end
|
|
|
|
def unnamed_variable?(variable_name)
|
|
!variable_name.start_with?('%{')
|
|
end
|
|
|
|
def validate_flags(errors, entry)
|
|
errors << "is marked #{entry.flag}" if entry.flag
|
|
end
|
|
end
|
|
end
|
|
end
|