From 0221ceeccd467f21a48f18d4e493174eef9a5b03 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 19 Dec 2024 09:11:10 -0500 Subject: [PATCH 1/4] ext/gettext/gettext.c: handle NULLs from bindtextdomain() According to POSIX, bindtextdomain() returns "the implementation- defined default directory pathname used by the gettext family of functions" when its second parameter is NULL (i.e. when you are querying the directory corresponding to some text domain and that directory has not yet been set). Its PHP counterpart is feeding that result direclty to RETURN_STRING, but this can go wrong in two ways: 1. If an error occurs, even POSIX-compliant implementations may return NULL. 2. At least one non-compliant implementation (musl) lacks a default directory and returns NULL whenever the domain has not yet been bound. In either of those cases, PHP segfaults on the NULL string. In this commit we check for the NULL, and RETURN_FALSE when it happens rather than crashing. This partially addresses GH #13696 --- ext/gettext/gettext.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/ext/gettext/gettext.c b/ext/gettext/gettext.c index 15af3cb9b57..eba2a591e9f 100644 --- a/ext/gettext/gettext.c +++ b/ext/gettext/gettext.c @@ -167,7 +167,7 @@ PHP_FUNCTION(bindtextdomain) char *domain; size_t domain_len; zend_string *dir = NULL; - char *retval, dir_name[MAXPATHLEN]; + char *retval, dir_name[MAXPATHLEN], *btd_result; if (zend_parse_parameters(ZEND_NUM_ARGS(), "sS!", &domain, &domain_len, &dir) == FAILURE) { RETURN_THROWS(); @@ -181,7 +181,16 @@ PHP_FUNCTION(bindtextdomain) } if (dir == NULL) { - RETURN_STRING(bindtextdomain(domain, NULL)); + btd_result = bindtextdomain(domain, NULL); + if (btd_result == NULL) { + /* POSIX-compliant implementations can return + * NULL if an error occured. On musl you will + * also get NULL if the domain is not yet + * bound, because musl has no default directory + * to return in that case. */ + RETURN_FALSE; + } + RETURN_STRING(btd_result); } if (ZSTR_LEN(dir) != 0 && !zend_string_equals_literal(dir, "0")) { From bfb0e367f2a2823149be67fad00b66511750f2bf Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 19 Dec 2024 09:17:16 -0500 Subject: [PATCH 2/4] ext/gettext/tests: fix libintl return values under musl Musl has two quirks that are leading to failed internationalization tests. First is that the return value of bindtextdomain(..., NULL) will always be false, rather than an "implementation-defined default directory," because musl does not have an implementation-defined default directory. One test needs a special case for this. Second is that the musl implementation of bind_textdomain_codeset() always returns NULL. The POSIX-correctness of this is debatable, but it is roughly equivalent to correct, because musl only support UTF-8, so the NULL value indicating that the codeset is unchanged from the locale's codeset (UTF-8) is accurate. PHP's bind_textdomain_codeset() function however treats NULL as failure, unconditionally: * https://github.com/php/doc-en/issues/4311 * https://github.com/php/php-src/issues/17163 This unfortunately causes false to be returned consistently on musl -- even when nothing unexpected has happened -- and naturally this is affecting several tests. For now we change two tests to accept "false" in addition to "UTF-8" so that they may pass on musl. If PHP's bind_textdomain_codeset() is updated to differentiate between NULL and NULL-with-errno-set, these tests can also be updated once again to reject the NULL-with-errno result. This partially addresses GH #13696 --- ext/gettext/tests/bug53251.phpt | 28 +++++++++++++------ ...ettext_bind_textdomain_codeset-retval.phpt | 12 ++++++-- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/ext/gettext/tests/bug53251.phpt b/ext/gettext/tests/bug53251.phpt index 6f37642925d..d568be6bc07 100644 --- a/ext/gettext/tests/bug53251.phpt +++ b/ext/gettext/tests/bug53251.phpt @@ -8,18 +8,28 @@ if (getenv('SKIP_REPEAT')) die('skip gettext leaks global state across requests' ?> --FILE-- --EXPECT-- bool(true) -bool(true) -bool(false) -string(5) "UTF-8" -string(5) "UTF-8" diff --git a/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt b/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt index 47c821648fc..941bab79bff 100644 --- a/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt +++ b/ext/gettext/tests/gettext_bind_textdomain_codeset-retval.phpt @@ -5,13 +5,21 @@ gettext --FILE-- --EXPECT-- bool(false) -string(5) "UTF-8" +bool(true) Done --CREDITS-- Florian Holzhauer fh-pt@fholzhauer.de From 471e94ce612ebf9b2f34490804f32c1ad89b6f33 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky Date: Thu, 19 Dec 2024 09:20:06 -0500 Subject: [PATCH 3/4] ext/gettext/config.m4: symlink en_US.UTF-8 test bits to en_US for musl The gettext() family of functions under musl does not support codeset suffixes like ".UTF-8", because the only codeset it understands is UTF-8. (Yes, it is annoying that it doesn't support the suffix for the codeset that it does understand; no, I am not in charge.) Thanks to this, we have six failing tests on musl, * FAIL Gettext basic test with en_US locale that should be on nearly every system [ext/gettext/tests/gettext_basic-enus.phpt] * FAIL Test if bindtextdomain() returns string id if no directory path is set( if directory path is 'null') [ext/gettext/tests/gettext_bindtextdomain-cwd.phpt] * FAIL Test dcgettext() functionality [ext/gettext/tests/gettext_dcgettext.phpt] * FAIL Test dgettext() functionality [ext/gettext/tests/gettext_dgettext.phpt] * FAIL Test if dngettext() returns the correct translations (optionally plural). [ext/gettext/tests/gettext_dngettext-plural.phpt] * FAIL Test ngettext() functionality [ext/gettext/tests/gettext_ngettext.phpt] These are all fixed by symlinking the en_US.UTF-8 message data to en_US, where musl is able to find it. This does not make the situation any better for developers (who don't know what libc their users will be running), but that problem is inhereted from C and is not the fault of the gettext extension. This partially addresses GH #13696 --- .gitignore | 3 +++ ext/gettext/config.m4 | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/.gitignore b/.gitignore index 49acc9f2e17..449963153f3 100644 --- a/.gitignore +++ b/.gitignore @@ -177,6 +177,9 @@ php /ext/*/configure.ac /ext/*/run-tests.php +# Generated by ./configure if libc might be musl +/ext/gettext/tests/locale/en_US + # ------------------------------------------------------------------------------ # Generated by Windows build system # ------------------------------------------------------------------------------ diff --git a/ext/gettext/config.m4 b/ext/gettext/config.m4 index 9e304d82b8d..b3a6c35d6c0 100644 --- a/ext/gettext/config.m4 +++ b/ext/gettext/config.m4 @@ -25,6 +25,17 @@ if test "$PHP_GETTEXT" != "no"; then AC_CHECK_LIB(c, bindtextdomain, [ GETTEXT_LIBS= GETTEXT_CHECK_IN_LIB=c + + dnl If libintl.h is provided by libc, it's possible that libc is musl. + dnl The gettext family of functions under musl ignores the codeset + dnl suffix on directories like "en_US.UTF-8"; instead they look only + dnl in "en_US". To accomodate that, we symlink some test data from one + dnl to the other. + AC_MSG_NOTICE([symlinking en_US.UTF-8 messages to en_US in case you are on musl]) + _linkdest="${srcdir%/}"/ext/gettext/tests/locale/en_US + AS_IF([test ! -e "${_linkdest}"],[ + ln -s en_US.UTF-8 "${_linkdest}" + ]) ],[ AC_MSG_ERROR(Unable to find required gettext library) ]) From a23ecc0a7592621916e974048b0f918c39478c5c Mon Sep 17 00:00:00 2001 From: Arnaud Le Blanc Date: Thu, 19 Dec 2024 18:30:17 +0100 Subject: [PATCH 4/4] NEWS for GH-17168 --- NEWS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS b/NEWS index 5c6b7bbe4f6..00332140e87 100644 --- a/NEWS +++ b/NEWS @@ -27,6 +27,10 @@ PHP NEWS . Fixed bug GH-16255 (Unexpected nan value in ext/gd/libgd/gd_filter.c). (nielsdos, cmb) +- Gettext: + . Fixed bug GH-17202 (Segmentation fault ext/gettext/gettext.c + bindtextdomain()). (Michael Orlitzky) + - Iconv: . Fixed bug GH-17047 (UAF on iconv filter failure). (nielsdos)