const up hunspell

Change-Id: I8d432279aeaadbc18f95df1c9e99237583503353
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/187820
Reviewed-by: Caolán McNamara <caolan.mcnamara@collabora.com>
Tested-by: Jenkins
This commit is contained in:
Caolán McNamara
2025-07-13 19:28:34 +01:00
parent caf9187455
commit eccfd182df
2 changed files with 376 additions and 0 deletions

View File

@ -0,0 +1,375 @@
From f9b326c5fd84ad976eaaa88582bb57d47d22ffb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Caol=C3=A1n=20McNamara?= <caolan.mcnamara@collabora.com>
Date: Sun, 13 Jul 2025 19:17:55 +0100
Subject: [PATCH] move iscii_devanagari_tbl, etc out of .data section
---
src/hunspell/affixmgr.hxx | 2 +-
src/hunspell/csutil.cxx | 52 ++++++++++++++++++-------------------
src/hunspell/csutil.hxx | 4 +--
src/hunspell/hashmgr.hxx | 2 +-
src/hunspell/hunspell.cxx | 8 +++---
src/hunspell/hunspell.hxx | 2 +-
src/hunspell/suggestmgr.cxx | 8 +++---
src/hunspell/suggestmgr.hxx | 2 +-
8 files changed, 40 insertions(+), 40 deletions(-)
diff --git a/src/hunspell/affixmgr.hxx b/src/hunspell/affixmgr.hxx
index 0fc22a3..84caed1 100644
--- a/src/hunspell/affixmgr.hxx
+++ b/src/hunspell/affixmgr.hxx
@@ -99,7 +99,7 @@ class AffixMgr {
std::string keystring;
std::string trystring;
std::string encoding;
- struct cs_info* csconv;
+ const struct cs_info* csconv;
int utf8;
int complexprefixes;
FLAG compoundflag;
diff --git a/src/hunspell/csutil.cxx b/src/hunspell/csutil.cxx
index 4589e77..92eb014 100644
--- a/src/hunspell/csutil.cxx
+++ b/src/hunspell/csutil.cxx
@@ -685,7 +685,7 @@ char* get_stored_pointer(const char* s) {
// encodings supported
// supplying isupper, tolower, and toupper
-static struct cs_info iso1_tbl[] = {
+const struct cs_info iso1_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -773,7 +773,7 @@ static struct cs_info iso1_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xff}};
-static struct cs_info iso2_tbl[] = {
+const struct cs_info iso2_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -861,7 +861,7 @@ static struct cs_info iso2_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xff}};
-static struct cs_info iso3_tbl[] = {
+const struct cs_info iso3_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -949,7 +949,7 @@ static struct cs_info iso3_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xff}};
-static struct cs_info iso4_tbl[] = {
+const struct cs_info iso4_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1037,7 +1037,7 @@ static struct cs_info iso4_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xff}};
-static struct cs_info iso5_tbl[] = {
+const struct cs_info iso5_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1125,7 +1125,7 @@ static struct cs_info iso5_tbl[] = {
{0x00, 0xfc, 0xac}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xae},
{0x00, 0xff, 0xaf}};
-static struct cs_info iso6_tbl[] = {
+const struct cs_info iso6_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1213,7 +1213,7 @@ static struct cs_info iso6_tbl[] = {
{0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
{0x00, 0xff, 0xff}};
-static struct cs_info iso7_tbl[] = {
+const struct cs_info iso7_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1301,7 +1301,7 @@ static struct cs_info iso7_tbl[] = {
{0x00, 0xfc, 0xbc}, {0x00, 0xfd, 0xbe}, {0x00, 0xfe, 0xbf},
{0x00, 0xff, 0xff}};
-static struct cs_info iso8_tbl[] = {
+const struct cs_info iso8_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1389,7 +1389,7 @@ static struct cs_info iso8_tbl[] = {
{0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
{0x00, 0xff, 0xff}};
-static struct cs_info iso9_tbl[] = {
+const struct cs_info iso9_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1477,7 +1477,7 @@ static struct cs_info iso9_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0x49}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xff}};
-static struct cs_info iso10_tbl[] = {
+const struct cs_info iso10_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1565,7 +1565,7 @@ static struct cs_info iso10_tbl[] = {
{0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
{0x00, 0xff, 0xff}};
-static struct cs_info koi8r_tbl[] = {
+const struct cs_info koi8r_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1653,7 +1653,7 @@ static struct cs_info koi8r_tbl[] = {
{0x01, 0xdc, 0xfc}, {0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe},
{0x01, 0xdf, 0xff}};
-static struct cs_info koi8u_tbl[] = {
+const struct cs_info koi8u_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1743,7 +1743,7 @@ static struct cs_info koi8u_tbl[] = {
{0x01, 0xda, 0xfa}, {0x01, 0xdb, 0xfb}, {0x01, 0xdc, 0xfc},
{0x01, 0xdd, 0xfd}, {0x01, 0xde, 0xfe}, {0x01, 0xdf, 0xff}};
-static struct cs_info cp1251_tbl[] = {
+const struct cs_info cp1251_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1831,7 +1831,7 @@ static struct cs_info cp1251_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xdf}};
-static struct cs_info iso13_tbl[] = {
+const struct cs_info iso13_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -1919,7 +1919,7 @@ static struct cs_info iso13_tbl[] = {
{0x00, 0xFC, 0xDC}, {0x00, 0xFD, 0xDD}, {0x00, 0xFE, 0xDE},
{0x00, 0xFF, 0xFF}};
-static struct cs_info iso14_tbl[] = {
+const struct cs_info iso14_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -2007,7 +2007,7 @@ static struct cs_info iso14_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xff}};
-static struct cs_info iso15_tbl[] = {
+const struct cs_info iso15_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -2095,7 +2095,7 @@ static struct cs_info iso15_tbl[] = {
{0x00, 0xfc, 0xdc}, {0x00, 0xfd, 0xdd}, {0x00, 0xfe, 0xde},
{0x00, 0xff, 0xbe}};
-static struct cs_info iscii_devanagari_tbl[] = {
+const struct cs_info iscii_devanagari_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -2183,7 +2183,7 @@ static struct cs_info iscii_devanagari_tbl[] = {
{0x00, 0xfc, 0xfc}, {0x00, 0xfd, 0xfd}, {0x00, 0xfe, 0xfe},
{0x00, 0xff, 0xff}};
-static struct cs_info tis620_tbl[] = {
+const struct cs_info tis620_tbl[] = {
{0x00, 0x00, 0x00}, {0x00, 0x01, 0x01}, {0x00, 0x02, 0x02},
{0x00, 0x03, 0x03}, {0x00, 0x04, 0x04}, {0x00, 0x05, 0x05},
{0x00, 0x06, 0x06}, {0x00, 0x07, 0x07}, {0x00, 0x08, 0x08},
@@ -2273,10 +2273,10 @@ static struct cs_info tis620_tbl[] = {
struct enc_entry {
const char* enc_name;
- struct cs_info* cs_table;
+ const struct cs_info* cs_table;
};
-static struct enc_entry encds[] = {
+const struct enc_entry encds[] = {
{"iso88591", iso1_tbl}, // ISO-8859-1
{"iso88592", iso2_tbl}, // ISO-8859-2
{"iso88593", iso3_tbl}, // ISO-8859-3
@@ -2323,11 +2323,11 @@ static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName,
*pBuf = '\0';
}
-struct cs_info* get_current_cs(const std::string& es) {
+const struct cs_info* get_current_cs(const std::string& es) {
char* normalized_encoding = new char[es.size() + 1];
toAsciiLowerAndRemoveNonAlphanumeric(es.c_str(), normalized_encoding);
- struct cs_info* ccs = NULL;
+ const struct cs_info* ccs = NULL;
for (const auto& encd : encds) {
if (strcmp(normalized_encoding, encd.enc_name) == 0) {
ccs = encd.cs_table;
@@ -2350,7 +2350,7 @@ struct cs_info* get_current_cs(const std::string& es) {
// XXX This function was rewritten for mozilla. Instead of storing the
// conversion tables static in this file, create them when needed
// with help the mozilla backend.
-struct cs_info* get_current_cs(const std::string& es) {
+const struct cs_info* get_current_cs(const std::string& es) {
struct cs_info* ccs = new cs_info[256];
// Initialze the array with dummy data so that we wouldn't need
// to return null in case of failures.
@@ -2437,7 +2437,7 @@ struct cs_info* get_current_cs(const std::string& es) {
// primitive isalpha() replacement for tokenization
std::string get_casechars(const char* enc) {
- struct cs_info* csconv = get_current_cs(enc);
+ const struct cs_info* csconv = get_current_cs(enc);
std::string expw;
for (int i = 0; i <= 255; ++i) {
if (cupper(csconv, i) != clower(csconv, i)) {
@@ -2457,7 +2457,7 @@ struct lang_map {
int num;
};
-static struct lang_map lang2enc[] =
+const struct lang_map lang2enc[] =
{{"ar", LANG_ar}, {"az", LANG_az},
{"az_AZ", LANG_az}, // for back-compatibility
{"bg", LANG_bg}, {"ca", LANG_ca},
@@ -2527,7 +2527,7 @@ int unicodeisalpha(unsigned short c) {
}
/* get type of capitalization */
-int get_captype(const std::string& word, cs_info* csconv) {
+int get_captype(const std::string& word, const cs_info* csconv) {
// now determine the capitalization type of the first nl letters
size_t ncap = 0;
size_t nneutral = 0;
diff --git a/src/hunspell/csutil.hxx b/src/hunspell/csutil.hxx
index 808087d..fd11266 100644
--- a/src/hunspell/csutil.hxx
+++ b/src/hunspell/csutil.hxx
@@ -189,7 +189,7 @@ LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
int langnum);
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
-LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
+LIBHUNSPELL_DLL_EXPORTED const struct cs_info* get_current_cs(const std::string& es);
// get language identifiers of language codes
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
@@ -230,7 +230,7 @@ LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
mkallcap_utf(std::vector<w_char>& u, int langnum);
// get type of capitalization
-LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
+LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, const cs_info*);
// get type of capitalization (UTF-8)
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
diff --git a/src/hunspell/hashmgr.hxx b/src/hunspell/hashmgr.hxx
index 792b2f1..9e0975a 100644
--- a/src/hunspell/hashmgr.hxx
+++ b/src/hunspell/hashmgr.hxx
@@ -96,7 +96,7 @@ class HashMgr {
int langnum;
std::string enc;
std::string lang;
- struct cs_info* csconv;
+ const struct cs_info* csconv;
std::string ignorechars;
std::vector<w_char> ignorechars_utf16;
std::vector<unsigned short*> aliasf; // flag vector `compression' with aliases
diff --git a/src/hunspell/hunspell.cxx b/src/hunspell/hunspell.cxx
index 51e0937..2bb1b94 100644
--- a/src/hunspell/hunspell.cxx
+++ b/src/hunspell/hunspell.cxx
@@ -112,7 +112,7 @@ public:
int add_with_affix(const std::string& word, const std::string& example);
int remove(const std::string& word);
const std::string& get_version_cpp() const;
- struct cs_info* get_csconv();
+ const struct cs_info* get_csconv() const;
int spell(const char* word, int* info = NULL, char** root = NULL);
int suggest(char*** slst, const char* word);
@@ -134,7 +134,7 @@ private:
SuggestMgr* pSMgr;
std::string affixpath;
std::string encoding;
- struct cs_info* csconv;
+ const struct cs_info* csconv;
int langnum;
int utf8;
int complexprefixes;
@@ -1505,7 +1505,7 @@ const std::string& HunspellImpl::get_version_cpp() const {
return pAMgr->get_version();
}
-struct cs_info* HunspellImpl::get_csconv() {
+const struct cs_info* HunspellImpl::get_csconv() const {
return csconv;
}
@@ -2186,7 +2186,7 @@ const std::string& Hunspell::get_version_cpp() const {
return m_Impl->get_version_cpp();
}
-struct cs_info* Hunspell::get_csconv() {
+const struct cs_info* Hunspell::get_csconv() const {
return m_Impl->get_csconv();
}
diff --git a/src/hunspell/hunspell.hxx b/src/hunspell/hunspell.hxx
index 142017f..267a745 100644
--- a/src/hunspell/hunspell.hxx
+++ b/src/hunspell/hunspell.hxx
@@ -217,7 +217,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
const std::string& get_wordchars_cpp() const;
const std::vector<w_char>& get_wordchars_utf16() const;
- struct cs_info* get_csconv();
+ const struct cs_info* get_csconv() const;
const char* get_version() const;
const std::string& get_version_cpp() const;
diff --git a/src/hunspell/suggestmgr.cxx b/src/hunspell/suggestmgr.cxx
index 5f480f9..82b53d2 100644
--- a/src/hunspell/suggestmgr.cxx
+++ b/src/hunspell/suggestmgr.cxx
@@ -1207,7 +1207,7 @@ void SuggestMgr::ngsuggest(std::vector<std::string>& wlst,
// set character based ngram suggestion for words with non-BMP Unicode
// characters
- struct cs_info* origconv = csconv;
+ const struct cs_info* origconv = csconv;
if (n == -1) {
utf8 = 0; // XXX not state-free
if (!csconv)
diff --git a/src/hunspell/suggestmgr.hxx b/src/hunspell/suggestmgr.hxx
index 7fa1a6b..0f8bcc5 100644
--- a/src/hunspell/suggestmgr.hxx
+++ b/src/hunspell/suggestmgr.hxx
@@ -103,7 +103,7 @@ class SuggestMgr {
AffixMgr* pAMgr;
unsigned int maxSug;
- struct cs_info* csconv;
+ const struct cs_info* csconv;
int utf8;
int langnum;
int nosplitsugs;
--
2.49.0

View File

@ -23,6 +23,7 @@ $(eval $(call gb_UnpackedTarball_add_patches,hunspell, \
external/hunspell/0001-fix-LibreOffice-build-problem-with-basic_string-appe.patch \
external/hunspell/0001-Resolves-rhbz-2158548-allow-longer-words-for-hunspel.patch \
external/hunspell/0001-Keep-only-REP-ph-or-2-word-dictionary-phrase-suggest.patch \
external/hunspell/0001-move-iscii_devanagari_tbl-etc-out-of-.data-section.patch \
external/hunspell/bit_cast.patch.0 \
external/hunspell/clock-monotonic.patch.1 \
))