Various fixes:

- removed utf8 part of parser, instead we use python binary object for parameter substituton (text mode) - removed memory leaks - fixed crash when reusing cursor with different number of placeholders
2025-07-27 13:01:19 +00:00 · 2022-06-08 13:43:20 +02:00
parent ad4937dc96
commit 5420fe39d9
6 changed files with 128 additions and 175 deletions
--- a/mariadb/cursors.py
+++ b/mariadb/cursors.py
@ -17,7 +17,7 @@
 # 51 Franklin St., Fifth Floor, Boston, MA 02110, USA
 #

-import mariadb, collections
+import mariadb, collections, datetime
 from numbers import Number
 from mariadb.constants import *
 from typing import Sequence
@ -104,7 +104,7 @@ class Cursor(mariadb._mariadb.cursor):
        will be used.
        """

-        new_stmt= self.statement
+        new_stmt= self.statement.encode("utf8")
        replace_diff= 0
        if self._paramlist:
            for i in range (0,len(self._paramlist)):
@ -129,7 +129,7 @@ class Cursor(mariadb._mariadb.cursor):
                            replace= "\"%s\"" % self.connection.escape_string(val.__str__())
                ofs= self._paramlist[i] + replace_diff
                
-                new_stmt= new_stmt[:ofs] + replace.__str__() + new_stmt[ofs+1:]
+                new_stmt= new_stmt[:ofs] + replace.__str__().encode("utf8") + new_stmt[ofs+1:]
                replace_diff+= len(replace) - 1
        return new_stmt

@ -277,7 +277,7 @@ class Cursor(mariadb._mariadb.cursor):
           self._text= False

        for val in data:
-            if isinstance(val, (bytes, bytearray)):
+            if isinstance(val, (bytes, bytearray, datetime.datetime, datetime.date, datetime.time)):
                self._text= False
                break

--- a/mariadb/mariadb_codecs.c
+++ b/mariadb/mariadb_codecs.c
@ -1193,7 +1193,8 @@ mariadb_check_execute_parameters(MrdbCursor *self,
        goto error;
    }

-    if (!(self->value= PyMem_RawCalloc(self->parseinfo.paramcount, sizeof(MrdbParamValue))))
+    if (!self->value &&
+       !(self->value= PyMem_RawCalloc(self->parseinfo.paramcount, sizeof(MrdbParamValue))))
    {
        mariadb_throw_exception(NULL, Mariadb_InterfaceError, 0,
                "Not enough memory (tried to allocated %lld bytes)",
--- a/mariadb/mariadb_cursor.c
+++ b/mariadb/mariadb_cursor.c
@ -448,6 +448,17 @@ PyObject *MrdbCursor_clear_result(MrdbCursor *self)
    Py_RETURN_NONE;
 }

+static void MrdbCursor_FreeValues(MrdbCursor *self)
+{
+  uint32_t i;
+  if (!self->value)
+    return;
+  for (i= 0; i < self->parseinfo.paramcount; i++)
+    if (self->value[i].free_me)
+      MARIADB_FREE_MEM(self->value[i].buffer);
+  MARIADB_FREE_MEM(self->value);
+}
+
 /* {{{ MrdbCursor_clear
   Resets statement attributes  and frees
   associated memory
@ -483,6 +494,7 @@ void MrdbCursor_clear(MrdbCursor *self, uint8_t new_stmt)
    self->fields= NULL;
    self->row_count= 0;
    self->affected_rows= 0;
+    MrdbCursor_FreeValues(self);
    MrdbCursor_clearparseinfo(&self->parseinfo);
    MARIADB_FREE_MEM(self->values);
    MARIADB_FREE_MEM(self->bind);
@ -976,9 +988,11 @@ MrdbCursor_parse(MrdbCursor *self, PyObject *args)
    Py_ssize_t statement_len= 0;
    MrdbParser *parser= NULL;
    char errmsg[128];
+    uint32_t old_paramcount= 0;

    if (self->parseinfo.statement)
    {
+      old_paramcount= self->parseinfo.paramcount;
      MrdbCursor_clearparseinfo(&self->parseinfo);
    }
 
@ -1002,7 +1016,13 @@ MrdbCursor_parse(MrdbCursor *self, PyObject *args)
    }

    /* cleanup and save some parser stuff */
-
+    if (parser->param_count && parser->param_count != old_paramcount)
+    {
+      MARIADB_FREE_MEM(self->params);
+      MrdbCursor_FreeValues(self);
+      MARIADB_FREE_MEM(self->values);
+      MARIADB_FREE_MEM(self->bind);
+    }
    self->parseinfo.paramcount= parser->param_count;
    self->parseinfo.paramstyle= parser->paramstyle;
    if (self->parseinfo.statement)
--- a/mariadb/mariadb_parser.c
+++ b/mariadb/mariadb_parser.c
@ -28,73 +28,6 @@ const char *comment_start= "/*";
 const char *comment_end= "*/";
 const char literals[3]= {'\'', '\"', '`'};

-typedef struct {
-  const char *start;
-  char *pos;
-  size_t byte_len;
-  size_t char_len;
-  size_t char_pos;
-} utf8_str;
-
-#define isutf8(c) (((c)&0xC0)!=0x80)
-
-uint8_t utf8_len(char *c)
-{
-  uint8_t i= 0;
-  (void)(isutf8(c[++(i)]) || isutf8(c[++(i)]) ||
-         isutf8(c[++(i)]) || ++(i));
-  return i;
-}
-
-size_t utf8_char_cnt(const char *start, size_t bytes)
-{
-  size_t char_cnt= 0;
-  char *tmp= (char *)start;
-
-  while (tmp < start + bytes)
-  {
-    tmp+= utf8_len(tmp);
-    char_cnt++;
-  }
-  return char_cnt;
-}
-
-static void utf8_str_init(utf8_str *u8, const char *stmt, size_t stmt_len)
-{
-  u8->start= stmt;
-  u8->pos= (char *)stmt;
-  u8->byte_len= stmt_len;
-  u8->char_pos= 0;
-  u8->char_len= utf8_char_cnt(u8->start, u8->byte_len);
-}
-
-static void utf8_next(utf8_str *u8, size_t inc)
-{
-  size_t i;
-
-  for (i=0; i < inc; i++)
-  {
-    u8->pos+= utf8_len(u8->pos);
-    u8->char_pos++;
-  }
-}
-
-static inline uint8_t utf8_chk_size(utf8_str *u8, size_t size)
-{
-  return (u8->char_pos + size < u8->char_len);
-}
-
-static char *utf8_val(utf8_str *u8, size_t offset)
-{
-  size_t i;
-  char *tmp= u8->pos;
-  for (i=0; i < offset; i++)
-  {
-    tmp+= utf8_len(tmp);
-  }
-  return tmp;
-}
-
 static struct {
    enum enum_binary_command command;
    MrdbString str;
@ -188,14 +121,15 @@ parser_error(char *errmsg, size_t errmsg_len, const char *errstr)
    }
 }

+#define isutf8(c) (((c)&0xC0)!=0x80)
+
 uint8_t
 MrdbParser_parse(MrdbParser *p, uint8_t is_batch,
                 char *errmsg, size_t errmsg_len)
 {
-    char *end;
+    char *a, *end;
    char lastchar= 0;
    uint8_t i;
-    utf8_str u8;

    if (errmsg_len)
        *errmsg= 0;
@ -211,83 +145,78 @@ MrdbParser_parse(MrdbParser *p, uint8_t is_batch,
        parser_error(errmsg, errmsg_len, "Invalid (empty) statement");
        return 1;
    }
+    a= p->statement.str;
+    end= a + p->statement.length - 1;

-    utf8_str_init(&u8, p->statement.str, p->statement.length);
-    end= p->statement.str + p->statement.length;
-
-    while (u8.pos <= end)
+    while (a <= end)
    {
 cont:
-        /* we are only interested in ascii chars, so all multibyte characterss
-           will be ignored */
-        if (utf8_len(u8.pos) > 1)
-        {
-          utf8_next(&u8, 1);
+/*        if (isutf8(*a)) {
+          a++;
          continue;
-        }
+        } */
        /* check literals */
        for (i=0; i < 3; i++)
        {
-            if (*u8.pos == literals[i])
+            if (*a == literals[i])
            {
                p->in_literal[i]= !(p->in_literal[i]);
-                utf8_next(&u8, 1);
+                a++;
                goto cont;
            }
        }
        /* nothing to do, if we are inside a comment or literal */
        if (IN_LITERAL(p))
        {
-            utf8_next(&u8,1);
+            a++;
            continue;
        }
        /* check comment */
        if (!p->in_comment)
        {
            /* Style 1 */
-            if (utf8_chk_size(&u8, 1) && *u8.pos == '/' && *utf8_val(&u8, 1) == '*')
+            if (*a == '/' && *(a + 1) == '*')
            {
-                utf8_next(&u8, 2);
-                if (utf8_chk_size(&u8, 1) && *u8.pos == '!')
+                a+= 2;
+                if (a+1 < end && *a == '!')
                {
                    /* check special syntax: 1. comment followed by '!' and whitespace */
-                    if (isspace(*utf8_val(&u8,1)))
+                    if (isspace(*(a+1)))
                    {
-                      utf8_next(&u8, 2);
+                      a+= 2;
                      continue;
                    }
                    /* check special syntax: 3. comment followed by '!' 5 or 6 digit version number */
-                    if (utf8_chk_size(&u8, 7) && isdigit(*utf8_val(&u8,1)))
+                    if (a + 7 < end && isdigit(*(a+1)))
                    {
-                        char *end_number;
-                        unsigned long version_number= strtol(utf8_val(&u8,1), &end_number, 10);
+                        char *x;
+                        unsigned long version_number= strtol(a+1, &x, 10);
+                        a= x;
                        if ((version_number >= 50700 && version_number <= 99999) ||
                            !(version_number <= mysql_get_server_version(p->mysql)))
                        {
                          p->in_comment= 1;
                        }
-                        utf8_next(&u8, end_number - u8.pos);
                        continue;
                    }
                }
-                if (utf8_chk_size(&u8, 2) && 
-                    *u8.pos == 'M' && *utf8_val(&u8, 1) == '!')
+                if (a+2 < end && *a == 'M' && *(a+1) == '!')
                {
-                    utf8_next(&u8, 2);
+                    a+= 2;
                    /* check special syntax: 2. comment followed by 'M! ' (MariaDB only) */
-                    if (isspace(*(u8.pos)))
+                    if (isspace(*(a)))
                        continue;

                    /* check special syntax: 2. comment followed by 'M!' and version number */
-                    if (utf8_chk_size(&u8, 6) && isdigit(*u8.pos))
+                    if (a + 6 < end && isdigit(*a))
                    {
-                      char *end_number;
-                      unsigned long version_number= strtol(u8.pos, &end_number, 10);
+                      char *x;
+                      unsigned long version_number= strtol(a, &x, 10);
+                      a= x;
                      if (!(version_number <= mysql_get_server_version(p->mysql)))
                      {
                          p->in_comment= 1;
                      }
-                      utf8_next(&u8, end_number - u8.pos);
                      continue;
                    }
                }
@ -295,48 +224,45 @@ cont:
                continue;
            }
            /* Style 2 */
-            if (*u8.pos == '#')
+            if (*a == '#')
            {
-                utf8_next(&u8, 1);
+                a++;
                p->comment_eol= 1;
-                continue;
            }
            /* Style 3 */
-            if (utf8_chk_size(&u8, 1) && *u8.pos == '-' && *(utf8_val(&u8,1)) == '-')
+            if (*a == '-' && *(a+1) == '-')
            {
-                if (utf8_chk_size(&u8, 3) && *(utf8_val(&u8,2)) == ' ')
+                if (((a+2) < end) && *(a+2) == ' ')
                {
-                    utf8_next(&u8, 3);
+                    a+= 3;
                    p->comment_eol= 1;
-                    continue;
                }
            }
        } else
        {
-            if (utf8_chk_size(&u8, 1) &&
-                *u8.pos == '*' && *(utf8_val(&u8, 1)) == '/')
+            if (*a == '*' && *(a + 1) == '/')
            {
-                utf8_next(&u8, 2);
+                a+= 2;
                p->in_comment= 0;
                continue;
            } else {
-                utf8_next(&u8, 1);
+                a++;
                continue;
            } 
        }
        if (p->comment_eol) {
-            if (*u8.pos == '\0' || *u8.pos == '\n')
+            if (*a == '\0' || *a == '\n')
            {
-                utf8_next(&u8, 1);
+                a++;
                p->comment_eol= 0;
                continue;
            }
-            utf8_next(&u8, 1);
+            a++;
            continue;
        }
        /* checking for different paramstyles */
        /* parmastyle = qmark */
-        if (*u8.pos == '?')
+        if (*a == '?')
        {
            PyObject *tmp;
            if (p->paramstyle && p->paramstyle != QMARK)
@ -347,18 +273,17 @@ cont:
            }
            p->paramstyle= QMARK;
            p->param_count++;
-            tmp= PyLong_FromLong((long)u8.char_pos);
+            tmp= PyLong_FromLong((long)(a - p->statement.str));
            PyList_Append(p->param_list, tmp);
            Py_DECREF(tmp);
-            utf8_next(&u8, 1);
+            a++;
            continue;
        }

-        if (*u8.pos == '%' && lastchar != '\\')
+        if (*a == '%' && lastchar != '\\')
        {
            /* paramstyle format */
-            if (utf8_chk_size(&u8, 1) && 
-               (*utf8_val(&u8, 1) == 's' || *utf8_val(&u8, 1) == 'd'))
+            if (*(a+1) == 's' || *(a+1) == 'd')
            {
                PyObject *tmp;
                if (p->paramstyle && p->paramstyle != FORMAT)
@ -368,30 +293,26 @@ cont:
                    return 1;
                }
                p->paramstyle= FORMAT;
-                *u8.pos= '?';
-                memmove(u8.pos +1, u8.pos + 2, end - u8.pos);
-                u8.char_len--;
-                u8.byte_len--;
+                *a= '?';
+                memmove(a+1, a+2, end - a);
                end--;

-                tmp= PyLong_FromLong((long)(u8.char_pos));
+                tmp= PyLong_FromLong((long)(a - p->statement.str));
                PyList_Append(p->param_list, tmp);
                Py_DECREF(tmp);
-                utf8_next(&u8,1);
+                a++;
                p->param_count++;
                continue;
            }
-            if (utf8_chk_size(&u8,3) && *(utf8_val(&u8, 1)) == '(')
+            if (*(a+1) == '(')
            {
-                /* named parameter: %(param_name)s */
-                char *val_end= strstr(u8.pos+1, ")s");
+                char *val_end= strstr(a+1, ")s");
                PyObject *tmp;
-                MrdbString *m;
+

                if (val_end)
                {
-                    ssize_t keylen= val_end - u8.pos + 1;
-                    ssize_t char_len= utf8_char_cnt(u8.pos + 1, keylen);
+                    ssize_t keylen= val_end - a + 1;
                    if (p->paramstyle && p->paramstyle != PYFORMAT)
                    {
                        parser_error(errmsg, errmsg_len, 
@ -399,11 +320,14 @@ cont:
                        return 1;
                    }
                    p->paramstyle= PYFORMAT;
-                    *u8.pos= '?';
+                    *a= '?';
                    p->param_count++;
-                    tmp= PyLong_FromLong((long)u8.char_pos);
+                    tmp= PyLong_FromLong((long)(a - p->statement.str));
                    PyList_Append(p->param_list, tmp);
                    Py_DECREF(tmp);
+                    if (p->keys)
+                    {
+                        MrdbString *m;
                        if (!(m= PyMem_RawRealloc(p->keys, 
                             p->param_count * sizeof(MrdbString))))
                        {
@ -412,20 +336,26 @@ cont:
                            return 1;
                        }
                        p->keys= m;
-
+                    }
+                    else {
+                        if (!(p->keys= PyMem_RawMalloc(sizeof(MrdbString))))
+                        {
+                            parser_error(errmsg, errmsg_len, 
+                                         "Not enough memory");
+                            return 1;
+                        }
+                    }
                    if (!(p->keys[p->param_count - 1].str= 
                        PyMem_RawCalloc(1, keylen - 2)))
                    {
                        parser_error(errmsg, errmsg_len, "Not enough memory");
                        return 1;
                    }
-                    memcpy(p->keys[p->param_count - 1].str, u8.pos + 2, keylen - 3);
-                    p->keys[p->param_count - 1].length= keylen - 3;
+                    memcpy(p->keys[p->param_count - 1].str, a + 2, keylen - 3);

-                    memmove(u8.pos+1, val_end+2, end - u8.pos - keylen + 1);
-                    u8.byte_len-= keylen;
-                    u8.char_len-= char_len;
-                    utf8_next(&u8,1);
+                    p->keys[p->param_count - 1].length= keylen - 3;
+                    memmove(a+1, val_end+2, end - a - keylen + 1);
+                    a+= 1;
                    end -= keylen;
                    continue;
                }
@ -435,21 +365,21 @@ cont:
        if (is_batch)
        {
            /* Do we have an insert statement ? */
-            if (!p->is_insert && check_keyword(u8.pos, end, "INSERT", 6))
+            if (!p->is_insert && check_keyword(a, end, "INSERT", 6))
            {
                if (lastchar == 0 ||
                    (IS_WHITESPACE(lastchar)) ||
                     lastchar == '/')
                {
                    p->is_insert = 1;
-                    utf8_next(&u8, 7);
+                    a += 7;
                }
            }

-            if (p->is_insert && check_keyword(u8.pos, end, "VALUES", 6))
+            if (p->is_insert && check_keyword(a, end, "VALUES", 6))
            {
-                p->value_ofs = u8.pos + 7;
-                utf8_next(&u8, 7);
+                p->value_ofs = a + 7;
+                a += 7;
                continue;
            }
        } 
@ -459,7 +389,7 @@ cont:
          {
            for (uint8_t i=0; binary_command[i].str.str; i++)
            {
-              if (check_keyword(u8.pos, end, binary_command[i].str.str,
+              if (check_keyword(a, end, binary_command[i].str.str,
                  binary_command[i].str.length))
              {
                p->command= binary_command[i].command;
@ -471,8 +401,8 @@ cont:
          }

        }
-        lastchar= *u8.pos;
-        utf8_next(&u8, 1);
+        lastchar= *a;
+        a++;
    }
    /* Update length */
    p->statement.length= end - p->statement.str + 1;
--- a/testing/test/integration/test_cursor.py
+++ b/testing/test/integration/test_cursor.py
@ -1113,19 +1113,6 @@ class TestCursor(unittest.TestCase):
        self.assertEqual(row[0], x)
        del cursor

-    def test_unicode_parsing(self):
-        conn= create_connection()
-        cursor= conn.cursor()
-
-        cursor.execute("create temporary table Unitéble2 ( 測試 int, méil int)");
-        cursor.execute("insert into Unitéble2 values (%(測試)s, %(méil)s)",
-                        {"測試" : 1, "méil" : 2})
-        self.assertEqual(cursor.rowcount, 1);
-        cursor.execute("SELECT `Unitéble2`.`測試` AS `Unitéble2_測試`, `Unitéble2`.`méil` AS `Unitéble2_méil` FROM `Unitéble2` WHERE ? = `Unitéble2`.`測試`",(1,))
-        rows= cursor.fetchall()
-        self.assertEqual(cursor.rowcount, 1)
-        del cursor
-
    def test_conpy133(self):
        if is_mysql():
            self.skipTest("Skip (MySQL)")
@ -1287,6 +1274,21 @@ class TestCursor(unittest.TestCase):
        except mariadb.ProgrammingError as err:
            self.assertEqual(err.errno, ERR.ER_PARSE_ERROR)

+    def test_unicode_parsing(self):
+        conn= create_connection()
+        cursor= conn.cursor()
+
+        cursor.execute("create temporary table Unitéble2 ( 測試 int, méil int)");
+        cursor.execute("insert into Unitéble2 values (%(測試)s, %(méil)s)",
+                        {"測試" : 1, "méil" : 2})
+        self.assertEqual(cursor.rowcount, 1);
+        cursor.execute("SELECT `Unitéble2`.`測試` AS `Unitéble2_測試`, `Unitéble2`.`méil` AS `Unitéble2_méil` FROM `Unitéble2` WHERE ? = `Unitéble2`.`測試`",(1,))
+        rows= cursor.fetchall()
+        self.assertEqual(cursor.rowcount, 1)
+        del cursor
+
+
+
    def test_conpy91(self):
        with create_connection() as connection:
            with connection.cursor() as cursor:
--- a/testing/test/integration/test_pooling.py
+++ b/testing/test/integration/test_pooling.py
@ -118,7 +118,7 @@ class TestPooling(unittest.TestCase):
            cursor.execute("select database()")
            row = cursor.fetchone()
            self.assertEqual(row[0], "中文考试")
-            cursor.execute("CREATE TABLE t1 (a varchar(255)) character set utf8mb4")
+            cursor.execute("CREATE TEMPORARY TABLE t1 (a varchar(255)) character set utf8mb4")
            cursor.execute("insert into t1 values (?)", ("123.45 中文考试",))
            cursor.execute("select a from t1", buffered=True)
            row = cursor.fetchone()