Various fixes:

- removed utf8 part of parser, instead we use python binary
  object for parameter substituton (text mode)
- removed memory leaks
- fixed crash when reusing cursor with different number of
  placeholders
This commit is contained in:
Georg Richter
2022-06-08 13:43:20 +02:00
parent ad4937dc96
commit 5420fe39d9
6 changed files with 128 additions and 175 deletions

View File

@ -17,7 +17,7 @@
# 51 Franklin St., Fifth Floor, Boston, MA 02110, USA
#
import mariadb, collections
import mariadb, collections, datetime
from numbers import Number
from mariadb.constants import *
from typing import Sequence
@ -104,7 +104,7 @@ class Cursor(mariadb._mariadb.cursor):
will be used.
"""
new_stmt= self.statement
new_stmt= self.statement.encode("utf8")
replace_diff= 0
if self._paramlist:
for i in range (0,len(self._paramlist)):
@ -129,7 +129,7 @@ class Cursor(mariadb._mariadb.cursor):
replace= "\"%s\"" % self.connection.escape_string(val.__str__())
ofs= self._paramlist[i] + replace_diff
new_stmt= new_stmt[:ofs] + replace.__str__() + new_stmt[ofs+1:]
new_stmt= new_stmt[:ofs] + replace.__str__().encode("utf8") + new_stmt[ofs+1:]
replace_diff+= len(replace) - 1
return new_stmt
@ -277,7 +277,7 @@ class Cursor(mariadb._mariadb.cursor):
self._text= False
for val in data:
if isinstance(val, (bytes, bytearray)):
if isinstance(val, (bytes, bytearray, datetime.datetime, datetime.date, datetime.time)):
self._text= False
break

View File

@ -1193,7 +1193,8 @@ mariadb_check_execute_parameters(MrdbCursor *self,
goto error;
}
if (!(self->value= PyMem_RawCalloc(self->parseinfo.paramcount, sizeof(MrdbParamValue))))
if (!self->value &&
!(self->value= PyMem_RawCalloc(self->parseinfo.paramcount, sizeof(MrdbParamValue))))
{
mariadb_throw_exception(NULL, Mariadb_InterfaceError, 0,
"Not enough memory (tried to allocated %lld bytes)",

View File

@ -448,6 +448,17 @@ PyObject *MrdbCursor_clear_result(MrdbCursor *self)
Py_RETURN_NONE;
}
static void MrdbCursor_FreeValues(MrdbCursor *self)
{
uint32_t i;
if (!self->value)
return;
for (i= 0; i < self->parseinfo.paramcount; i++)
if (self->value[i].free_me)
MARIADB_FREE_MEM(self->value[i].buffer);
MARIADB_FREE_MEM(self->value);
}
/* {{{ MrdbCursor_clear
Resets statement attributes and frees
associated memory
@ -483,6 +494,7 @@ void MrdbCursor_clear(MrdbCursor *self, uint8_t new_stmt)
self->fields= NULL;
self->row_count= 0;
self->affected_rows= 0;
MrdbCursor_FreeValues(self);
MrdbCursor_clearparseinfo(&self->parseinfo);
MARIADB_FREE_MEM(self->values);
MARIADB_FREE_MEM(self->bind);
@ -976,9 +988,11 @@ MrdbCursor_parse(MrdbCursor *self, PyObject *args)
Py_ssize_t statement_len= 0;
MrdbParser *parser= NULL;
char errmsg[128];
uint32_t old_paramcount= 0;
if (self->parseinfo.statement)
{
old_paramcount= self->parseinfo.paramcount;
MrdbCursor_clearparseinfo(&self->parseinfo);
}
@ -1002,7 +1016,13 @@ MrdbCursor_parse(MrdbCursor *self, PyObject *args)
}
/* cleanup and save some parser stuff */
if (parser->param_count && parser->param_count != old_paramcount)
{
MARIADB_FREE_MEM(self->params);
MrdbCursor_FreeValues(self);
MARIADB_FREE_MEM(self->values);
MARIADB_FREE_MEM(self->bind);
}
self->parseinfo.paramcount= parser->param_count;
self->parseinfo.paramstyle= parser->paramstyle;
if (self->parseinfo.statement)

View File

@ -28,73 +28,6 @@ const char *comment_start= "/*";
const char *comment_end= "*/";
const char literals[3]= {'\'', '\"', '`'};
typedef struct {
const char *start;
char *pos;
size_t byte_len;
size_t char_len;
size_t char_pos;
} utf8_str;
#define isutf8(c) (((c)&0xC0)!=0x80)
uint8_t utf8_len(char *c)
{
uint8_t i= 0;
(void)(isutf8(c[++(i)]) || isutf8(c[++(i)]) ||
isutf8(c[++(i)]) || ++(i));
return i;
}
size_t utf8_char_cnt(const char *start, size_t bytes)
{
size_t char_cnt= 0;
char *tmp= (char *)start;
while (tmp < start + bytes)
{
tmp+= utf8_len(tmp);
char_cnt++;
}
return char_cnt;
}
static void utf8_str_init(utf8_str *u8, const char *stmt, size_t stmt_len)
{
u8->start= stmt;
u8->pos= (char *)stmt;
u8->byte_len= stmt_len;
u8->char_pos= 0;
u8->char_len= utf8_char_cnt(u8->start, u8->byte_len);
}
static void utf8_next(utf8_str *u8, size_t inc)
{
size_t i;
for (i=0; i < inc; i++)
{
u8->pos+= utf8_len(u8->pos);
u8->char_pos++;
}
}
static inline uint8_t utf8_chk_size(utf8_str *u8, size_t size)
{
return (u8->char_pos + size < u8->char_len);
}
static char *utf8_val(utf8_str *u8, size_t offset)
{
size_t i;
char *tmp= u8->pos;
for (i=0; i < offset; i++)
{
tmp+= utf8_len(tmp);
}
return tmp;
}
static struct {
enum enum_binary_command command;
MrdbString str;
@ -188,14 +121,15 @@ parser_error(char *errmsg, size_t errmsg_len, const char *errstr)
}
}
#define isutf8(c) (((c)&0xC0)!=0x80)
uint8_t
MrdbParser_parse(MrdbParser *p, uint8_t is_batch,
char *errmsg, size_t errmsg_len)
{
char *end;
char *a, *end;
char lastchar= 0;
uint8_t i;
utf8_str u8;
if (errmsg_len)
*errmsg= 0;
@ -211,83 +145,78 @@ MrdbParser_parse(MrdbParser *p, uint8_t is_batch,
parser_error(errmsg, errmsg_len, "Invalid (empty) statement");
return 1;
}
a= p->statement.str;
end= a + p->statement.length - 1;
utf8_str_init(&u8, p->statement.str, p->statement.length);
end= p->statement.str + p->statement.length;
while (u8.pos <= end)
while (a <= end)
{
cont:
/* we are only interested in ascii chars, so all multibyte characterss
will be ignored */
if (utf8_len(u8.pos) > 1)
{
utf8_next(&u8, 1);
/* if (isutf8(*a)) {
a++;
continue;
}
} */
/* check literals */
for (i=0; i < 3; i++)
{
if (*u8.pos == literals[i])
if (*a == literals[i])
{
p->in_literal[i]= !(p->in_literal[i]);
utf8_next(&u8, 1);
a++;
goto cont;
}
}
/* nothing to do, if we are inside a comment or literal */
if (IN_LITERAL(p))
{
utf8_next(&u8,1);
a++;
continue;
}
/* check comment */
if (!p->in_comment)
{
/* Style 1 */
if (utf8_chk_size(&u8, 1) && *u8.pos == '/' && *utf8_val(&u8, 1) == '*')
if (*a == '/' && *(a + 1) == '*')
{
utf8_next(&u8, 2);
if (utf8_chk_size(&u8, 1) && *u8.pos == '!')
a+= 2;
if (a+1 < end && *a == '!')
{
/* check special syntax: 1. comment followed by '!' and whitespace */
if (isspace(*utf8_val(&u8,1)))
if (isspace(*(a+1)))
{
utf8_next(&u8, 2);
a+= 2;
continue;
}
/* check special syntax: 3. comment followed by '!' 5 or 6 digit version number */
if (utf8_chk_size(&u8, 7) && isdigit(*utf8_val(&u8,1)))
if (a + 7 < end && isdigit(*(a+1)))
{
char *end_number;
unsigned long version_number= strtol(utf8_val(&u8,1), &end_number, 10);
char *x;
unsigned long version_number= strtol(a+1, &x, 10);
a= x;
if ((version_number >= 50700 && version_number <= 99999) ||
!(version_number <= mysql_get_server_version(p->mysql)))
{
p->in_comment= 1;
}
utf8_next(&u8, end_number - u8.pos);
continue;
}
}
if (utf8_chk_size(&u8, 2) &&
*u8.pos == 'M' && *utf8_val(&u8, 1) == '!')
if (a+2 < end && *a == 'M' && *(a+1) == '!')
{
utf8_next(&u8, 2);
a+= 2;
/* check special syntax: 2. comment followed by 'M! ' (MariaDB only) */
if (isspace(*(u8.pos)))
if (isspace(*(a)))
continue;
/* check special syntax: 2. comment followed by 'M!' and version number */
if (utf8_chk_size(&u8, 6) && isdigit(*u8.pos))
if (a + 6 < end && isdigit(*a))
{
char *end_number;
unsigned long version_number= strtol(u8.pos, &end_number, 10);
char *x;
unsigned long version_number= strtol(a, &x, 10);
a= x;
if (!(version_number <= mysql_get_server_version(p->mysql)))
{
p->in_comment= 1;
}
utf8_next(&u8, end_number - u8.pos);
continue;
}
}
@ -295,48 +224,45 @@ cont:
continue;
}
/* Style 2 */
if (*u8.pos == '#')
if (*a == '#')
{
utf8_next(&u8, 1);
a++;
p->comment_eol= 1;
continue;
}
/* Style 3 */
if (utf8_chk_size(&u8, 1) && *u8.pos == '-' && *(utf8_val(&u8,1)) == '-')
if (*a == '-' && *(a+1) == '-')
{
if (utf8_chk_size(&u8, 3) && *(utf8_val(&u8,2)) == ' ')
if (((a+2) < end) && *(a+2) == ' ')
{
utf8_next(&u8, 3);
a+= 3;
p->comment_eol= 1;
continue;
}
}
} else
{
if (utf8_chk_size(&u8, 1) &&
*u8.pos == '*' && *(utf8_val(&u8, 1)) == '/')
if (*a == '*' && *(a + 1) == '/')
{
utf8_next(&u8, 2);
a+= 2;
p->in_comment= 0;
continue;
} else {
utf8_next(&u8, 1);
a++;
continue;
}
}
if (p->comment_eol) {
if (*u8.pos == '\0' || *u8.pos == '\n')
if (*a == '\0' || *a == '\n')
{
utf8_next(&u8, 1);
a++;
p->comment_eol= 0;
continue;
}
utf8_next(&u8, 1);
a++;
continue;
}
/* checking for different paramstyles */
/* parmastyle = qmark */
if (*u8.pos == '?')
if (*a == '?')
{
PyObject *tmp;
if (p->paramstyle && p->paramstyle != QMARK)
@ -347,18 +273,17 @@ cont:
}
p->paramstyle= QMARK;
p->param_count++;
tmp= PyLong_FromLong((long)u8.char_pos);
tmp= PyLong_FromLong((long)(a - p->statement.str));
PyList_Append(p->param_list, tmp);
Py_DECREF(tmp);
utf8_next(&u8, 1);
a++;
continue;
}
if (*u8.pos == '%' && lastchar != '\\')
if (*a == '%' && lastchar != '\\')
{
/* paramstyle format */
if (utf8_chk_size(&u8, 1) &&
(*utf8_val(&u8, 1) == 's' || *utf8_val(&u8, 1) == 'd'))
if (*(a+1) == 's' || *(a+1) == 'd')
{
PyObject *tmp;
if (p->paramstyle && p->paramstyle != FORMAT)
@ -368,30 +293,26 @@ cont:
return 1;
}
p->paramstyle= FORMAT;
*u8.pos= '?';
memmove(u8.pos +1, u8.pos + 2, end - u8.pos);
u8.char_len--;
u8.byte_len--;
*a= '?';
memmove(a+1, a+2, end - a);
end--;
tmp= PyLong_FromLong((long)(u8.char_pos));
tmp= PyLong_FromLong((long)(a - p->statement.str));
PyList_Append(p->param_list, tmp);
Py_DECREF(tmp);
utf8_next(&u8,1);
a++;
p->param_count++;
continue;
}
if (utf8_chk_size(&u8,3) && *(utf8_val(&u8, 1)) == '(')
if (*(a+1) == '(')
{
/* named parameter: %(param_name)s */
char *val_end= strstr(u8.pos+1, ")s");
char *val_end= strstr(a+1, ")s");
PyObject *tmp;
MrdbString *m;
if (val_end)
{
ssize_t keylen= val_end - u8.pos + 1;
ssize_t char_len= utf8_char_cnt(u8.pos + 1, keylen);
ssize_t keylen= val_end - a + 1;
if (p->paramstyle && p->paramstyle != PYFORMAT)
{
parser_error(errmsg, errmsg_len,
@ -399,11 +320,14 @@ cont:
return 1;
}
p->paramstyle= PYFORMAT;
*u8.pos= '?';
*a= '?';
p->param_count++;
tmp= PyLong_FromLong((long)u8.char_pos);
tmp= PyLong_FromLong((long)(a - p->statement.str));
PyList_Append(p->param_list, tmp);
Py_DECREF(tmp);
if (p->keys)
{
MrdbString *m;
if (!(m= PyMem_RawRealloc(p->keys,
p->param_count * sizeof(MrdbString))))
{
@ -412,20 +336,26 @@ cont:
return 1;
}
p->keys= m;
}
else {
if (!(p->keys= PyMem_RawMalloc(sizeof(MrdbString))))
{
parser_error(errmsg, errmsg_len,
"Not enough memory");
return 1;
}
}
if (!(p->keys[p->param_count - 1].str=
PyMem_RawCalloc(1, keylen - 2)))
{
parser_error(errmsg, errmsg_len, "Not enough memory");
return 1;
}
memcpy(p->keys[p->param_count - 1].str, u8.pos + 2, keylen - 3);
p->keys[p->param_count - 1].length= keylen - 3;
memcpy(p->keys[p->param_count - 1].str, a + 2, keylen - 3);
memmove(u8.pos+1, val_end+2, end - u8.pos - keylen + 1);
u8.byte_len-= keylen;
u8.char_len-= char_len;
utf8_next(&u8,1);
p->keys[p->param_count - 1].length= keylen - 3;
memmove(a+1, val_end+2, end - a - keylen + 1);
a+= 1;
end -= keylen;
continue;
}
@ -435,21 +365,21 @@ cont:
if (is_batch)
{
/* Do we have an insert statement ? */
if (!p->is_insert && check_keyword(u8.pos, end, "INSERT", 6))
if (!p->is_insert && check_keyword(a, end, "INSERT", 6))
{
if (lastchar == 0 ||
(IS_WHITESPACE(lastchar)) ||
lastchar == '/')
{
p->is_insert = 1;
utf8_next(&u8, 7);
a += 7;
}
}
if (p->is_insert && check_keyword(u8.pos, end, "VALUES", 6))
if (p->is_insert && check_keyword(a, end, "VALUES", 6))
{
p->value_ofs = u8.pos + 7;
utf8_next(&u8, 7);
p->value_ofs = a + 7;
a += 7;
continue;
}
}
@ -459,7 +389,7 @@ cont:
{
for (uint8_t i=0; binary_command[i].str.str; i++)
{
if (check_keyword(u8.pos, end, binary_command[i].str.str,
if (check_keyword(a, end, binary_command[i].str.str,
binary_command[i].str.length))
{
p->command= binary_command[i].command;
@ -471,8 +401,8 @@ cont:
}
}
lastchar= *u8.pos;
utf8_next(&u8, 1);
lastchar= *a;
a++;
}
/* Update length */
p->statement.length= end - p->statement.str + 1;

View File

@ -1113,19 +1113,6 @@ class TestCursor(unittest.TestCase):
self.assertEqual(row[0], x)
del cursor
def test_unicode_parsing(self):
conn= create_connection()
cursor= conn.cursor()
cursor.execute("create temporary table Unitéble2 ( 測試 int, méil int)");
cursor.execute("insert into Unitéble2 values (%(測試)s, %(méil)s)",
{"測試" : 1, "méil" : 2})
self.assertEqual(cursor.rowcount, 1);
cursor.execute("SELECT `Unitéble2`.`測試` AS `Unitéble2_測試`, `Unitéble2`.`méil` AS `Unitéble2_méil` FROM `Unitéble2` WHERE ? = `Unitéble2`.`測試`",(1,))
rows= cursor.fetchall()
self.assertEqual(cursor.rowcount, 1)
del cursor
def test_conpy133(self):
if is_mysql():
self.skipTest("Skip (MySQL)")
@ -1287,6 +1274,21 @@ class TestCursor(unittest.TestCase):
except mariadb.ProgrammingError as err:
self.assertEqual(err.errno, ERR.ER_PARSE_ERROR)
def test_unicode_parsing(self):
conn= create_connection()
cursor= conn.cursor()
cursor.execute("create temporary table Unitéble2 ( 測試 int, méil int)");
cursor.execute("insert into Unitéble2 values (%(測試)s, %(méil)s)",
{"測試" : 1, "méil" : 2})
self.assertEqual(cursor.rowcount, 1);
cursor.execute("SELECT `Unitéble2`.`測試` AS `Unitéble2_測試`, `Unitéble2`.`méil` AS `Unitéble2_méil` FROM `Unitéble2` WHERE ? = `Unitéble2`.`測試`",(1,))
rows= cursor.fetchall()
self.assertEqual(cursor.rowcount, 1)
del cursor
def test_conpy91(self):
with create_connection() as connection:
with connection.cursor() as cursor:

View File

@ -118,7 +118,7 @@ class TestPooling(unittest.TestCase):
cursor.execute("select database()")
row = cursor.fetchone()
self.assertEqual(row[0], "中文考试")
cursor.execute("CREATE TABLE t1 (a varchar(255)) character set utf8mb4")
cursor.execute("CREATE TEMPORARY TABLE t1 (a varchar(255)) character set utf8mb4")
cursor.execute("insert into t1 values (?)", ("123.45 中文考试",))
cursor.execute("select a from t1", buffered=True)
row = cursor.fetchone()