mirror of
https://github.com/MariaDB/server.git
synced 2025-07-28 23:27:19 +00:00
MDEV-22224: Support JSON Path negative index
This patch can be viewed as combination of two parts: 1) Enabling '-' in the path so that the parser does not give out a warning. 2) Setting the negative index to a correct value and returning the appropriate value. 1) To enable using the negative index in the path: To make the parser not return warning when negative index is used in path '-' needs to be allowed in json path characters. P_NEG is added to enable this and is made recognizable by setting the 45th index of json_path_chr_map[] to P_NEG (instead of previous P_ETC) because 45 corresponds to '-' in unicode. When the path is being parsed and '-' is encountered, the parser should recognize it as parsing '-' sign, so a new json state PS_NEG is required. When the state is PS_NEG, it means that a negative integer is going to be parsed so set is_negative_index of current step to 1 and n_item is set accordingly when integer is encountered after '-'. Next proceed with parsing rest of the path and get the correct path. Next thing is parsing the json and returning correct value. 2) Setting the negative index to a correct value and returning the value: While parsing json if we encounter array and the path step for the array is a negative index (n_item < 0), then we can count the number of elements in the array and set n_item to correct corresponding value. This is done in json_skip_array_and_count.
This commit is contained in:
@ -985,6 +985,7 @@ enum json_path_chr_classes {
|
||||
P_LSQRB, /* [ */
|
||||
P_RSQRB, /* ] */
|
||||
P_POINT, /* . */
|
||||
P_NEG, /* hyphen (for negative index in path) */
|
||||
P_ZERO, /* 0 */
|
||||
P_DIGIT, /* 123456789 */
|
||||
P_L, /* l (for "lax") */
|
||||
@ -1006,7 +1007,7 @@ static enum json_path_chr_classes json_path_chr_map[128] = {
|
||||
P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
|
||||
|
||||
P_SPACE, P_ETC, P_QUOTE, P_ETC, P_USD, P_ETC, P_ETC, P_ETC,
|
||||
P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC,
|
||||
P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_NEG, P_POINT, P_ETC,
|
||||
P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
|
||||
P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
|
||||
|
||||
@ -1029,6 +1030,7 @@ enum json_path_states {
|
||||
PS_AR, /* Parse array step. */
|
||||
PS_SAR, /* space after the '['. */
|
||||
PS_AWD, /* Array wildcard. */
|
||||
PS_NEG, /* Parse '-' (hyphen) */
|
||||
PS_Z, /* '0' (as an array item number). */
|
||||
PS_INT, /* Parse integer (as an array item number). */
|
||||
PS_AS, /* Space. */
|
||||
@ -1039,6 +1041,7 @@ enum json_path_states {
|
||||
PS_DWD, /* Double wildcard. */
|
||||
PS_KEYX, /* Key started with quote ("). */
|
||||
PS_KNMX, /* Parse quoted key name. */
|
||||
PS_LAST, /* Parse 'last' keyword */
|
||||
N_PATH_STATES, /* Below are states that aren't in the transitions table. */
|
||||
PS_SCT, /* Parse the 'strict' keyword. */
|
||||
PS_EKY, /* '.' after the keyname so next step is the key. */
|
||||
@ -1054,67 +1057,72 @@ enum json_path_states {
|
||||
static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
|
||||
{
|
||||
/*
|
||||
EOS $, * [ ] . 0
|
||||
EOS $, * [ ] . - 0
|
||||
1..9 L S SPACE \ " ETC
|
||||
ERR BAD
|
||||
*/
|
||||
/* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
/* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
|
||||
/* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
|
||||
PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
|
||||
/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_NEG,PS_Z,
|
||||
PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
|
||||
PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
|
||||
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN,JE_SYN, PS_Z,
|
||||
PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
|
||||
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN,JE_SYN, JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
|
||||
/* NEG */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,JE_SYN,
|
||||
PS_INT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN,JE_SYN, JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT,
|
||||
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN,JE_SYN, PS_INT,
|
||||
PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
|
||||
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN,JE_SYN, PS_KNM,
|
||||
PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
|
||||
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, JE_SYN,PS_KNM,
|
||||
PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,
|
||||
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
|
||||
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY,JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
|
||||
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,PS_KNMX,
|
||||
PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
|
||||
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,PS_KNMX,
|
||||
PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
|
||||
JE_NOT_JSON_CHR, JE_BAD_CHR},
|
||||
/* LAST */{ JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG, JE_SYN,
|
||||
JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
|
||||
JE_SYN, JE_BAD_CHR}
|
||||
};
|
||||
|
||||
|
||||
int json_path_setup(json_path_t *p,
|
||||
CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
|
||||
{
|
||||
int c_len, t_next, state= PS_GO;
|
||||
int c_len, t_next, state= PS_GO, is_negative_index= 0, is_last= 0, prev_value=0;
|
||||
enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
|
||||
|
||||
json_string_setup(&p->s, i_cs, str, end);
|
||||
|
||||
p->steps[0].type= JSON_PATH_ARRAY_WILD;
|
||||
@ -1153,8 +1161,20 @@ int json_path_setup(json_path_t *p,
|
||||
p->types_used|= JSON_PATH_WILD;
|
||||
continue;
|
||||
case PS_INT:
|
||||
p->last_step->n_item*= 10;
|
||||
p->last_step->n_item+= p->s.c_next - '0';
|
||||
if (is_last)
|
||||
{
|
||||
prev_value*= 10;
|
||||
prev_value-= p->s.c_next - '0';
|
||||
p->last_step->n_item= -1 + prev_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->last_step->n_item*= 10;
|
||||
if (is_negative_index)
|
||||
p->last_step->n_item-= p->s.c_next - '0';
|
||||
else
|
||||
p->last_step->n_item+= p->s.c_next - '0';
|
||||
}
|
||||
continue;
|
||||
case PS_EKYX:
|
||||
p->last_step->key_end= p->s.c_str - c_len;
|
||||
@ -1166,6 +1186,7 @@ int json_path_setup(json_path_t *p,
|
||||
/* fall through */
|
||||
case PS_KEY:
|
||||
p->last_step++;
|
||||
is_negative_index= 0;
|
||||
if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
|
||||
return p->s.error= JE_DEPTH;
|
||||
p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
|
||||
@ -1180,11 +1201,14 @@ int json_path_setup(json_path_t *p,
|
||||
/* fall through */
|
||||
case PS_AR:
|
||||
p->last_step++;
|
||||
is_last= 0;
|
||||
if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
|
||||
return p->s.error= JE_DEPTH;
|
||||
p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
|
||||
double_wildcard= JSON_PATH_KEY_NULL;
|
||||
p->last_step->n_item= 0;
|
||||
prev_value= 0;
|
||||
is_negative_index= 0;
|
||||
continue;
|
||||
case PS_ESC:
|
||||
if (json_handle_esc(&p->s))
|
||||
@ -1203,6 +1227,19 @@ int json_path_setup(json_path_t *p,
|
||||
case PS_DWD:
|
||||
double_wildcard= JSON_PATH_DOUBLE_WILD;
|
||||
continue;
|
||||
case PS_NEG:
|
||||
p->types_used|= JSON_PATH_NEGATIVE_INDEX;
|
||||
is_negative_index= 1;
|
||||
if (is_last)
|
||||
p->last_step->n_item= 0;
|
||||
continue;
|
||||
case PS_LAST:
|
||||
if ((p->s.error= skip_string_verbatim(&p->s, "ast")))
|
||||
return 1;
|
||||
p->types_used|= JSON_PATH_NEGATIVE_INDEX;
|
||||
is_last= 1;
|
||||
p->last_step->n_item= -1;
|
||||
continue;
|
||||
};
|
||||
} while (state != PS_OK);
|
||||
|
||||
@ -1242,6 +1279,15 @@ int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
|
||||
}
|
||||
|
||||
|
||||
int json_skip_array_and_count(json_engine_t *je, int *n_items)
|
||||
{
|
||||
json_engine_t j= *je;
|
||||
*n_items= 0;
|
||||
|
||||
return json_skip_level_and_count(&j, n_items);
|
||||
}
|
||||
|
||||
|
||||
int json_skip_key(json_engine_t *j)
|
||||
{
|
||||
if (json_read_value(j))
|
||||
@ -1254,7 +1300,7 @@ int json_skip_key(json_engine_t *j)
|
||||
}
|
||||
|
||||
|
||||
#define SKIPPED_STEP_MARK ((uint) ~0)
|
||||
#define SKIPPED_STEP_MARK ((int) ~0)
|
||||
|
||||
/*
|
||||
Current step of the patch matches the JSON construction.
|
||||
@ -1262,7 +1308,7 @@ int json_skip_key(json_engine_t *j)
|
||||
step of the path.
|
||||
*/
|
||||
static int handle_match(json_engine_t *je, json_path_t *p,
|
||||
json_path_step_t **p_cur_step, uint *array_counters)
|
||||
json_path_step_t **p_cur_step, int *array_counters)
|
||||
{
|
||||
json_path_step_t *next_step= *p_cur_step + 1;
|
||||
|
||||
@ -1299,13 +1345,28 @@ static int handle_match(json_engine_t *je, json_path_t *p,
|
||||
} while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
|
||||
}
|
||||
|
||||
|
||||
array_counters[next_step - p->steps]= 0;
|
||||
|
||||
if ((int) je->value_type !=
|
||||
(int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
|
||||
return json_skip_level(je);
|
||||
|
||||
if (next_step->type == JSON_PATH_ARRAY)
|
||||
{
|
||||
int array_size;
|
||||
if (next_step->n_item >= 0)
|
||||
array_size= 0;
|
||||
else
|
||||
{
|
||||
json_engine_t j2= *je;
|
||||
if (json_skip_array_and_count(&j2, &array_size))
|
||||
{
|
||||
*je= j2;
|
||||
return 1;
|
||||
}
|
||||
array_size= -array_size;
|
||||
}
|
||||
array_counters[next_step - p->steps]= array_size;
|
||||
}
|
||||
|
||||
*p_cur_step= next_step;
|
||||
return 0;
|
||||
}
|
||||
@ -1330,7 +1391,7 @@ int json_key_matches(json_engine_t *je, json_string_t *k)
|
||||
|
||||
int json_find_path(json_engine_t *je,
|
||||
json_path_t *p, json_path_step_t **p_cur_step,
|
||||
uint *array_counters)
|
||||
int *array_counters)
|
||||
{
|
||||
json_string_t key_name;
|
||||
|
||||
@ -1773,9 +1834,10 @@ int json_get_path_next(json_engine_t *je, json_path_t *p)
|
||||
int json_path_parts_compare(
|
||||
const json_path_step_t *a, const json_path_step_t *a_end,
|
||||
const json_path_step_t *b, const json_path_step_t *b_end,
|
||||
enum json_value_types vt)
|
||||
enum json_value_types vt, const int *array_sizes)
|
||||
{
|
||||
int res, res2;
|
||||
const json_path_step_t *temp_b= b;
|
||||
|
||||
while (a <= a_end)
|
||||
{
|
||||
@ -1798,7 +1860,9 @@ int json_path_parts_compare(
|
||||
{
|
||||
if (b->type & JSON_PATH_ARRAY)
|
||||
{
|
||||
if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
|
||||
if ((a->type & JSON_PATH_WILD) ||
|
||||
(a->n_item >= 0 ? a->n_item == b->n_item :
|
||||
a->n_item == b->n_item - array_sizes[b-temp_b]))
|
||||
goto step_fits;
|
||||
goto step_failed;
|
||||
}
|
||||
@ -1833,11 +1897,13 @@ step_fits:
|
||||
}
|
||||
|
||||
/* Double wild handling needs recursions. */
|
||||
res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
|
||||
res= json_path_parts_compare(a+1, a_end, b, b_end, vt,
|
||||
array_sizes + (b - temp_b));
|
||||
if (res == 0)
|
||||
return 0;
|
||||
|
||||
res2= json_path_parts_compare(a, a_end, b, b_end, vt);
|
||||
res2= json_path_parts_compare(a, a_end, b, b_end, vt,
|
||||
array_sizes + (b - temp_b));
|
||||
|
||||
return (res2 >= 0) ? res2 : res;
|
||||
|
||||
@ -1849,11 +1915,13 @@ step_fits_autowrap:
|
||||
}
|
||||
|
||||
/* Double wild handling needs recursions. */
|
||||
res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
|
||||
res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt,
|
||||
array_sizes + (b - temp_b));
|
||||
if (res == 0)
|
||||
return 0;
|
||||
|
||||
res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
|
||||
res2= json_path_parts_compare(a, a_end, b+1, b_end, vt,
|
||||
array_sizes + (b - temp_b));
|
||||
|
||||
return (res2 >= 0) ? res2 : res;
|
||||
|
||||
@ -1864,10 +1932,10 @@ step_fits_autowrap:
|
||||
|
||||
|
||||
int json_path_compare(const json_path_t *a, const json_path_t *b,
|
||||
enum json_value_types vt)
|
||||
enum json_value_types vt, const int *array_size)
|
||||
{
|
||||
return json_path_parts_compare(a->steps+1, a->last_step,
|
||||
b->steps+1, b->last_step, vt);
|
||||
b->steps+1, b->last_step, vt, array_size);
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user