MDEV-22224: Support JSON Path negative index

This patch can be viewed as combination of two parts:
1) Enabling '-' in the path so that the parser does not give out a warning.
2) Setting the negative index to a correct value and returning the
   appropriate value.

1) To enable using the negative index in the path:
To make the parser not return warning when negative index is used in path
'-' needs to be allowed in json path characters. P_NEG is added
to enable this and is made recognizable by setting the 45th index of
json_path_chr_map[] to P_NEG (instead of previous P_ETC)
because 45 corresponds to '-' in unicode.
When the path is being parsed and '-' is encountered, the parser should
recognize it as parsing '-' sign, so a new json state PS_NEG is required.
When the state is PS_NEG, it means that a negative integer is
going to be parsed so set is_negative_index of current step to 1 and
n_item is set accordingly when integer is encountered after '-'.
Next proceed with parsing rest of the path and get the correct path.
Next thing is parsing the json and returning correct value.

2) Setting the negative index to a correct value and returning the value:
While parsing json if we encounter array and the path step for the array
is a negative index (n_item < 0), then we can count the number of elements
in the array and set n_item to correct corresponding value. This is done in
json_skip_array_and_count.
This commit is contained in:
Rucha Deodhar
2021-11-22 22:59:30 +05:30
parent e98013cb5c
commit dfcbb30a92
7 changed files with 682 additions and 76 deletions

View File

@ -985,6 +985,7 @@ enum json_path_chr_classes {
P_LSQRB, /* [ */
P_RSQRB, /* ] */
P_POINT, /* . */
P_NEG, /* hyphen (for negative index in path) */
P_ZERO, /* 0 */
P_DIGIT, /* 123456789 */
P_L, /* l (for "lax") */
@ -1006,7 +1007,7 @@ static enum json_path_chr_classes json_path_chr_map[128] = {
P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR, P_ERR,
P_SPACE, P_ETC, P_QUOTE, P_ETC, P_USD, P_ETC, P_ETC, P_ETC,
P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_ETC, P_POINT, P_ETC,
P_ETC, P_ETC, P_ASTER, P_ETC, P_ETC, P_NEG, P_POINT, P_ETC,
P_ZERO, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT, P_DIGIT,
P_DIGIT, P_DIGIT, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC, P_ETC,
@ -1029,6 +1030,7 @@ enum json_path_states {
PS_AR, /* Parse array step. */
PS_SAR, /* space after the '['. */
PS_AWD, /* Array wildcard. */
PS_NEG, /* Parse '-' (hyphen) */
PS_Z, /* '0' (as an array item number). */
PS_INT, /* Parse integer (as an array item number). */
PS_AS, /* Space. */
@ -1039,6 +1041,7 @@ enum json_path_states {
PS_DWD, /* Double wildcard. */
PS_KEYX, /* Key started with quote ("). */
PS_KNMX, /* Parse quoted key name. */
PS_LAST, /* Parse 'last' keyword */
N_PATH_STATES, /* Below are states that aren't in the transitions table. */
PS_SCT, /* Parse the 'strict' keyword. */
PS_EKY, /* '.' after the keyname so next step is the key. */
@ -1054,67 +1057,72 @@ enum json_path_states {
static int json_path_transitions[N_PATH_STATES][N_PATH_CLASSES]=
{
/*
EOS $, * [ ] . 0
EOS $, * [ ] . - 0
1..9 L S SPACE \ " ETC
ERR BAD
*/
/* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
/* GO */ { JE_EOS, PS_PT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_SYN, PS_LAX, PS_SCT, PS_GO, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
/* LAX */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_SYN, PS_LAX, JE_SYN, PS_GO, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
/* PT */ { PS_OK, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN, JE_SYN,
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_Z,
PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
/* AR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, JE_SYN, JE_SYN, PS_NEG,PS_Z,
PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN, PS_Z,
PS_INT, JE_SYN, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
/* SAR */ { JE_EOS, JE_SYN, PS_AWD, JE_SYN, PS_PT, JE_SYN,JE_SYN, PS_Z,
PS_INT, PS_LAST, JE_SYN, PS_SAR, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
/* AWD */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN,JE_SYN, JE_SYN,
JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN,
/* NEG */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,JE_SYN,
PS_INT, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* Z */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN,JE_SYN, JE_SYN,
JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_INT,
/* INT */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN,JE_SYN, PS_INT,
PS_INT, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* AS */ { JE_EOS, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, JE_SYN, JE_SYN,
JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN, PS_KNM,
/* KEY */ { JE_EOS, PS_KNM, PS_KWD, JE_SYN, PS_KNM, JE_SYN,JE_SYN, PS_KNM,
PS_KNM, PS_KNM, PS_KNM, PS_KNM, JE_SYN, PS_KEYX, PS_KNM,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, PS_KNM,
/* KNM */ { PS_KOK, PS_KNM, PS_AST, PS_EAR, PS_KNM, PS_EKY, JE_SYN,PS_KNM,
PS_KNM, PS_KNM, PS_KNM, PS_KNM, PS_ESC, PS_KNM, PS_KNM,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,
/* KWD */ { PS_OK, JE_SYN, JE_SYN, PS_AR, JE_SYN, PS_EKY, JE_SYN,JE_SYN,
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
/* AST */ { JE_SYN, JE_SYN, PS_DWD, JE_SYN, JE_SYN, JE_SYN, JE_SYN,JE_SYN,
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY, JE_SYN, JE_SYN,
/* DWD */ { JE_SYN, JE_SYN, PS_AST, PS_AR, JE_SYN, PS_KEY,JE_SYN, JE_SYN, JE_SYN,
JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN, JE_SYN,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
/* KEYX*/ { JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,PS_KNMX,
PS_KNMX,PS_KNMX, PS_KNMX, PS_KNMX, PS_ESCX, PS_EKYX, PS_KNMX,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,
/* KNMX */{ JE_EOS, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX, JE_SYN,PS_KNMX,
PS_KNMX, PS_KNMX, PS_KNMX, PS_KNMX,PS_ESCX, PS_EKYX, PS_KNMX,
JE_NOT_JSON_CHR, JE_BAD_CHR},
/* LAST */{ JE_SYN, JE_SYN, JE_SYN, JE_SYN, PS_PT, JE_SYN, PS_NEG, JE_SYN,
JE_SYN, JE_SYN, JE_SYN, PS_AS, JE_SYN, JE_SYN, JE_SYN,
JE_SYN, JE_BAD_CHR}
};
int json_path_setup(json_path_t *p,
CHARSET_INFO *i_cs, const uchar *str, const uchar *end)
{
int c_len, t_next, state= PS_GO;
int c_len, t_next, state= PS_GO, is_negative_index= 0, is_last= 0, prev_value=0;
enum json_path_step_types double_wildcard= JSON_PATH_KEY_NULL;
json_string_setup(&p->s, i_cs, str, end);
p->steps[0].type= JSON_PATH_ARRAY_WILD;
@ -1153,8 +1161,20 @@ int json_path_setup(json_path_t *p,
p->types_used|= JSON_PATH_WILD;
continue;
case PS_INT:
p->last_step->n_item*= 10;
p->last_step->n_item+= p->s.c_next - '0';
if (is_last)
{
prev_value*= 10;
prev_value-= p->s.c_next - '0';
p->last_step->n_item= -1 + prev_value;
}
else
{
p->last_step->n_item*= 10;
if (is_negative_index)
p->last_step->n_item-= p->s.c_next - '0';
else
p->last_step->n_item+= p->s.c_next - '0';
}
continue;
case PS_EKYX:
p->last_step->key_end= p->s.c_str - c_len;
@ -1166,6 +1186,7 @@ int json_path_setup(json_path_t *p,
/* fall through */
case PS_KEY:
p->last_step++;
is_negative_index= 0;
if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
return p->s.error= JE_DEPTH;
p->types_used|= p->last_step->type= JSON_PATH_KEY | double_wildcard;
@ -1180,11 +1201,14 @@ int json_path_setup(json_path_t *p,
/* fall through */
case PS_AR:
p->last_step++;
is_last= 0;
if (p->last_step - p->steps >= JSON_DEPTH_LIMIT)
return p->s.error= JE_DEPTH;
p->types_used|= p->last_step->type= JSON_PATH_ARRAY | double_wildcard;
double_wildcard= JSON_PATH_KEY_NULL;
p->last_step->n_item= 0;
prev_value= 0;
is_negative_index= 0;
continue;
case PS_ESC:
if (json_handle_esc(&p->s))
@ -1203,6 +1227,19 @@ int json_path_setup(json_path_t *p,
case PS_DWD:
double_wildcard= JSON_PATH_DOUBLE_WILD;
continue;
case PS_NEG:
p->types_used|= JSON_PATH_NEGATIVE_INDEX;
is_negative_index= 1;
if (is_last)
p->last_step->n_item= 0;
continue;
case PS_LAST:
if ((p->s.error= skip_string_verbatim(&p->s, "ast")))
return 1;
p->types_used|= JSON_PATH_NEGATIVE_INDEX;
is_last= 1;
p->last_step->n_item= -1;
continue;
};
} while (state != PS_OK);
@ -1242,6 +1279,15 @@ int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped)
}
int json_skip_array_and_count(json_engine_t *je, int *n_items)
{
json_engine_t j= *je;
*n_items= 0;
return json_skip_level_and_count(&j, n_items);
}
int json_skip_key(json_engine_t *j)
{
if (json_read_value(j))
@ -1254,7 +1300,7 @@ int json_skip_key(json_engine_t *j)
}
#define SKIPPED_STEP_MARK ((uint) ~0)
#define SKIPPED_STEP_MARK ((int) ~0)
/*
Current step of the patch matches the JSON construction.
@ -1262,7 +1308,7 @@ int json_skip_key(json_engine_t *j)
step of the path.
*/
static int handle_match(json_engine_t *je, json_path_t *p,
json_path_step_t **p_cur_step, uint *array_counters)
json_path_step_t **p_cur_step, int *array_counters)
{
json_path_step_t *next_step= *p_cur_step + 1;
@ -1299,13 +1345,28 @@ static int handle_match(json_engine_t *je, json_path_t *p,
} while (next_step->type == JSON_PATH_ARRAY && next_step->n_item == 0);
}
array_counters[next_step - p->steps]= 0;
if ((int) je->value_type !=
(int) (next_step->type & JSON_PATH_KEY_OR_ARRAY))
return json_skip_level(je);
if (next_step->type == JSON_PATH_ARRAY)
{
int array_size;
if (next_step->n_item >= 0)
array_size= 0;
else
{
json_engine_t j2= *je;
if (json_skip_array_and_count(&j2, &array_size))
{
*je= j2;
return 1;
}
array_size= -array_size;
}
array_counters[next_step - p->steps]= array_size;
}
*p_cur_step= next_step;
return 0;
}
@ -1330,7 +1391,7 @@ int json_key_matches(json_engine_t *je, json_string_t *k)
int json_find_path(json_engine_t *je,
json_path_t *p, json_path_step_t **p_cur_step,
uint *array_counters)
int *array_counters)
{
json_string_t key_name;
@ -1773,9 +1834,10 @@ int json_get_path_next(json_engine_t *je, json_path_t *p)
int json_path_parts_compare(
const json_path_step_t *a, const json_path_step_t *a_end,
const json_path_step_t *b, const json_path_step_t *b_end,
enum json_value_types vt)
enum json_value_types vt, const int *array_sizes)
{
int res, res2;
const json_path_step_t *temp_b= b;
while (a <= a_end)
{
@ -1798,7 +1860,9 @@ int json_path_parts_compare(
{
if (b->type & JSON_PATH_ARRAY)
{
if ((a->type & JSON_PATH_WILD) || a->n_item == b->n_item)
if ((a->type & JSON_PATH_WILD) ||
(a->n_item >= 0 ? a->n_item == b->n_item :
a->n_item == b->n_item - array_sizes[b-temp_b]))
goto step_fits;
goto step_failed;
}
@ -1833,11 +1897,13 @@ step_fits:
}
/* Double wild handling needs recursions. */
res= json_path_parts_compare(a+1, a_end, b, b_end, vt);
res= json_path_parts_compare(a+1, a_end, b, b_end, vt,
array_sizes + (b - temp_b));
if (res == 0)
return 0;
res2= json_path_parts_compare(a, a_end, b, b_end, vt);
res2= json_path_parts_compare(a, a_end, b, b_end, vt,
array_sizes + (b - temp_b));
return (res2 >= 0) ? res2 : res;
@ -1849,11 +1915,13 @@ step_fits_autowrap:
}
/* Double wild handling needs recursions. */
res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt);
res= json_path_parts_compare(a+1, a_end, b+1, b_end, vt,
array_sizes + (b - temp_b));
if (res == 0)
return 0;
res2= json_path_parts_compare(a, a_end, b+1, b_end, vt);
res2= json_path_parts_compare(a, a_end, b+1, b_end, vt,
array_sizes + (b - temp_b));
return (res2 >= 0) ? res2 : res;
@ -1864,10 +1932,10 @@ step_fits_autowrap:
int json_path_compare(const json_path_t *a, const json_path_t *b,
enum json_value_types vt)
enum json_value_types vt, const int *array_size)
{
return json_path_parts_compare(a->steps+1, a->last_step,
b->steps+1, b->last_step, vt);
b->steps+1, b->last_step, vt, array_size);
}