MDEV-37230 Incorrect handling of NULL join conditions when using split-materialized

During the optimization phase, when determining whether or not to materialize
a derived table all at once or in groups (split materialization), all key
equalities that reference the derived table have their cond Items pushed into
JOIN::spl_opt_info->inj_cond_list.
From there they are filtered (tables in join prefix) and injected into the
JOIN conditions (in JOIN::inject_best_splitting_cond()), where they might
be removed because they are involved ref access and they aren't needed.
These pushed items conditions were always Item_func_eq, whether or or
the key usage specifies null safe equality (<=>) or not.

The fix is to create an Item_func_equal condition when the key equality
is specified using <=>.

approved by Sergei Petrunia (sergey@mariadb.com) PR#4198
This commit is contained in:
Rex Johnston
2025-07-14 13:31:50 +11:00
committed by Rex Johnston
parent aa3578aa8a
commit a7d8c97952
3 changed files with 309 additions and 3 deletions

View File

@ -989,7 +989,7 @@ explain $query;
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t2 range t2_IDX t2_IDX 4 NULL 1 Using index condition
1 PRIMARY <derived2> ref key0 key0 10 test.t2.a,test.t2.b 1 Using where
2 LATERAL DERIVED t1 ref t1_IDX t1_IDX 9 test.t2.a,test.t2.b 1
2 LATERAL DERIVED t1 ref t1_IDX t1_IDX 9 test.t2.a,test.t2.b 1 Using index condition
$query;
a b name total_amt
1 NULL A 10
@ -998,4 +998,242 @@ set statement optimizer_switch='split_materialized=off' for $query;
a b name total_amt
1 NULL A 10
DROP TABLE t1,t2;
#
# MDEV-37230 Incorrect handling of NULL join conditions when using
# split-materialized
#
create table t1
(
a int not null,
b int,
c int,
d int,
amount decimal,
key t1_ix1 (a,b)
) engine=innodb;
insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000),
(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000),
(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000),
(8, 8, 8, 8, 80.0000);
create table t2
(
a int NOT NULL,
b int,
name varchar(50),
key t2_ix1 (a,b)
) engine=innodb;
insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'),
(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'),
(10,10,'H'), (11,11, NULL), (12,12, NULL);
create table t3
(
a int not null,
b int,
description varchar(50),
key t3_ix1 (a,b)
);
insert into t3 values (1, 1, 'bar'),(2,2,'buz'),(0,NULL, 'gold');
insert into t3 select seq, seq, 'junk' from seq_3_to_13;
analyze table t1, t2, t3;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
test.t3 analyze status Engine-independent statistics collected
test.t3 analyze status OK
set optimizer_switch='default';
set statement optimizer_switch='split_materialized=on' for explain format=json select * from t1
join t2 on t1.a = t2.a and t1.b <=> t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b <=> t1.b and dt.b <=> t2.b
where dt.a < 1;
EXPLAIN
{
"query_block": {
"select_id": 1,
"cost": "REPLACED",
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["t1_ix1"],
"key": "t1_ix1",
"key_length": "4",
"used_key_parts": ["a"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"index_condition": "t1.a < 1"
}
},
{
"table": {
"table_name": "t2",
"access_type": "ref",
"possible_keys": ["t2_ix1"],
"key": "t2_ix1",
"key_length": "9",
"used_key_parts": ["a", "b"],
"ref": ["test.t1.a", "test.t1.b"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"index_condition": "t1.b <=> t2.b"
}
},
{
"table": {
"table_name": "<derived3>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "9",
"used_key_parts": ["a", "b"],
"ref": ["test.t1.a", "test.t1.b"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"attached_condition": "dt.b <=> t1.b and dt.b <=> t2.b",
"materialized": {
"lateral": 1,
"query_block": {
"select_id": 3,
"cost": "REPLACED",
"nested_loop": [
{
"table": {
"table_name": "t3",
"access_type": "ref",
"possible_keys": ["t3_ix1"],
"key": "t3_ix1",
"key_length": "9",
"used_key_parts": ["a", "b"],
"ref": ["test.t1.a", "test.t1.b"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"index_condition": "t3.a < 1 and t3.b <=> t1.b and t3.b <=> t2.b and t3.a = t2.a"
}
}
]
}
}
}
}
]
}
}
set statement optimizer_switch='split_materialized=on' for select * from t1
join t2 on t1.a = t2.a and t1.b <=> t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b <=> t1.b and dt.b <=> t2.b
where dt.a < 1;
a b c d amount a b name a b description
0 NULL 0 NULL 10 0 NULL a 0 NULL gold
set statement optimizer_switch='split_materialized=off' for explain format=json select * from t1
join t2 on t1.a = t2.a and t1.b <=> t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b <=> t1.b and dt.b <=> t2.b
where dt.a < 1;
EXPLAIN
{
"query_block": {
"select_id": 1,
"cost": "REPLACED",
"nested_loop": [
{
"table": {
"table_name": "t1",
"access_type": "range",
"possible_keys": ["t1_ix1"],
"key": "t1_ix1",
"key_length": "4",
"used_key_parts": ["a"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"index_condition": "t1.a < 1"
}
},
{
"table": {
"table_name": "t2",
"access_type": "ref",
"possible_keys": ["t2_ix1"],
"key": "t2_ix1",
"key_length": "9",
"used_key_parts": ["a", "b"],
"ref": ["test.t1.a", "test.t1.b"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"index_condition": "t1.b <=> t2.b"
}
},
{
"table": {
"table_name": "<derived3>",
"access_type": "ref",
"possible_keys": ["key0"],
"key": "key0",
"key_length": "9",
"used_key_parts": ["a", "b"],
"ref": ["test.t1.a", "test.t1.b"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"attached_condition": "dt.b <=> t1.b and dt.b <=> t2.b",
"materialized": {
"query_block": {
"select_id": 3,
"cost": "REPLACED",
"nested_loop": [
{
"table": {
"table_name": "t3",
"access_type": "range",
"possible_keys": ["t3_ix1"],
"key": "t3_ix1",
"key_length": "4",
"used_key_parts": ["a"],
"loops": 1,
"rows": 1,
"cost": "REPLACED",
"filtered": 100,
"index_condition": "t3.a < 1"
}
}
]
}
}
}
}
]
}
}
set statement optimizer_switch='split_materialized=off' for select * from t1
join t2 on t1.a = t2.a and t1.b <=> t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b <=> t1.b and dt.b <=> t2.b
where dt.a < 1;
a b c d amount a b name a b description
0 NULL 0 NULL 10 0 NULL a 0 NULL gold
drop table t1, t2, t3;
SET GLOBAL innodb_stats_persistent=@save_innodb_stats_persistent;

View File

@ -603,5 +603,67 @@ evalp set statement optimizer_switch='split_materialized=off' for $query;
DROP TABLE t1,t2;
--echo #
--echo # MDEV-37230 Incorrect handling of NULL join conditions when using
--echo # split-materialized
--echo #
create table t1
(
a int not null,
b int,
c int,
d int,
amount decimal,
key t1_ix1 (a,b)
) engine=innodb;
insert into t1 values (0, NULL, 0, NULL, 10.0000), (1, 1, 1, 1, 10.0000),
(2, 2, 2, 2, 20.0000), (3, 3, 3, 3, 30.0000), (4, 4, 4, 4, 40.0000),
(5, 5, 5, 5, NULL), (6, 6, 6, 6, NULL), (7, 7, 7, 7, 70.0000),
(8, 8, 8, 8, 80.0000);
create table t2
(
a int NOT NULL,
b int,
name varchar(50),
key t2_ix1 (a,b)
) engine=innodb;
insert into t2 values (0, NULL, 'a'), (1, NULL, 'A'), (2, 2, 'B'), (3,3, 'C'),
(4,4, 'D'), (5,5, NULL), (6,6, NULL), (7,7, 'E'), (8,8, 'F'), (9,9, 'G'),
(10,10,'H'), (11,11, NULL), (12,12, NULL);
create table t3
(
a int not null,
b int,
description varchar(50),
key t3_ix1 (a,b)
);
insert into t3 values (1, 1, 'bar'),(2,2,'buz'),(0,NULL, 'gold');
insert into t3 select seq, seq, 'junk' from seq_3_to_13;
let $q=
select * from t1
join t2 on t1.a = t2.a and t1.b <=> t2.b
join
(
select a, b, description from t3 group by a, b
) dt on dt.a = t1.a and dt.b <=> t1.b and dt.b <=> t2.b
where dt.a < 1;
analyze table t1, t2, t3;
set optimizer_switch='default';
--source include/analyze-format.inc
eval set statement optimizer_switch='split_materialized=on' for explain format=json $q;
eval set statement optimizer_switch='split_materialized=on' for $q;
--source include/analyze-format.inc
eval set statement optimizer_switch='split_materialized=off' for explain format=json $q;
eval set statement optimizer_switch='split_materialized=off' for $q;
drop table t1, t2, t3;
# End of 11.4 tests;
SET GLOBAL innodb_stats_persistent=@save_innodb_stats_persistent;

View File

@ -605,13 +605,19 @@ void TABLE::add_splitting_info_for_key_field(KEY_FIELD *key_field)
THD *thd= in_use;
Item *left_item= spl_field->producing_item->build_clone(thd);
Item *right_item= key_field->val->build_clone(thd);
Item_func_eq *eq_item= 0;
Item_bool_func *eq_item= 0;
if (left_item && right_item)
{
right_item->walk(&Item::set_fields_as_dependent_processor,
false, join->select_lex);
right_item->update_used_tables();
eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item);
// Item_func::EQUAL_FUNC is null-safe, others can use Item_func_eq()
if (key_field->cond->type() == Item::FUNC_ITEM &&
((Item_func*)key_field->cond)->functype() == Item_func::EQUAL_FUNC)
eq_item= new (thd->mem_root) Item_func_equal(thd, left_item, right_item);
else
eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item);
}
if (!eq_item)
return;