From 47feb5795ca272c1b4d0ff9968a658afc12b2003 Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:06:22 +0200 Subject: [PATCH] Support named items in dimension handling for HTMLCollection Closes GH-13937. --- ext/dom/html_collection.c | 93 ++++++++++++++++--- ext/dom/html_collection.h | 23 +++++ ext/dom/nodelist.c | 2 +- ext/dom/nodelist.h | 2 +- ext/dom/php_dom.c | 8 +- .../HTMLCollection_dimension_errors.phpt | 32 +++++++ .../HTMLCollection_named_reads.phpt | 55 +++++++++-- 7 files changed, 193 insertions(+), 22 deletions(-) create mode 100644 ext/dom/html_collection.h create mode 100644 ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt diff --git a/ext/dom/html_collection.c b/ext/dom/html_collection.c index dfcc304e8c7..3b74074559a 100644 --- a/ext/dom/html_collection.c +++ b/ext/dom/html_collection.c @@ -21,29 +21,33 @@ #include "php.h" #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" +#include "nodelist.h" +#include "html_collection.h" #include "namespace_compat.h" +typedef struct _dom_named_item { + dom_object *context_intern; + xmlNodePtr node; +} dom_named_item; + /* https://dom.spec.whatwg.org/#dom-htmlcollection-nameditem-key */ -PHP_METHOD(DOM_HTMLCollection, namedItem) +static dom_named_item dom_html_collection_named_item(zend_string *key, zend_object *zobj) { - zend_string *key; - ZEND_PARSE_PARAMETERS_START(1, 1) - Z_PARAM_PATH_STR(key) - ZEND_PARSE_PARAMETERS_END(); + dom_named_item ret = {NULL, NULL}; /* 1. If key is the empty string, return null. */ if (ZSTR_LEN(key) == 0) { - RETURN_NULL(); + return ret; } - dom_object *intern = Z_DOMOBJ_P(ZEND_THIS); + dom_object *intern = php_dom_obj_from_obj(zobj); dom_nnodemap_object *objmap = intern->ptr; /* 2. Return the first element in the collection for which at least one of the following is true: */ xmlNodePtr basep = dom_object_get_node(objmap->baseobj); if (basep != NULL) { int cur = 0; - int next = cur; + int next = cur; /* not +1, otherwise we skip the first candidate */ xmlNodePtr candidate = basep->children; while (candidate != NULL) { candidate = dom_get_elements_by_tag_name_ns_raw(basep, candidate, objmap->ns, objmap->local, objmap->local_lower, &cur, next); @@ -55,20 +59,85 @@ PHP_METHOD(DOM_HTMLCollection, namedItem) /* it has an ID which is key; */ if ((attr = xmlHasNsProp(candidate, BAD_CAST "id", NULL)) != NULL && dom_compare_value(attr, BAD_CAST ZSTR_VAL(key))) { - DOM_RET_OBJ(candidate, objmap->baseobj); - return; + ret.context_intern = objmap->baseobj; + ret.node = candidate; + return ret; } /* it is in the HTML namespace and has a name attribute whose value is key; */ else if (php_dom_ns_is_fast(candidate, php_dom_ns_is_html_magic_token)) { if ((attr = xmlHasNsProp(candidate, BAD_CAST "name", NULL)) != NULL && dom_compare_value(attr, BAD_CAST ZSTR_VAL(key))) { - DOM_RET_OBJ(candidate, objmap->baseobj); - return; + ret.context_intern = objmap->baseobj; + ret.node = candidate; + return ret; } } next = cur + 1; } } + + return ret; +} + +static void dom_html_collection_named_item_into_zval(zval *return_value, zend_string *key, zend_object *zobj) +{ + dom_named_item named_item = dom_html_collection_named_item(key, zobj); + if (named_item.node != NULL) { + DOM_RET_OBJ(named_item.node, named_item.context_intern); + } else { + RETURN_NULL(); + } +} + +PHP_METHOD(DOM_HTMLCollection, namedItem) +{ + zend_string *key; + ZEND_PARSE_PARAMETERS_START(1, 1) + Z_PARAM_STR(key) + ZEND_PARSE_PARAMETERS_END(); + dom_html_collection_named_item_into_zval(return_value, key, Z_OBJ_P(ZEND_THIS)); +} + +zval *dom_html_collection_read_dimension(zend_object *object, zval *offset, int type, zval *rv) +{ + if (UNEXPECTED(!offset)) { + zend_throw_error(NULL, "Cannot append to %s", ZSTR_VAL(object->ce->name)); + return NULL; + } + + dom_nodelist_dimension_index index = dom_modern_nodelist_get_index(offset); + if (UNEXPECTED(index.type == DOM_NODELIST_DIM_ILLEGAL)) { + zend_illegal_container_offset(object->ce->name, offset, type); + return NULL; + } + + if (index.type == DOM_NODELIST_DIM_STRING) { + dom_html_collection_named_item_into_zval(rv, index.str, object); + } else { + ZEND_ASSERT(index.type == DOM_NODELIST_DIM_LONG); + php_dom_nodelist_get_item_into_zval(php_dom_obj_from_obj(object)->ptr, index.lval, rv); + } + + return rv; +} + +int dom_html_collection_has_dimension(zend_object *object, zval *member, int check_empty) +{ + /* If it exists, it cannot be empty because nodes aren't empty. */ + ZEND_IGNORE_VALUE(check_empty); + + dom_nodelist_dimension_index index = dom_modern_nodelist_get_index(member); + if (UNEXPECTED(index.type == DOM_NODELIST_DIM_ILLEGAL)) { + zend_illegal_container_offset(object->ce->name, member, BP_VAR_IS); + return 0; + } + + if (index.type == DOM_NODELIST_DIM_STRING) { + return dom_html_collection_named_item(index.str, object).node != NULL; + } else { + ZEND_ASSERT(index.type == DOM_NODELIST_DIM_LONG); + return index.lval >= 0 && index.lval < php_dom_get_nodelist_length(php_dom_obj_from_obj(object)); + } } #endif diff --git a/ext/dom/html_collection.h b/ext/dom/html_collection.h new file mode 100644 index 00000000000..a94daa1aae8 --- /dev/null +++ b/ext/dom/html_collection.h @@ -0,0 +1,23 @@ +/* + +----------------------------------------------------------------------+ + | Copyright (c) The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | https://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Niels Dossche | + +----------------------------------------------------------------------+ +*/ + +#ifndef PHP_HTML_COLLECTION_H +#define PHP_HTML_COLLECTION_H + +zval *dom_html_collection_read_dimension(zend_object *object, zval *offset, int type, zval *rv); +int dom_html_collection_has_dimension(zend_object *object, zval *member, int check_empty); + +#endif diff --git a/ext/dom/nodelist.c b/ext/dom/nodelist.c index beda8939e55..b615705b621 100644 --- a/ext/dom/nodelist.c +++ b/ext/dom/nodelist.c @@ -250,7 +250,7 @@ ZEND_METHOD(DOMNodeList, getIterator) zend_create_internal_iterator_zval(return_value, ZEND_THIS); } -dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset) +dom_nodelist_dimension_index dom_modern_nodelist_get_index(const zval *offset) { dom_nodelist_dimension_index ret; diff --git a/ext/dom/nodelist.h b/ext/dom/nodelist.h index af76e34e41d..72264d683f3 100644 --- a/ext/dom/nodelist.h +++ b/ext/dom/nodelist.h @@ -33,7 +33,7 @@ typedef struct _dom_nodelist_dimension_index { void php_dom_nodelist_get_item_into_zval(dom_nnodemap_object *objmap, zend_long index, zval *return_value); int php_dom_get_nodelist_length(dom_object *obj); -dom_nodelist_dimension_index dom_modern_nodelist_get_index(zval *offset); +dom_nodelist_dimension_index dom_modern_nodelist_get_index(const zval *offset); zval *dom_modern_nodelist_read_dimension(zend_object *object, zval *offset, int type, zval *rv); int dom_modern_nodelist_has_dimension(zend_object *object, zval *member, int check_empty); diff --git a/ext/dom/php_dom.c b/ext/dom/php_dom.c index c6cead3578d..978fbe96dec 100644 --- a/ext/dom/php_dom.c +++ b/ext/dom/php_dom.c @@ -24,6 +24,7 @@ #if defined(HAVE_LIBXML) && defined(HAVE_DOM) #include "php_dom.h" #include "nodelist.h" +#include "html_collection.h" #include "namespace_compat.h" #include "internal_helpers.h" #include "php_dom_arginfo.h" @@ -90,6 +91,7 @@ static zend_object_handlers dom_nnodemap_object_handlers; static zend_object_handlers dom_nodelist_object_handlers; static zend_object_handlers dom_modern_nnodemap_object_handlers; static zend_object_handlers dom_modern_nodelist_object_handlers; +static zend_object_handlers dom_html_collection_object_handlers; static zend_object_handlers dom_object_namespace_node_handlers; static zend_object_handlers dom_modern_domimplementation_object_handlers; #ifdef LIBXML_XPATH_ENABLED @@ -715,6 +717,10 @@ PHP_MINIT_FUNCTION(dom) dom_modern_nodelist_object_handlers.read_dimension = dom_modern_nodelist_read_dimension; dom_modern_nodelist_object_handlers.has_dimension = dom_modern_nodelist_has_dimension; + memcpy(&dom_html_collection_object_handlers, &dom_modern_nodelist_object_handlers, sizeof(zend_object_handlers)); + dom_html_collection_object_handlers.read_dimension = dom_html_collection_read_dimension; + dom_html_collection_object_handlers.has_dimension = dom_html_collection_has_dimension; + memcpy(&dom_object_namespace_node_handlers, &dom_object_handlers, sizeof(zend_object_handlers)); dom_object_namespace_node_handlers.offset = XtOffsetOf(dom_object_namespace_node, dom.std); dom_object_namespace_node_handlers.free_obj = dom_object_namespace_node_free_storage; @@ -927,7 +933,7 @@ PHP_MINIT_FUNCTION(dom) dom_html_collection_class_entry = register_class_DOM_HTMLCollection(zend_ce_aggregate, zend_ce_countable); dom_html_collection_class_entry->create_object = dom_nnodemap_objects_new; - dom_html_collection_class_entry->default_object_handlers = &dom_modern_nodelist_object_handlers; + dom_html_collection_class_entry->default_object_handlers = &dom_html_collection_object_handlers; dom_html_collection_class_entry->get_iterator = php_dom_get_iterator; zend_hash_add_new_ptr(&classes, dom_html_collection_class_entry->name, &dom_nodelist_prop_handlers); diff --git a/ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt b/ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt new file mode 100644 index 00000000000..e68d1ede386 --- /dev/null +++ b/ext/dom/tests/modern/html/interactions/HTMLCollection_dimension_errors.phpt @@ -0,0 +1,32 @@ +--TEST-- +HTMLCollection::namedItem() and dimension handling for named accesses +--EXTENSIONS-- +dom +--FILE-- +'); + +try { + $dom->getElementsByTagName('root')[][1] = 1; +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +try { + $dom->getElementsByTagName('root')[true]; +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +try { + isset($dom->getElementsByTagName('root')[true]); +} catch (Error $e) { + echo $e->getMessage(), "\n"; +} + +?> +--EXPECT-- +Cannot append to DOM\HTMLCollection +Cannot access offset of type bool on DOM\HTMLCollection +Cannot access offset of type bool in isset or empty diff --git a/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt b/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt index 708f618d1a9..82883e4ceec 100644 --- a/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt +++ b/ext/dom/tests/modern/html/interactions/HTMLCollection_named_reads.phpt @@ -22,20 +22,61 @@ $xml = <<getElementsByTagName('node')->namedItem('foo')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('does not exist')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('wrong')?->textContent); -var_dump($dom->getElementsByTagName('node')->namedItem('bar')?->textContent); -var_dump($dom->getElementsByTagName('x')->namedItem('foo')?->textContent); -var_dump($dom->getElementsByTagName('x')->namedItem('footest')?->textContent); + +function test($obj, $name) { + echo "--- Query \"$name\" ---\n"; + var_dump($obj->namedItem($name)?->textContent); + var_dump($obj[$name]?->textContent); + var_dump(isset($obj[$name])); + + // Search to check for dimension access consistency + $node = $obj[$name]; + if ($node) { + $found = false; + for ($i = 0; $i < $obj->length && !$found; $i++) { + $found = $obj[$i] === $node; + } + if (!$found) { + throw new Error('inconsistency in dimension access'); + } + } +} + +test($dom->getElementsByTagName('node'), 'foo'); +test($dom->getElementsByTagName('node'), ''); +test($dom->getElementsByTagName('node'), 'does not exist'); +test($dom->getElementsByTagName('node'), 'wrong'); +test($dom->getElementsByTagName('node'), 'bar'); +test($dom->getElementsByTagName('x'), 'foo'); +test($dom->getElementsByTagName('x'), 'footest'); ?> --EXPECT-- +--- Query "foo" --- string(1) "5" +string(1) "5" +bool(true) +--- Query "" --- NULL NULL +bool(false) +--- Query "does not exist" --- +NULL +NULL +bool(false) +--- Query "wrong" --- string(1) "4" +string(1) "4" +bool(true) +--- Query "bar" --- string(12) "with html ns" +string(12) "with html ns" +bool(true) +--- Query "foo" --- string(1) "2" +string(1) "2" +bool(true) +--- Query "footest" --- string(13) "2 with entity" +string(13) "2 with entity" +bool(true)