/*
   +----------------------------------------------------------------------+
   | Copyright (c) The PHP Group                                          |
   +----------------------------------------------------------------------+
   | This source file is subject to version 3.01 of the PHP license,      |
   | that is bundled with this package in the file LICENSE, and is        |
   | available through the world-wide-web at the following url:           |
   | https://www.php.net/license/3_01.txt                                 |
   | If you did not receive a copy of the PHP license and are unable to   |
   | obtain it through the world-wide-web, please send a note to          |
   | license@php.net so we can mail you a copy immediately.               |
   +----------------------------------------------------------------------+
   | Authors: Niels Dossche <nielsdos@php.net>                            |
   +----------------------------------------------------------------------+
*/

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "php.h"
#if defined(HAVE_LIBXML) && defined(HAVE_DOM)
#include "php_dom.h"
#include "namespace_compat.h"
#include "private_data.h"
#include "internal_helpers.h"

/* The actual value of these doesn't matter as long as they serve as a unique ID.
 * They need to be pointers because the `_private` field is a pointer, however we can choose the contents ourselves.
 * We need keep these at least 4-byte aligned because the pointer may be tagged (although for now 2 byte alignment works too).
 * We use a trick: we declare a struct with a double member to force the alignment. */
#define DECLARE_NS_TOKEN(name, uri)			\
	static const struct {                   \
		char val[sizeof(uri)];				\
		double align;						\
	} decl_##name = { uri, 0.0 };			\
	PHP_DOM_EXPORT const php_dom_ns_magic_token *(name) = (const php_dom_ns_magic_token *) &decl_##name;
DECLARE_NS_TOKEN(php_dom_ns_is_html_magic_token, DOM_XHTML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_mathml_magic_token, DOM_MATHML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_svg_magic_token, DOM_SVG_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xlink_magic_token, DOM_XLINK_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xml_magic_token, DOM_XML_NS_URI);
DECLARE_NS_TOKEN(php_dom_ns_is_xmlns_magic_token, DOM_XMLNS_NS_URI);

static void php_dom_libxml_ns_mapper_prefix_map_element_dtor(zval *zv)
{
	if (DOM_Z_IS_OWNED(zv)) {
		efree(Z_PTR_P(zv));
	}
}

static HashTable *php_dom_libxml_ns_mapper_ensure_prefix_map(php_dom_libxml_ns_mapper *mapper, zend_string **uri)
{
	zval *zv = zend_hash_find(&mapper->uri_to_prefix_map, *uri);
	HashTable *prefix_map;
	if (zv == NULL) {
		prefix_map = emalloc(sizeof(HashTable));
		zend_hash_init(prefix_map, 0, NULL, php_dom_libxml_ns_mapper_prefix_map_element_dtor, false);
		zval zv_prefix_map;
		ZVAL_ARR(&zv_prefix_map, prefix_map);
		zend_hash_add_new(&mapper->uri_to_prefix_map, *uri, &zv_prefix_map);
	} else {
		/* cast to Bucket* only works if this holds, I would prefer a static assert but we're stuck at C99. */
		ZEND_ASSERT(XtOffsetOf(Bucket, val) == 0);
		ZEND_ASSERT(Z_TYPE_P(zv) == IS_ARRAY);
		Bucket *bucket = (Bucket *) zv;
		/* Make sure we take the value from the key string that lives long enough. */
		*uri = bucket->key;
		prefix_map = Z_ARRVAL_P(zv);
	}
	return prefix_map;
}

static xmlNsPtr php_dom_libxml_ns_mapper_ensure_cached_ns(php_dom_libxml_ns_mapper *mapper, xmlNsPtr *ptr, const char *uri, size_t length, const php_dom_ns_magic_token *token)
{
	if (EXPECTED(*ptr != NULL)) {
		return *ptr;
	}

	zend_string *uri_str = zend_string_init(uri, length, false);
	*ptr = php_dom_libxml_ns_mapper_get_ns(mapper, NULL, uri_str);
	(*ptr)->_private = (void *) token;
	zend_string_release_ex(uri_str, false);
	return *ptr;
}

PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_html_ns(php_dom_libxml_ns_mapper *mapper)
{
	return php_dom_libxml_ns_mapper_ensure_cached_ns(mapper, &mapper->html_ns, DOM_XHTML_NS_URI, sizeof(DOM_XHTML_NS_URI) - 1, php_dom_ns_is_html_magic_token);
}

PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_ensure_prefixless_xmlns_ns(php_dom_libxml_ns_mapper *mapper)
{
	return php_dom_libxml_ns_mapper_ensure_cached_ns(mapper, &mapper->prefixless_xmlns_ns, DOM_XMLNS_NS_URI, sizeof(DOM_XMLNS_NS_URI) - 1, php_dom_ns_is_xmlns_magic_token);
}

static xmlNsPtr dom_create_owned_ns(zend_string *prefix, zend_string *uri)
{
	ZEND_ASSERT(prefix != NULL);
	ZEND_ASSERT(uri != NULL);

	xmlNsPtr ns = emalloc(sizeof(*ns));
	memset(ns, 0, sizeof(*ns));
	ns->type = XML_LOCAL_NAMESPACE;
	/* These two strings are kept alive because they're the hash table keys that lead to this entry. */
	ns->prefix = ZSTR_LEN(prefix) == 0 ? NULL : BAD_CAST ZSTR_VAL(prefix);
	ns->href = BAD_CAST ZSTR_VAL(uri);
	/* Note ns->context is unused in libxml2 at the moment, and if it were used it would be for
	 * LIBXML_NAMESPACE_DICT which is opt-in anyway. */

	return ns;
}

PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns(php_dom_libxml_ns_mapper *mapper, zend_string *prefix, zend_string *uri)
{
	if (uri == NULL) {
		uri = zend_empty_string;
	}

	if (prefix == NULL) {
		prefix = zend_empty_string;
	}

	if (ZSTR_LEN(prefix) == 0 && ZSTR_LEN(uri) == 0) {
		return NULL;
	}

	HashTable *prefix_map = php_dom_libxml_ns_mapper_ensure_prefix_map(mapper, &uri);
	xmlNsPtr found = zend_hash_find_ptr(prefix_map, prefix);
	if (found != NULL) {
		return found;
	}

	xmlNsPtr ns = dom_create_owned_ns(prefix, uri);

	zval new_zv;
	DOM_Z_OWNED(&new_zv, ns);
	zend_hash_add_new(prefix_map, prefix, &new_zv);

	return ns;
}

PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_prefix_string(php_dom_libxml_ns_mapper *mapper, const xmlChar *prefix, size_t prefix_len, zend_string *uri)
{
	xmlNsPtr ns;
	if (prefix_len == 0) {
		/* Fast path */
		ns = php_dom_libxml_ns_mapper_get_ns(mapper, zend_empty_string, uri);
	} else {
		zend_string *prefix_str = zend_string_init((const char *) prefix, prefix_len, false);
		ns = php_dom_libxml_ns_mapper_get_ns(mapper, prefix_str, uri);
		zend_string_release_ex(prefix_str, false);
	}
	return ns;
}

static xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings_ex(php_dom_libxml_ns_mapper *mapper, const char *prefix, size_t prefix_len, const char *uri, size_t uri_len)
{
	zend_string *prefix_str = zend_string_init(prefix, prefix_len, false);
	zend_string *uri_str = zend_string_init(uri, uri_len, false);
	xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns(mapper, prefix_str, uri_str);
	zend_string_release_ex(prefix_str, false);
	zend_string_release_ex(uri_str, false);
	return ns;
}

static zend_always_inline xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings(php_dom_libxml_ns_mapper *mapper, const char *prefix, const char *uri)
{
	return php_dom_libxml_ns_mapper_get_ns_raw_strings_ex(mapper, prefix, strlen(prefix), uri, strlen(uri));
}

PHP_DOM_EXPORT xmlNsPtr php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(php_dom_libxml_ns_mapper *mapper, const char *prefix, const char *uri)
{
	if (prefix == NULL) {
		prefix = "";
	}
	if (uri == NULL) {
		uri = "";
	}
	return php_dom_libxml_ns_mapper_get_ns_raw_strings(mapper, prefix, uri);
}

static void php_dom_libxml_ns_mapper_store_and_normalize_parsed_ns(php_dom_libxml_ns_mapper *mapper, xmlNsPtr ns)
{
	ZEND_ASSERT(ns != NULL);

	zend_string *href_str = zend_string_init((const char *) ns->href, xmlStrlen(ns->href), false);
	zend_string *href_str_orig = href_str;
	HashTable *prefix_map = php_dom_libxml_ns_mapper_ensure_prefix_map(mapper, &href_str);
	zend_string_release_ex(href_str_orig, false);

	const char *prefix = (const char *) ns->prefix;
	size_t prefix_len;
	if (prefix == NULL) {
		prefix = "";
		prefix_len = 0;
	} else {
		prefix_len = xmlStrlen(ns->prefix);
	}

	zval new_zv;
	DOM_Z_UNOWNED(&new_zv, ns);
	zend_hash_str_add(prefix_map, prefix, prefix_len, &new_zv);
}

typedef struct {
	/* Fast lookup for created mappings. */
	HashTable old_ns_to_new_ns_ptr;
	/* It is common that the last created mapping will be used for a while,
	 * cache it too to bypass the hash table. */
	xmlNsPtr last_mapped_src, last_mapped_dst;
	php_dom_libxml_ns_mapper *ns_mapper;
} dom_libxml_reconcile_ctx;

PHP_DOM_EXPORT php_dom_libxml_ns_mapper *php_dom_get_ns_mapper(dom_object *object)
{
	return &php_dom_get_private_data(object)->ns_mapper;
}

PHP_DOM_EXPORT xmlAttrPtr php_dom_ns_compat_mark_attribute(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node, xmlNsPtr ns)
{
	xmlNsPtr xmlns_ns;
	const xmlChar *name;
	if (ns->prefix != NULL) {
		xmlns_ns = php_dom_libxml_ns_mapper_get_ns_raw_strings(mapper, "xmlns", DOM_XMLNS_NS_URI);
		name = ns->prefix;
	} else {
		xmlns_ns = php_dom_libxml_ns_mapper_ensure_prefixless_xmlns_ns(mapper);
		name = BAD_CAST "xmlns";
	}

	ZEND_ASSERT(xmlns_ns != NULL);

	return xmlSetNsProp(node, xmlns_ns, name, ns->href);
}

PHP_DOM_EXPORT void php_dom_ns_compat_mark_attribute_list(php_dom_libxml_ns_mapper *mapper, xmlNodePtr node)
{
	if (node->nsDef == NULL) {
		return;
	}

	/* We want to prepend at the front, but in order of the namespace definitions.
	 * So temporarily unlink the existing properties and add them again at the end. */
	xmlAttrPtr attr = node->properties;
	node->properties = NULL;

	xmlNsPtr ns = node->nsDef;
	xmlAttrPtr last_added = NULL;
	do {
		last_added = php_dom_ns_compat_mark_attribute(mapper, node, ns);
		php_dom_libxml_ns_mapper_store_and_normalize_parsed_ns(mapper, ns);
		xmlNsPtr next = ns->next;
		ns->next = NULL;
		php_libxml_set_old_ns(node->doc, ns);
		ns = next;
	} while (ns != NULL);

	if (last_added != NULL) {
		/* node->properties now points to the first namespace declaration attribute. */
		if (attr != NULL) {
			last_added->next = attr;
			attr->prev = last_added;
		}
	} else {
		/* Nothing added, so nothing changed. Only really possible on OOM. */
		node->properties = attr;
	}

	node->nsDef = NULL;
}

PHP_DOM_EXPORT bool php_dom_ns_is_fast_ex(xmlNsPtr ns, const php_dom_ns_magic_token *magic_token)
{
	ZEND_ASSERT(ns != NULL);
	/* cached for fast checking */
	if (ns->_private == magic_token) {
		return true;
	} else if (ns->_private != NULL && ((uintptr_t) ns->_private & 1) == 0) {
		/* Other token stored */
		return false;
	}
	/* Slow path */
	if (xmlStrEqual(ns->href, BAD_CAST magic_token)) {
		if (ns->_private == NULL) {
			/* Only overwrite the private data if there is no other token stored. */
			ns->_private = (void *) magic_token;
		}
		return true;
	}
	return false;
}

PHP_DOM_EXPORT bool php_dom_ns_is_fast(const xmlNode *nodep, const php_dom_ns_magic_token *magic_token)
{
	ZEND_ASSERT(nodep != NULL);
	xmlNsPtr ns = nodep->ns;
	if (ns != NULL) {
		return php_dom_ns_is_fast_ex(ns, magic_token);
	}
	return false;
}

PHP_DOM_EXPORT bool php_dom_ns_is_html_and_document_is_html(const xmlNode *nodep)
{
	ZEND_ASSERT(nodep != NULL);
	return nodep->doc && nodep->doc->type == XML_HTML_DOCUMENT_NODE && php_dom_ns_is_fast(nodep, php_dom_ns_is_html_magic_token);
}

/* will rename prefixes if there is a declaration with the same prefix but different uri. */
PHP_DOM_EXPORT void php_dom_reconcile_attribute_namespace_after_insertion(xmlAttrPtr attrp)
{
	ZEND_ASSERT(attrp != NULL);

	if (attrp->ns != NULL) {
		/* Try to link to an existing namespace. If that won't work, reconcile. */
		xmlNodePtr nodep = attrp->parent;
		xmlNsPtr matching_ns = xmlSearchNs(nodep->doc, nodep, attrp->ns->prefix);
		if (matching_ns && xmlStrEqual(matching_ns->href, attrp->ns->href)) {
			/* Doesn't leak because this doesn't define the declaration. */
			attrp->ns = matching_ns;
		} else {
			if (attrp->ns->prefix != NULL) {
				/* Note: explicitly use the legacy reconciliation as it mostly (i.e. as good as it gets for legacy DOM)
				* does the right thing for attributes. */
				xmlReconciliateNs(nodep->doc, nodep);
			}
		}
	}
}

static zend_always_inline void php_dom_libxml_reconcile_modern_single_node(dom_libxml_reconcile_ctx *ctx, xmlNodePtr node)
{
	ZEND_ASSERT(node->ns != NULL);

	if (node->ns == ctx->last_mapped_src) {
		node->ns = ctx->last_mapped_dst;
		return;
	}

	/* If the namespace is the same as in the map, we're good. */
	xmlNsPtr new_ns = zend_hash_index_find_ptr(&ctx->old_ns_to_new_ns_ptr, dom_mangle_pointer_for_key(node->ns));
	if (new_ns == NULL) {
		/* We have to create an alternative declaration, and we'll add it to the map. */
		const char *prefix = (const char *) node->ns->prefix;
		const char *href = (const char *) node->ns->href;
		new_ns = php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(ctx->ns_mapper, prefix, href);
		zend_hash_index_add_new_ptr(&ctx->old_ns_to_new_ns_ptr, dom_mangle_pointer_for_key(node->ns), new_ns);
		ctx->last_mapped_src = node->ns;
		ctx->last_mapped_dst = new_ns;
		node->ns = new_ns;
	} else if (node->ns != new_ns) {
		/* The namespace is different, so we have to replace it. */
		node->ns = new_ns;
	}
}

static zend_always_inline bool dom_libxml_reconcile_fast_element_skip(xmlNodePtr node)
{
	/* Fast path: this is a lone element and the namespace is defined by the node (or the namespace is NULL). */
	ZEND_ASSERT(node->type == XML_ELEMENT_NODE);
	return node->children == NULL && node->properties == NULL && node->ns == node->nsDef;
}

static zend_always_inline void php_dom_libxml_reconcile_modern_single_element_node(dom_libxml_reconcile_ctx *ctx, xmlNodePtr node)
{
	ZEND_ASSERT(node->type == XML_ELEMENT_NODE);

	/* Since this is modern DOM, the declarations are not on the node and thus there's nothing to add from nsDef. */
	ZEND_ASSERT(node->nsDef == NULL);

	if (node->ns != NULL) {
		php_dom_libxml_reconcile_modern_single_node(ctx, node);
	}

	for (xmlAttrPtr attr = node->properties; attr != NULL; attr = attr->next) {
		if (attr->ns != NULL) {
			php_dom_libxml_reconcile_modern_single_node(ctx, (xmlNodePtr) attr);
		}
	}
}

PHP_DOM_EXPORT void php_dom_libxml_reconcile_modern(php_dom_libxml_ns_mapper *ns_mapper, xmlNodePtr node)
{
	if (node->type == XML_ATTRIBUTE_NODE) {
		if (node->ns != NULL) {
			node->ns = php_dom_libxml_ns_mapper_get_ns_raw_strings_nullsafe(ns_mapper, (const char *) node->ns->prefix, (const char *) node->ns->href);
		}
		return;
	}

	if (node->type != XML_ELEMENT_NODE || dom_libxml_reconcile_fast_element_skip(node)) {
		return;
	}

	dom_libxml_reconcile_ctx ctx;
	zend_hash_init(&ctx.old_ns_to_new_ns_ptr, 0, NULL, NULL, 0);
	ctx.last_mapped_src = NULL;
	ctx.last_mapped_dst = NULL;
	ctx.ns_mapper = ns_mapper;

	php_dom_libxml_reconcile_modern_single_element_node(&ctx, node);

	xmlNodePtr base = node;
	node = node->children;
	while (node != NULL) {
		ZEND_ASSERT(node != base);

		if (node->type == XML_ELEMENT_NODE) {
			php_dom_libxml_reconcile_modern_single_element_node(&ctx, node);
		}

		node = php_dom_next_in_tree_order(node, base);
	}

	zend_hash_destroy(&ctx.old_ns_to_new_ns_ptr);
}

PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns(php_dom_libxml_ns_mapper *ns_mapper, const xmlNode *node, bool ignore_elements)
{
	ZEND_ASSERT(node != NULL);

	php_dom_in_scope_ns in_scope_ns;
	in_scope_ns.origin_is_ns_compat = true;

	/* libxml fetches all nsDef items from bottom to top - left to right, ignoring prefixes already in the list.
	 * We don't have nsDef, but we can use the ns pointer (as that is necessarily in scope),
	 * and check the xmlns attributes. */
	HashTable tmp_prefix_to_ns_table;
	zend_hash_init(&tmp_prefix_to_ns_table, 0, NULL, NULL, false);
	zend_hash_real_init_mixed(&tmp_prefix_to_ns_table);

	for (const xmlNode *cur = node; cur != NULL; cur = cur->parent) {
		if (cur->type == XML_ELEMENT_NODE) {
			/* Register namespace of element */
			if (!ignore_elements && cur->ns != NULL && cur->ns->prefix != NULL) {
				const char *prefix = (const char *) cur->ns->prefix;
				zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), cur->ns);
			}

			/* Register xmlns attributes */
			for (const xmlAttr *attr = cur->properties; attr != NULL; attr = attr->next) {
				if (attr->ns != NULL && attr->ns->prefix != NULL && php_dom_ns_is_fast_ex(attr->ns, php_dom_ns_is_xmlns_magic_token)
					&& attr->children != NULL && attr->children->content != NULL) {
					/* This attribute declares a namespace, get the relevant instance.
					 * The declared namespace is not the same as the namespace of this attribute (which is xmlns). */
					const char *prefix = (const char *) attr->name;
					xmlNsPtr ns = php_dom_libxml_ns_mapper_get_ns_raw_strings(ns_mapper, prefix, (const char *) attr->children->content);
					zend_hash_str_add_ptr(&tmp_prefix_to_ns_table, prefix, strlen(prefix), ns);
				}
			}
		}
	}

	in_scope_ns.count = zend_hash_num_elements(&tmp_prefix_to_ns_table);
	in_scope_ns.list = safe_emalloc(in_scope_ns.count, sizeof(xmlNsPtr), 0);

	size_t index = 0;
	xmlNsPtr ns;
	ZEND_HASH_MAP_FOREACH_PTR(&tmp_prefix_to_ns_table, ns) {
		in_scope_ns.list[index++] = ns;
	} ZEND_HASH_FOREACH_END();

	zend_hash_destroy(&tmp_prefix_to_ns_table);

	return in_scope_ns;
}

PHP_DOM_EXPORT php_dom_in_scope_ns php_dom_get_in_scope_ns_legacy(const xmlNode *node)
{
	ZEND_ASSERT(node != NULL);

	php_dom_in_scope_ns in_scope_ns;
	in_scope_ns.origin_is_ns_compat = false;
	in_scope_ns.list = xmlGetNsList(node->doc, node);
	in_scope_ns.count = 0;

	if (in_scope_ns.list != NULL) {
		while (in_scope_ns.list[in_scope_ns.count] != NULL) {
			in_scope_ns.count++;
		}
	}

	return in_scope_ns;
}

PHP_DOM_EXPORT void php_dom_in_scope_ns_destroy(php_dom_in_scope_ns *in_scope_ns)
{
	ZEND_ASSERT(in_scope_ns != NULL);
	if (in_scope_ns->origin_is_ns_compat) {
		efree(in_scope_ns->list);
	} else {
		xmlFree(in_scope_ns->list);
	}
}

#endif  /* HAVE_LIBXML && HAVE_DOM */
