/*
 * uriparser - RFC 3986 URI parsing library
 *
 * Copyright (C) 2025, Sebastian Pipping <sebastian@pipping.org>
 * All rights reserved.
 *
 * Redistribution and use in source  and binary forms, with or without
 * modification, are permitted provided  that the following conditions
 * are met:
 *
 *     1. Redistributions  of  source  code   must  retain  the  above
 *        copyright notice, this list  of conditions and the following
 *        disclaimer.
 *
 *     2. Redistributions  in binary  form  must  reproduce the  above
 *        copyright notice, this list  of conditions and the following
 *        disclaimer  in  the  documentation  and/or  other  materials
 *        provided with the distribution.
 *
 *     3. Neither the  name of the  copyright holder nor the  names of
 *        its contributors may be used  to endorse or promote products
 *        derived from  this software  without specific  prior written
 *        permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 * "AS IS" AND  ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING, BUT NOT
 * LIMITED TO,  THE IMPLIED WARRANTIES OF  MERCHANTABILITY AND FITNESS
 * FOR  A  PARTICULAR  PURPOSE  ARE  DISCLAIMED.  IN  NO  EVENT  SHALL
 * THE  COPYRIGHT HOLDER  OR CONTRIBUTORS  BE LIABLE  FOR ANY  DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO,  PROCUREMENT OF SUBSTITUTE GOODS OR
 * SERVICES; LOSS OF USE, DATA,  OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
 * OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* What encodings are enabled? */
#include <uriparser/UriDefsConfig.h>
#if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
/* Include SELF twice */
#  ifdef URI_ENABLE_ANSI
#    define URI_PASS_ANSI 1
#    include "UriSetPath.c"
#    undef URI_PASS_ANSI
#  endif
#  ifdef URI_ENABLE_UNICODE
#    define URI_PASS_UNICODE 1
#    include "UriSetPath.c"
#    undef URI_PASS_UNICODE
#  endif
#else
#  ifdef URI_PASS_ANSI
#    include <uriparser/UriDefsAnsi.h>
#  else
#    include <uriparser/UriDefsUnicode.h>
#    include <wchar.h>
#  endif

#  ifndef URI_DOXYGEN
#    include <uriparser/Uri.h>
#    include "UriCommon.h"
#    include "UriMemory.h"
#    include "UriSets.h"
#  endif

#  include <assert.h>

UriBool URI_FUNC(IsWellFormedPath)(const URI_CHAR * first, const URI_CHAR * afterLast,
                                   UriBool hasHost) {
    if ((first == NULL) || (afterLast == NULL)) {
        return URI_FALSE;
    }

    if ((hasHost == URI_TRUE) && ((first >= afterLast) || (first[0] != _UT('/')))) {
        return URI_FALSE;
    }

    /* The related part of the grammar in RFC 3986 (section 3.3) reads:
     *
     *   path          = path-abempty    ; begins with "/" or is empty
     *                 / path-absolute   ; begins with "/" but not "//"
     *                 / path-noscheme   ; begins with a non-colon segment
     *                 / path-rootless   ; begins with a segment
     *                 / path-empty      ; zero characters
     *
     *   path-abempty  = *( "/" segment )
     *   path-absolute = "/" [ segment-nz *( "/" segment ) ]
     *   path-noscheme = segment-nz-nc *( "/" segment )
     *   path-rootless = segment-nz *( "/" segment )
     *   path-empty    = 0<pchar>
     *
     *   segment       = *pchar
     *   segment-nz    = 1*pchar
     *   segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
     *                 ; non-zero-length segment without any colon ":"
     *
     *   pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
     *
     * The check below simplifies this to ..
     *
     *   path          = *( unreserved / pct-encoded / sub-delims / ":" / "@" / "/" )
     *
     * .. and leaves the rest to pre-return removal of ambiguity
     * from cases like "path1:/path2" and "//path1/path2" inside SetPath.
     */
    while (first < afterLast) {
        switch (first[0]) {
        case URI_SET_PCHAR_WITHOUT_PERCENT(_UT):
            break;

        /* pct-encoded */
        case _UT('%'):
            if (afterLast - first < 3) {
                return URI_FALSE;
            }
            switch (first[1]) {
            case URI_SET_HEXDIG(_UT):
                break;
            default:
                return URI_FALSE;
            }
            switch (first[2]) {
            case URI_SET_HEXDIG(_UT):
                break;
            default:
                return URI_FALSE;
            }
            first += 2;
            break;

        case _UT('/'):
            break;

        default:
            return URI_FALSE;
        }

        first++;
    }
    return URI_TRUE;
}

static void URI_FUNC(DropEmptyFirstPathSegment)(URI_TYPE(Uri) * uri,
                                                UriMemoryManager * memory) {
    assert(uri != NULL);
    assert(memory != NULL);
    assert(uri->pathHead != NULL);
    assert(uri->pathHead->text.first == uri->pathHead->text.afterLast);

    URI_TYPE(PathSegment) * const originalHead = uri->pathHead;

    uri->pathHead = uri->pathHead->next;

    originalHead->text.first = NULL;
    originalHead->text.afterLast = NULL;
    memory->free(memory, originalHead);
}

/* URIs without a host encode a leading slash in the path as .absolutePath == URI_TRUE.
 * This function checks for a leading empty path segment (that would have the "visual
 * effect" of a leading slash during stringification) and transforms it into .absolutePath
 * == URI_TRUE instead, if present. */
static void URI_FUNC(TransformEmptyLeadPathSegments)(URI_TYPE(Uri) * uri,
                                                     UriMemoryManager * memory) {
    assert(uri != NULL);
    assert(memory != NULL);

    if ((URI_FUNC(HasHost)(uri) == URI_TRUE) || (uri->pathHead == NULL)
        || (uri->pathHead->text.first != uri->pathHead->text.afterLast)) {
        return; /* i.e. nothing to do */
    }

    assert(uri->absolutePath == URI_FALSE);

    URI_FUNC(DropEmptyFirstPathSegment)(uri, memory);

    uri->absolutePath = URI_TRUE;
}

static int URI_FUNC(InternalSetPath)(URI_TYPE(Uri) * destUri, const URI_CHAR * first,
                                     const URI_CHAR * afterLast,
                                     UriMemoryManager * memory) {
    assert(destUri != NULL);
    assert(first != NULL);
    assert(afterLast != NULL);
    assert(memory != NULL);
    assert(destUri->pathHead == NULL); /* set by SetPathMm right before */
    assert(destUri->pathTail == NULL); /* set by SetPathMm right before */
    assert(destUri->absolutePath == URI_FALSE); /* set by SetPathMm right before */

    /* Skip the leading slash from target URIs with a host (so that we can
     * transfer the path 1:1 further down) */
    if (URI_FUNC(HasHost)(destUri) == URI_TRUE) {
        /* NOTE: This is because SetPathMm called IsWellFormedPath earlier: */
        assert((afterLast - first >= 1) && (first[0] == _UT('/')));
        first++;
    } else if (first == afterLast) {
        /* This avoids (1) all the expensive but unnecessary work below
         * and also (2) mis-encoding as single empty path segment
         * that would need (detection and) repair further down otherwise */
        return URI_SUCCESS;
    }

    /* Assemble "///.." input wrap for upcoming parse as a URI */
    const size_t inputLenChars = (afterLast - first);
    const size_t MAX_SIZE_T = (size_t)-1;

    /* Detect overflow */
    if (MAX_SIZE_T - inputLenChars < 3 + 1) {
        return URI_ERROR_MALLOC;
    }

    const size_t candidateLenChars = 3 + inputLenChars;

    /* Detect overflow */
    if (MAX_SIZE_T / sizeof(URI_CHAR) < candidateLenChars + 1) {
        return URI_ERROR_MALLOC;
    }

    URI_CHAR * const candidate =
        memory->malloc(memory, (candidateLenChars + 1) * sizeof(URI_CHAR));

    if (candidate == NULL) {
        return URI_ERROR_MALLOC;
    }

    memcpy(candidate, _UT("///"), 3 * sizeof(URI_CHAR));
    memcpy(candidate + 3, first, inputLenChars * sizeof(URI_CHAR));
    candidate[3 + inputLenChars] = _UT('\0');

    /* Parse as an RFC 3986 URI */
    URI_TYPE(Uri) tempUri;
    int res = URI_FUNC(ParseSingleUriExMm)(&tempUri, candidate,
                                           candidate + candidateLenChars, NULL, memory);
    assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX)
           || (res == URI_ERROR_MALLOC));
    if (res != URI_SUCCESS) {
        memory->free(memory, candidate);
        return res;
    }

    /* Nothing but path and host is supposed to be set by the parse, in
     * particular not: */
    assert(tempUri.query.first == NULL);
    assert(tempUri.fragment.first == NULL);

    /* Ensure that the strings in the path segments are all owned by
     * `tempUri` because we want to (1) rip out and keep the full path
     * list further down and (2) be able to free the parsed string
     * (`candidate`) also. */
    res = URI_FUNC(MakeOwnerMm)(&tempUri, memory);
    assert((res == URI_SUCCESS) || (res == URI_ERROR_MALLOC));
    if (res != URI_SUCCESS) {
        URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
        memory->free(memory, candidate);
        return res;
    }
    assert(tempUri.owner == URI_TRUE);

    /* Move path to destination URI */
    assert(tempUri.absolutePath == URI_FALSE); /* always URI_FALSE for URIs with host */
    destUri->pathHead = tempUri.pathHead;
    destUri->pathTail = tempUri.pathTail;
    destUri->absolutePath = URI_FALSE;

    tempUri.pathHead = NULL;
    tempUri.pathTail = NULL;

    /* Free the rest of the temp URI */
    URI_FUNC(FreeUriMembersMm)(&tempUri, memory);
    memory->free(memory, candidate);

    /* Restore use of .absolutePath as needed */
    URI_FUNC(TransformEmptyLeadPathSegments)(destUri, memory);

    /* Disambiguate as needed */
    UriBool success = URI_FUNC(FixPathNoScheme)(destUri, memory);
    if (success == URI_FALSE) {
        return URI_ERROR_MALLOC;
    }

    success = URI_FUNC(EnsureThatPathIsNotMistakenForHost)(destUri, memory);
    if (success == URI_FALSE) {
        return URI_ERROR_MALLOC;
    }

    return URI_SUCCESS;
}

int URI_FUNC(SetPathMm)(URI_TYPE(Uri) * uri, const URI_CHAR * first,
                        const URI_CHAR * afterLast, UriMemoryManager * memory) {
    /* Input validation (before making any changes) */
    if ((uri == NULL) || ((first == NULL) != (afterLast == NULL))) {
        return URI_ERROR_NULL;
    }

    URI_CHECK_MEMORY_MANAGER(memory); /* may return */

    if ((first != NULL)
        && (URI_FUNC(IsWellFormedPath)(first, afterLast, URI_FUNC(HasHost)(uri))
            == URI_FALSE)) {
        return URI_ERROR_SYNTAX;
    }

    /* Clear old value */
    int res = URI_FUNC(FreeUriPath)(uri, memory);
    if (res != URI_SUCCESS) {
        return res;
    }
    uri->absolutePath = URI_FALSE;

    /* Already done? */
    if (first == NULL) {
        return URI_SUCCESS;
    }

    assert(first != NULL);

    /* Ensure owned */
    if (uri->owner == URI_FALSE) {
        res = URI_FUNC(MakeOwnerMm)(uri, memory);
        if (res != URI_SUCCESS) {
            return res;
        }
    }

    assert(uri->owner == URI_TRUE);

    /* Apply new value */
    res = URI_FUNC(InternalSetPath)(uri, first, afterLast, memory);
    assert((res == URI_SUCCESS) || (res == URI_ERROR_SYNTAX)
           || (res == URI_ERROR_MALLOC));
    return res;
}

int URI_FUNC(SetPath)(URI_TYPE(Uri) * uri, const URI_CHAR * first,
                      const URI_CHAR * afterLast) {
    return URI_FUNC(SetPathMm)(uri, first, afterLast, NULL);
}

#endif
