Yet Another CPAN Grep

URI-Fast/Fast.xs

#define PERL_NO_GET_CONTEXT
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
#include "ppport.h"

/*------------------------------------------------------------------------------
 *
 * Macros and definitions
 *
 -----------------------------------------------------------------------------*/

// Permitted characters
#define URI_CHARS_AUTH          "!$&'()*+,;:=@"
#define URI_CHARS_USER          "!$&'()*+,;="
#define URI_CHARS_PATH          "!$&'()*+,;:=@/"
#define URI_CHARS_PATH_SEGMENT  "!$&'()*+,;:=@"
#define URI_CHARS_HOST          "!$&'()[]*+,.;=@/"
#define URI_CHARS_QUERY         ":@?/&=;"
#define URI_CHARS_FRAG          ":@?/"

// Returns the uri_t* referenced by the blessed URI::Fast object in the SV ref.
// Croaks if the SV does not point to a URI::Fast object.
#define URI(obj) \
  (((sv_isobject(obj) && sv_derived_from(obj, "URI::Fast")) ? NULL : croak("error: expected instance of URI::Fast")), \
    ((uri_t*) SvIV(SvRV((obj)))))

// Size constants
#define URI_SIZE_scheme 32UL
#define URI_SIZE_usr    32UL
#define URI_SIZE_pwd    32UL
#define URI_SIZE_host   64UL
#define URI_SIZE_port    8UL
#define URI_SIZE_path   64UL
#define URI_SIZE_query  64UL
#define URI_SIZE_frag   32UL

// Enough to fit all pieces + 3 chars for separators (2 colons + @)
#define URI_SIZE_auth (3 + URI_SIZE_usr + URI_SIZE_pwd + URI_SIZE_host + URI_SIZE_port)

// Returns the size of the member in bytes
#define URI_SIZE(member) (URI_SIZE_##member)

// Defines a clearer method
#define URI_SIMPLE_CLEARER(member) \
static void clear_##member(pTHX_ SV *uri) { \
  str_clear(aTHX_ URI(uri)->member); \
}

// Returns a (non-mortal) SV from a uri_str_t
#define URI_STR_2SV(str) (newSVpvn((str)->length == 0 ? "" : (str)->string, (str)->length))

// Defines a setter method that accepts an unencoded value, encodes it,
// ignoring characters in string 'allowed', and copies the encoded value into
// slot 'member'.
#define URI_SIMPLE_SETTER(member, allowed) \
static void set_##member(pTHX_ SV *sv_uri, SV *sv_value) { \
  uri_t *uri = URI(sv_uri); \
  if (is_defined(aTHX_ sv_value)) { \
    size_t len_value, len_enc; \
    const char *value = SvPV_const(sv_value, len_value); \
    char enc[len_value * 3 + 1]; \
    len_enc = uri_encode(value, len_value, enc, allowed, uri->is_iri); \
    str_set(aTHX_ uri->member, enc, len_enc); \
  } \
  else { \
    str_clear(aTHX_ uri->member); \
  } \
}

// Defines a setter method that accepts an arbitrary string value and copies it
// into the slot 'member' without changing it in any way.
#define URI_RAW_SETTER(member) \
static void set_raw_##member(pTHX_ SV *sv_uri, SV *sv_value) { \
  uri_t *uri = URI(sv_uri); \
  if (is_defined(aTHX_ sv_value)) { \
    size_t len_value; \
    const char *value = SvPV_const(sv_value, len_value); \
    str_set(aTHX_ uri->member, value, len_value); \
  } \
  else { \
    str_clear(aTHX_ uri->member); \
  } \
}

// Defines a getter method that returns the raw, encoded value of the member
// slot. If the object is an IRI, decodes utf8 characters from hex sequences if
// present.
#define URI_RAW_GETTER(member) \
static SV* get_raw_##member(pTHX_ SV *sv_uri) { \
  uri_t *uri = URI(sv_uri); \
  uri_str_t *str = uri->member; \
  if (uri->is_iri) { \
    if (str->length == 0) return newSVpvn("", 0); \
    char decoded[ str->length + 1 ]; \
    size_t len = uri_decode_utf8(str->string, str->length, decoded); \
    SV *out = newSVpvn(decoded, len); \
    sv_utf8_decode(out); \
    return out; \
  } else { \
    return URI_STR_2SV(str); \
  } \
}

// Defines a getter method that returns the decoded value of the member slot.
#define URI_SIMPLE_GETTER(member) \
static SV* get_##member(pTHX_ SV *uri) { \
  uri_str_t *str = URI(uri)->member; \
  if (str->length == 0) return newSVpvn("", 0); \
  char decoded[ str->length + 1 ]; \
  size_t len = uri_decode(str->string, str->length, decoded, ""); \
  SV *out = newSVpvn(decoded, len); \
  sv_utf8_decode(out); \
  return out; \
}

// Defines a getter method for a structured field that returns the value of the
// member slot with non-ASCII character decoded, while leaving reserved
// characters encoded.
#define URI_COMPOUND_GETTER(member) \
static SV* get_##member(pTHX_ SV *uri) { \
  uri_str_t *str = URI(uri)->member; \
  if (str->length == 0) return newSVpvn("", 0); \
  char decoded[ str->length + 1 ]; \
  size_t len = uri_decode_utf8(str->string, str->length, decoded); \
  SV *out = newSVpvn(decoded, len); \
  sv_utf8_decode(out); \
  return out; \
}

// Warns out info about a uri_str_t
#define URI_STR_DEBUG(str) \
  (warn( \
    "STRING< chunk=%lu, allocated=%lu, length=%lu, string='%.*s' >\n", \
    str->chunk, \
    str->allocated, \
    str->length, \
    (int)str->length, \
    str->string \
  )); \

/*
 * Allocate memory with Newx if it's
 * available - if it's an older perl
 * that doesn't have Newx then we
 * resort to using New.
 */
#ifndef Newx
#define Newx(v,n,t) New(0,v,n,t)
#endif

// av_top_index not available on Perls < 5.18
#ifndef av_top_index
#define av_top_index(av) av_len(av)
#endif

/*------------------------------------------------------------------------------
 *
 * Internal API
 *
 -----------------------------------------------------------------------------*/

/*------------------------------------------------------------------------------
 * Utilities
 -----------------------------------------------------------------------------*/

// Returns the class name of a blessed object. Gets magic before evaluating.
// Returns an empty string if the object is not defined or is not a reference.
static
const char* class_name(pTHX_ SV *sv) {
  SvGETMAGIC(sv);

  if (!SvOK(sv) || !SvROK(sv)) {
    return "";
  }

  return HvNAME(SvSTASH(SvRV(sv)));
}

// Returns true if the SV is defined. Gets magic before evaluating the
// definedness of the SV.
static
bool is_defined(pTHX_ SV *sv) {
  SvGETMAGIC(sv);
  return SvOK(sv) ? 1 : 0;
}

// Returns true if the SV is an RV. Gets magic before evaluating.
static
bool is_ref(pTHX_ SV *sv) {
  SvGETMAGIC(sv);
  return SvROK(sv) ? 1 : 0;
}

// Replacement for strspn that is length-aware
static
size_t strnspn(const char *s, size_t s_len, const char *c)
{
  size_t res = strspn(s, c);
  return s_len < res ? s_len : res;
}

// Replacement for strcspn that is length-aware
static
size_t strncspn(const char *s, size_t s_len, const char *c)
{
  size_t res = strcspn(s, c);
  return s_len < res ? s_len : res;
}

// Returns true if char c is in char* set. It is up to the caller to ensure
// that *set is nul-terminated.
static inline
bool char_in_str(const char c, const char *set) {
  size_t i;

  for (i = 0; set[i] != '\0'; ++i) {
    if (set[i] == c) {
      return 1;
    }
  }

  return 0;
}

// returns true for an ASCII whitespace char
static inline
bool my_isspace(const char c) {
  switch (c) {
    case ' ':  case '\t':
    case '\r': case '\n':
    case '\f': case '\v':
      return 1;
    default:
      return 0;
  }
}

// min of two numbers
static inline
size_t minnum(size_t x, size_t y) {
  return x <= y ? x : y;
}

// max of two numbers
static inline
size_t maxnum(size_t x, size_t y) {
  return x >= y ? x : y;
}

/*------------------------------------------------------------------------------
 * Resizable strings
 -----------------------------------------------------------------------------*/
typedef struct {
  size_t chunk;     // bytes to allocate at a time
  size_t allocated; // number of currently allocated bytes
  size_t length;    // length of the string within the allocated buffer
  char *string;     // pointer to the allocated string
} uri_str_t;

#define str_len(str) ((str)->length)
#define str_get(str) (str_len(str) == 0 ? "" : (const char*)str->string)

// Truncates the string from the right-most occurence of r_char by setting that
// index to nul. Does not zero out the rest of the string.
static
void str_rtrim(pTHX_ uri_str_t *str, const char r_char) {
  size_t i;
  for (i = str->length; i > 0; --i) {
    if (str->string[i - 1] == r_char) {
      str->string[i - 1] = '\0';
      str->length = i - 1;
      break;
    }
  }
}

// Sets str to the first len chars of value. Reallocates another block of
// memory to fit it if necessary.
static
void str_set(pTHX_ uri_str_t *str, const char *value, size_t len) {
  size_t allocate = str->chunk * (((len + 1) / str->chunk) + 1);

  if (str->string == NULL) {
    Newx(str->string, allocate, char);
    str->allocated = allocate;
  }
  else if (len > str->allocated) {
    Renew(str->string, allocate, char);
    str->allocated = allocate;
  }

  if (value == NULL) {
    Zero(str->string, len + 1, char);
    str->length = 0;
  }
  else {
    Copy(value, str->string, len, char);
    str->string[len] = '\0';
    str->length = len;
  }
}

// Appends the first len chars of value to str, allocating more memory if
// necessary.
static
void str_append(pTHX_ uri_str_t *str, const char *value, size_t len) {
  if (str->string == NULL) {
    str_set(aTHX_ str, value, len);
    return;
  }

  if (value != NULL && len > 0) {
    size_t allocate = str->chunk * (((str->length + len + 1) / str->chunk) + 1);

    if (allocate != str->allocated) {
      Renew(str->string, allocate, char);
      str->allocated = allocate;
    }

    Copy(value, &str->string[str->length], len, char);
    str->string[str->length + len] = '\0';
    str->length += len;
  }
}

// Zeroes out the contents of str. Does not release memory.
static
void str_clear(pTHX_ uri_str_t *str) {
  str_set(aTHX_ str, NULL, 0);
}

// Copies the contents of from into to. Does not clear to first, but will set
// the terminating nul and length.
static
void str_copy(pTHX_ uri_str_t *from, uri_str_t *to) {
  str_set(aTHX_ to, from->string, from->length);
}

// Initializes a uri_str_t.
static
void str_init(pTHX_ uri_str_t *str, size_t alloc_size) {
  str->chunk = alloc_size;
  str->allocated = 0;
  str->length = 0;
  str->string = NULL;
}

// Allocates and initializes a new uri_str_t.
static
uri_str_t* str_new(pTHX_ size_t alloc_size) {
  uri_str_t *str;
  Newx(str, 1, uri_str_t);
  str_init(aTHX_ str, alloc_size);
  return str;
}

// Release an allocated uri_str_t and free's its contents.
static
void str_free(pTHX_ uri_str_t *str) {
  if (str->string != NULL) {
    Safefree(str->string);
  }

  Safefree(str);
}


/*-------------------------------------------------------------------------------
 * Percent encoding
 -----------------------------------------------------------------------------*/

// Taken with respect from URI::Escape::XS. Adapted to accept a configurable
// string of permissible characters.
#define _______ "\0\0\0\0"
static const char uri_encode_tbl[ sizeof(U32) * 0x100 ] = {
/*  0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f                        */
    "%00\0" "%01\0" "%02\0" "%03\0" "%04\0" "%05\0" "%06\0" "%07\0" "%08\0" "%09\0" "%0A\0" "%0B\0" "%0C\0" "%0D\0" "%0E\0" "%0F\0"  /* 0:   0 ~  15 */
    "%10\0" "%11\0" "%12\0" "%13\0" "%14\0" "%15\0" "%16\0" "%17\0" "%18\0" "%19\0" "%1A\0" "%1B\0" "%1C\0" "%1D\0" "%1E\0" "%1F\0"  /* 1:  16 ~  31 */
    "%20\0" "%21\0" "%22\0" "%23\0" "%24\0" "%25\0" "%26\0" "%27\0" "%28\0" "%29\0" "%2A\0" "%2B\0" "%2C\0" _______ _______ "%2F\0"  /* 2:  32 ~  47 */
    _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ "%3A\0" "%3B\0" "%3C\0" "%3D\0" "%3E\0" "%3F\0"  /* 3:  48 ~  63 */
    "%40\0" _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______  /* 4:  64 ~  79 */
    _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ "%5B\0" "%5C\0" "%5D\0" "%5E\0" _______  /* 5:  80 ~  95 */
    "%60\0" _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______  /* 6:  96 ~ 111 */
    _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ _______ "%7B\0" "%7C\0" "%7D\0" _______ "%7F\0"  /* 7: 112 ~ 127 */
    "%80\0" "%81\0" "%82\0" "%83\0" "%84\0" "%85\0" "%86\0" "%87\0" "%88\0" "%89\0" "%8A\0" "%8B\0" "%8C\0" "%8D\0" "%8E\0" "%8F\0"  /* 8: 128 ~ 143 */
    "%90\0" "%91\0" "%92\0" "%93\0" "%94\0" "%95\0" "%96\0" "%97\0" "%98\0" "%99\0" "%9A\0" "%9B\0" "%9C\0" "%9D\0" "%9E\0" "%9F\0"  /* 9: 144 ~ 159 */
    "%A0\0" "%A1\0" "%A2\0" "%A3\0" "%A4\0" "%A5\0" "%A6\0" "%A7\0" "%A8\0" "%A9\0" "%AA\0" "%AB\0" "%AC\0" "%AD\0" "%AE\0" "%AF\0"  /* A: 160 ~ 175 */
    "%B0\0" "%B1\0" "%B2\0" "%B3\0" "%B4\0" "%B5\0" "%B6\0" "%B7\0" "%B8\0" "%B9\0" "%BA\0" "%BB\0" "%BC\0" "%BD\0" "%BE\0" "%BF\0"  /* B: 176 ~ 191 */
    "%C0\0" "%C1\0" "%C2\0" "%C3\0" "%C4\0" "%C5\0" "%C6\0" "%C7\0" "%C8\0" "%C9\0" "%CA\0" "%CB\0" "%CC\0" "%CD\0" "%CE\0" "%CF\0"  /* C: 192 ~ 207 */
    "%D0\0" "%D1\0" "%D2\0" "%D3\0" "%D4\0" "%D5\0" "%D6\0" "%D7\0" "%D8\0" "%D9\0" "%DA\0" "%DB\0" "%DC\0" "%DD\0" "%DE\0" "%DF\0"  /* D: 208 ~ 223 */
    "%E0\0" "%E1\0" "%E2\0" "%E3\0" "%E4\0" "%E5\0" "%E6\0" "%E7\0" "%E8\0" "%E9\0" "%EA\0" "%EB\0" "%EC\0" "%ED\0" "%EE\0" "%EF\0"  /* E: 224 ~ 239 */
    "%F0\0" "%F1\0" "%F2\0" "%F3\0" "%F4\0" "%F5\0" "%F6\0" "%F7\0" "%F8\0" "%F9\0" "%FA\0" "%FB\0" "%FC\0" "%FD\0" "%FE\0" "%FF"    /* F: 240 ~ 255 */
};
#undef _______

static
size_t uri_encode(const char* in, size_t len, char* out, const char* allow, int allow_utf8) {
  size_t i = 0;
  size_t j = 0;
  size_t k, skip;
  U8 octet;
  U32 code;

  while (i < len) {
    octet = in[i];

    if (allow_utf8 && octet & 0xc0) {
      skip = UTF8SKIP(&in[i]);

      if (skip > 0) {
        for (k = 0; k < skip; ++k) {
          out[j++] = in[i++];
        }

        continue;
      }
    }

    if (char_in_str(octet, allow)) {
      out[j++] = octet;
      ++i;
    }
    else {
      code = ((U32*) uri_encode_tbl)[(unsigned char) octet];

      if (code) {
        *((U32*) &out[j]) = code;
        j += 3;
      }
      else {
        out[j++] = octet;
      }

      ++i;
    }
  }

  out[j] = '\0';

  return j;
}

/*-------------------------------------------------------------------------------
 * Percent decoding
 -----------------------------------------------------------------------------*/

#define __ 0xFF
static const unsigned char hex[0x100] = {
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 00-0F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 10-1F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 20-2F */
   0, 1, 2, 3, 4, 5, 6, 7, 8, 9,__,__,__,__,__,__, /* 30-3F */
  __,10,11,12,13,14,15,__,__,__,__,__,__,__,__,__, /* 40-4F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 50-5F */
  __,10,11,12,13,14,15,__,__,__,__,__,__,__,__,__, /* 60-6F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 70-7F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 80-8F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* 90-9F */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* A0-AF */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* B0-BF */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* C0-CF */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* D0-DF */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* E0-EF */
  __,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__, /* F0-FF */
};
#undef __

static inline
char unhex(const char *in) {
  unsigned char v1 = hex[ (unsigned char) in[0] ];
  unsigned char v2 = hex[ (unsigned char) in[1] ];

  /* skip invalid hex sequences */
  if ((v1 | v2) != 0xFF) {
    return (v1 << 4) | v2;
  }

  return '\0';
}

static
size_t uri_decode(const char *in, size_t len, char *out, const char *ignore) {
  size_t i = 0, j = 0;
  char decoded;

  while (i < len) {
    decoded = '\0';

    switch (in[i]) {
      case '+':
        decoded = ' ';
        if (!char_in_str(decoded, ignore)) {
          ++i;
          break;
        }
      case '%':
        if (i + 2 < len) {
          decoded = unhex( &in[i + 1] );
          if (decoded != '\0' && !char_in_str(decoded, ignore)) {
            i += 3;
            break;
          }
        }
      default:
        decoded = in[i++];
    }

    if (decoded != '\0') {
      out[j++] = decoded;
    }
  }

  out[j] = '\0';

  return j;
}

static
size_t uri_decode_utf8(const char *in, size_t len, char *out) {
  size_t i = 0, j = 0;
  char decoded;

  while (i < len) {
    decoded = '\0';

    switch (in[i]) {
      case '%':
        if (i + 2 < len) {
          decoded = unhex( &in[i + 1] );
          if (decoded != '\0' && (U32)decoded > 127) {
            i += 3;
            break;
          }
        }
      default:
        decoded = in[i++];
    }

    if (decoded != '\0') {
      out[j++] = decoded;
    }
  }

  out[j] = '\0';

  return j;
}

// EOT (end of theft)

/*
 * External API for encode/decode.
 */
static
SV* encode(pTHX_ SV *in, SV *sv_allowed) {
  size_t ilen, olen, alen;
  const char *allowed;
  SV* out;

  if (!is_defined(aTHX_ in)) {
    return newSVpvn("", 0);
  }

  const char *src = SvPV_nomg_const(in, ilen);
  char dest[(ilen * 3) + 1];

  if (sv_allowed == NULL) {
    allowed = "";
  } else {
    allowed = SvPV_nomg_const(sv_allowed, alen);
  }

  olen = uri_encode(src, ilen, dest, allowed, 0);
  out  = newSVpvn(dest, olen);
  sv_utf8_downgrade(out, FALSE);

  return out;
}

static
SV* decode(pTHX_ SV *in) {
  size_t ilen, olen;
  const char *src;
  SV *out;

  if (!is_defined(aTHX_ in)) {
    return newSVpvn("", 0);
  }

  if (DO_UTF8(in)) {
    in = sv_mortalcopy(in);
    SvUTF8_on(in);

    if (!sv_utf8_downgrade(in, TRUE)) {
      croak("decode: wide character in input octet string");
    }

    src = SvPV_nomg_const(in, ilen);
  }
  else {
    src = SvPV_nomg_const(in, ilen);
  }

  char dest[ilen + 1];
  olen = uri_decode(src, ilen, dest, "");
  out = newSVpvn(dest, olen);
  sv_utf8_decode(out);

  return out;
}


/*------------------------------------------------------------------------------
 * Query string parsing
 -----------------------------------------------------------------------------*/

typedef enum {
  KEY   = 1,
  PARAM = 2,
  DONE  = 3,
} uri_query_token_type_t;

typedef struct {
  uri_query_token_type_t type;
  const char *key;   size_t key_length;
  const char *value; size_t value_length; // only present when type=PARAM
} uri_query_token_t;

typedef struct {
  size_t length;
  size_t cursor;
  const char *source;
} uri_query_scanner_t;

// Initializes a uri_query_scanner_t with input string *source of at least
// length characters. It is the caller's responsibility to ensure the lifetime
// of source matches the lifetime of the scanner.
void query_scanner_init(
    uri_query_scanner_t *scanner,
    const char *source,
    size_t length
  )
{
  scanner->source = source;
  scanner->length = length;
  scanner->cursor = 0;
}

// Returns true if the scanner has reached the end of the input string.
static
int query_scanner_done(uri_query_scanner_t *scanner) {
  return scanner->cursor >= scanner->length;
}

/*
 * Fills the token struct with the next token information. Does not decode
 * any values.
 */
static
void query_scanner_next(uri_query_scanner_t *scanner, uri_query_token_t *token) {
  size_t brk;
  const char key_sep[4] = {'&', ';', '=', '\0'};
  const char val_sep[4] = {'&', ';', '\0'};

SCAN_KEY:
  if (scanner->cursor >= scanner->length) {
    token->key   = NULL; token->key_length   = 0;
    token->value = NULL; token->value_length = 0;
    token->type  = DONE;
    return;
  }

  // Scan to end of token
  brk = strncspn(
    &scanner->source[ scanner->cursor ],
    scanner->length - scanner->cursor,
    key_sep
  );

  // Set key members in token struct
  token->key = &scanner->source[ scanner->cursor ];
  token->key_length = brk;

  // Move cursor to end of token
  scanner->cursor += brk;

  // If there is an associate value, add it to the token
  if (scanner->source[ scanner->cursor ] == '=') {
    // Advance past '='
    ++scanner->cursor;

    // Find the end of the value
    brk = strncspn(&scanner->source[ scanner->cursor ], scanner->length - scanner->cursor, val_sep);

    // Set the value and token type
    token->value = &scanner->source[ scanner->cursor ];
    token->value_length = brk;
    token->type = PARAM;

    // Move cursor to the end of the value, eating the separator terminating it
    scanner->cursor += brk + 1;
  }
  // No value assigned to this key
  else {
    token->type = KEY;
    ++scanner->cursor; // advance past terminating separator
  }

  // No key was found; try again
  if (token->key_length == 0) {
    goto SCAN_KEY;
  }

  return;
}


/*------------------------------------------------------------------------------
 * URI parsing
 -----------------------------------------------------------------------------*/

typedef struct {
  U8         is_iri;
  uri_str_t *scheme;
  uri_str_t *query;
  uri_str_t *path;
  uri_str_t *host;
  uri_str_t *port;
  uri_str_t *frag;
  uri_str_t *usr;
  uri_str_t *pwd;
} uri_t;

/*
 * Scans the authorization portion of the URI string
 */
static
void uri_scan_auth(pTHX_ uri_t* uri, const char* auth, const size_t len) {
  size_t idx  = 0;
  size_t brk1 = 0;
  size_t brk2 = 0;
  size_t i;
  unsigned char flag;

  if (len > 0) {
    // Credentials
    brk1 = strncspn(&auth[idx], len - idx, "@");

    if (brk1 > 0 && brk1 != (len - idx)) {
      brk2 = strncspn(&auth[idx], len - idx, ":");

      if (brk2 > 0 && brk2 < brk1) {
        // user
        str_set(aTHX_ uri->usr, &auth[idx], brk2);
        idx += brk2 + 1;

        // password
        str_set(aTHX_ uri->pwd, &auth[idx], brk1 - brk2 - 1);
        idx += brk1 - brk2;
      }
      else {
        // user only
        str_set(aTHX_ uri->usr, &auth[idx], brk1);
        idx += brk1 + 1;
      }
    }

    // Location

    // Maybe an IPV6 address
    flag = 0;
    if (auth[idx] == '[') {
      brk1 = strncspn(&auth[idx], len - idx, "]");

      if (auth[idx + brk1] == ']') {
        // Copy, including the square brackets
        str_set(aTHX_ uri->host, &auth[idx], brk1 + 1);
        idx += brk1 + 1;
        flag = 1;
      }
    }

    if (flag == 0) {
      brk1 = strncspn(&auth[idx], len - idx, ":");

      if (brk1 > 0) {
        str_set(aTHX_ uri->host, &auth[idx], brk1);
        idx += brk1;
      }
    }

    if (auth[idx] == ':') {
      ++idx;
      str_set(aTHX_ uri->port, &auth[idx], len - idx);
    }
  }
}

/*
 * Scans a URI string and populates the uri_t struct.
 *
 * Correct:
 *   scheme:[//[usr[:pwd]@]host[:port]]path[?query][#fragment]
 *
 * Incorrect but supported:
 *   /path[?query][#fragment]
 *
 */
static
void uri_scan(pTHX_ uri_t *uri, const char *src, size_t len) {
  size_t idx = 0;
  size_t brk;
  size_t i;

  while (my_isspace(src[idx]) == 1)     ++idx; // Trim leading whitespace
  while (my_isspace(src[len - 1]) == 1) --len; // Trim trailing whitespace

  // scheme
  brk = strncspn(&src[idx], len - idx, ":/@?#");

  if (brk > 0 && src[idx + brk] == ':') {
    str_set(aTHX_ uri->scheme, &src[idx], brk);
    idx += brk;
    ++idx; // skip past ":"
  }

  // authority
  if (idx + 1 < len         // src is long enough to hold two slashes
   && src[idx]     == '/'   // next char is a slash
   && src[idx + 1] == '/')  // char after that is a slash
  {
    idx += 2;               // skip past the double slashes

    brk = strncspn(&src[idx], len - idx, "/?#");
    uri_scan_auth(aTHX_ uri, &src[idx], brk);

    if (brk > 0) {
      idx += brk;
    }
  }

  // path
  brk = strncspn(&src[idx], len - idx, "?#");
  if (brk > 0) {
    str_set(aTHX_ uri->path, &src[idx], brk);
    idx += brk;
  }

  // query
  if (src[idx] == '?') {
    ++idx; // skip past ?
    brk = strncspn(&src[idx], len - idx, "#");
    if (brk > 0) {
      str_set(aTHX_ uri->query, &src[idx], brk);
      idx += brk;
    }
  }

  // fragment
  if (src[idx] == '#') {
    ++idx; // skip past #
    brk = len - idx;
    if (brk > 0) {
      str_set(aTHX_ uri->frag, &src[idx], brk);
    }
  }
}

/*
 * Helper function that returns true if any part of the authority section is
 * set.
 */
static inline
int has_authority(pTHX_ uri_t *uri) {
  return uri->host->length > 0
      || uri->usr->length > 0
      || uri->pwd->length > 0
      || uri->port->length > 0;
}

/*------------------------------------------------------------------------------
 *
 * Perl API
 *
 -----------------------------------------------------------------------------*/

/*------------------------------------------------------------------------------
 * Clearers
 -----------------------------------------------------------------------------*/

URI_SIMPLE_CLEARER(scheme);
URI_SIMPLE_CLEARER(path);
URI_SIMPLE_CLEARER(query);
URI_SIMPLE_CLEARER(frag);
URI_SIMPLE_CLEARER(usr);
URI_SIMPLE_CLEARER(pwd);
URI_SIMPLE_CLEARER(host);
URI_SIMPLE_CLEARER(port);

static
void clear_auth(pTHX_ SV *uri_obj) {
  clear_usr(aTHX_ uri_obj);
  clear_pwd(aTHX_ uri_obj);
  clear_host(aTHX_ uri_obj);
  clear_port(aTHX_ uri_obj);
}

/*------------------------------------------------------------------------------
 * Raw getters
 -----------------------------------------------------------------------------*/

// Raw getters
URI_RAW_GETTER(scheme);
URI_RAW_GETTER(usr);
URI_RAW_GETTER(pwd);
URI_RAW_GETTER(host);
URI_RAW_GETTER(port);
URI_RAW_GETTER(path);
URI_RAW_GETTER(query);
URI_RAW_GETTER(frag);

static
SV* get_raw_auth(pTHX_ SV *uri_obj) {
  uri_t *uri = URI(uri_obj);
  SV *out = newSVpvn("", 0);

  if (uri->is_iri) {
    SvUTF8_on(out);
  }

  if (str_len(uri->usr) > 0) {
    if (str_len(uri->pwd) > 0) {
      sv_catpvn(out, str_get(uri->usr), str_len(uri->usr));
      sv_catpvn(out, ":", 1);
      sv_catpvn(out, str_get(uri->pwd), str_len(uri->pwd));
      sv_catpvn(out, "@", 1);
    } else {
      sv_catpvn(out, str_get(uri->usr), str_len(uri->usr));
      sv_catpvn(out, "@", 1);
    }
  }

  if (str_len(uri->host) > 0) {
    if (str_len(uri->port) > 0) {
      sv_catpvn(out, str_get(uri->host), str_len(uri->host));
      sv_catpvn(out, ":", 1);
      sv_catpvn(out, str_get(uri->port), str_len(uri->port));
    } else {
      sv_catpvn(out, str_get(uri->host), str_len(uri->host));
    }
  }

  return out;
}

/*------------------------------------------------------------------------------
 * Decoding getters
 -----------------------------------------------------------------------------*/

URI_SIMPLE_GETTER(scheme);
URI_SIMPLE_GETTER(usr);
URI_SIMPLE_GETTER(pwd);
URI_SIMPLE_GETTER(host);
URI_SIMPLE_GETTER(port);
URI_SIMPLE_GETTER(frag);

URI_COMPOUND_GETTER(path);
URI_COMPOUND_GETTER(query);

static
SV* get_auth(pTHX_ SV *uri_obj) {
  uri_t *uri = URI(uri_obj);
  SV *out = newSVpvn("", 0);

  if (uri->is_iri) {
    SvUTF8_on(out);
  }

  if (str_len(uri->usr) > 0) {
    if (str_len(uri->pwd) > 0) {
      sv_catsv(out, sv_2mortal(get_usr(aTHX_ uri_obj)));
      sv_catpvn(out, ":", 1);
      sv_catsv(out, sv_2mortal(get_pwd(aTHX_ uri_obj)));
      sv_catpvn(out, "@", 1);
    } else {
      sv_catsv(out, sv_2mortal(get_usr(aTHX_ uri_obj)));
      sv_catpvn(out, "@", 1);
    }
  }

  if (str_len(uri->host) > 0) {
    if (str_len(uri->port) > 0) {
      sv_catsv(out, sv_2mortal(get_host(aTHX_ uri_obj)));
      sv_catpvn(out, ":", 1);
      sv_catsv(out, sv_2mortal(get_port(aTHX_ uri_obj)));
    } else {
      sv_catsv(out, sv_2mortal(get_host(aTHX_ uri_obj)));
    }
  }

  return out;
}

static
SV* split_path(pTHX_ SV* sv_uri, int include_leading) {
  uri_t *uri = URI(sv_uri);
  size_t len, segment_len, brk, idx = 0;
  AV* arr = newAV();
  SV* tmp;

  const char *str = uri->path->string;
  len = uri->path->length;

  if (len > 0) {
    if (str[0] == '/') {
      if (include_leading) {
        tmp = newSVpvn("", 0);
        av_push(arr, tmp);
      }

      ++idx; // skip past leading /
    }

    while (idx < len) {
      // Find the next separator
      brk = strcspn(&str[idx], "/");

      // Decode the segment
      char segment[brk + 1];
      segment_len = uri_decode(&str[idx], brk, segment, "");

      // Push new SV to AV
      tmp = newSVpvn(segment, segment_len);
      sv_utf8_decode(tmp);
      av_push(arr, tmp);

      idx += brk + 1;
    }
  }

  return newRV_noinc((SV*) arr);
}

static
SV* get_query_keys(pTHX_ SV* sv_uri) {
  uri_str_t *str_query = URI(sv_uri)->query;
  const char *query = str_query->string;
  size_t klen, qlen = str_query->length;
  HV* out = newHV();
  uri_query_scanner_t scanner;
  uri_query_token_t token;

  query_scanner_init(&scanner, query, qlen);

  while (!query_scanner_done(&scanner)) {
    query_scanner_next(&scanner, &token);
    if (token.type == DONE) continue;
    char key[token.key_length + 1];
    klen = uri_decode(token.key, token.key_length, key, "");
    hv_store(out, key, -klen, &PL_sv_undef, 0);
  }

  return newRV_noinc((SV*) out);
}

static
SV* query_hash(pTHX_ SV *sv_uri) {
  uri_t *uri = URI(sv_uri);
  SV *tmp, **refval;
  AV *arr;
  HV *out = newHV();
  size_t klen, vlen;

  uri_query_scanner_t scanner;
  uri_query_token_t token;

  query_scanner_init(&scanner, uri->query->string, uri->query->length);

  while (!query_scanner_done(&scanner)) {
    query_scanner_next(&scanner, &token);
    if (token.type == DONE) continue;

    // Get decoded key
    char key[token.key_length + 1];
    klen = uri_decode(token.key, token.key_length, key, "");

    // Values are stored in an array; this block is the rough equivalent of:
    //   $out{$key} = [] unless exists $out{$key};
    if (!hv_exists(out, key, klen)) {
      arr = newAV();
      hv_store(out, key, -klen, newRV_noinc((SV*) arr), 0);
    }
    else {
      refval = hv_fetch(out, key, -klen, 0);
      if (refval == NULL) croak("query_hash: something went wrong");
      arr = (AV*) SvRV(*refval);
    }

    // Get decoded value if there is one
    if (token.type == PARAM) {
      char val[token.value_length + 1];
      vlen = uri_decode(token.value, token.value_length, val, "");
      tmp = newSVpvn(val, vlen);
      sv_utf8_decode(tmp);
      av_push(arr, tmp);
    }
  }

  return newRV_noinc((SV*) out);
}

static
SV* get_param(pTHX_ SV* sv_uri, SV* sv_key) {
  uri_t *uri = URI(sv_uri);
  size_t klen, vlen, elen;
  const char *key;
  uri_query_scanner_t scanner;
  uri_query_token_t token;
  AV* out = newAV();
  SV* value;

  // Read key to search
  if (!is_defined(aTHX_ sv_key)) {
    croak("get_param: expected key to search");
  }
  else {
    // Copy input string *before* calling DO_UTF8() in case the SV is an object
    // with string overloading, which may trigger the utf8 flag.
    key = SvPV_const(sv_key, klen);

    if (!DO_UTF8(sv_key)) {
      sv_key = sv_2mortal(newSVpvn(key, klen));
      sv_utf8_encode(sv_key);
      key = SvPV_const(sv_key, klen);
    }
  }

  char enc_key[(klen * 3) + 2];
  elen = uri_encode(key, klen, enc_key, ":@?/", uri->is_iri);

  query_scanner_init(&scanner, uri->query->string, uri->query->length);

  while (!query_scanner_done(&scanner)) {
    query_scanner_next(&scanner, &token);
    if (token.type == DONE) continue;

    if (strncmp(enc_key, token.key, maxnum(elen, token.key_length)) == 0) {
      if (token.type == PARAM) {
        char val[token.value_length + 1];
        vlen = uri_decode(token.value, token.value_length, val, "");
        value = newSVpvn(val, vlen);
        sv_utf8_decode(value);
        av_push(out, value);
      }
      else {
        av_push(out, newSV(0));
      }
    }
  }

  return newRV_noinc((SV*) out);
}

/*------------------------------------------------------------------------------
 * Raw setters
 -----------------------------------------------------------------------------*/

URI_RAW_SETTER(scheme);
URI_RAW_SETTER(path);
URI_RAW_SETTER(query);
URI_RAW_SETTER(frag);
URI_RAW_SETTER(usr);
URI_RAW_SETTER(pwd);
URI_RAW_SETTER(host);
URI_RAW_SETTER(port);

static
void set_raw_auth(pTHX_ SV *sv_uri, SV *sv_value) {
  uri_t *uri = URI(sv_uri);

  str_clear(aTHX_ uri->usr);
  str_clear(aTHX_ uri->pwd);
  str_clear(aTHX_ uri->host);
  str_clear(aTHX_ uri->port);

  if (is_defined(aTHX_ sv_value)) {
    size_t vlen;
    const char *value = SvPV_const(sv_value, vlen);

    if (vlen > URI_SIZE_auth) {
      croak("set_auth: size of auth string exceeds max of %lu", URI_SIZE_auth);
    }

    // auth isn't stored as an individual field, so just rescan from the new
    // source string.
    uri_scan_auth(aTHX_ uri, value, vlen);
  }
}

/*------------------------------------------------------------------------------
 * Setters
 -----------------------------------------------------------------------------*/

URI_SIMPLE_SETTER(scheme, "");
URI_SIMPLE_SETTER(path,   URI_CHARS_PATH);
URI_SIMPLE_SETTER(query,  URI_CHARS_QUERY);
URI_SIMPLE_SETTER(frag,   URI_CHARS_FRAG);
URI_SIMPLE_SETTER(usr,    URI_CHARS_USER);
URI_SIMPLE_SETTER(pwd,    URI_CHARS_USER);
URI_SIMPLE_SETTER(host,   URI_CHARS_HOST);

static
void set_port(pTHX_ SV *sv_uri, SV *sv_value) {
  uri_t *uri = URI(sv_uri);
  if (!is_defined(aTHX_ sv_value)) {
    str_clear(aTHX_ uri->port);
    return;
  }

  size_t vlen, i;
  const char *value = SvPV_const(sv_value, vlen);
  str_set(aTHX_ uri->port, value, vlen);
}

static
void set_auth(pTHX_ SV *sv_uri, SV *sv_value) {
  uri_t *uri = URI(sv_uri);

  str_clear(aTHX_ uri->usr);
  str_clear(aTHX_ uri->pwd);
  str_clear(aTHX_ uri->host);
  str_clear(aTHX_ uri->port);

  if (is_defined(aTHX_ sv_value)) {
    size_t vlen;
    const char *value = SvPV_const(sv_value, vlen);

    if (vlen > URI_SIZE_auth) {
      croak("set_auth: size of auth string exceeds max of %lu", URI_SIZE_auth);
    }

    // auth isn't stored as an individual field, so encode to local array and rescan
    char auth[URI_SIZE_auth];
    size_t len = uri_encode(value, vlen, (char*) &auth, URI_CHARS_AUTH, uri->is_iri);

    uri_scan_auth(aTHX_ uri, auth, len);
  }
}

static
void set_path_array(pTHX_ SV *sv_uri, SV *sv_path) {
  uri_t *uri = URI(sv_uri);
  SV **refval, *tmp;
  AV *av_path;
  size_t i, av_idx, seg_len;
  const char *seg;
  uri_str_t *path = uri->path;

  str_clear(aTHX_ path);

  if (!is_defined(aTHX_ sv_path)) {
    return;
  }

  // Inspect input array
  av_path = (AV*) SvRV(sv_path);
  av_idx  = av_top_index(av_path);

  if (av_idx == -1) {
    return;
  }

  // Build the new path
  for (i = 0; i <= av_idx; ++i) {
    // Add separator. If the next value fetched from the array is invalid, it
    // just gets an empty segment.
    str_append(aTHX_ path, "/", 1);

    // Fetch next segment
    refval = av_fetch(av_path, (SSize_t) i, 0);
    if (refval == NULL) continue;
    if (!is_defined(aTHX_ *refval)) continue;

    // Copy value over
    if (is_defined(aTHX_ *refval)) {
      seg = SvPV_nomg_const(*refval, seg_len);

      // Convert octets to utf8 if necessary
      if (!DO_UTF8(*refval)) {
        tmp = sv_2mortal(newSVpvn(seg, seg_len));
        sv_utf8_encode(tmp);
        seg = SvPV_const(tmp, seg_len);
      }

      char out[seg_len * 3 + 1];
      size_t out_len = uri_encode(seg, seg_len, out, URI_CHARS_PATH_SEGMENT, uri->is_iri);
      str_append(aTHX_ path, out, out_len);
    }
  }
}

static
void update_query_keyset(pTHX_ SV *sv_uri, SV *sv_key_set, SV *sv_separator) {
  uri_t  *uri = URI(sv_uri);
  HE     *ent;
  HV     *keys, *enc_keys;
  I32    iterlen, i, klen;
  SV     *val, **refval;
  bool   copy;
  char   *key;
  size_t off = 0;
  uri_str_t *query = uri->query;
  uri_str_t *dest  = str_new(aTHX_ URI_SIZE_query);

  size_t slen = 1;
  const char *separator = is_defined(aTHX_ sv_separator) ? SvPV_const(sv_separator, slen) : "&";

  uri_query_scanner_t scanner;
  uri_query_token_t   token;

  // Validate reference parameters
  if (!is_ref(aTHX_ sv_key_set) || SvTYPE(SvRV(sv_key_set)) != SVt_PVHV) {
    croak("set_query_keys: expected hash ref");
  }

  // Dereference key set hash
  keys = (HV*) SvRV(sv_key_set);

  // Create new HV with all keys uri-encoded
  enc_keys = newHV();
  iterlen = hv_iterinit(keys);

  for (i = 0; i < iterlen; ++i) {
    ent = hv_iternext(keys);
    key = hv_iterkey(ent, &klen);
    val = hv_iterval(keys, ent);

    SvGETMAGIC(val);

    if (klen > 0) {
      char enc_key[(3 * klen) + 1];
      klen = uri_encode(key, klen, enc_key, ":@?/", uri->is_iri);
      hv_store(enc_keys, enc_key, klen * (uri->is_iri ? -1 : 1), val, 0);
    }
  }

  // Begin building the new query string from the existing one. As each key is
  // encountered in the query string, exclude ones with a falsish value in the
  // hash and keep the ones with a truish value. Any not present in the hash
  // are kept unchanged.
  query_scanner_init(&scanner, str_get(query), str_len(query));

  while (!query_scanner_done(&scanner)) {
    query_scanner_next(&scanner, &token);
    if (token.type == DONE) continue;

    // Use the encoded keys hash to decide whether to copy this key (and
    // value if present) over to dest. If the key exists, skip. It will be
    // added to the filtered query string last.
    copy = 1;
    if (hv_exists(enc_keys, token.key, token.key_length * (uri->is_iri ? -1 : 1))) {
      refval = hv_fetch(enc_keys, token.key, token.key_length * (uri->is_iri ? -1 : 1), 0);
      // NULL shouldn't be possible since this is guarded with hv_exists, but
      // perlguts, amirite?
      copy = refval == NULL || SvTRUE(*refval);
    }

    if (copy) {
      if (off > 0) {
        str_append(aTHX_ dest, separator, slen);
        off += slen;
      }

      str_append(aTHX_ dest, token.key, token.key_length);
      off += token.key_length;

      if (token.type == PARAM) {
        str_append(aTHX_ dest, "=", 1);
        str_append(aTHX_ dest, token.value, token.value_length);
        off += token.value_length;
      }
    }
  }

  // Walk through the encoded-key hash, adding remaining keys.
  iterlen = hv_iterinit(enc_keys);

  for (i = 0; i < iterlen; ++i) {
    ent = hv_iternext(enc_keys);
    key = hv_iterkey(ent, &klen);
    val = hv_iterval(enc_keys, ent);

    if (SvTRUE(val)) {
      // Add separator if the new query string is not empty
      if (off > 0) {
        str_append(aTHX_ dest, separator, slen);
        off += slen;
      }

      str_append(aTHX_ dest, key, klen);
      off += klen;
    }
  }

  str_free(aTHX_ query);
  uri->query = dest;
}

static
void set_param(pTHX_ SV *sv_uri, SV *sv_key, SV *sv_values, SV *sv_separator) {
  uri_t *uri = URI(sv_uri);
  const char *strval;
  size_t vlen, reflen, av_idx, i = 0, off = 0;
  AV *av_values;
  SV **refval;
  uri_str_t *dest = str_new(aTHX_ URI_SIZE_query);
  uri_query_scanner_t scanner;
  uri_query_token_t token;

  size_t slen = 1;
  const char *separator = is_defined(aTHX_ sv_separator) ? SvPV_const(sv_separator, slen) : "&";

  // Build encoded key string
  if (!is_defined(aTHX_ sv_key)) {
    croak("set_param: expected key");
  }

  size_t klen;
  const char *key = SvPV_const(sv_key, klen);
  char enc_key[(3 * klen) + 1];
  klen = uri_encode(key, strlen(key), enc_key, ":@?/", uri->is_iri);

  // Get array of values to set
  if (!is_ref(aTHX_ sv_values) || SvTYPE(SvRV(sv_values)) != SVt_PVAV) {
    croak("set_param: expected array of values");
  }

  av_values = (AV*) SvRV(sv_values);
  av_idx = av_top_index(av_values);

  // Begin building the new query string from the existing one, skipping
  // keys (and their values, if any) matching sv_key.
  query_scanner_init(&scanner, uri->query->string, uri->query->length);

  while (!query_scanner_done(&scanner)) {
    query_scanner_next(&scanner, &token);
    if (token.type == DONE) continue;

    // The key does not match the key being set
    if (strncmp(enc_key, token.key, maxnum(klen, token.key_length)) != 0) {
      // Add separator if this is not the first key being written
      if (off > 0) {
        str_append(aTHX_ dest, separator, slen);
        off += slen;
      }

      // Write the key to the buffer
      str_append(aTHX_ dest, token.key, token.key_length);
      off += token.key_length;

      // The key has a value
      if (token.type == PARAM) {
        str_append(aTHX_ dest, "=", 1);

        // If the value's length is 0, it was parsed from "key=", so the value
        // is not written after the '=' is added above.
        if (token.value_length > 0) {
          // Otherwise, write the value to the buffer
          str_append(aTHX_ dest, token.value, token.value_length);
          off += token.value_length;
        }
      }
    }
  }

  // Add the new values to the query
  for (i = 0; i <= av_idx; ++i) {
    // Fetch next value from the array
    refval = av_fetch(av_values, (SSize_t) i, 0);
    if (refval == NULL) break;
    if (!is_defined(aTHX_ *refval)) break;

    // Add separator if needed to separate pairs
    if (off > 0) {
      str_append(aTHX_ dest, separator, slen);
      off += slen;
    }

    // Copy key over
    str_append(aTHX_ dest, enc_key, klen);
    off += klen;

    str_append(aTHX_ dest, "=", 1);

    // Copy value over
    SvGETMAGIC(*refval);
    strval = SvPV_const(*refval, reflen);

    char tmp[reflen * 3 + 1];
    vlen = uri_encode(strval, reflen, tmp, ":@?/", uri->is_iri);
    str_append(aTHX_ dest, tmp, vlen);
    off += vlen;
  }

  str_free(aTHX_ uri->query);
  uri->query = dest;
}

/*------------------------------------------------------------------------------
 * Other stuff
 -----------------------------------------------------------------------------*/

static
SV* to_string(pTHX_ SV *uri_obj) {
  uri_t *uri = URI(uri_obj);
  SV *out = newSVpvn("", 0);
  SV *auth = sv_2mortal(get_raw_auth(aTHX_ uri_obj));

  if (uri->is_iri) {
    SvUTF8_on(out);
  }

  if (str_len(uri->scheme) > 0) {
    sv_catpvn(out, str_get(uri->scheme), str_len(uri->scheme));
    sv_catpvn(out, ":", 1);

    if (SvTRUE(auth)) {
      // When the authority section is present, the scheme must be followed by
      // two forward slashes
      sv_catpvn(out, "//", 2);
    }
  }

  if (SvTRUE(auth)) {
    sv_catsv(out, auth);

    // When the authority section is present, any path must be separated from
    // the authority section by a forward slash
    if (str_len(uri->path) > 0 && (str_get(uri->path))[0] != '/') {
      sv_catpvn(out, "/", 1);
    }
  }

  sv_catpvn(out, str_get(uri->path), str_len(uri->path));

  if (str_len(uri->query) > 0) {
    sv_catpvn(out, "?", 1);
    sv_catpvn(out, str_get(uri->query), str_len(uri->query));
  }

  if (str_len(uri->frag) > 0) {
    sv_catpvn(out, "#", 1);
    sv_catpvn(out, str_get(uri->frag), str_len(uri->frag));
  }

  return out;
}

static
void explain(pTHX_ SV* sv_uri) {
  uri_t *uri = URI(sv_uri);
  printf("scheme: %s\n",  uri->scheme->string);
  printf("auth:\n");
  printf("  -usr: %s\n",  uri->usr->string);
  printf("  -pwd: %s\n",  uri->pwd->string);
  printf("  -host: %s\n", uri->host->string);
  printf("  -port: %s\n", uri->port->string);
  printf("path: %s\n",    uri->path->string);
  printf("query: %s\n",   uri->query->string);
  printf("frag: %s\n",    uri->frag->string);
}

static
void debug(pTHX_ SV* sv_uri) {
  uri_t *uri = URI(sv_uri);
  warn("scheme: %s\n",  uri->scheme->string);
  warn("auth:\n");
  warn("  -usr: %s\n",  uri->usr->string);
  warn("  -pwd: %s\n",  uri->pwd->string);
  warn("  -host: %s\n", uri->host->string);
  warn("  -port: %s\n", uri->port->string);
  warn("path: %s\n",    uri->path->string);
  warn("query: %s\n",   uri->query->string);
  warn("frag: %s\n",    uri->frag->string);
}

static
SV* new(pTHX_ const char* class, SV* uri_str, int is_iri) {
  const char* src;
  size_t len;
  uri_t* uri;
  SV*    obj;
  SV*    obj_ref;

  // Initialize the struct
  Newx(uri, 1, uri_t);
  Zero(uri, 1, uri_t);

  uri->is_iri = is_iri;
  uri->scheme = str_new(aTHX_ URI_SIZE_scheme);
  uri->usr    = str_new(aTHX_ URI_SIZE_usr);
  uri->pwd    = str_new(aTHX_ URI_SIZE_pwd);
  uri->host   = str_new(aTHX_ URI_SIZE_host);
  uri->port   = str_new(aTHX_ URI_SIZE_port);
  uri->path   = str_new(aTHX_ URI_SIZE_path);
  uri->query  = str_new(aTHX_ URI_SIZE_query);
  uri->frag   = str_new(aTHX_ URI_SIZE_frag);

  // Build the blessed instance
  obj = newSViv((IV) uri);
  obj_ref = newRV_noinc(obj);
  sv_bless(obj_ref, gv_stashpv(class, GV_ADD));

  // Scan the input string to fill the struct
  if (!SvTRUE(uri_str)) {
    src = "";
    len = 0;
  }
  else {
    // Copy input string *before* calling DO_UTF8() in case the SV is an object
    // with string overloading, which may trigger the utf8 flag.
    src = SvPV_nomg_const(uri_str, len);

    // Ensure the pv bytes are utf8-encoded
    if (!DO_UTF8(uri_str)) {
      uri_str = sv_2mortal(newSVpvn(src, len));
      sv_utf8_encode(uri_str);
      src = SvPV_const(uri_str, len);
    }
  }

  uri_scan(aTHX_ uri, src, len);

  return obj_ref;
}

static
void DESTROY(pTHX_ SV *sv_uri) {
  uri_t *uri = URI(sv_uri);
  str_free(aTHX_ uri->scheme);
  str_free(aTHX_ uri->usr);
  str_free(aTHX_ uri->pwd);
  str_free(aTHX_ uri->host);
  str_free(aTHX_ uri->port);
  str_free(aTHX_ uri->path);
  str_free(aTHX_ uri->query);
  str_free(aTHX_ uri->frag);
  Safefree(uri);
}

/*
 * Extras
 */

/*
 * Splits a uri string into its component sections: scheme, authority, path,
 * query, fragment. Pushes those values directly onto the results stack.
 */
static
void uri_split(pTHX_ SV *uri) {
  dXSARGS;
  sp = mark;

  // If the object has already been parsed, there is no need to reparse it.
  if (sv_isobject(uri) && sv_derived_from(uri, "URI::Fast")) {
    XPUSHs(sv_2mortal(get_scheme(aTHX_ uri)));
    XPUSHs(sv_2mortal(get_auth(aTHX_ uri)));
    XPUSHs(sv_2mortal(get_path(aTHX_ uri)));
    XPUSHs(sv_2mortal(get_query(aTHX_ uri)));
    XPUSHs(sv_2mortal(get_frag(aTHX_ uri)));
  }
  // The object is defined
  else if (SvOK(uri)) {
    size_t idx = 0;
    size_t brk = 0;

    // Read the string from the SV
    const char *src;
    size_t len;

    if (sv_len(uri) == 0) {
      src = "";
      len = 0;
    }
    else {
      // Copy string into new SV
      SV* str = sv_2mortal(newSV(0));
      sv_copypv(str, ST(0));
      src = SvPV_const(str, len);

      if (!DO_UTF8(str)) {
        sv_utf8_encode(str);
        src = SvPV_const(str, len);
      }
    }

    // scheme
    brk = strcspn(&src[idx], ":/@?#");

    if (brk > 0 && src[idx + brk] == ':') {
      XPUSHs(sv_2mortal(newSVpvn(&src[idx], brk)));
      idx += brk;
      ++idx; // skip past ":"
    }
    else {
      XPUSHs(&PL_sv_undef);
    }

    // authority
    if (idx + 1 < len         // src is long enough to hold two slashes
     && src[idx]     == '/'   // next char is a slash
     && src[idx + 1] == '/')  // char after that is a slash
    {
      idx += 2;               // skip past the double slashes

      brk = strcspn(&src[idx], "/?#");

      if (brk > 0) {
        XPUSHs(sv_2mortal(newSVpvn(&src[idx], brk)));
        idx += brk;
      }
      else {
        XPUSHs(sv_2mortal(newSVpvn("", 0)));
      }
    }
    else {
      XPUSHs(&PL_sv_undef);
    }

    // path
    brk = strcspn(&src[idx], "?#");
    if (brk > 0) {
      XPUSHs(sv_2mortal(newSVpvn(&src[idx], brk)));
      idx += brk;
    } else {
      XPUSHs(sv_2mortal(newSVpvn("", 0)));
    }

    // query
    if (src[idx] == '?') {
      ++idx; // skip past ?
      brk = strcspn(&src[idx], "#");
      if (brk > 0) {
        XPUSHs(sv_2mortal(newSVpvn(&src[idx], brk)));
        idx += brk;
      } else {
        XPUSHs(sv_2mortal(newSVpvn("", 0)));
      }
    } else {
      XPUSHs(&PL_sv_undef);
    }

    // fragment
    if (src[idx] == '#') {
      ++idx; // skip past #
      brk = len - idx;
      if (brk > 0) {
        XPUSHs(sv_2mortal(newSVpvn(&src[idx], brk)));
      } else {
        XPUSHs(sv_2mortal(newSVpvn("", 0)));
      }
    } else {
      XPUSHs(&PL_sv_undef);
    }
  }

  PUTBACK;
}

/*
 * Collapses dotted segments in a path string based on the rules defined in RFC
 * 3986 section 5.2.
 */
static
void remove_dot_segments(pTHX_ uri_str_t *out, const char *path, size_t len) {
  if (len == 0) {
    return;
  }

  size_t brk, idx = 0;
  char in[len];
  Copy(path, in, len + 1, char);

  while (idx < len) {
    // in begins with "./" or "../": ignore prefix completely
    if (strncmp(&in[idx], "./", 2) == 0) {
      idx += 2;
    }
    else if (strncmp(&in[idx], "../", 3) == 0) {
      idx += 3;
    }

    // in begins with /./: replace with /
    else if (strncmp(&in[idx], "/./", 3) == 0) {
      idx += 2; // inc to the final / in /./ instead of editing the buffer
    }

    // in begins with /. and . is a complete segment: replace with /
    else if (strncmp(&in[idx], "/.", 2) == 0 && idx + 2 == len) {
      idx += 1;
      in[idx] = '/';
    }

    // in begins with /../: replace with /, remove final segment from out
    else if (strncmp(&in[idx], "/../", 4) == 0) {
      idx += 3; // inc to the final / in /./ instead of editing the buffer
      str_rtrim(aTHX_ out, '/');
    }

    // in begins with /.. and .. is a complete $in segment: replace with /, remove final segment from out
    else if (strncmp(&in[idx], "/..", 3) == 0 && idx + 3 == len) {
      idx += 2;
      in[idx] = '/';
      str_rtrim(aTHX_ out, '/');
    }

    // in is "." or "..": done
    else if ((in[idx] == '.' && idx + 1 == len)
          || (in[idx] == '.' && in[idx + 1] == '.' && idx + 2 == len)) {
      break;
    }

    // else copy everything up to but not including the next '/' from in to out
    else {
      if (in[idx] == '/') {
        brk = minnum(len - idx, 1 + strncspn(&in[idx + 1], len - idx, "/"));
      }
      else {
        brk = strncspn(&in[idx], len - idx, "/");
      }

      str_append(aTHX_ out, &in[idx], brk);
      idx += brk;
    }
  }
}

/*------------------------------------------------------------------------------
 * Absolution
 *
 * As defined in https://www.rfc-editor.org/rfc/rfc3986.txt section 5.2
 *----------------------------------------------------------------------------*/
static
void absolute(pTHX_ SV *sv_target, SV *sv_uri, SV *sv_base) {
  uri_t *target = URI(sv_target);
  uri_t *rel    = URI(sv_uri);
  uri_t *base   = URI(sv_base);

  const char *class = class_name(aTHX_ sv_target);

  // Relative URIs may begin with // to indicate an authority section without a
  // scheme, which is illegal in standard URI syntax (authority may only come
  // after a scheme, which is required, separated by //). This workaround helps
  // the parser along by identifying the authority section as such.
  if (rel->scheme->length == 0
   && rel->host->length == 0
   && rel->path->length >= 2
   && strncmp(rel->path->string, "//", 2) == 0)
  {
    SV *fixed = newSVpvn("x:", 2);
    sv_catsv(fixed, sv_2mortal(to_string(aTHX_ sv_uri)));

    SV *sv_tmp = sv_2mortal(new(aTHX_ class, sv_2mortal(fixed), 0));
    rel = URI(sv_tmp);

    str_clear(aTHX_ rel->scheme);
  }

  if (rel->scheme->length != 0) {
    remove_dot_segments(aTHX_ target->path, rel->path->string, rel->path->length);
    str_copy(aTHX_ rel->scheme, target->scheme);
    str_copy(aTHX_ rel->usr,    target->usr);
    str_copy(aTHX_ rel->pwd,    target->pwd);
    str_copy(aTHX_ rel->host,   target->host);
    str_copy(aTHX_ rel->port,   target->port);
    str_copy(aTHX_ rel->query,  target->query);
  }
  else {
    if (rel->usr->length > 0 || rel->host->length > 0) {
      remove_dot_segments(aTHX_ target->path, rel->path->string, rel->path->length);
      str_copy(aTHX_ rel->usr,    target->usr);
      str_copy(aTHX_ rel->pwd,    target->pwd);
      str_copy(aTHX_ rel->host,   target->host);
      str_copy(aTHX_ rel->port,   target->port);
      str_copy(aTHX_ rel->query,  target->query);
    }
    else {
      if (rel->path->length == 0) {
        str_copy(aTHX_ base->path, target->path);

        if (rel->query->length != 0) {
          str_copy(aTHX_ rel->query, target->query);
        } else {
          str_copy(aTHX_ base->query, target->query);
        }
      }
      else {
        if (rel->path->string[0] == '/') {
          remove_dot_segments(aTHX_ target->path, rel->path->string, rel->path->length);
        }
        else {
          uri_str_t *merged = str_new(aTHX_ rel->path->length + base->path->length);

          if (base->scheme->length > 0 && base->path->length == 0) {
            str_append(aTHX_ merged, "/", 1);
            str_append(aTHX_ merged, rel->path->string, rel->path->length);
          }
          else {
            if (base->path->length > 0 && strstr(base->path->string, "/") != NULL) {
              // truncate base path at right-most /, inclusive
              str_append(aTHX_ merged, base->path->string, base->path->length);
              str_rtrim(aTHX_ merged, '/');
            } else {
              // if there is no / in the base path, truncate it completely
            }

            str_append(aTHX_ merged, "/", 1);
            str_append(aTHX_ merged, rel->path->string, rel->path->length);
          }

          remove_dot_segments(aTHX_ target->path, merged->string, merged->length);
          str_free(aTHX_ merged);
        }

        str_copy(aTHX_ rel->query, target->query);
      }

      str_copy(aTHX_ base->usr,  target->usr);
      str_copy(aTHX_ base->pwd,  target->pwd);
      str_copy(aTHX_ base->host, target->host);
      str_copy(aTHX_ base->port, target->port);
    }

    str_copy(aTHX_ base->scheme, target->scheme);
  }

  str_copy(aTHX_ rel->frag, target->frag);
}

/*
 * Decodes and then reencodes a uri_str_t.
 *
 */
// unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
//       41-5A / 61-7A / 30-39 / 2D  / 2E  / 5F  / 7E
static inline
void normalize_encoding(pTHX_ uri_str_t *str, char *permitted_chars, int allow_utf8) {
  if (str->length == 0 || (strchr(str->string, '+') == NULL && strchr(str->string, '%') == NULL)) {
    return;
  }

  char decoded[str->length + 1];
  size_t decoded_len = uri_decode(str->string, str->length, decoded, "");

  char encoded[(decoded_len * 3) + 2];
  size_t encoded_len = uri_encode(decoded, decoded_len, encoded, permitted_chars, allow_utf8);

  str_set(aTHX_ str, encoded, encoded_len);
}

/*
 * Performs minimal normalization. Scheme and hostname are lower cased. All
 * members are scanned for lower case percent-encoded sequences.
 */
static
void normalize(pTHX_ SV *uri_obj) {
  uri_t *uri = URI(uri_obj);
  size_t i;

  // (6.2.2.1) lower case scheme
  for (i = 0; i < uri->scheme->length; ++i) {
    uri->scheme->string[i] = toLOWER(uri->scheme->string[i]);
  }

  // (6.2.2.1) lower case hostname
  for (i = 0; i < uri->host->length; ++i) {
    uri->host->string[i] = toLOWER(uri->host->string[i]);
  }

  // (6.2.2) remove dot segments from path
  // This is expensive, so skip it unless the uri has a path with a dot in it.
  if (uri->path->length > 0
   && strchr(uri->path->string, '.') != NULL)
  {
    uri_str_t *tmp = str_new(aTHX_ URI_SIZE_path);
    remove_dot_segments(aTHX_ tmp, uri->path->string, uri->path->length);
    str_free(aTHX_ uri->path);
    uri->path = tmp;
  }

  // (6.2.2.1) upper case hex codes in each section of the uri
  // (6.2.2.2) decode any percent-encoded sequences decoding to unreserved chars
  normalize_encoding(aTHX_ uri->usr,   URI_CHARS_USER,  uri->is_iri);
  normalize_encoding(aTHX_ uri->pwd,   URI_CHARS_USER,  uri->is_iri);
  normalize_encoding(aTHX_ uri->host,  URI_CHARS_HOST,  uri->is_iri);
  normalize_encoding(aTHX_ uri->path,  URI_CHARS_PATH,  uri->is_iri);
  normalize_encoding(aTHX_ uri->query, URI_CHARS_QUERY, uri->is_iri);
  normalize_encoding(aTHX_ uri->frag,  URI_CHARS_FRAG,  uri->is_iri);

  // (6.2.3) empty path should be represented as "/" when authority is present
  if (uri->path->length == 0 && has_authority(aTHX_ uri)) {
    str_set(aTHX_ uri->path, "/", 1);
  }
}

/*
 * Returns a new copy of the uri string with tabs, line feeds, and carriage
 * returns stripped, and backslashes replaced with forward slashes.
 */
SV* html_url(pTHX_ SV *uri, SV *base) {
  SV *rv;
  size_t i = 0;
  size_t len;
  const char *in = SvPV_const(uri, len);
  uri_str_t *out = str_new(aTHX_ 32);

  if (in[0] == '/' && in[1] == '/') {
    if (base != NULL && (SvOK(base) || SvROK(base))) {
      uri_t *base_uri = URI(base);
      if (base_uri->scheme->length > 0) {
        str_append(aTHX_ out, base_uri->scheme->string, base_uri->scheme->length);
        str_append(aTHX_ out, ":", 1);
      }
    }
  }

  // Remove characters specified by the URL standard
  for (i = 0; i < len; ++i) {
    switch (in[i]) {
      // Strip tabs, line feeds, and carriage returns
      case '\t':
      case '\r':
      case '\n':
        break;

      // Convert backslashes to forward slashes
      case '\\':
        str_append(aTHX_ out, "/", 1);
        break;

      default:
        str_append(aTHX_ out, &in[i], 1);
        break;
    }
  }

  rv = URI_STR_2SV(out);
  str_free(aTHX_ out);

  // If the source uri was utf8, ensure that the flag is flipped for the output
  // buffer as well.
  if (DO_UTF8(uri)) {
    sv_utf8_decode(rv);
  }

  return rv;
}


MODULE = URI::Fast  PACKAGE = URI::Fast

PROTOTYPES: DISABLE

FALLBACK: TRUE

#-------------------------------------------------------------------------------
# URL-encoding
#-------------------------------------------------------------------------------
SV* encode(in, ...)
  SV *in
  ALIAS:
    uri_encode = 1
    url_encode = 2
  PREINIT:
    SV *temp = NULL;
  CODE:
    if (items > 1) {
      temp = ST(1);
    }
    RETVAL = encode(aTHX_ in, temp);
  OUTPUT:
    RETVAL

SV* decode(in)
  SV* in
  ALIAS:
    uri_decode = 1
    url_decode = 2
  CODE:
    RETVAL = decode(aTHX_ in);
  OUTPUT:
    RETVAL

#-------------------------------------------------------------------------------
# Constructors and destructors
#-------------------------------------------------------------------------------
void DESTROY(uri_obj)
  SV *uri_obj
  CODE:
    DESTROY(aTHX_ uri_obj);

SV* new(class, uri_str)
  const char *class
  SV* uri_str
  ALIAS:
    new_iri = 1
  CODE:
    if (ix == 1) {
      RETVAL = new(aTHX_ "URI::Fast::IRI", uri_str, 1);
    } else {
      RETVAL = new(aTHX_ class, uri_str, 0);
    }
  OUTPUT:
    RETVAL

SV* new_abs(class, rel, base)
  const char *class
  SV *rel
  SV *base
  PREINIT:
    SV *abs;
  CODE:
    if (!sv_isobject(rel) || !sv_derived_from(rel, class)) {
      rel = sv_2mortal(new(aTHX_ class, rel, 0));
    }

    if (!sv_isobject(base) || !sv_derived_from(base, class)) {
      base = sv_2mortal(new(aTHX_ class, base, 0));
    }

    abs = new(aTHX_ class, sv_2mortal(newSVpvn("", 0)), 0);
    absolute(aTHX_ abs, rel, base);
    RETVAL = abs;
  OUTPUT:
    RETVAL

SV* new_html_url(class, url, ...)
  const char *class
  SV *url
  PREINIT:
    SV *base = NULL;
    SV *rel;
    SV *abs;
  CODE:
    if (items > 2) {
      if (!sv_isobject(ST(2)) || !sv_derived_from(ST(2), "URI::Fast")) {
        base = sv_2mortal(new(aTHX_ "URI::Fast", ST(2), 0));
      } else {
        base = ST(2);
      }

      rel = new(aTHX_ "URI::Fast", sv_2mortal(html_url(aTHX_ url, base)), 0);
      abs = new(aTHX_ "URI::Fast", sv_2mortal(newSVpvn("", 0)), 0);
      absolute(aTHX_ abs, sv_2mortal(rel), base);
      normalize(aTHX_ abs);
      RETVAL = abs;
    }
    else {
      rel = new(aTHX_ class, sv_2mortal(html_url(aTHX_ url, base)), 0);
      normalize(aTHX_ rel);
      RETVAL = rel;
    }

  OUTPUT:
    RETVAL

#-------------------------------------------------------------------------------
# Short-hand constructors
#-------------------------------------------------------------------------------
SV* uri(...)
  ALIAS:
    iri   = 1
    clone = 2
  PREINIT:
    SV *uri_str;
  CODE:
    if (items == 0) {
      uri_str = sv_2mortal(newSVpvn("", 0));
    } else {
      uri_str = ST(0);
    }

    if (ix == 1) {
      RETVAL = new(aTHX_ "URI::Fast::IRI", uri_str, 1);
    } else {
      RETVAL = new(aTHX_ "URI::Fast", uri_str, 0);
    }
  OUTPUT:
    RETVAL

SV* abs_uri(rel, base)
  SV *rel
  SV *base
  PREINIT:
    SV *abs;
  CODE:
    if (!sv_isobject(rel) || !sv_derived_from(rel, "URI::Fast")) {
      rel = sv_2mortal(new(aTHX_ "URI::Fast", rel, 0));
    }

    if (!sv_isobject(base) || !sv_derived_from(base, "URI::Fast")) {
      base = sv_2mortal(new(aTHX_ "URI::Fast", base, 0));
    }

    abs = new(aTHX_ "URI::Fast", sv_2mortal(newSVpvn("", 0)), 0);
    absolute(aTHX_ abs, rel, base);
    RETVAL = abs;
  OUTPUT:
    RETVAL

SV* html_url(url, ...)
  SV *url
  PREINIT:
    SV *base = NULL;
    SV *abs;
    SV *rel;
  CODE:
    if (items > 1) {
      if (!sv_isobject(ST(1)) || !sv_derived_from(ST(1), "URI::Fast")) {
        base = sv_2mortal(new(aTHX_ "URI::Fast", ST(1), 0));
      } else {
        base = ST(1);
      }

      rel = new(aTHX_ "URI::Fast", sv_2mortal(html_url(aTHX_ url, base)), 0);
      abs = new(aTHX_ "URI::Fast", sv_2mortal(newSVpvn("", 0)), 0);
      absolute(aTHX_ abs, sv_2mortal(rel), base);
      normalize(aTHX_ abs);
      RETVAL = abs;
    }
    else {
      rel = new(aTHX_ "URI::Fast", sv_2mortal(html_url(aTHX_ url, base)), 0);
      normalize(aTHX_ rel);
      RETVAL = rel;
    }
  OUTPUT:
    RETVAL

#-------------------------------------------------------------------------------
# Clearers
#-------------------------------------------------------------------------------
void clear_scheme(uri_obj)
  SV* uri_obj
  CODE:
    clear_scheme(aTHX_ uri_obj);

void clear_path(uri_obj)
  SV* uri_obj
  CODE:
    clear_path(aTHX_ uri_obj);

void clear_query (uri_obj)
  SV* uri_obj
  CODE:
    clear_query(aTHX_ uri_obj);

void clear_frag(uri_obj)
  SV* uri_obj
  CODE:
    clear_frag(aTHX_ uri_obj);

void clear_usr(uri_obj)
  SV* uri_obj
  CODE:
    clear_usr(aTHX_ uri_obj);

void clear_pwd(uri_obj)
  SV* uri_obj
  CODE:
    clear_pwd(aTHX_ uri_obj);

void clear_host(uri_obj)
  SV* uri_obj
  CODE:
    clear_host(aTHX_ uri_obj);

void clear_port(uri_obj)
  SV* uri_obj
  CODE:
    clear_port(aTHX_ uri_obj);

void clear_auth(uri_obj)
  SV* uri_obj
  CODE:
    clear_auth(aTHX_ uri_obj);

#-------------------------------------------------------------------------------
# Raw accessors
#-------------------------------------------------------------------------------
SV* raw_scheme(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_scheme(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_scheme(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_auth(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_auth(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_auth(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_path(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_path(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_path(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_query(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_query(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_query(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_frag(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_frag(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_frag(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_usr(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_usr(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_usr(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_pwd(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_pwd(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_pwd(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_host(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_host(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_host(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* raw_port(uri_obj, ...)
  SV *uri_obj
  CODE:
    if (items > 1) set_raw_port(aTHX_ uri_obj, ST(1));
    RETVAL = get_raw_port(aTHX_ uri_obj);
  OUTPUT:
    RETVAL


#-------------------------------------------------------------------------------
# Compound getters
#-------------------------------------------------------------------------------
SV* get_path(uri_obj)
  SV *uri_obj
  CODE:
    RETVAL = get_path(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* get_query(uri_obj)
  SV *uri_obj
  CODE:
    RETVAL = get_query(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* get_auth(uri_obj)
  SV *uri_obj
  CODE:
    RETVAL = get_auth(aTHX_ uri_obj);
  OUTPUT:
    RETVAL

SV* split_path(uri)
  SV* uri
  ALIAS:
    split_path_compat = 1
  CODE:
    RETVAL = split_path(aTHX_ uri, ix == 1 ? 1 : 0);
  OUTPUT:
    RETVAL

SV* get_query_keys(uri)
  SV* uri
  CODE:
    RETVAL = get_query_keys(aTHX_ uri);
  OUTPUT:
    RETVAL

SV* get_query_hash(uri)
  SV* uri
  CODE:
    RETVAL = query_hash(aTHX_ uri);
  OUTPUT:
    RETVAL

SV* get_param(uri, sv_key)
  SV* uri
  SV* sv_key
  CODE:
    RETVAL = get_param(aTHX_ uri, sv_key);
  OUTPUT:
    RETVAL


#-------------------------------------------------------------------------------
# Compound setters
#-------------------------------------------------------------------------------
void set_auth(uri_obj, value)
  SV *uri_obj
  SV *value
  CODE:
    set_auth(aTHX_ uri_obj, value);

void set_path(uri_obj, value)
  SV *uri_obj
  SV *value
  CODE:
    set_path(aTHX_ uri_obj, value);

void set_path_array(uri_obj, segments)
  SV *uri_obj
  SV *segments
  CODE:
    set_path_array(aTHX_ uri_obj, segments);

void set_query(uri_obj, value)
  SV *uri_obj
  SV *value
  CODE:
    set_query(aTHX_ uri_obj, value);

void set_param(uri, sv_key, sv_values, sv_separator)
  SV *uri
  SV *sv_key
  SV *sv_values
  SV *sv_separator
  CODE:
    set_param(aTHX_ uri, sv_key, sv_values, sv_separator);

void query_keyset(self, sv_key_set, ...)
  SV *self
  SV *sv_key_set
  CODE:
    SV *sv_separator = items > 2 ? ST(2) : sv_2mortal(newSVpvn("&", 1));
    update_query_keyset(aTHX_ self, sv_key_set, sv_separator);


#-------------------------------------------------------------------------------
# Unified accessors
#-------------------------------------------------------------------------------
SV* scheme(self, ...)
  SV *self
  CODE:
    if (items > 1) set_scheme(aTHX_ self, ST(1));
    RETVAL = get_scheme(aTHX_ self);
  OUTPUT:
    RETVAL

SV* usr(self, ...)
  SV *self
  CODE:
    if (items > 1) set_usr(aTHX_ self, ST(1));
    RETVAL = get_usr(aTHX_ self);
  OUTPUT:
    RETVAL

SV* pwd(self, ...)
  SV *self
  CODE:
    if (items > 1) set_pwd(aTHX_ self, ST(1));
    RETVAL = get_pwd(aTHX_ self);
  OUTPUT:
    RETVAL

SV* host(self, ...)
  SV *self
  CODE:
    if (items > 1) set_host(aTHX_ self, ST(1));
    RETVAL = get_host(aTHX_ self);
  OUTPUT:
    RETVAL

SV* port(self, ...)
  SV *self
  CODE:
    if (items > 1) set_port(aTHX_ self, ST(1));
    RETVAL = get_port(aTHX_ self);
  OUTPUT:
    RETVAL

SV* frag(self, ...)
  SV *self
  ALIAS:
    fragment = 1
  CODE:
    if (items > 1) set_frag(aTHX_ self, ST(1));
    RETVAL = get_frag(aTHX_ self);
  OUTPUT:
    RETVAL


#-------------------------------------------------------------------------------
# Extras
#-------------------------------------------------------------------------------
SV* to_string(self, ...)
  SV *self
  ALIAS:
    as_string = 1
    TO_JSON = 2
  OVERLOAD:
    to_string \"\"
  CODE:
    RETVAL = to_string(aTHX_ self);
  OUTPUT:
    RETVAL

SV* normalize(uri)
  SV *uri
  ALIAS:
    canonical = 1
  CODE:
    normalize(aTHX_ uri);
  OUTPUT:
    uri

SV* absolute(rel, base)
  SV* rel
  SV* base
  ALIAS:
    abs = 1
  PREINIT:
    SV *abs;
    const char *class;
  CODE:
    class = class_name(aTHX_ rel);
    abs = new(aTHX_ class, sv_2mortal(newSVpvn("", 0)), 0);

    if (!sv_isobject(base) || !sv_derived_from(base, class)) {
      base = sv_2mortal(new(aTHX_ class, base, 0));
    }

    absolute(aTHX_ abs, rel, base);

    RETVAL = abs;
  OUTPUT:
    RETVAL

void explain(uri_obj)
  SV* uri_obj
  CODE:
    explain(aTHX_ uri_obj);

void debug(uri_obj)
  SV* uri_obj
  CODE:
    debug(aTHX_ uri_obj);

void uri_split(uri)
  SV* uri
  PREINIT:
    I32* temp;
  PPCODE:
    temp = PL_markstack_ptr++;
    uri_split(aTHX_ uri);

    if (PL_markstack_ptr != temp) {
      PL_markstack_ptr = temp;
      XSRETURN_EMPTY;
    }

    return;