Group
Extension

JSON-Relaxed/lib/JSON/Relaxed/Parser.pm

#! perl

use v5.26;
use Object::Pad;
use utf8;

package JSON::Relaxed::Parser;

our $VERSION = "0.098";

class JSON::Relaxed::Parser;

# Instance data.
field $data    :mutator;	# RJSON string being parser
field @pretoks;			# string in pre-tokens
field @tokens;			# string as tokens

# Instance properties.

# Enforce strictness to official standard.
# Strict true -> RJSON conformant.
# Strict false (default) -> RRJSON. Everything goes :).
field $strict		   :mutator :param = 0;

# Allow extra stuff after the JSON structure.
# Strict mode only.
field $extra_tokens_ok	   :mutator :param = 0;

# Define the values to be used for true and false.
field $booleans		   :mutator :param = 1;

# Signal error with exceptions.
field $croak_on_error	   :mutator :param = 1;
field $croak_on_error_internal;

# Some non-strict extensions can be controlled individually.
# This may disappear in some futer version, so do not use.
# Extension: a.b:c -> a:{b:c}
## Non-strict only.
field $combined_keys	   :mutator :param = 1;

# Extension: a:b -> {a:b} (if outer)
## Non-strict only.
field $implied_outer_hash  :mutator :param = 1;

# Extension: = as :, and optional before {, off/on as false/true
## Non-strict only.
field $prp		    :mutator :param = 1;

# Formatted output.
field $pretty		    :mutator :param = 0;

# Retain key order. Warning: adds a key " key order " to each hash!
## Non-strict only.
field $key_order	    :mutator :param = 0;

# Error indicators.
field $err_id		    :accessor;
field $err_msg		    :accessor;
field $err_pos		    :accessor;

method decode( $str ) {
    $croak_on_error_internal = $croak_on_error;
    $self->_decode($str);
}

# Legacy.
method parse( $str ) {
    $croak_on_error_internal = 0;
    $self->_decode($str);
}

method _decode( $str ) {

    $data = $str;
    return $self->error('missing-input')
      unless defined $data && length $data;

    undef $err_id;
    $err_pos = -1;
    undef $err_msg;

    $self->pretokenize;
    return if $self->is_error;

    $self->tokenize;
    return $self->error('empty-input') unless @tokens;

    $self->structure( top => 1 );
}

################ Character classifiers ################

# Reserved characters.
#    '['  beginning of array
#    ']'  end of array
#    '{'  beginning of hash
#    '}'  end of hash
#    ':'  delimiter between name and value of hash element
#    ','  separator between elements in hashes and arrays

my $p_reserved = q<[,:{}\[\]]>;

method is_reserved ($c) {
    $c =~ /^$p_reserved$/;
}

# Newlines. CRLF (Windows), CR (MacOS) and newline (sane systems).

my $p_newlines = q{(?:\r\n|\r|\n|\\\n)};

method is_newline ($c) {
    $c =~ /^$p_newlines$/o;
}

# Quotes. Single, double and backtick.

my $p_quotes = q{["'`]};

method is_quote ($c) {
    $c =~ /^$p_quotes$/o;
}

# Numbers. A special case of unquoted strings.
my $p_number = q{[+-]?\d*\.?\d+(?:[Ee][+-]?\d+)?};

method pretokenize {

    # \u escape (4 hexits)
    my @p = ( qq<\\\\u[[:xdigit:]]{4}> );

    # Any escaped char (strict mode).
    if ( $strict ) {
	push( @p, qq<\\\\.> );
    }

    # Otherwise, match \u{ ... } also.
    else {
	push( @p, qq<\\\\u\\{[[:xdigit:]]+\\}>, qq<\\\\[^u]> ); # escaped char
    }

    if ( $prp && !$strict ) {
	# Add = to the reserved characters
        $p_reserved = q<[,=:{}\[\]]>;
	# Massage # comments into // comments without affecting position.
        $data =~ s/^(\s*)#.(.*)$/$1\/\/$2/gm;
        $data =~ s/^(\s*)#$/$1 /gm;
    }

    push( @p, $p_newlines,
	  qq< // [^\\n]* \\n >,	  # line comment
	  qq< /\\* .*? \\*/ >,	  # comment start
	  qq< /\\* >,		  # comment start
          qq< $p_reserved >,	  # reserved chars
	  qq< "(?:\\\\.|.)*?" >,  # "string"
	  qq< `(?:\\\\.|.)*?` >,  # `string`
	  qq< '(?:\\\\.|.)*?' >,  # 'string'
	  qq< $p_quotes >,	  # stringquote
	  qq< \\s+ > );		  # whitespace

    my $p = join( "|", @p );

    @pretoks = split( m< ( $p ) >sox, $data );

    # Remove empty strings.
    @pretoks = grep { length($_) } @pretoks;

    return;
}

# Accessor for @pretoks.
method pretoks() { \@pretoks }

method tokenize {

    @tokens = ();
    my $offset = 0;		# token offset in input

    if ( $booleans ) {
	if ( ref($booleans) ne 'ARRAY' ) {
	    $booleans = [ $JSON::Boolean::false, $JSON::Boolean::true ];
	}
    }
    else {
	$booleans = [ 0, 1 ];
    }

    my $glue = 0;		# can glue strings
    my $uq_open = 0;		# collecting pretokens for unquoted string

    # Loop through characters.
    while ( @pretoks ) {
	my $pretok = shift(@pretoks);

	# White space: ignore.
	if ( $pretok !~ /\S/ ) {
	    $offset += length($pretok);
	    $uq_open = 0;
	    next;
	}

	if ( $pretok eq "\\\n" ) {
	    $glue++ if $glue;
	    $uq_open = 0;
	    $offset += length($pretok);
	    next;
	}

	# Strings.
	if ( $pretok =~ /^(["'`])(.*?)\1$/s ) {
	    my ( $quote, $content ) = ( $1, $2 );
	    if ( $glue > 1 ) {
		$tokens[-1]->append($content);
	    }
	    else {
		$self->addtok( $content, 'Q', $offset, $quote );
		$glue = 1 unless $strict;
	    }
	    $offset += length($pretok);
	    $uq_open = 0;
	    next;
	}
	$glue = 0;

	# // comment.
	if ( $pretok =~ m<^//(.*)> ) {
	    # $self->addtok( $1, 'L', $offset );
	    $offset += length($pretok);
	    $uq_open = 0;
	}

	# /* comment */
	elsif ( $pretok =~ m<^/\*.+>s ) {
	    $offset += length($pretok);
	    $uq_open = 0;
	}

	elsif ( $pretok eq '/*' ) {
	    return $self->error('unclosed-inline-comment');
	}

	# Reserved characters.
	elsif ( $self->is_reserved($pretok) ) {
	    $self->addtok( $pretok, 'C', $offset );
	    $offset += length($pretok);
	    $uq_open = 0;
	}


	# Numbers.
	elsif ( $pretok =~ /^$p_number$/ ) {
	    $self->addtok( 0+$pretok, 'N', $offset );
	    $offset += length($pretok);
	    $uq_open = 0;
	}

	# Quotes
	# Can't happen -- should be an encosed string.
	elsif ( $self->is_quote($pretok) ) {
	    $offset += length($pretok);
	    $self->addtok( $pretok, '?', $offset );
	    return $self->error('unclosed-quote', $tokens[-1] );
	}

	# Else it's an unquoted string.
	else {
	    if ( $uq_open ) {
		$tokens[-1]->append($pretok);
	    }
	    else {
		$self->addtok( $pretok, 'U', $offset );
		$uq_open++;
	    }
	    $offset += length($pretok);
	}
    }
    return;
}

# Accessor for @tokens,
method tokens() { \@tokens }

# Add a new token to @tokens.
method addtok( $tok, $typ, $off, $quote=undef ) {

    push( @tokens,
	  $typ eq 'U' || $typ eq 'N'
	  ? JSON::Relaxed::String::Unquoted->new( token   => $tok,
						  content => $tok,
						  type    => $typ,
						  parent  => $self,
						  offset  => $off )
	  : $typ eq 'Q'
	    ? JSON::Relaxed::String::Quoted->new( token   => $tok,
						  type    => $typ,
						  content => $tok,
						  quote   => $quote,
						  parent  => $self,
						  offset  => $off )
	    : JSON::Relaxed::Token->new( token  => $tok,
					 parent => $self,
					 type   => $typ,
					 offset => $off ) );
}

# Build the result structure out of the tokens.
method structure( %opts ) {

    @tokens = @{$opts{tokens}} if $opts{tokens}; # for debugging

    if ( $implied_outer_hash && !$strict ) {
	# Note that = can only occur with $prp.
	if ( @tokens > 2 && $tokens[0]->is_string
	     && $tokens[1]->token =~ /[:={]/ ) {
	    $self->addtok( '}', 'C', $tokens[-1]->offset );
	    $self->addtok( '{', 'C', $tokens[0]->offset );
	    unshift( @tokens, pop(@tokens ));
	}
    }

    my $this = shift(@tokens) // return;
    my $rv;

    if ( $this->is_string ) { # (un)quoted string
	$rv = $this->as_perl;
    }
    else {
	my $t = $this->token;
	if ( $t eq '{' ) {
	    $rv = $self->build_hash;
	}
	elsif ( $t eq '[' ) {
	    $rv = $self->build_array;
	}
	else {
	    return $self->error( 'invalid-structure-opening-character',
				 $this );
	}
    }

    # If this is the outer structure, then no tokens should remain.
    if ( $opts{top}
	 && @tokens
	 && ( $strict || !$extra_tokens_ok )
	 && !$self->is_error
       ) {
	return $self->error( 'multiple-structures', $tokens[0] );
    }

    return $rv;
}


method error( $id, $aux = undef ) {
    require JSON::Relaxed::ErrorCodes;
    $err_id = $id;
    $err_pos = $aux ? $aux->offset : -1;
    $err_msg = JSON::Relaxed::ErrorCodes->message( $id, $aux );

    die( $err_msg, "\n" ) if $croak_on_error_internal;
    return;			# undef
}

method is_error() {
    $err_id;
}

# For debugging.
method dump_tokens() {
    my $tokens = \@tokens;
    return unless require DDP;
    if ( -t STDERR ) {
	DDP::p($tokens);
    }
    else {
	warn DDP::np($tokens), "\n";
    }
}

method build_hash() {

    my $rv = {};
    my @ko;			# order of keys

    while ( @tokens ) {
	my $this = shift(@tokens);
	# What is allowed after opening brace:
	#	closing brace
	#	comma
	#	string

	# If closing brace, return.
	my $t = $this->token;
	if ( $t eq '}' ) {
	    $rv->{" key order "} = \@ko
	      if $key_order && !$strict && @ko > 1;
	    return $rv;
	}

	# If comma, do nothing.
	next if $t eq ',';

	# String
	# If the token is a string then it is a key. The token after that
	# should be a value.
	if ( $this->is_string ) {
	    my ( $key, $value );

	    # Set key using string.
	    $key = $this->as_perl( always_string => 1 );
	    $self->set_value( $rv, $key );
	    if ( $key_order ) {
		if ( $combined_keys && !$strict ) {
		    push( @ko, $key =~ s/\..*//r );
		}
		else {
		    push( @ko, $key );
		}
	    }

	    my $next = $tokens[0];
	    # If anything follows the string.
	    last unless defined $next;

	    # A comma or closing brace is acceptable after a string.
	    next if $next->token eq ',' || $next->token eq '}';

	    # If next token is a colon or equals then it should be followed by a value.
	    # Note that = can only occur with $prp.
	    if ( $next->token =~ /^[:=]$/ ) {
		# Step past the colon.
		shift(@tokens);

		# If at end of token array, exit loop.
		last unless @tokens;

		# Get hash value.
		$value = $self->get_value;

		# If there is a global error, return undef.
		return undef if $self->is_error;
	    }

	    # Extension (prp): Implied colon.
	    elsif ( $prp && $next->token eq '{' ) {
		# Get hash value.
		$value = $self->get_value;

		# If there is a global error, return undef.
		return undef if $self->is_error;
	    }

	    # Anything else is an error.
	    else {
		return $self->error('unknown-token-after-key', $next );
	    }

	    # Set key and value in return hash.
	    $self->set_value( $rv, $key, $value );
	}

	# Anything else is an error.
	else {
	    return $self->error('unknown-token-for-hash-key', $this );
	}
    }

    # If we get this far then unclosed brace.
    return $self->error('unclosed-hash-brace');

}

method get_value() {

    # Get token.
    my $this = shift(@tokens);

    # Token must be string, array, or hash.

    # String.
    if ( $this->is_string ) {
	return $this->as_perl;
    }

    # Token opens a hash or array.
    elsif ( $this->is_list_opener ) {
	unshift( @tokens, $this );
	return $self->structure;
    }

    # At this point it's an illegal token.
    return $self->error('unexpected-token-after-colon', $this );
}

method set_value ( $rv, $key, $value = undef ) {
    return $rv->{$key} = $value
      unless $combined_keys && !$strict && $key =~ /\./s;

    my @keys = split(/\./, $key, -1 );
    my $c = \$rv;
    for ( @keys ) {
	if ( /^[+-]?\d+$/ ) {
	    $c = \( $$c->[$_] );
	}
	else {
	    $c = \( $$c->{$_} );
	}
    }
    $$c = $value;
}

method build_array() {

    my $rv = [];

    # Build array. Work through tokens until closing brace.
    while ( @tokens ) {
	my $this = shift(@tokens);

	my $t = $this->token;
	# Closing brace: we're done building this array.
	return $rv if $t eq ']';

	# Comma: if we get to a comma at this point, and we have
	# content, do nothing with it in strict mode. Ignore otherwise.
	if ( $t eq ',' && (!$strict || @$rv) ) {
	}

	# Opening brace of hash or array.
	elsif ( $this->is_list_opener ) {
	    unshift( @tokens, $this );
	    my $object = $self->structure;
	    defined($object) or return undef;
	    push( @$rv, $object );
	}

	# if string, add it to the array
	elsif ( $this->is_string ) {
	    # add the string to the array
	    push( @$rv, $this->as_perl );

	    # Check following token.
	    if ( @tokens ) {
		my $next = $tokens[0] || '';
		# Spec say: Commas are optional between objects pairs
		# and array items.
		# The next element must be a comma or the closing brace,
		# or a string or list.
		# Anything else is an error.
		unless ( $next->token =~ /^[,\]]$/
			 || $next->is_string
			 || $next->is_list_opener ) {
		    return $self->error( 'missing_comma-between-array-elements',
					 $next );
		}
	    }
	}

	# Else unkown object or character, so throw error.
	else {
	    return $self->error( 'unknown-array-token', $this );
	}
    }

    # If we get this far then unclosed brace.
    return $self->error('unclosed-array-brace');
}

method is_comment_opener( $pretok ) {
    $pretok eq '//' || $pretok eq '/*';
}

use List::Util qw( min max uniqstr );

method encode(%opts) {
    my $schema  = $opts{schema};
    my $level   = $opts{level}              // 0;
    my $rv      = $opts{data};			# allow undef
    my $indent  = $opts{indent}             // 2;
    my $impoh   = $opts{implied_outer_hash} // $implied_outer_hash;
    my $ckeys   = $opts{combined_keys}      // $combined_keys;
    my $prpmode = $opts{prp}                // $prp;
    my $pretty  = $opts{pretty}             // $pretty;
    my $strict  = $opts{strict}             // $strict;
    my $nouesc  = $opts{nounicodeescapes}   // 0;

    if ( $strict ) {
	$ckeys = $prpmode = $impoh = 0;
    }

    $schema = resolve( $schema, $schema ) if $schema;

    my $s = "";
    my $i = 0;
    my $props = $schema->{properties};
    #warn("L$level - ", join(" ", sort keys(%$props)),"\n");

    # Add comments from schema, if any.
    my $comments = sub( $p ) {
	my $s = "";
	my $did = 0;#$level;
	for my $topic ( qw( title description ) ) {
	    next unless $p->{$topic};
	    $s .= "\n" unless $did++;
	    $s .= (" " x $i) . "// $_\n"
	      for split( /\s*<br\/?>|\\n|\n/, $p->{$topic} );
	}
	return $s;
    };

    if ( !$level ) {
	$s .= $comments->($schema);
    }

    # Format a string value.
    my $pr_string = sub ( $str, $force = 0 ) {

	# Reserved strings.
	if ( !defined($str) ) {
	    return "null";
	}

	if ( UNIVERSAL::isa( $str, 'JSON::Boolean' )
	     || UNIVERSAL::isa( $str, 'JSON::PP::Boolean' ) ) {
	    return (qw(false true))[$str];	# force string result
	}

	my $v = $str;

	# Escapes.
	$v =~ s/\\/\\\\/g;
	$v =~ s/\n/\\n/g;
	$v =~ s/\r/\\r/g;
	$v =~ s/\f/\\f/g;
	$v =~ s/\013/\\v/g;
	$v =~ s/\010/\\b/g;
	$v =~ s/\t/\\t/g;
	$v =~ s/([^ -ÿ])/sprintf( ord($1) < 0xffff ? "\\u%04x" : "\\u{%x}", ord($1))/ge unless $nouesc;

	# Force quotes unless the string can be represented as unquoted.
	if ( # contains escapes
	     $v ne $str
	     # not value-formed numeric
	     || ( $v =~ /^$p_number$/ && 0+$v ne $v )
	     # contains reserved, quotes or spaces
	     || $v =~ $p_reserved
	     || $v =~ $p_quotes
	     || $v =~ /\s/
	     || $v =~ /^(true|false|null)$/
	     || !length($v)
	   ) {
	    if ( $v !~ /\"/ ) {
		return '"' . $v . '"';
	    }
	    if ( $v !~ /\'/ ) {
		return "'" . $v . "'";
	    }
	    if ( $v !~ /\`/ ) {
		return "`" . $v . "`";
	    }
	    return '"' . ($v =~ s/(["'`])/\\$1/rg) . '"';
	}

	# Just a string.
	return $v;
    };

    # Format an array value.
    my $pr_array = sub ( $rv, $level=0, $props = {} ) {
	return "[]" unless @$rv;

	# Gather list of formatted values.
	my @v = map { $self->encode( %opts,
				     data   => $_,
				     level  => $level+1,
				     schema => $props,
				   ) } @$rv;

	return "[".join(",",@v)."]" unless $pretty;

	# If sufficiently short, put it on one line.
	if ( $i + length("@v") < 72
	     && join("",@v) !~ /\s|$p_newlines/ ) {
	    return "[ @v ]";
	}

	# Put the values on separate lines.
	my $s = "[\n";
	$s .= s/^/(" " x ($i+$indent))/gemr . "\n" for @v;
	$s .= (" " x $i) . "]";

	return $s;
    };

    # Format a hash value.
    my $pr_hash; $pr_hash = sub ( $rv, $level=0, $props = {} ) {
	return "{}" unless keys(%$rv);

	my $s = "";

	# Opening brace.
	if ( $level || !$impoh ) {
	    $s .= $pretty ? "{\n" : "{";
	    $i += $indent;
	}

	# If we have a key order, use this and delete.
	my @ko = $rv->{" key order "}
	  ? @{ delete($rv->{" key order "}) }
	  : sort(keys(%$rv));

	# Dedup.
	@ko = uniqstr(@ko);

	my $ll = 0;
	for ( @ko ) {
	    # This may be wrong if \ escapes or combined keys are involved.
	    $ll = length($_) if length($_) > $ll;
	}

	for ( @ko ) {
	    my $k = $_;

	    # Gather comments, if available.
	    my $comment;
	    if ( $props->{$k} ) {
		$comment = $comments->($props->{$k});
		$s .= $comment if $comment;
	    }

	    my $v = $rv->{$k};
	    my $key = $k;	# final key
	    # Combine keys if allowed and possible.
	    while ( $ckeys && ref($v) eq 'HASH' && keys(%$v) == 1 ) {
		my $k = (keys(%$v))[0];
		$key .= ".$k";	# append to final key
		$v = $v->{$k};	# step to next
	    }

	    $s .= (" " x $i) if $pretty;

	    # Format the key, try to align on length. NEEDS WORK
	    my $t = $pr_string->($key);
	    my $l = length($t);
	    $s .= $t;
	    my $in = $comment ? "" : " " x max( 0, $ll-length($t) );

	    # Handle object serialisation.
	    my $r = UNIVERSAL::can( $v, "TO_JSON" ) // UNIVERSAL::can( $v, "FREEZE" );
	    $r = $r ? $v->$r : $v;

	    # Format the value.
	    if ( ref($r) eq 'HASH' ) {
		# Make up and recurse.
		if ( $pretty ) {
		    $s .= $prpmode ? " " : " : ";
		}
		elsif ( !$prpmode ) {
		    $s .=  ":";
		}

		$s .= $pr_hash->( $r, $level+1, $props->{$k}->{properties} );
	    }

	    elsif ( ref($r) eq 'ARRAY' ) {
		$s .= $pretty ? "$in : " : ":";
		$s .= $pr_array->( $r, $level+1, $props->{$k}->{items} );
	    }

	    elsif ( $pretty ) {
		my $t = $pr_string->($r);
		$s .= "$in : ";

		# Break quoted strings that contain pseudo-newlines.
		if ( $t =~ /^["'`].*\\n/ ) {
		    # Remove the quotes/
		    my $quote = substr( $t, 0, 1, '');
		    chop($t);

		    # Determine current indent.
		    $s =~ /^(.*)\Z/m;
		    my $sep = " \\\n" . (" " x length($1));

		    # Get string parts.
		    my @a = split( /\\n/, $t, -1 );
		    while ( @a ) {
			$s .= $quote.shift(@a);
			$s .= "\\n" if @a;
			$s .= $quote;
			$s .= $sep if @a;
		    }
		}

		# Just a string.
		else {
		    $s .= $t;
		}
	    }
	    else {
		$s .= ":" . $pr_string->($r) . ",";
	    }
	    $s .= "\n" if $pretty;
	}

	# Strip final comma.
	$s =~ s/,$// unless $pretty;

	# Closing brace,.
	if ( $level || !$impoh ) {
	    $i -= $indent;
	    $s .= (" " x $i) if $pretty;
	    $s .= "}";
	}
	else {
	    $s =~ s/\n+$//;
	}

	return $s;
    };

    # Handle object serialisation.
    my $r = UNIVERSAL::can( $rv, "TO_JSON" ) // UNIVERSAL::can( $rv, "FREEZE" );
    $r = $r ? $rv->$r : $rv;

    # From here it is straight forward.
    if ( ref($r) eq 'HASH' ) {
	$s .= $pr_hash->( $r, $level, $props );
    }
    elsif ( ref($r) eq 'ARRAY' ) {
	$s .= $pr_array->( $r, $level );
    }
    else {
	$s .= $pr_string->($r);
    }

    # Final make-up.
    $s =~ s/^ +$//gm;
    if ( $pretty && !$level ) {
	$s =~ s/^\n*//s;
	$s .= "\n" if $s !~ /\n$/;
    }
    return $s;
}

################ Subroutines ################

# resolve processes $ref, allOf etc nodes.

sub resolve( $d, $schema ) {

    if ( is_hash($d) ) {
	while ( my ($k,$v) = each %$d ) {
	    if ( $k eq 'allOf' ) {
		delete $d->{$k}; # yes, safe to do
		$d = merge( resolve( $_, $schema ), $d ) for @$v;
	    }
	    elsif ( $k eq 'oneOf' || $k eq 'anyOf' ) {
		delete $d->{$k}; # yes, safe to do
		$d = merge( resolve( $v->[0], $schema ), $d );
	    }
	    elsif ( $k eq '$ref' ) {
		delete $d->{$k}; # yes, safe to do
		if ( $v =~ m;^#/definitions/(.*); ) {
		    $d = merge( resolve( $schema->{definitions}->{$1}, $schema ), $d );
		}
		else {
		    die("Invalid \$ref: $v\n");
		}
	    }
	    else {
		$d->{$k} = resolve( $v, $schema );
	    }
	}
    }
    elsif ( is_array($d) ) {
	$d = [ map { resolve( $_, $schema ) } @$d ];
    }
    else {
    }

    return $d;
}

sub is_hash($o)  { UNIVERSAL::isa( $o, 'HASH'  ) }
sub is_array($o) { UNIVERSAL::isa( $o, 'ARRAY' ) }

sub merge ( $left, $right ) {

    return $left unless $right;

    my %merged = %$left;

    for my $key ( keys %$right ) {

        my ($hr, $hl) = map { is_hash($_->{$key}) } $right, $left;

        if ( $hr and $hl ) {
            $merged{$key} = merge( $left->{$key}, $right->{$key} );
        }
        else {
            $merged{$key} = $right->{$key};
        }
    }

    return \%merged;
}

################ Tokens ################

class JSON::Relaxed::Token;

field $parent :accessor :param;
field $token  :accessor :param;
field $type   :accessor :param;
field $offset :accessor :param;

method is_string() {
    $type =~ /[QUN]/
}

method is_list_opener() {
    $type eq 'C' && $token =~ /[{\[]/;
}

method as_perl( %options ) {	# for values
    $token->as_perl(%options);
}

method _data_printer( $ddp ) {	# for DDP
    my $res = "Token(";
    if ( !defined $token ) {
	$res .= "null";
    }
    elsif ( $self->is_string ) {
	$res .= $token->_data_printer($ddp);
    }
    else {
	$res .= "\"$token\"";
    }
    $res .= ", $type";
    $res . ", $offset)";
}

method as_string {		# for messages
    my $res = "";
    if ( $self->is_string ) {
	$res = '"' . ($self->content =~ s/"/\\"/gr) . '"';
    }
    else {
	$res .= "\"$token\"";
    }
    $res;
}

=begin heavily_optimized_alternative

package JSON::Relaxed::XXToken;
our @ISA = qw(JSON::Relaxed::Parser);

sub new {
    my ( $pkg, %opts ) = @_;
    my $self = bless [] => $pkg;
    push( @$self,
	  delete(%opts{parent}),
	  delete(%opts{token}),
	  delete(%opts{type}),
	  delete(%opts{offset}),
    );
    $self;
}

sub parent { $_[0]->[0] }
sub token  { $_[0]->[1]  }
sub type   { $_[0]->[2]   }
sub offset { $_[0]->[3] }

sub is_string { $_[0]->[2] =~ /[QUN]/  }
sub is_list_opener { $_[0]->[2] eq 'C' && $_[0]->[1] =~ /[{\[]/ }
sub as_perl {	# for values
    return shift->[1]->as_perl(@_);
}

sub _data_printer {	# for DDP
    my ( $self, $ddp ) = @_;
    my $res = "Token(";
    if ( $self->is_string ) {
	$res .= $self->[1]->_data_printer($ddp);
    }
    else {
	$res .= "\"".$self->[1]."\"";
    }
    $res .= ", " . $self->[2];
    $res . ", " . $self->[3] . ")";
}

sub as_string {		# for messages
    if ( $_[0]->is_string ) {
	return '"' . ($_[0]->[1]->content =~ s/"/\\"/gr) . '"';
    }
    "\"" . $_[0]->[1] . "\"";
}

=cut

################ Strings ################

class JSON::Relaxed::String :isa(JSON::Relaxed::Token);

field $content	:param = undef;
field $quote	:accessor :param = undef;

# Quoted strings are assembled from complete substrings, so escape
# processing is done on the substrings. This prevents ugly things
# when unicode escapes are split across substrings.
# Unquotes strings are collected token by token, so escape processing
# can only be done on the complete string (on output).

ADJUST {
    $content = $self->unescape($content) if defined($quote);
};

method append ($str) {
    $str = $self->unescape($str) if defined $quote;
    $content .= $str;
}

method content {
    defined($quote) ? $content : $self->unescape($content);
}

# One regexp to match them all...
my $esc_quoted = qr/
	       \\([tnrfb])				# $1 : one char
	     | \\u\{([[:xdigit:]]+)\}			# $2 : \u{XX...}
	     | \\u([Dd][89abAB][[:xdigit:]]{2})		# $3 : \uDXXX hi
	       \\u([Dd][c-fC-F][[:xdigit:]]{2})		# $4 : \uDXXX lo
	     | \\u([[:xdigit:]]{4})			# $5 : \uXXXX
	     | \\?(.)					# $6
	   /xs;

# Special escapes (quoted strings only).
my %esc = (
    'b'   => "\b",    #  Backspace
    'f'   => "\f",    #  Form feed
    'n'   => "\n",    #  New line
    'r'   => "\r",    #  Carriage return
    't'   => "\t",    #  Tab
    'v'   => chr(11), #  Vertical tab
);

method unescape ($str) {
    return $str unless $str =~ /\\/;

    my $convert = sub {
	# Specials. Only for quoted strings.
	if ( defined($1) ) {
	    return defined($quote) ? $esc{$1} : $1;
	}

	# Extended \u{XXX} character.
	defined($2) and return chr(hex($2));

	# Pair of surrogates.
	defined($3) and return pack( 'U*',
				     0x10000 + (hex($3) - 0xD800) * 0x400
				     + (hex($4) - 0xDC00) );

	# Standard \uXXXX character.
	defined($5) and return chr(hex($5));

	# Anything else.
	defined($6) and return $6;

	return '';
    };

    while( $str =~ s/\G$esc_quoted/$convert->()/gxse) {
        last unless defined pos($str);
    }

    return $str;
}

################ Quoted Strings ################

class JSON::Relaxed::String::Quoted :isa(JSON::Relaxed::String);

method as_perl( %options ) {
    $self->content;
}

method _data_printer( $ddp ) {
    "Token(" . $self->quote . $self->content . $self->quote . ", " .
      $self->type . ", " . $self->offset . ")";
}

################ Unquoted Strings ################

class JSON::Relaxed::String::Unquoted :isa(JSON::Relaxed::String);

# If the option always_string is set, bypass the reserved strings.
# This is used for hash keys.
method as_perl( %options ) {
    my $content = $self->content;

    # If used as a key, always return a string.
    return $content if $options{always_string};

    # Return boolean specials if appropriate.
    if ( $content =~ /^(?:true|false)$/ ) {
	return $self->parent->booleans->[ $content eq 'true' ? 1 : 0 ];
    }
    if ( $self->parent->prp && $content =~ /^(?:on|off)$/ ) {
	return $self->parent->booleans->[ $content eq 'on' ? 1 : 0 ];
    }

    # null -> undef
    elsif ( $content eq "null" ) {
	return undef;
    }

    # Return as string.
    $content;
}

method _data_printer( $ddp ) {
    "Token(«" . $self->content . "», " .
      $self->type . ", " . $self->offset . ")";
}

################ Booleans ################

# This class distinguises booleans true and false from numeric 1 and 0.

use JSON::PP ();

package JSON::Boolean {

    sub as_perl( $self, %options ) { $self }

    sub _data_printer( $self, $ddp ) { "Bool($self)" }

    use overload '""'     => sub { ${$_[0]} ? "true" : "false" },
		 "0+"     => sub { ${$_[0]} },
		 "bool"   => sub { !!${$_[0]} },
		 fallback => 1;

    # For JSON::PP export.
    sub TO_JSON { ${$_[0]} ? $JSON::PP::true : $JSON::PP::false }

    # Boolean values.
    our $true  = do { bless \(my $dummy = 1) => __PACKAGE__ };
    our $false = do { bless \(my $dummy = 0) => __PACKAGE__ };

}

################

1;


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.