Group
Extension

Parse-JCONF/lib/Parse/JCONF.pm

package Parse::JCONF;

use strict;
use Carp;
use Parse::JCONF::Boolean qw(TRUE FALSE);
use Parse::JCONF::Error;

our $VERSION = '0.05';
our $HashClass = 'Tie::IxHash';

sub new {
	my ($class, %opts) = @_;
	
	my $self = {
		autodie    => delete $opts{autodie},
		keep_order => delete $opts{keep_order}
	};
	
	%opts and croak 'unrecognized options: ', join(', ', keys %opts);
	
	if ($self->{keep_order}) {
		eval "require $HashClass"
			or croak "you need to install $HashClass for `keep_order' option";
	}
	
	bless $self, $class;
}

sub parse {
	my ($self, $data) = @_;
	
	$self->_err(undef);
	
	my %rv;
	if ($self->{keep_order}) {
		tie %rv, $HashClass;
	}
	
	my $offset = 0;
	my $line = 1;
	my $len = length $data;
	
	while ($offset < $len && $self->_parse_space_and_comments(\$data, \$offset, \$line)) {
		$self->_parse_bareword(\$data, \$offset, \$line, \my $key)
			or return;
		$self->_parse_eq_sign(\$data, \$offset, \$line)
			or return;
		$self->_parse_value(\$data, \$offset, \$line, \my $val)
			or return;
		$self->_parse_delim(undef, \$data, \$offset, \$line)
			or return;
		
		$rv{$key} = $val;
	}
	
	return \%rv;
}

sub _parse_space_and_comments {
	my ($self, $data_ref, $offset_ref, $line_ref) = @_;
	
	pos($$data_ref) = $$offset_ref;
	
	while ($$data_ref =~ /\G(?:(\n+)|\s|#[^\n]*)/gc) {
		if (defined $1) {
			$$line_ref += length $1;
		}
	}
	
	$$offset_ref = pos($$data_ref);
	return $$offset_ref < length $$data_ref;
}

sub _parse_bareword {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
		or return $self->_err(
			Parser => "Unexpected end of data, expected bareword at line $$line_ref"
		);
	
	pos($$data_ref) = $$offset_ref;
	
	$$data_ref =~ /\G(\w+)/g
		or return $self->_err(
			Parser => "Expected bareword at line $$line_ref:\n" . _parser_msg($data_ref, $$offset_ref)
		);
	
	$$rv_ref = $1;
	$$offset_ref = pos($$data_ref);
	
	1;
}

sub _parse_bareword_or_string {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
		or return $self->_err(
			Parser => "Unexpected end of data, expected bareword or string at line $$line_ref"
		);
	
	pos($$data_ref) = $$offset_ref;
	
	if (substr($$data_ref, $$offset_ref, 1) eq '"') {
		$self->_parse_string($data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	else {
		$self->_parse_bareword($data_ref, $offset_ref, $line_ref, $rv_ref);
	}
}

sub _parse_delim {
	my ($self, $ok_if, $data_ref, $offset_ref, $line_ref) = @_;
	
	my $line_was = $$line_ref;
	my $has_data = $self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref);
	
	if ($has_data && substr($$data_ref, $$offset_ref, 1) eq ',') {
		# comma delimiter
		$$offset_ref++;
		return 1;
	}
	
	if ($line_was != $$line_ref) {
		# newline delimiter
		return 1;
	}
	
	if (!defined $ok_if && !$has_data) {
		# we may not have delimiter at the end of data
		return 1;
	}
	
	if ($has_data && substr($$data_ref, $$offset_ref, 1) eq $ok_if) {
		# we may not have delimiter at the end of object, array
		return 1;
	}
	
	$self->_err(
		Parser => "Expected delimiter `,' at line $$line_ref:\n" . _parser_msg($data_ref, $$offset_ref)
	);
}

sub _parse_eq_sign {
	my ($self, $data_ref, $offset_ref, $line_ref) = @_;
	
	$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
		or return $self->_err(
			Parser => "Unexpected end of data, expected equals sign `=' at line $$line_ref"
		);
	
	unless (substr($$data_ref, $$offset_ref, 1) eq '=') {
		return $self->_err(
			Parser => "Expected equals sign `=' at line $$line_ref:\n" . _parser_msg($data_ref, $$offset_ref)
		);
	}
	
	$$offset_ref++;
	1;
}

sub _parse_value {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
		or return $self->_err(
			Parser => "Unexpected end of data, expected value at line $$line_ref"
		);
	
	my $c = substr($$data_ref, $$offset_ref, 1);
	if ($c eq '{') {
		$self->_parse_object($data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	elsif ($c eq '[') {
		$self->_parse_array($data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	elsif ($c eq 't') {
		$self->_parse_constant('true', TRUE, $data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	elsif ($c eq 'f') {
		$self->_parse_constant('false', FALSE, $data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	elsif ($c eq 'n') {
		$self->_parse_constant('null', undef, $data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	elsif ($c eq '"') {
		$self->_parse_string($data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	elsif ($c =~ /-|\d/) {
		$self->_parse_number($data_ref, $offset_ref, $line_ref, $rv_ref);
	}
	else {
		$self->_err(
			Parser => "Unexpected value, expected array/object/string/number/true/false/null at line $$line_ref:\n" . 
						_parser_msg($data_ref, $$offset_ref)
		);
	}
}

sub _parse_constant {
	my ($self, $constant, $constant_val, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	my $len = length $constant;
	substr($$data_ref, $$offset_ref, $len) eq $constant && 
	($len + $$offset_ref == length $$data_ref || substr($$data_ref, $$offset_ref+$len, 1) =~ /[\s,\]}]/)
		or return $self->_err(
			Parser => "Unexpected value, expected `$constant' at line $$line_ref:\n" .
						_parser_msg($data_ref, $$offset_ref)
		);
	
	$$offset_ref += $len;
	$$rv_ref = $constant_val;
	
	1;
}

sub _parse_number {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	$$data_ref =~ /\G(-?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+-]?\d+)?)/gc
		or return $self->_err(
			Parser => "Unexpected value, expected number at line $$line_ref:\n" .
						_parser_msg($data_ref, $$offset_ref)
		);
	
	my $num = $1;
	$$rv_ref = $num + 0; # WTF: $1 + 0 is string if we can believe Data::Dumper, so use temp var
	$$offset_ref = pos($$data_ref);
	
	1;
}

sub _parse_array {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	$$offset_ref++;
	my @rv;
	
	while (1) {
		$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
			or return $self->_err(
				Parser => "Unexpected end of data, expected end of array `]' at line $$line_ref"
			);
			
		substr($$data_ref, $$offset_ref, 1) eq ']'
			and last;
		$self->_parse_value($data_ref, $offset_ref, $line_ref, \my $val)
			or return;
		$self->_parse_delim(']', $data_ref, $offset_ref, $line_ref)
			or return;
		
		push @rv, $val;
	}
	
	$$rv_ref = \@rv;
	$$offset_ref++;
	
	1;
}

sub _parse_object {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	$$offset_ref++;
	my %rv;
	if ($self->{keep_order}) {
		tie %rv, $HashClass;
	}
	
	while (1) {
		$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
			or return $self->_err(
				Parser => "Unexpected end of data, expected end of object `}' at line $$line_ref"
			);
		
		substr($$data_ref, $$offset_ref, 1) eq '}'
			and last;
		$self->_parse_bareword_or_string($data_ref, $offset_ref, $line_ref, \my $key)
			or return;
		$self->_parse_colon_sign($data_ref, $offset_ref, $line_ref)
			or return;
		$self->_parse_value($data_ref, $offset_ref, $line_ref, \my $val)
			or return;
		$self->_parse_delim('}', $data_ref, $offset_ref, $line_ref)
			or return;
		
		$rv{$key} = $val;
	}
	
	$$rv_ref = \%rv;
	$$offset_ref++;
	
	1;
}

sub _parse_colon_sign {
	my ($self, $data_ref, $offset_ref, $line_ref) = @_;
	
	$self->_parse_space_and_comments($data_ref, $offset_ref, $line_ref)
		or return $self->_err(
			Parser => "Unexpected end of data, expected colon sign `:' at line $$line_ref"
		);
	
	unless (substr($$data_ref, $$offset_ref, 1) eq ':') {
		return $self->_err(
			Parser => "Expected colon sign `:' at line $$line_ref:\n" . _parser_msg($data_ref, $$offset_ref)
		);
	}
	
	$$offset_ref++;
	1;
}

my %ESCAPES = (
	'b'  => "\b",
	'f'  => "\f",
	'n'  => "\n",
	'r'  => "\r",
	't'  => "\t",
	'"'  => '"',
	'\\' => '\\'
);

sub _parse_string {
	my ($self, $data_ref, $offset_ref, $line_ref, $rv_ref) = @_;
	
	pos($$data_ref) = ++$$offset_ref;
	my $str = '';
	
	while ($$data_ref =~ /\G(?:(\n+)|\\((?:[bfnrt"\\]))|\\u([0-9a-fA-F]{4})|([^\\"\x{0}-\x{8}\x{A}-\x{C}\x{E}-\x{1F}]+))/gc) {
		if (defined $1) {
			$$line_ref += length $1;
			$str .= $1;
		}
		elsif (defined $2) {
			$str .= $ESCAPES{$2};
		}
		elsif (defined $3) {
			$str .= pack 'U', hex $3;
		}
		else {
			$str .= $4;
		}
	}
	
	$$offset_ref = pos($$data_ref);
	if ($$offset_ref == length $$data_ref) {
		return $self->_err(
			Parser => "Unexpected end of data, expected string terminator `\"' at line $$line_ref"
		);
	}
	
	if ((my $c = substr($$data_ref, $$offset_ref, 1)) ne '"') {
		if ($c eq '\\') {
			return $self->_err(
				Parser => "Unrecognized escape sequence in string at line $$line_ref:\n" .
							_parser_msg($data_ref, $$offset_ref)
			);
		}
		else {
			my $hex = sprintf('"\x%02x"', ord $c);
			return $self->_err(
				Parser => "Bad character $hex in string at line $$line_ref:\n" .
							_parser_msg($data_ref, $$offset_ref)
			);
		}
	}
	
	$$offset_ref++;
	$$rv_ref = $str;
	
	1;
}

sub parse_file {
	my ($self, $path) = @_;
	
	$self->_err(undef);
	
	open my $fh, '<:utf8', $path
		or return $self->_err(IO => "open `$path': $!");
	
	my $data = do {
		local $/;
		<$fh>;
	};
	
	close $fh;
	
	$self->parse($data);
}

sub last_error {
	return $_[0]->{last_error};
}

sub _err {
	my ($self, $err_type, $msg) = @_;
	
	unless (defined $err_type) {
		$self->{last_error} = undef;
		return;
	}
	
	$self->{last_error} = "Parse::JCONF::Error::$err_type"->new($msg);
	if ($self->{autodie}) {
		$self->{last_error}->throw();
	}
	
	return;
}

sub _parser_msg {
	my ($data_ref, $offset) = @_;
	
	my $msg = '';
	my $non_space_chars = 0;
	my $c;
	my $i;
	
	for ($i=$offset; $i>=0; $i--) {
		$c = substr($$data_ref, $i, 1);
		if ($c eq "\n") {
			last;
		}
		elsif ($c eq "\t") {
			$c = '  ';
		}
		elsif (ord $c < 32) {
			$c = ' ';
		}
		
		substr($msg, 0, 0) = $c;
		
		if ($c =~ /\S/) {
			if (++$non_space_chars > 5) {
				last;
			}
		}
	}
	
	substr($msg, 0, 0) = ' ';
	my $bad_char = length $msg;
	
	my $len = length $$data_ref;
	$non_space_chars = 0;
	
	for ($i=$offset+1; $i<$len; $i++) {
		$c = substr($$data_ref, $i, 1);
		if ($c eq "\n") {
			last;
		}
		elsif ($c eq "\t") {
			$c = '  ';
		}
		elsif (ord $c < 32) {
			$c = ' ';
		}
		
		substr($msg, length $msg) = $c;
		
		if ($c =~ /\S/) {
			if (++$non_space_chars > 3) {
				last;
			}
		}
	}
	
	substr($msg, length $msg) = "\n" . ' 'x($bad_char-1).'^';
	return $msg;
}

1;

__END__

=pod

=head1 NAME

Parse::JCONF - Parse JCONF (JSON optimized for configs)

=head1 SYNOPSIS

    use strict;
    use Parse::JCONF;
    
    my $raw_cfg = do { local $/; <DATA> };
    my $parser = Parse::JCONF->new(autodie => 1);
    
    $cfg = $parser->parse($raw_cfg);
    
    $cfg->{modules}{Mo}[1]; # 0.08
    $cfg->{enabled}; # Parse::JCONF::Boolean::TRUE or "1" in string context
    $cfg->{enabled} == Parse::JCONF::Boolean::TRUE; # yes
    $cfg->{enabled} == 1; # no
    if ($cfg->{enabled}) { 1 }; # yes
    $cfg->{data}[0]; # Test data
    $cfg->{query}; # SELECT * from pkg
                   # LEFT JOIN ver ON pkg.id=ver.pkg_id
                   # WHERE pkg.name IN ("Moose", "Mouse", "Moo", "Mo")
    __DATA__
    modules = {
        Moose: 1,
        Mouse: 0.91,
        Moo: 0.05, # some comment here about version
        Mo: [0.01, 0.08],
    }
    
    enabled = true
    data = ["Test data", "Production data"] # some comment about data
    
    query = "SELECT * from pkg
             LEFT JOIN ver ON pkg.id=ver.pkg_id
             WHERE pkg.name IN (\"Moose\", \"Mouse\", \"Moo\", \"Mo\")"

=head1 DESCRIPTION

JSON is good, but not very handy for configuration files. JCONF intended to fix this.

It has several differences with JSON format:

=over

=item bareword - the word which matches /^\w+$/

    some_word   # valid
    some word   # invalid
    "some_word" # invalid

=item bareword may be used only as object key or root key

=item object key may be bareword or string

    {test: 1}   # valid
    {"test": 1} # valid

=item JCONF root always consists of 0 or more trines: root key (bareword), equals sign (=), any valid JCONF value (number/string/true/false/null/object/array)

    value1 = [1,2] # root trine: root key (bareword), equals sign (=), any valid JCONF value

=item values in the object/array or root trines may be devided with comma "," (like in JSON) or with new line (or even several)

    val = [1,2,3,4] # with comma
    
    val = [         # with new line
        1
        2
        3
        4
    ]
    
    val = {         # several newlines are ok
        a: 1
        
        b: 2
    }
    
    val = {        # comma and newlines are ok
        a: 1,
        b: 2
    }
    
    val = {       # invalid, several commas is not ok
        a: 1,,b:2
    }

=item comma separator allowed after last element

    [1,2,3,4,] # ok
    {a:1,b:2,} # ok

=item new lines, carriage return, tabs are valid symbols in the string

    str = "This is valid multiline
    JCONF string"

=item # - is start of the comment, all from this symbol to the end of line will be interpreted as comment

    obj = {
        bool: false # this is comment
    }

=back

=head1 METHODS

=head2 new

This is parser object constructor. Available parameters are:

=over

=item autodie

Throw exception on any error if true, default is false (in this case parser methods will return undef on error
and error may be found with L</last_error> method)

=item keep_order

Store key/value pairs in the hash which keeps order if true, default is false. This is useful when you need to
store your configuration back to the file (for example with C<JCONF::Writer>) and want to save same order as it
was before. You must have $Parse::JCONF::HashClass installed which default value is Tie::IxHash.

=back

=head2 parse

Parses string provided as parameter. Expected string encoding is utf8. On success returns reference to hash.
On fail returns undef/throws exception (according to C<autodie> option in the constructor).
Exception will be of type C<Parse::JCONF::Error::Parser>.

=head2 parse_file

Parses content of the file which path provided as parameter. Expected file content encoding is utf8. On success
returns reference to hash. On fail returns undef/throws exception (according to C<autodie> option in the constructor).
Exception will be of type C<Parse::JCONF::Error::IO> or C<Parse::JCONF::Error::Parser>.

=head2 last_error

Returns error occured for last parse() or parse_file() call. Error will be one of C<Parse::JCONF::Error> subclass or undef
(if there was no error).

=head1 SEE ALSO

L<Parse::JCONF::Error>, L<Parse::JCONF::Boolean>, L<JCONF::Writer>

=head1 COPYRIGHT

Copyright Oleg G <oleg@cpan.org>.

This library is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.