Group
Extension

MarpaX-RFC-RFC3629/lib/MarpaX/RFC/RFC3629.pm

use strict;
use warnings FATAL => 'all';

package MarpaX::RFC::RFC3629;

# ABSTRACT: Marpa parsing of UTF-8 byte sequences as per RFC3629

our $VERSION = '0.001'; # VERSION

our $AUTHORITY = 'cpan:JDDPAUSE'; # AUTHORITY


use Encode qw/decode/;
use Marpa::R2;
use Moo;
use Types::Standard -all;
use Types::Encodings qw/Bytes/;

has input     => ( is => 'ro',  isa => Bytes, required => 1, trigger => 1);
has output    => ( is => 'rwp', isa => Str|Undef);

our $DATA = do { local $/; <DATA> };
our $G = Marpa::R2::Scanless::G->new({source => \$DATA});

sub BUILDARGS {
  my ($class, @args) = @_;

  unshift @args, 'input' if @args % 2 == 1;
  return { @args };
}

sub _trigger_input { shift->_set_output(decode('UTF-8', ${$G->parse(\shift)}, Encode::FB_CROAK)) }
sub _concat        { shift; join('', @_) }


1;

=pod

=encoding UTF-8

=head1 NAME

MarpaX::RFC::RFC3629 - Marpa parsing of UTF-8 byte sequences as per RFC3629

=head1 VERSION

version 0.001

=head1 SYNOPSIS

    use strict;
    use warnings FATAL => 'all';
    use MarpaX::RFC::RFC3629;
    use Encode qw/encode/;
    use Data::HexDump;
    #
    # Parse octets
    #
    my $orig = "\x{0041}\x{2262}\x{0391}\x{002E}";
    my $octets = encode('UTF-8', $orig, Encode::FB_CROAK);
    my $string = MarpaX::RFC::RFC3629->new($octets)->output;
    print STDERR "Octets:\n" . HexDump($octets) . "\n";
    print STDERR "String:\n" . HexDump($string) . "\n";

=head1 DESCRIPTION

This module is parsing byte sequences as per RFC3629. It will croak if parsing fails.

=head1 SUBROUTINES/METHODS

=head2 new(ClassName $class: Bytes $octets --> InstanceOf['MarpaX::RFC::RFC3629'])

Instantiate a new object. Takes as parameter the octets.

=head2 output(InstanceOf['MarpaX::RFC::RFC3629'] $self --> Str)

Returns the UTF-8 string (utf8 flag might be on, depends).

=head1 SEE ALSO

L<Syntax of UTF-8 Byte Sequences|https://tools.ietf.org/html/rfc3629#section-4>

L<Marpa::R2>

=for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan

=head1 SUPPORT

=head2 Bugs / Feature Requests

Please report any bugs or feature requests through the issue tracker
at L<https://rt.cpan.org/Public/Dist/Display.html?Name=MarpaX-RFC-RFC3629>.
You will be notified automatically of any progress on your issue.

=head2 Source Code

This is open source software.  The code repository is available for
public review and contribution under the terms of the license.

L<https://github.com/jddurand/marpax-rfc-rfc3629>

  git clone git://github.com/jddurand/marpax-rfc-rfc3629.git

=head1 AUTHOR

Jean-Damien Durand <jeandamiendurand@free.fr>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2015 by Jean-Damien Durand.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut

__DATA__
lexeme default = latm => 1
:default ::= action => MarpaX::RFC::RFC3629::_concat
:start        ::= <UTF8 octets>
<UTF8 octets> ::= <UTF8 char>*
<UTF8 char>   ::= <UTF8 1>
                | <UTF8 2>
                | <UTF8 3>
                | <UTF8 4>
<UTF8 1>        ~                                                 [\x{00}-\x{7F}]
<UTF8 2>        ~                                 [\x{C2}-\x{DF}]     <UTF8 tail>
<UTF8 3>        ~                        [\x{E0}] [\x{A0}-\x{BF}]     <UTF8 tail>
                |                 [\x{E1}-\x{EC}]     <UTF8 tail>     <UTF8 tail>
                |                        [\x{ED}] [\x{80}-\x{9F}]     <UTF8 tail>
                |                 [\x{EE}-\x{EF}]     <UTF8 tail>     <UTF8 tail>
<UTF8 4>        ~        [\x{F0}] [\x{90}-\x{BF}]     <UTF8 tail>     <UTF8 tail>
                | [\x{F1}-\x{F3}]     <UTF8 tail>     <UTF8 tail>     <UTF8 tail>
                |        [\x{F4}] [\x{80}-\x{8F}]     <UTF8 tail>     <UTF8 tail>
<UTF8 tail>     ~ [\x{80}-\x{BF}]


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.