Yet Another CPAN Grep

Data-Section-Pluggable/lib/Data/Section/Pluggable.pm

use warnings;
use 5.020;
use true;
use experimental qw( signatures );
use stable qw( postderef );

package Data::Section::Pluggable 0.08 {

    # ABSTRACT: Read structured data from __DATA__


    use Class::Tiny qw( package prefer_filesystem filename _formats _cache );
    use Exporter qw( import );
    use Ref::Util qw( is_ref is_plain_hashref is_coderef is_plain_arrayref is_blessed_ref );
    use MIME::Base64 qw( decode_base64 encode_base64 );
    use Path::Tiny 0.130 ();
    use Carp ();

    our @EXPORT_OK = qw( get_data_section );

    sub BUILDARGS ($class, @args) {
        if(@args == 1) {
            return $args[0] if is_plain_hashref $args[0];
            return { package => $args[0] };
        } else {
            my %args = @args;
            return \%args;
        }
    }

    sub BUILD ($self, $) {
        unless(defined $self->package) {
            my $package = caller 2;
            $self->package($package);
        }
        foreach my $attr (qw( prefer_filesystem filename )) {
            if(defined $self->$attr) {
                unless(is_blessed_ref($self->$attr) && $self->$attr->isa('Path::Tiny')) {
                    $self->$attr(Path::Tiny->new($self->$attr)->absolute);
                }
            }
        }
        $self->_formats({});
    }


    sub get_data_section ($self=undef, $name=undef) {

        # handle being called as a function instead of
        # a method.
        unless(is_ref $self) {
            $name = $self;
            $self = __PACKAGE__->new(scalar caller);
        }

        my $all = $self->_get_all_data_sections;
        return undef unless $all;

        if (defined $name) {
            if(exists $all->{$name}) {
                return $self->_format($name, $all->{$name});
            }
            return undef;
        } else {
            return $self->_format_all($all);
        }
    }

    sub _format_all ($self, $all) {
        my %new;
        foreach my $key (keys %$all) {
            $new{$key} = $self->_format($key, $all->{$key});
        }
        \%new;
    }

    sub _format ($self, $name, $content) {
        $content = $self->_decode($content->@*);
        if($name =~ /\.(.*?)\z/ ) {
            my $ext = $1;
            if($self->_formats->{$ext}) {
                $content = $_->($self, $content) for $self->_formats->{$ext}->@*;
            }
        }
        return $content;
    }

    sub _decode ($self, $content, $encoding) {
        return $content unless $encoding;
        if($encoding ne 'base64') {
            Carp::croak("unknown encoding: $encoding");
        }
        return decode_base64($content);
    }

    sub _get_all_data_sections ($self) {
        return $self->_cache if $self->_cache;

        my $fh;

        if($self->filename) {
            $fh = $self->filename->openr_raw;
        } else {
            $fh = do { no strict 'refs'; \*{$self->package."::DATA"} };
        }

        return undef unless defined fileno $fh;

        # Question: does this handle corner case where perl
        # file is just __DATA__ section?  turns out, yes!
        # added test t/data_section_pluggable__data_only.t
        seek $fh, 0, 0;
        my $content = do { local $/; <$fh> };
        $content =~ s/^.*\n__DATA__\n/\n/s; # for win32
        $content =~ s/\n__END__\n.*$/\n/s;

        my @data = split /^@@\s+(.+?)\s*\r?\n/m, $content;

        # extra at start whitespace, or __DATA_ for data only file
        shift @data;

        my $all = {};
        while (@data) {
            my ($name_encoding, $content) = splice @data, 0, 2;
            my ($name, $encoding);
            if($name_encoding =~ /^(.*)\s+\((.*?)\)$/) {
                $name = $1;
                $encoding = $2;
            } else {
                $name = $name_encoding;
            }
            if($self->prefer_filesystem && -f (my $path = $self->prefer_filesystem->child($name))) {
                $content = $encoding ? encode_base64($path->slurp_raw) : $path->slurp_utf8;
            }
            $all->{$name} = [ $content, $encoding ];
        }

        return $self->_cache($all);
    }


    sub add_format ($self, $ext, $cb) {
        Carp::croak("callback is not a code reference") unless is_coderef $cb;
        push $self->_formats->{$ext}->@*, $cb;
        return $self;
    }


    sub add_plugin ($self, $name, %args) {

        Carp::croak("plugin name must match [a-z][a-z0-9_]+, got $name")
            unless $name =~ /^[a-z][a-z0-9_]+\z/;

        my $class = join '::', 'Data', 'Section', 'Pluggable', 'Plugin', ucfirst($name =~ s/_(.)/uc($1)/egr);
        my $pm    = ($class =~ s!::!/!gr) . ".pm";

        require $pm unless $self->_valid_plugin($class);

        my $plugin;
        if($class->can("new")) {
            $plugin = $class->new(%args);
        } else {
            if(%args) {
                Carp::croak("extra arguments are not allowed for class plugins (hint create constructor)");
            }
            $plugin = $class;
        }

        Carp::croak("$class is not a valid Data::Section::Pluggable plugin")
            unless $self->_valid_plugin($plugin);

        if($plugin->does('Data::Section::Pluggable::Role::ContentProcessorPlugin')) {
            my @extensions = $plugin->extensions;
            @extensions = $extensions[0]->@* if is_plain_arrayref $extensions[0];
            die "extensions method for $class returned no extensions" unless @extensions;

            my $cb = sub ($self, $content) {
                return $plugin->process_content($self, $content);
            };

            $self->add_format($_, $cb) for @extensions;

        };

        return $self;
    }

    sub _valid_plugin ($self, $plugin) {
        $plugin->can('does') && $plugin->does('Data::Section::Pluggable::Role::ContentProcessorPlugin');
    }


    sub extract ($self, $dir=undef) {
        $dir = Path::Tiny->new($dir // '.');

        my $all = $self->_get_all_data_sections;

        foreach my $key (keys %$all) {
            my $path = $dir->child($key);
            $path->parent->mkdir;
            my($content,$encoding) = $all->{$key}->@*;

            if(defined $encoding) {
                if($encoding eq 'base64') {
                    $path->spew_raw(decode_base64($content));
                } else {
                    Carp::croak("unknown encoding: $encoding");
                }
            } else {
                $path->spew_utf8($content);
            }
        }
    }

}

__END__

=pod

=encoding UTF-8

=head1 NAME

Data::Section::Pluggable - Read structured data from __DATA__

=head1 VERSION

version 0.08

=head1 SYNOPSIS

 use Data::Section::Pluggable;
 
 my $dsp = Data::Section::Pluggable->new
                                   ->add_plugin('trim')
                                   ->add_plugin('json');
 
 # prints "Welcome to Perl" without prefix
 # or trailing white space.
 say $dsp->get_data_section('hello.txt');
 
 # also prints "Welcome to Perl"
 say $dsp->get_data_section('hello.json')->{message};
 
 # prints "This is base64 encoded.\n"
 say $dsp->get_data_section('hello.bin');
 
 __DATA__
 
 @@ hello.txt
   Welcome to Perl
 
 
 @@ hello.json
 {"message":"Welcome to Perl"}
 
 @@ hello.bin (base64)
 VGhpcyBpcyBiYXNlNjQgZW5jb2RlZC4K

=head1 DESCRIPTION

Data::Section::Simple is a module to extract data from C<__DATA__> section of Perl source file.
This module started out as a fork of L<Data::Section::Simple> (itself based on L<Mojo::Loader>),
and includes some of its tests to ensure compatibility, but it also includes features not
available in either of those modules.

This module caches the result of reading the C<__DATA__> section in the object if you use the OO
interface.  It doesn't do any caching of the processing required of "formats" (see below).

This module also supports C<base64> encoding using the same mechanism as L<Mojo::Loader>, which
is helpful for putting binary sections in C<__DATA__>.

As mentioned, this module aims to be and is largely a drop in replacement for L<Data::Section::Simple>
with some extra features.  Here are the known ways in which it is not compatible:

=over 4

=item

Because L<Data::Section::Simple> does not support C<base64> encoded data, these data sections
would include the C< (base64)> in the filename instead of decoding the content.

=item

When a section is not found L<Data::Section::Simple> return the empty list from C<get_data_section>,
where as this module returns C<undef>, in order to keep the return value more consistent.

=back

=head1 CONSTRUCTOR

 my $dsp = Data::Section::Pluggable->new($package);
 my $dsp = Data::Section::Pluggable->new(\%attributes);
 my $dsp = Data::Section::Pluggable->new(%attributes);

=head1 ATTRIBUTES

=head2 package

The name of the package to read from C<__DATA__>.  If not specified, then
the current package will be used.

=head2 prefer_filesystem

If provided, this is a directory containing files from where  content will be
preferred over what is in the C<__DATA__> section, if available.  This file
still must still exist in the C<__DATA__> section to be found. This can be
useful to do local testing with files on the filesystem, but release a script
or test a just one combined file.

=head2 filename

Read from the C<__DATA__> section of the given file instead of the current
Perl process.  This can be useful for reading the C<__DATA__> section of a
Perl script or module without parsing or running it first.

=head1 METHODS

=head2 get_data_section

 my $hash = get_data_section;
 my $data = get_data_section $name;
 my $hash = $dsp->get_data_section;
 my $data = $dsp->get_data_section($name);

Gets data from C<__DATA_>.  This can be called either as a function (which is
optionally exported from this module), or as an object method.  Creating an
instance of L<Data::Section::Pluggable> allows you to use packages other than
the default or use plugins.

=head2 add_format

 $dsp->add_format( $ext, sub ($dsp, $content) { return ... } );

Adds a content processor to the given filename extension.  The extension should be a filename
extension without the C<.>, for example C<txt> or C<json>.

The callback takes the L<Data::Section::Pluggable> instance as its first argument and the content
to be processed as the second.  This callback should return the processed content as a scalar.

You can chain multiple content processors to the same filename extension, and they will be
called in the order that they were added.

=head2 add_plugin

 $dsp->add_plugin( $name, %args );

Applies the plugin with C<$name>.  If the plugin supports instance mode (that is: it has a constructor
named C<new>), then C<%args> will be passed to the constructor.  For included plugins see L</CORE PLUGINS>.
To write your own see L</PLUGIN ROLES>.

=head2 extract

 $dsp->extract($dir);
 $dsp->extract;

Extract all files in Data section to the given C<$dir>, or if not provided the current directory.

=head1 CORE PLUGINS

=head2 json

Automatically decode json into Perl data structures.
See L<Data::Section::Pluggable::Plugin::Json>.

=head2 trim

Automatically trim leading and trailing white space.
See L<Data::Section::Pluggable::Plugin::Trim>.

=head1 PLUGIN ROLES

=head2 ContentProcessorPlugin

Used for adding content processors for specific formats.  This
is essentially a way to wrap the L<add_format method|/add_format>
as a module.  See L<Data::Section::Pluggable::Role::ContentProcessorPlugin>.

=head1 SEE ALSO

These are some alternative modules that do a similar thing, each
with their own feature set and limitations.

=over 4

=item L<Data::Section>

=item L<Data::Section::Simple>

=item L<Data::Section::Writer>

=item L<Mojo::Loader>

=back

=head1 AUTHOR

Graham Ollis <plicease@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2024 by Graham Ollis.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.