Yet Another CPAN Grep

Mir-FileHandler/lib/Mir/FileHandler.pm

# ABSTRACT: A module to handle files and folders
package Mir::FileHandler;

use 5.018;
use Moose;
no warnings 'experimental::smartmatch';
use DirHandle;
use File::Find;
use File::Basename qw( dirname );
use Log::Log4perl;
use Digest::MD5 'md5_base64';
use Cache::FileCache;

=head1 NAME

Mir::FileHandler - An Mir module to handle files and folders...

=head1 VERSION

Version 0.05

=cut

our $VERSION = '0.05';
our @rec_files = ();
our $files_as_hash = {};
our @types = ();

=head1 SYNOPSIS

    use Mir::FileHandler;

    # the oo way...
    # get a new FileHandler obj for the passed root directory
    my $o = Mir::FileHandler->new( 
        path => $path, # root folder to start with 
    );

    # get plain files list inside the root directory
    my $list = $o->plainfiles(); # or pass a path in input

    # get plain files list from folder and sub-folders
    my $list = $o->plainfiles_recursive( $path, $suffix, \&found );

    # Traverses a directory tree and exec code for each file
    $o->dir_walk(
        path => $path,
        code => $code,
    );

    # Traverses a directory tree and exec code for each file
    # Stops after max success code execution
    # the sub pointed by $code has to return 1 in
    # case of success
    # if cached_dir is set, at each iteration starts from 
    # what stored in cache, if something has been stored,
    # otherwise starts from path
    $o->clear_cache(); # to clear current dir stored in cache
    $o->dir_walk_max(
        code => $code, # not mandatory, code to exec for each file found
        max  => $max  # not mandatory, max files successfully processed
    );

=head1 EXPORT

=over 

=item plainfiles

=item plainfiles_recursive

=back

=head1 SUBROUTINES/METHODS

=cut

#=============================================================
has 'path' => ( 
    is => 'rw', 
    isa => 'Str', 
    required => 1 
);

has 'cache_key' => (
    is  => 'rw',
    isa => 'Str',
);

has 'cache' => (
    is  => 'ro',
    isa => 'Cache::FileCache',
    lazy    => 1,
    default => sub {
        my $self = shift;
        my $o = Cache::FileCache->new();
        return $o;
    }
);

has 'log' => (
    is  => 'rw',
    isa => 'Log::Log4perl::Logger',
    lazy => 1,
    default => sub {
        return Log::Log4perl->get_logger( __PACKAGE__ );
    }
);

sub BUILD {
    my $self = shift;
    $self->cache_key( md5_base64( $self->path ) );
}

#=============================================================

=head2 plainfiles

=head3 INPUT

    $path: path to look for (not mandatory, eventually takes the
            one passed at construction time)

=head3 OUTPUT

An ArrayRef

=head3 DESCRIPTION

Returns the list of folder plain files.

=cut

#=============================================================
sub plainfiles {
    my $self = shift;

    return undef unless ( -e $self->path );
    return [ _pf( $self->path ) ];
}

#=============================================================

=head2 _pf

=head3 INPUT

    $path: the path to look for docs

=head3 OUTPUT

A sorted list of docs.

=head3 DESCRIPTION

Private function. Returns the sorted list of regular
files in current folder.

=cut

#=============================================================
sub _pf {
    my $dir = shift or return undef;
    my $dh = DirHandle->new($dir) or die "can't opendir $dir: $!";
    return sort
           grep { -f       }
           map { "$dir/$_" }
           grep { !/^\./   }
           $dh->read();
}

#=============================================================

=head2 _get_path_from_cache

=head3 INPUT

None

=head3 OUTPUT

The cache path or undef

=head3 DESCRIPTION

Tries to retrieve a current path cached for the root one or 
returns undef

=cut

#=============================================================
sub _get_path_from_cache {
    my $self = shift;
    return $self->cache->get( $self->cache_key );
}

sub _set_path_in_cache {
    my ( $self, $path ) = @_;
    return $self->clear_cache() unless ( $path );
    return $self->cache->set( $self->cache_key, $path );
}

sub clear_cache {
    my $self = shift;
    $self->cache->remove( $self->cache_key );
    return 1;
}

#=============================================================

=head2 dir_walk - traverses recursively a folder

=head3 INPUT

An hash with keys:
    code: a coderef to apply to each file

=head3 OUTPUT

-1 if root file or folder are not existent, else
the number of valid files.

=head3 DESCRIPTION

Traverse a directory, triggering a sub for each file found.
The sub should return 1 if file is good, 0 otherwise.
It is meant to recursively process a folder in one shot.
This is the minimal logic to traverse a tree, for added,
features look at dir_walk_max.

=cut

#=============================================================
sub dir_walk {
    my ( $self, %params ) = @_;
    my $code = $params{code};
    return undef unless $code;
    return $self->_walk( $self->path, $code, 0);
}

#=============================================================

=head2 _walk

=head3 INPUT

    $top: subtree root
    $code: code to execute against each valid file
    $count: number of valid files till now...

=head3 OUTPUT

The number of valid files.

=head3 DESCRIPTION

Traverse the subtree.

=cut

#=============================================================
sub _walk {
    my ( $self, $top, $code, $count ) = @_;
    my $DIR;
    unless (opendir $DIR, $top) {
        $self->log->error( "Couldn’t open directory $top: $!" );
        return;
    }
    my $file;
    while ($file = readdir $DIR) {
        my $item = "$top/$file";
        if ( -f $item ) {
            $count += $code->($item);
        } elsif (-d $item ) {
            next if $file eq '.' || $file eq '..';
            $count = $self->_walk( $item, $code, $count );
        }
    }
    closedir $DIR;
    return $count;
}

#=============================================================

=head2 dir_walk_max - traverses recursively a folder, stops after
                      max valid files found or all tree has been
                      traversed

=head3 INPUT

An hash with keys:
    code:   a coderef to apply to
    max:    max number of items to evaluate

=head3 OUTPUT

-1 if root file or folder are not existent, else
the number of valid files.

=head3 DESCRIPTION

Traverse a directory, triggering a sub for each file found.
The sub should return 1 if file is good, 0 otherwise.
The method stops when all files are consumed or max number
of good files is reached.


=cut

#=============================================================
sub dir_walk_max {
    my ( $self, %params ) = @_;
    my $code = $params{code};
    my $max  = $params{max};
    return undef unless ( $code && $max );
    my $rel_path = $self->_get_path_from_cache();
    return $self->_walk_max( $self->path, 0, $max, $code, $rel_path );
}


#=============================================================

=head2 _walk_max

=head3 INPUT

root     : the root folder to start with
count    : number of valid items processed
rel_path : the relative path from root
max      : max number of valid items
code     : code to run against each item

=head3 OUTPUT

The number of valid items processed

=head3 DESCRIPTION

Inner loop to analize each item of the tree.
it calls recursively itself for each sub-tree.
Ends when 1 of these conditions is met:
- no more files found
- max number of valid items reached
this number is set to be 10 times the max number of valid items.


=cut

#=============================================================
sub _walk_max {
    my ( $self, $root, $count, $max, $code, $rel_path ) = @_;

    $self->log->info(sprintf("[%06d]:Scanning %s", $count, $rel_path)) if( $rel_path );
    my $path = ( $rel_path ) ? "$root/$rel_path" : $root;
    $self->_set_path_in_cache( $rel_path );

    my $DIR;
    unless (opendir $DIR, $path) {
        $self->log->error( "Couldn’t open directory $path $!" );
        return undef;
    }

    my $file;
    while ( defined($file = readdir $DIR) && ( $count < $max ) ) {
        my $item = "$path/$file";
        if ( -f $item ) {
            $count += $code->($item);
        } elsif (-d $item ) {
            next if $file eq '.' || $file eq '..';
            $count = $self->_walk_max( $root, $count, $max, $code, ( $rel_path ) ? $rel_path.'/'.$file : $file );
        }
    }
    closedir $DIR;
    undef $DIR;
    unless ( $file ) {
        ( $rel_path ) ?  $self->_set_path_in_cache( join('/', splice(@{[split(m|/|, $rel_path)]},0,-1))) :
                         $self->clear_cache();
    }
    undef $file;
    return $count;
}

#=============================================================

=head2 plainfiles_recursive

=head3 INPUT

    $path:      a path to start from.
    $avoid:     arrayref con lista risorse da evitare
    $suffix:    arrayref con lista suffissi da processare
    $found:     ref sub callback per ogni risorsa

=head3 OUTPUT

    An arrayref.

=head3 DESCRIPTION

    Returns recursively the list of all files from passed folder.

=cut

#=============================================================
sub plainfiles_recursive {
    my ( $self, $path, $found ) = @_;

    my $dir = ( $self && ref $self ) ? $path || $self->{path} : $path;
    return undef unless $dir;

    $found = \&_found unless $found;

    @rec_files = ();
    find ( $found, ( $dir ) );
    return \@rec_files;
}

sub _found {
    return if ( -d );
    if ( -f  && !/^\./ && !/\~$/ ) {
        push @rec_files, $File::Find::name;
    }
}

sub _collect_as_hash {
    return if ( -d && /\.svn/ );
    if ( -f && !/^\./ && !/\~$/ ) {
        if ( m|(\w+)\.(\w+)$| ) {
            push @{ $files_as_hash->{ $2 } }, $File::Find::name;
        }
    }
}

sub _collect_of_type {
    return if ( -d && /\.\w+/ );
    if ( -f && !/^\./ && !/\~$/ ) {
        if ( m|(\w+)\.(\w+)$| ) {
            if ( $2 ~~ @types ) {
            push @rec_files, $File::Find::name;
            }
        }
    }
}

#=============================================================

=head2 plainfiles_recursive_as_hash

=head3 INPUT

    $path : root path (not mandatory if already passed at 
            construction time)

=head3 OUTPUT

An HashRef.

=head3 DESCRIPTION

Recursively collects files in an hash indexed by file suffix.

=cut

#=============================================================
sub plainfiles_recursive_as_hash {
    my ( $self, $path ) = @_;

    my $dir = ( $self && ref $self ) ? $path || $self->{path} : $path;
    return undef unless $dir;

    $files_as_hash = {};
    $self->plainfiles_recursive( $dir, \&_collect_as_hash );

#    foreach my $file ( @$rec_files ) {
#        if ( $file =~ m|(\w+)\.(\w+)$| ) {
#            push @{ $files_as_hash->{ $2 } }, $file;
#        }
#    }

    return $files_as_hash;
}

#=============================================================

=head2 plainfiles_recursive_of_type

=head3 INPUT

    @types: list of valid file suffixes

=head3 OUTPUT

    An ArrayRef or undef in case of errors.

=head3 DESCRIPTION

    Recursively collects all files with valid suffixes

=cut

#=============================================================
sub plainfiles_recursive_of_type {
    my ( $self, @ptypes ) = @_;

    return undef unless $self->{path};

    @types = @ptypes;
    @rec_files = ();
    $self->plainfiles_recursive( $self->{path}, \&_collect_of_type );
    return \@rec_files;
}

#=============================================================

=head2 process_dir

=head3 INPUT

    $dir    : dir to start with
    $suffix : arrayref of valid file suffixes
    $depth  : depth in processing subdirs

=head3 OUTPUT

    An arrayref

=head3 DESCRIPTION

    Workflow:
    get a single dir as input and the level of recursions
    get the list of valid dir files and process them
    get the list of dir direct subdirs
    depth-- 
    if depth > 0
        call process_dir foreach subdir

=cut

#=============================================================
sub process_dir ($$) {

}

=head1 AUTHOR

Marco Masetti, C<< <marco.masetti at softeco.it> >>

=head1 BUGS

Please report any bugs or feature requests to C<bug-ishare-filehandler at rt.cpan.org>, or through
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Mir-FileHandler>.  I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

    perldoc Mir::FileHandler

=head1 LICENSE AND COPYRIGHT

Copyright 2013 Marco Masetti.

This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.

See http://dev.perl.org/licenses/ for more information.

=cut

no Moose;
__PACKAGE__->meta->make_immutable;
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.