Group
Extension

Distribution-Metadata/lib/Distribution/Metadata.pm

package Distribution::Metadata 0.10;
use v5.16;
use warnings;

use CPAN::DistnameInfo;
use CPAN::Meta;
use Config ();
use Cwd ();
use ExtUtils::Packlist;
use File::Basename ();
use File::Find ();
use File::Spec;
use JSON ();
use Module::Metadata;

use constant DEBUG => $ENV{PERL_DISTRIBUTION_METADATA_DEBUG};

my $SEP = qr{/|\\}; # path separater
my $ARCHNAME = $Config::Config{archname};

our $CACHE;

sub new_from_file {
    my ($class, $file, %option) = @_;
    $class->_new(%option, _module => {file => $file});
}

sub new_from_module {
    my ($class, $module, %option) = @_;
    $class->_new(%option, _module => {name => $module});
}

sub _new {
    my ($class, %option) = @_;
    my $module = $option{_module};
    my $inc = $option{inc} || \@INC;
    $inc = $class->_abs_path($inc);
    $inc = $class->_fill_archlib($inc) if $option{fill_archlib};
    my $metadata = $module->{file}
        ? Module::Metadata->new_from_file($module->{file}, inc => $inc)
        : Module::Metadata->new_from_module($module->{name}, inc => $inc);

    my $self = bless {}, $class;
    return $self unless $metadata;

    $module->{file} = $metadata->filename;
    $module->{name} = $metadata->name;
    $module->{version} = $metadata->version;

    my ($packlist, $files) = $class->_find_packlist($module->{file}, $inc);
    if ($packlist) {
        $self->{packlist} = $packlist;
        $self->{files}    = $files;
    } else {
        return $self;
    }

    my ($main_module, $lib) = $self->_guess_main_module($packlist);
    if ($main_module) {
        $self->{main_module} = $main_module;
        if ($main_module eq "perl") {
            $self->{main_module_version} = $^V;
            $self->{main_module_file} = $^X;
            $self->{dist} = "perl";
            my $version = "" . $^V;
            $version =~ s/v//;
            $self->{distvname} = "perl-$version";
            $self->{version} = $version;
            return $self;
        }
    } else {
        return $self;
    }

    my $archlib = File::Spec->catdir($lib, $ARCHNAME);
    my $main_metadata = Module::Metadata->new_from_module(
        $main_module, inc => [$archlib, $lib]
    );

    my ($find_module, $find_version);
    if ($main_metadata) {
        $self->{main_module_version} = $main_metadata->version;
        $self->{main_module_file} = $main_metadata->filename;
        $find_module = $main_metadata->name;
        $find_version = $main_metadata->version;
    } else {
        $find_module = $module->{name};
        $find_version = $module->{version};
    }

    my ($meta_directory, $install_json, $install_json_hash, $mymeta_json) = $class->_find_meta(
        $main_module, $find_module, $find_version,
        File::Spec->catdir($archlib, ".meta")
    );
    $self->{meta_directory}    = $meta_directory;
    $self->{install_json}      = $install_json;
    $self->{install_json_hash} = $install_json_hash;
    $self->{mymeta_json}       = $mymeta_json;
    $self;
}

sub _guess_main_module {
    my ($self, $packlist) = @_;
    my @piece = File::Spec->splitdir( File::Basename::dirname($packlist) );
    if ($piece[-1] eq $ARCHNAME) {
        return ("perl", undef);
    }

    my (@module, @lib);
    for my $i ( 1 .. ($#piece-2) ) {
        if ($piece[$i] eq $ARCHNAME && $piece[$i+1] eq "auto") {
            @module = @piece[ ($i+2) .. $#piece ];
            @lib    = @piece[ 0      .. ($i-1)  ];
            last;
        }
    }
    return unless @module;
    return ( _fix_module_name( join("::", @module) ), File::Spec->catdir(@lib) );
}

# ugly workaround for case insensitive filesystem
# eg: if you install 'Version::Next' module and later 'version' module,
# then version's packlist is located at Version/.packlist! (capital V!)
# Maybe there are a lot of others...
my @fix_module_name = qw(version Version::Next);
sub _fix_module_name {
    my $module_name = shift;
    if (my ($fix) = grep { $module_name =~ /^$_$/i } @fix_module_name) {
        $fix;
    } else {
        $module_name;
    }
}

sub _fill_archlib {
    my ($class, $incs) = @_;
    my %incs = map { $_ => 1 } @$incs;
    my @out;
    for my $inc (@$incs) {
        push @out, $inc;
        next if $inc =~ /$ARCHNAME$/o;
        my $archlib = File::Spec->catdir($inc, $ARCHNAME);
        if (-d $archlib && !$incs{$archlib}) {
            push @out, $archlib;
        }
    }
    \@out;
}

my $decode_install_json = sub {
    my $file = shift;
    my $content = do { open my $fh, "<", $file or next; local $/; <$fh> };
    JSON::decode_json($content);
};
sub _decode_install_json {
    my ($class, $file, $dir) = @_;
    if ($CACHE) {
        $CACHE->{install_json}{$dir}{$file} ||= $decode_install_json->($file);
    } else {
        $decode_install_json->($file);
    }
}

sub _find_meta {
    my ($class, $main_module, $module, $version, $dir) = @_;
    return unless -d $dir;

    my @install_json;
    if ($CACHE and $CACHE->{install_json_collected}{$dir}) {
        @install_json = keys %{$CACHE->{install_json}{$dir}};
    } else {
        @install_json = do {
            opendir my $dh, $dir or die "opendir $dir: $!";
            my @meta_dir = grep { !/^[.]{1,2}$/ } readdir $dh;
            grep -f, map { File::Spec->catfile($dir, $_, "install.json") } @meta_dir;
        };
        if ($CACHE) {
            $CACHE->{install_json}{$dir}{$_} ||= undef for @install_json;
            $CACHE->{install_json_collected}{$dir}++;
        }
    }

    # to speed up, first try distribution which just $module =~ s/::/-/gr;
    my $naive = do { my $dist = $main_module; $dist =~ s/::/-/g; $dist };
    @install_json = (
        (sort { $b cmp $a } grep {  /^$naive/ } @install_json),
        (sort { $b cmp $a } grep { !/^$naive/ } @install_json),
    );

    my ($meta_directory, $install_json, $install_json_hash, $mymeta_json);
    INSTALL_JSON_LOOP:
    for my $file (@install_json) {
        my $hash = $class->_decode_install_json($file, $dir);

        # name VS target ? When LWP, name is LWP, and target is LWP::UserAgent
        # So name is main_module!
        my $name = $hash->{name} || "";
        next if $name ne $main_module;
        my $provides = $hash->{provides} || +{};
        for my $provide (sort keys %$provides) {
            if ($provide eq $module
                && ($provides->{$provide}{version} || "") eq $version) {
                $meta_directory = File::Basename::dirname($file);
                $install_json = $file;
                $mymeta_json  = File::Spec->catfile($meta_directory, "MYMETA.json");
                $install_json_hash = $hash;
                last INSTALL_JSON_LOOP;
            }
        }
        DEBUG and warn "==> failed to find $module $version in $file\n";
    }

    return ($meta_directory, $install_json, $install_json_hash, $mymeta_json);
}

sub _naive_packlist {
    my ($class, $module_file, $inc) = @_;
    for my $i (@$inc) {
        if (my ($path) = $module_file =~ /$i $SEP (.+)\.pm /x) {
            my $archlib = $i =~ /$ARCHNAME$/o ? $i : File::Spec->catdir($i, $ARCHNAME);
            my $try = File::Spec->catfile( $archlib, "auto", $path, ".packlist" );
            return $try if -f $try;
        }
    }
    return;
}

# It happens that .packlist files are symlink path.
# eg: OSX,
# in .packlist: /var/folders/...
# but /var/folders/.. is a symlink to /private/var/folders
my $extract_files = sub {
    my $packlist = shift;
    [
        map  { Cwd::abs_path($_) } grep { -f }
        sort keys %{ ExtUtils::Packlist->new($packlist) || +{} }
    ];
};
sub _extract_files {
    my ($class, $packlist) = @_;
    if ($CACHE) {
        $CACHE->{packlist}{$packlist} ||= $extract_files->($packlist);
    } else {
        $extract_files->($packlist);
    }
}

sub _core_packlist {
    my ($self, $inc) = @_;
    for my $dir (grep -d, @$inc) {
        opendir my $dh, $dir or die "Cannot open dir $dir: $!\n";
        my ($packlist) = map { File::Spec->catfile($dir, $_) } grep {$_ eq ".packlist"} readdir $dh;
        return $packlist if $packlist;
    }
    return;
}

sub _find_packlist {
    my ($class, $module_file, $inc) = @_;

    if ($CACHE and my $core_packlist = $CACHE->{core_packlist}) {
        my $files = $class->_extract_files($core_packlist);
        if (grep {$module_file eq $_} @$files) {
            return ($core_packlist, $files);
        }
    }

    # to speed up, first try packlist which is naively guessed by $module_file
    if (my $naive_packlist = $class->_naive_packlist($module_file, $inc)) {
        my $files = $class->_extract_files($naive_packlist);
        if ( grep { $module_file eq $_ } @$files ) {
            DEBUG and warn "-> naively found packlist: $module_file\n";
            return ($naive_packlist, $files);
        }
    }

    my @packlists;
    if ($CACHE and $CACHE->{packlist_collected}) {
        @packlists = keys %{ $CACHE->{packlist} };
    } else {
        if (my $core_packlist = $class->_core_packlist($inc)) {
            push @packlists, $core_packlist;
            $CACHE->{core_packlist} = $core_packlist if $CACHE;
        }
        File::Find::find sub {
            return unless -f;
            return unless $_ eq ".packlist";
            push @packlists, $File::Find::name;
        }, grep -d, map { File::Spec->catdir($_, "auto") } @{$class->_fill_archlib($inc)};
        if ($CACHE) {
            $CACHE->{packlist}{$_} ||= undef for @packlists;
            $CACHE->{packlist_collected}++;
        }
    }

    for my $try (@packlists) {
        my $files = $class->_extract_files($try);
        if (grep { $module_file eq $_ } @$files) {
            return ($try, $files);
        }
    }
    return;
}

sub _abs_path {
    my ($class, $dirs) = @_;
    my @out;
    for my $dir (grep -d, @$dirs) {
        my $abs = Cwd::abs_path($dir);
        $abs =~ s/$SEP+$//;
        push @out, $abs if $abs;
    }
    \@out;
}

sub packlist            { shift->{packlist} }
sub meta_directory      { shift->{meta_directory} }
sub install_json        { shift->{install_json} }
sub mymeta_json         { shift->{mymeta_json} }
sub main_module         { shift->{main_module} }
sub main_module_version { shift->{main_module_version} }
sub main_module_file    { shift->{main_module_file} }
sub files               { shift->{files} }
sub install_json_hash   { shift->{install_json_hash} }

sub mymeta_json_hash {
    my $self = shift;
    return unless my $mymeta_json = $self->mymeta_json;
    $self->{mymeta_json_hash} ||= CPAN::Meta->load_file($mymeta_json)->as_struct;
}

sub _distnameinfo {
    my $self = shift;
    return unless my $hash = $self->install_json_hash;
    $self->{_distnameinfo} ||= CPAN::DistnameInfo->new( $hash->{pathname} );
}

for my $attr (qw(dist version cpanid distvname pathname)) {
    no strict 'refs';
    *$attr = sub {
        my $self = shift;
        return $self->{$attr} if exists $self->{$attr}; # for 'perl' distribution
        return unless $self->_distnameinfo;
        $self->_distnameinfo->$attr;
    };
}

# alias
sub name   { shift->dist }
sub author { shift->cpanid }

1;

__END__

=for stopwords .packlist inc pathname eg archname eq archlibs vname libwww-perl

=encoding utf-8

=head1 NAME

Distribution::Metadata - gather distribution metadata in local

=head1 SYNOPSIS

    use Distribution::Metadata;

    my $info = Distribution::Metadata->new_from_module("LWP::UserAgent");

    print $info->name;      # libwww-perl
    print $info->version;   # 6.13
    print $info->distvname; # libwww-perl-6.13
    print $info->author;    # ETHER
    print $info->pathname;  # E/ET/ETHER/libwww-perl-6.13.tar.gz

    print $info->main_module;         # LWP
    print $info->main_module_version; # 6.13
    print $info->main_module_file;    # path of LWP.pm

    print $info->packlist;       # path of .packlist
    print $info->meta_directory; # path of .meta directory
    print $info->install_json;   # path of install.json
    print $info->mymeta_json;    # path of MYMETA.json

    my $files = $info->files; # files which are listed in .packlist

    my $install_json_hash = $info->install_json_hash;
    my $mymeta_json_hash  = $info->mymeta_json_hash;

=head1 DESCRIPTION

(B<CAUTION>: This module is still in development phase. API will change without notice.)

Sometimes we want to know:
I<Where this module comes from? Which distribution does this module belong to?>

Since L<cpanm> 1.5000 (released 2011.10.13),
it installs not only modules but also their meta data.
So we can answer that questions!

Distribution::Metadata gathers distribution metadata in local.
That is, this module tries to gather

=over 4

=item * main module name, version, file

=item * C<.packlist> file

=item * C<.meta> directory

=item * C<install.json> file

=item * C<MYMETA.json> file

=back

Please note that as mentioned above, B<this module deeply depends on cpanm behavior>.
If you install cpan modules by hands or some cpan clients other than cpanm,
this module won't work.

=head1 HOW IT WORKS

Let me explain how C<< $class->new_from_module($module, inc => $inc) >> works.

=over 4

=item * Get C<$module_file> by

    Module::Metadata->new_from_module($module, inc => $inc)->filename.

=item * Find C<$packlist> in which C<$module_file> is listed.

=item * From C<$packlist> pathname (eg: ...auto/LWP/.packlist), determine C<$main_module> and main module search directory C<$lib>.

=item * Get C<$main_module_version> by

    Module::Metadata->new_from_module($main_module, inc => [$lib, "$lib/$Config{archname}"])->version

=item * Find install.json that has "name" eq C<$main_module>, and provides C<$main_module> with version C<$main_module_version>.

=item * Get .meta directory and MYMETA.json with install.json.

=back

=head2 CONSTRUCTORS

=over 4

=item C<< my $info = $class->new_from_module($module, inc => \@dirs, fill_archlib => $bool) >>

Create Distribution::Metadata instance from module name.

You can append C<inc> argument
to specify module/packlist/meta search paths. Default is C<\@INC>.

Also you can append C<fill_archlib> argument
so that archlibs are automatically added to C<inc> if missing.

Please note that, even if the module cannot be found,
C<new_from_module> returns a Distribution::Metadata instance.
However almost all methods returns false for such objects.
If you want to know whether the distribution was found or not, try:

    my $info = $class->new_from_module($module);

    if ($info->packlist) {
        # found
    } else {
        # not found
    }

=item C<< my $info = $class->new_from_file($file, inc => \@dirs, fill_archlib => $bool) >>

Create Distribution::Metadata instance from file path.
You can append C<inc> and C<fill_archlib> arguments too.

Also C<new_from_file> retunes a Distribution::Metadata instance,
even if file cannot be found.

=back

=head2 METHODS

Please note that the following methods return false
when appropriate modules or files cannot be found.

=over 4

=item C<< my $name = $info->name (alias: $info->dist) >>

distribution name (eg: C<libwww-perl>)

=item C<< my $version = $info->version >>

distribution version (eg: C<6.13>)

=item C<< my $distvname = $info->distvname >>

distribution vname (eg: C<libwww-perl-6.13>)

=item C<< my $author = $info->author (alias: $info->cpanid) >>

distribution author (eg: C<ETHER>)

=item C<< my $pathname = $info->pathname >>

distribution pathname (eg: C<E/ET/ETHER/libwww-perl-6.13.tar.gz>)

=item C<< my $file = $info->packlist >>

C<.packlist> file path

=item C<< my $dir = $info->meta_directory >>

C<.meta> directory path

=item C<< my $file = $info->install_json >>

C<install.json> file path

=item C<< my $file = $info->mymeta_json >>

C<MYMETA.json> file path

=item C<< my $main_module = $info->main_module >>

main module name

=item C<< my $version = $info->main_module_version >>

main module version

=item C<< my $file = $info->main_module_file >>

main module file path

=item C<< my $files = $info->files >>

file paths which is listed in C<.packlist> file,
note that paths are acutually C<< Cwd::abs_path() >>-ed

=item C<< my $hash = $info->install_json_hash >>

a hash reference for C<install.json>

    my $info = Distribution::Metadata->new_from_module("LWP::UserAgent");
    my $install = $info->install_json_hash;
    $install->{version};  # 6.13
    $install->{dist};     # libwww-perl-6.13
    $install->{provides}; # a hash reference of providing modules
    ...

=item C<< my $hash = $info->mymeta_json_hash >>

a hash reference for C<MYMETA.json>

    my $info = Distribution::Metadata->new_from_module("LWP::UserAgent");
    my $meta = $info->mymeta_hash;
    $meta->{version};  # 6.13
    $meta->{abstract}; # The World-Wide Web library for Perl
    $meta->{prereqs};  # prereq hash
    ...

=back

=head1 SEE ALSO

L<Module::Metadata>

L<App::cpanminus>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2015 Shoichi Kaji E<lt>skaji@cpan.orgE<gt>

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.