ORDB-CPANMeta-Generator/lib/ORDB/CPANMeta/Generator.pm
package ORDB::CPANMeta::Generator;
=pod
=head1 NAME
ORDB::CPANMeta::Generator - Generator for the CPAN Meta database
=head1 DESCRIPTION
This is the module that is used to generate the "CPAN Meta" database.
For more information, and to access this database as a consumer, see
the L<ORDB::CPANMeta> module.
The bulk of the work done in this module is actually achieved with:
L<CPAN::Mini> - Fetching the index and dist tarballs
L<CPAN::Mini::Visit> - Expanding and processing the tarballs
L<Xtract> - Preparing the SQLite database for distribution
=head1 METHODS
=cut
use 5.008005;
use strict;
use Carp ();
use File::Spec 3.29 ();
use File::Path 2.07 ();
use File::Remove 1.42 ();
use File::HomeDir 0.86 ();
use File::Basename 0 ();
use Module::CoreList 2.46 ();
use Parse::CPAN::Meta 1.4200 ();
use Params::Util 1.00 ();
use Getopt::Long 2.34 ();
use DBI 1.609 ();
use CPAN::Meta 2.112621 ();
use CPAN::Mini 0.576 ();
use CPAN::Mini::Visit 1.14 ();
use Xtract::Publish 0.12 ();
our $VERSION = '0.12';
use Object::Tiny 1.06 qw{
minicpan
sqlite
publish
visit
trace
delta
prefer_bin
warnings
dbh
};
######################################################################
# Constructor and Accessors
=pod
=head2 new
The C<new> constructor creates a new processor/generator.
=cut
sub new {
my $self = shift->SUPER::new(@_);
# Set the default path to the database
unless ( defined $self->sqlite ) {
$self->{sqlite} = File::Spec->catdir(
File::HomeDir->my_data,
($^O eq 'MSWin32' ? 'Perl' : '.perl'),
'ORDB-CPANMeta-Generator',
'metadb.sqlite',
);
}
# Set the default path to the publishing location
unless ( exists $self->{publish} ) {
$self->{publish} = 'cpanmeta';
}
return $self;
}
=pod
=head2 dir
The C<dir> method returns the directory that the SQLite
database will be written into.
=cut
sub dir {
File::Basename::dirname($_[0]->sqlite);
}
=pod
=head2 dsn
The C<dsn> method returns the L<DBI> DSN that is used to connect
to the generated database.
=cut
sub dsn {
"DBI:SQLite:" . $_[0]->sqlite
}
######################################################################
# Main Methods
=pod
=head2 run
The C<run> method executes the process that will produce and fill the
final database.
=cut
sub run {
my $self = shift;
# Normalise
$self->{prefer_bin} = $self->prefer_bin ? 1 : 0;
# Create the output directory
File::Path::make_path($self->dir);
unless ( -d $self->dir ) {
Carp::croak("Failed to create '" . $self->dir . "'");
}
# Clear the database if it already exists
unless ( $self->delta ) {
if ( -f $self->sqlite ) {
File::Remove::remove($self->sqlite);
}
if ( -f $self->sqlite ) {
Carp::croak("Failed to clear " . $self->sqlite);
}
}
# Update the minicpan if needed
if ( Params::Util::_HASH($self->minicpan) ) {
CPAN::Mini->update_mirror(
trace => $self->trace,
no_conn_cache => 1,
%{$self->minicpan},
);
$self->{minicpan} = $self->minicpan->{local};
}
# Connect to the database
my $dbh = DBI->connect($self->dsn);
unless ( $dbh ) {
Carp::croak("connect: \$DBI::errstr");
}
# Create the tables
$dbh->do(<<'END_SQL');
CREATE TABLE IF NOT EXISTS meta_distribution (
release TEXT NOT NULL,
meta INTEGER,
meta_name TEXT,
meta_version TEXT,
meta_abstract TEXT,
meta_generated TEXT,
meta_from TEXT,
meta_license TEXT
);
END_SQL
$dbh->do(<<'END_SQL');
CREATE TABLE IF NOT EXISTS meta_dependency (
release TEXT NOT NULL,
module TEXT NOT NULL,
version TEXT NULL,
phase TEXT NOT NULL,
core REAL NULL
)
END_SQL
### NOTE: This does nothing right now but will later.
# Build the index of seen archives.
# While building the index, remove entries
# that are no longer in the minicpan.
my $ignore = undef;
if ( $self->delta ) {
$dbh->begin_work;
my %seen = ();
my $dists = $dbh->selectcol_arrayref(
'SELECT DISTINCT release FROM meta_distribution'
);
foreach my $dist ( @$dists ) {
my $one = substr($dist, 0, 1);
my $two = substr($dist, 0, 2);
my $path = File::Spec->catfile(
$self->minicpan,
'authors', 'id',
$one, $two,
split /\//, $dist,
);
if ( -f $path ) {
# Add to the ignore list
$seen{$dist} = 1;
next;
}
# Clear the release from the database
$dbh->do(
'DELETE FROM meta_distribution WHERE release = ?',
{}, $dist,
);
}
$dbh->do(
'DELETE FROM meta_dependency WHERE release NOT IN '
. '( SELECT release FROM meta_distribution )',
);
$dbh->commit;
# NOW we need to start ignoring something
$ignore = [
sub {
$seen{ $_[0]->{dist} }
}
];
}
# Clear indexes for speed
$self->drop_indexes( $dbh );
# Run the visitor to generate the database
$dbh->begin_work;
my @meta_dist = ();
my @meta_deps = ();
my $visitor = CPAN::Mini::Visit->new(
acme => 1,
warnings => $self->warnings,
minicpan => $self->minicpan,
# This does nothing now but will later
ignore => $ignore,
prefer_bin => $self->prefer_bin,
callback => sub {
print STDERR "$_[0]->{dist}\n" if $self->trace;
my $the = shift;
my $meta = undef;
my @deps = ();
my $dist = {
release => $the->{dist},
meta => 0,
};
my $yaml_file = File::Spec->catfile(
$the->{tempdir}, 'META.yml',
);
my $json_file = File::Spec->catfile(
$the->{tempdir}, 'META.json',
);
if ( -f $json_file ) {
$meta = eval {
CPAN::Meta->load_file($json_file)
};
} elsif ( -f $yaml_file ) {
$meta = eval {
CPAN::Meta->load_file($yaml_file)
};
}
unless ( $@ or not defined $meta ) {
$dist->{meta} = 1;
$dist->{meta_name} = $meta->name;
$dist->{meta_version} = $meta->version;
$dist->{meta_abstract} = $meta->abstract;
$dist->{meta_generated} = $meta->generated_by;
$dist->{meta_generated} =~ s/,.+//;
$dist->{meta_license} = join ', ', $meta->licenses;
$dist->{meta_from} = undef;
# Fetch the dependency blocks
my $core = $meta->effective_prereqs;
foreach my $when ( qw{ configure build test runtime } ) {
my $requires = $core->requirements_for($when, 'requires');
my $hash = $requires->as_string_hash;
push @deps, map { +{
release => $the->{dist},
phase => $when,
module => $_,
version => $hash->{$_},
} } sort keys %$hash;
}
}
$dbh->do(
'INSERT INTO meta_distribution VALUES ( ?, ?, ?, ?, ?, ?, ?, ? )', {},
$dist->{release},
$dist->{meta},
$dist->{meta_name},
$dist->{meta_version},
$dist->{meta_abstract},
$dist->{meta_generated},
$dist->{meta_from},
$dist->{meta_license},
);
$dbh->do(
'INSERT INTO meta_dependency VALUES ( ?, ?, ?, ?, ? )', {},
$_->{release},
$_->{module},
$_->{version},
$_->{phase},
$_->{module} eq 'perl'
? $_->{version}
: scalar Module::CoreList->first_release(
$_->{module}, $_->{version},
),
) foreach @deps;
unless ( $the->{counter} % 100 ) {
$dbh->commit;
$dbh->begin_work;
}
},
);
$visitor->run;
$dbh->commit;
# Generate the indexes
$self->create_indexes( $dbh );
# Clean and optimise the database
$dbh->do('PRAGMA user_version = 10');
$dbh->do('VACUUM');
$dbh->do('ANALYZE main');
# Publish the database to the current directory
if ( defined $self->publish ) {
print STDERR "Publishing the generated database...\n" if $self->trace;
Xtract::Publish->new(
from => $self->sqlite,
sqlite => $self->publish,
trace => $self->trace,
raw => 0,
gz => 1,
bz2 => 1,
lz => 1,
)->run;
}
return 1;
}
######################################################################
# Index Management
use constant INDEX => (
[ 'meta_distribution', 'release' ],
[ 'meta_dependency', 'release' ],
[ 'meta_dependency', 'phase' ],
[ 'meta_dependency', 'module' ],
);
sub drop_indexes {
my $self = shift;
my $dbh = shift;
foreach my $i ( INDEX ) {
$dbh->do("DROP INDEX IF EXISTS $i->[0]__$i->[1]");
}
return 1;
}
sub create_indexes {
my $self = shift;
my $dbh = shift;
foreach my $i ( INDEX ) {
$self->create_index( $dbh, @$i );
}
return 1;
}
sub create_index {
$_[1]->do("CREATE INDEX IF NOT EXISTS $_[2]__$_[3] on $_[2] ( $_[3] )");
}
1;
=pod
=head1 SUPPORT
Bugs should be reported via the CPAN bug tracker at
L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=ORDB-CPANMeta-Generator>
For other issues, contact the author.
=head1 AUTHOR
Adam Kennedy E<lt>adamk@cpan.orgE<gt>
=head1 COPYRIGHT
Copyright 2009 - 2012 Adam Kennedy.
This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
The full text of the license can be found in the
LICENSE file included with this module.
=cut