Yet Another CPAN Grep

CPAN-Testers-Schema/lib/CPAN/Testers/Schema.pm

package CPAN::Testers::Schema;
our $VERSION = '0.028';
# ABSTRACT: Schema for CPANTesters database processed from test reports

#pod =head1 SYNOPSIS
#pod
#pod     my $schema = CPAN::Testers::Schema->connect( $dsn, $user, $pass );
#pod     my $rs = $schema->resultset( 'Stats' )->search( { dist => 'Test-Simple' } );
#pod     for my $row ( $rs->all ) {
#pod         if ( $row->state eq 'fail' ) {
#pod             say sprintf "Fail report from %s: http://cpantesters.org/cpan/report/%s",
#pod                 $row->tester, $row->guid;
#pod         }
#pod     }
#pod
#pod =head1 DESCRIPTION
#pod
#pod This is a L<DBIx::Class> Schema for the CPANTesters statistics database.
#pod This database is generated by processing the incoming data from L<the
#pod CPANTesters Metabase|http://metabase.cpantesters.org>, and extracting
#pod the useful fields like distribution, version, platform, and others (see
#pod L<CPAN::Testers::Schema::Result::Stats> for a full list).
#pod
#pod This is its own distribution so that it can be shared by the backend
#pod processing, data APIs, and the frontend web application.
#pod
#pod =head1 SEE ALSO
#pod
#pod L<CPAN::Testers::Schema::Result::Stats>, L<DBIx::Class>
#pod
#pod =cut

use CPAN::Testers::Schema::Base;
use File::Share qw( dist_dir );
use Path::Tiny qw( path );
use List::Util qw( uniq );
use base 'DBIx::Class::Schema';
use Mojo::UserAgent;
use DateTime::Format::ISO8601;

__PACKAGE__->load_namespaces;
__PACKAGE__->load_components(qw/Schema::Versioned/);
__PACKAGE__->upgrade_directory( dist_dir( 'CPAN-Testers-Schema' ) );

#pod =method connect_from_config
#pod
#pod     my $schema = CPAN::Testers::Schema->connect_from_config( %extra_conf );
#pod
#pod Connect to the MySQL database using a local MySQL configuration file
#pod in C<$HOME/.cpanstats.cnf>. This configuration file should look like:
#pod
#pod     [client]
#pod     host     = ""
#pod     database = cpanstats
#pod     user     = my_usr
#pod     password = my_pwd
#pod
#pod See L<DBD::mysql/mysql_read_default_file>.
#pod
#pod C<%extra_conf> will be added to the L<DBIx::Class::Schema/connect>
#pod method in the C<%dbi_attributes> hashref (see
#pod L<DBIx::Class::Storage::DBI/connect_info>).
#pod
#pod =cut

# Convenience connect method
sub connect_from_config ( $class, %config ) {
    my $schema = $class->connect(
        "DBI:mysql:mysql_read_default_file=$ENV{HOME}/.cpanstats.cnf;".
        "mysql_read_default_group=application;mysql_enable_utf8=1",
        undef,  # user
        undef,  # pass
        {
            AutoCommit => 1,
            RaiseError => 1,
            mysql_enable_utf8 => 1,
            quote_char => '`',
            name_sep   => '.',
            %config,
        },
    );
    return $schema;
}

#pod =method ordered_schema_versions
#pod
#pod Get the available schema versions by reading the files in the share
#pod directory. These versions can then be upgraded to using the
#pod L<cpantesters-schema> script.
#pod
#pod =cut

sub ordered_schema_versions( $self ) {
    my @versions =
        uniq sort
        map { /[\d.]+-([\d.]+)/ }
        grep { /CPAN-Testers-Schema-[\d.]+-[\d.]+-MySQL[.]sql/ }
        path( dist_dir( 'CPAN-Testers-Schema' ) )->children;
    return '0.000', @versions;
}

#pod =method populate_from_api
#pod
#pod     $schema->populate_from_api( \%search, @tables );
#pod
#pod Populate the given tables from the CPAN Testers API (L<http://api.cpantesters.org>).
#pod C<%search> has the following keys:
#pod
#pod =over
#pod
#pod =item dist
#pod
#pod A distribution to populate
#pod
#pod =item version
#pod
#pod A distribution version to populate
#pod
#pod =item author
#pod
#pod Populate an author's data
#pod
#pod =back
#pod
#pod The available C<@tables> are:
#pod
#pod =over
#pod
#pod =item * upload
#pod
#pod =item * release
#pod
#pod =item * summary
#pod
#pod =item * report
#pod
#pod =back
#pod
#pod =cut

sub populate_from_api( $self, $search, @tables ) {
    my $ua = $self->{_ua} ||= Mojo::UserAgent->new;
    $ua->inactivity_timeout( 120 );
    my $base_url = $self->{_url} ||= 'http://api.cpantesters.org/v3';
    my $dtf = DateTime::Format::ISO8601->new();

    # Establish dependencies
    my @order = qw( upload summary release report );
    my $match_tables = join '|', @order;
    if ( my @unknown = grep { !/^(?:$match_tables)$/ } @tables ) {
        die 'Unknown table(s): ', join ', ', @unknown;
    }

    my %tables = map {; $_ => 1 } @tables;
    # release depends on data in uploads and summary
    if ( $tables{ release } ) {
        @tables{qw( upload summary )} = ( 1, 1 );
    }
    # In order to link the report from the dist via the API, we need
    # to get the summaries first
    if ( $tables{ report } ) {
        @tables{qw( summary )} = ( 1 );
    }
    # summary depends on data in uploads
    if ( $tables{ summary } ) {
        @tables{qw( upload )} = ( 1 );
    }

    # ; use Data::Dumper;
    # ; say "Fetching tables: " . Dumper \%tables;

    for my $table ( @order ) {
        next unless $tables{ $table };
        my $url = $base_url;
        if ( $table eq 'upload' ) {
            $url .= '/upload';
            if ( $search->{dist} ) {
                $url .= '/dist/' . $search->{dist};
            }
            elsif ( $search->{author} ) {
                $url .= '/author/' . $search->{author};
            }
            my $tx = $ua->get( $url );
            if ( my $err = $tx->error ) {
                die sprintf q{Error fetching table '%s': (%s) %s}, $table, $err->{code} // 'XXX', $err->{message};
            }
            my @rows = map {
                $_->{released} = $dtf->parse_datetime( $_->{released} )->epoch;
                $_->{type} = 'cpan';
                $_;
            } $tx->res->json->@*;
            $self->resultset( 'Upload' )->update_or_create( $_ ) for @rows;
        }

        if ( $table eq 'summary' ) {
            $url .= '/summary';
            if ( $search->{dist} ) {
                $url .= '/' . $search->{dist};
                if ( $search->{version} ) {
                    $url .= '/' . $search->{version};
                }
            }
            my $tx = $ua->get( $url );
            if ( my $err = $tx->error ) {
                die sprintf q{Error fetching table '%s': (%s) %s}, $table, $err->{code} // 'XXX', $err->{message};
            }
            my @rows = map {
                my $dt = $dtf->parse_datetime( delete $_->{date} );
                $_->{postdate} = $dt->strftime( '%Y%m' );
                $_->{fulldate} = $dt->strftime( '%Y%m%d%H%M' );
                $_->{state} = delete $_->{grade};
                $_->{type} = 2;
                $_->{tester} = delete $_->{reporter};
                $_->{uploadid} = $self->resultset( 'Upload' )
                                 ->search({ $_->%{qw( dist version )} })
                                 ->first->id;
                $_;
            } $tx->res->json->@*;
            # ; use Data::Dumper;
            # ; say "Populate summary: " . Dumper \@rows;
            for my $perl ( uniq map { $_->{perl} } @rows ) {
                $self->resultset( 'PerlVersion' )->find_or_create({
                    version => $perl,
                });
            }
            $self->resultset( 'Stats' )->update_or_create( $_, { key => 'guid' } ) for @rows;
        }

        if ( $table eq 'release' ) {
            $url .= '/release';
            if ( $search->{dist} ) {
                $url .= '/dist/' . $search->{dist};
                if ( $search->{version} ) {
                    $url .= '/' . $search->{version};
                }
            }
            elsif ( $search->{author} ) {
                $url .= '/author/' . $search->{author};
            }
            my $tx = $ua->get( $url );
            if ( my $err = $tx->error ) {
                die sprintf q{Error fetching table '%s': (%s) %s}, $table, $err->{code} // 'XXX', $err->{message};
            }
            my @results = $search->{version} ? ( $tx->res->json ) : $tx->res->json->@*;
            my @rows = map {
                delete $_->{author}; # Author is from Upload
                my $stats_rs = $self->resultset( 'Stats' )
                           ->search({ $_->%{qw( dist version )} });
                $_->{id} = $stats_rs->get_column( 'id' )->max;
                $_->{guid} = $stats_rs->find( $_->{id} )->guid;
                my $upload = $self->resultset( 'Upload' )
                             ->search({ $_->%{qw( dist version )} })
                             ->first;
                $_->{oncpan} = $upload->type eq 'cpan';
                $_->{uploadid} = $upload->id;
                # XXX These are just wrong
                $_->{distmat} = 1;
                $_->{perlmat} = 1;
                $_->{patched} = 1;
                $_;
            } @results;
            # ; use Data::Dumper;
            # ; say "Populate release: " . Dumper \@rows;
            $self->resultset( 'Release' )->update_or_create( $_ ) for @rows;
        }

        if ( $table eq 'report' ) {
            $url .= '/report';

            # There is no direct API to get reports by dist/version, BUT
            # we already have summaries loaded in the database so we can
            # get the GUIDs out of there.
            Mojo::Promise->map(
                { concurrency => 8 },
                sub( $summary ) {
                    my $report_url = join '/', $url, $summary->guid;
                    #; say "Getting report $report_url";
                    return $ua->get_p( $report_url )->then(
                        # Success
                        sub {
                            my ( $tx ) = @_;
                            if ( my $err = $tx->error ) {
                                die sprintf q{Error fetching table '%s': (%s) %s}, $table, $err->{code} // 'XXX', $err->{message};
                            }
                            my $report = $tx->res->json;
                            #; say "Writing $report->{id}";
                            $self->resultset( 'TestReport' )->update_or_create({
                                id => $report->{id},
                                report => $report,
                            });
                        },
                        # Failure
                        sub {
                            warn 'Problem fetching report: ' . join ' ', @_;
                        },
                    );
                },
                $self->resultset( 'Stats' )->search( $search )->all,
            )->then(
                undef,
                sub { warn 'Problem fetching reports: ' . join ' ', @_ },
            )->wait;
        }
    }
}

1;

__END__

=pod

=head1 NAME

CPAN::Testers::Schema - Schema for CPANTesters database processed from test reports

=head1 VERSION

version 0.028

=head1 SYNOPSIS

    my $schema = CPAN::Testers::Schema->connect( $dsn, $user, $pass );
    my $rs = $schema->resultset( 'Stats' )->search( { dist => 'Test-Simple' } );
    for my $row ( $rs->all ) {
        if ( $row->state eq 'fail' ) {
            say sprintf "Fail report from %s: http://cpantesters.org/cpan/report/%s",
                $row->tester, $row->guid;
        }
    }

=head1 DESCRIPTION

This is a L<DBIx::Class> Schema for the CPANTesters statistics database.
This database is generated by processing the incoming data from L<the
CPANTesters Metabase|http://metabase.cpantesters.org>, and extracting
the useful fields like distribution, version, platform, and others (see
L<CPAN::Testers::Schema::Result::Stats> for a full list).

This is its own distribution so that it can be shared by the backend
processing, data APIs, and the frontend web application.

=head1 METHODS

=head2 connect_from_config

    my $schema = CPAN::Testers::Schema->connect_from_config( %extra_conf );

Connect to the MySQL database using a local MySQL configuration file
in C<$HOME/.cpanstats.cnf>. This configuration file should look like:

    [client]
    host     = ""
    database = cpanstats
    user     = my_usr
    password = my_pwd

See L<DBD::mysql/mysql_read_default_file>.

C<%extra_conf> will be added to the L<DBIx::Class::Schema/connect>
method in the C<%dbi_attributes> hashref (see
L<DBIx::Class::Storage::DBI/connect_info>).

=head2 ordered_schema_versions

Get the available schema versions by reading the files in the share
directory. These versions can then be upgraded to using the
L<cpantesters-schema> script.

=head2 populate_from_api

    $schema->populate_from_api( \%search, @tables );

Populate the given tables from the CPAN Testers API (L<http://api.cpantesters.org>).
C<%search> has the following keys:

=over

=item dist

A distribution to populate

=item version

A distribution version to populate

=item author

Populate an author's data

=back

The available C<@tables> are:

=over

=item * upload

=item * release

=item * summary

=item * report

=back

=head1 SEE ALSO

L<CPAN::Testers::Schema::Result::Stats>, L<DBIx::Class>

=head1 AUTHORS

=over 4

=item *

Oriol Soriano <oriolsoriano@gmail.com>

=item *

Doug Bell <preaction@cpan.org>

=back

=head1 CONTRIBUTORS

=for stopwords Breno G. de Oliveira Joel Berger Mohammad S Anwar Nick Tonkin Paul Cochrane

=over 4

=item *

Breno G. de Oliveira <garu@cpan.org>

=item *

Joel Berger <joel.a.berger@gmail.com>

=item *

Mohammad S Anwar <mohammad.anwar@yahoo.com>

=item *

Nick Tonkin <1nickt@users.noreply.github.com>

=item *

Paul Cochrane <paul@liekut.de>

=back

=head1 COPYRIGHT AND LICENSE

This software is copyright (c) 2018 by Oriol Soriano, Doug Bell.

This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.

=cut
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.