Group
Extension

Zonemaster-Backend/share/patch/patch_db_zonemaster_backend_ver_11.1.0.pl

use strict;
use warnings;

use List::MoreUtils qw(zip_unflatten);
use JSON::PP;
use Try::Tiny;
use File::Temp qw(tempfile);
use Encode qw(find_encoding);

use Zonemaster::Backend::Config;
use Zonemaster::Engine;

my $config = Zonemaster::Backend::Config->load_config();

my %module_mapping;
for my $module ( Zonemaster::Engine->modules ) {
    $module_mapping{lc $module} = $module;
}

my %patch = (
    mysql       => \&patch_db_mysql,
    postgresql  => \&patch_db_postgresql,
    sqlite      => \&patch_db_sqlite,
);

my $db_engine = $config->DB_engine;
print "Configured database engine: $db_engine\n";

if ( $db_engine =~ /^(MySQL|PostgreSQL|SQLite)$/ ) {
    print( "Starting database migration\n" );
    $patch{ lc $db_engine }();
    print( "\nMigration done\n" );
}
else {
    die "Unknown database engine configured: $db_engine\n";
}

# depending on the resources available to select all data in database
# update $row_count to your needs
sub _update_data_result_entries {
    my ( $dbh, $row_count ) = @_;

    my $json = JSON::PP->new->allow_blessed->convert_blessed->canonical;

    # update only jobs with results
    my ( $row_total ) = $dbh->selectrow_array( 'SELECT count(*) FROM test_results WHERE results IS NOT NULL' );
    print "Will update $row_total rows\n";

    my %levels = Zonemaster::Engine::Logger::Entry->levels();

    my $row_done = 0;
    while ( $row_done < $row_total ) {
        print "Progress update: $row_done / $row_total\n";
        my $row_updated = 0;
        my $sth1 = $dbh->prepare( 'SELECT hash_id, results FROM test_results WHERE results IS NOT NULL ORDER BY id ASC LIMIT ? OFFSET ?' );
        $sth1->execute( $row_count, $row_done );
        while ( my $row = $sth1->fetchrow_arrayref ) {
            my ( $hash_id, $results ) = @$row;

            next unless $results;

            my @records;
            my $entries = $json->decode( $results );

            foreach my $m ( @$entries ) {
                my $module = $module_mapping{ lc $m->{module} } // ucfirst lc $m->{module};
                my $testcase =
                  ( !defined $m->{testcase} or $m->{testcase} eq 'UNSPECIFIED' )
                  ? 'Unspecified'
                  : $m->{testcase} =~ s/[a-z_]*/$module/ir;

                if ($testcase eq 'Delegation01' and $m->{tag} =~ /^(NOT_)?ENOUGH_IPV[46]_NS_(CHILD|DEL)$/) {
                    my @ips = split( /;/, delete $m->{args}{ns_ip_list} );
                    my @names = split( /;/, delete $m->{args}{nsname_list} );
                    my @ns_list = map { join( '/', @$_ ) } zip_unflatten(@names, @ips);
                    $m->{args}{ns_list} = join( ';', @ns_list );
                }

                my $r = [
                    $hash_id,
                    $levels{ $m->{level} },
                    $module,
                    $testcase,
                    $m->{tag},
                    $m->{timestamp},
                    $json->encode( $m->{args} // {} ),
                ];

                push @records, $r;
            }

            my $query_values = join ", ", ("(?, ?, ?, ?, ?, ?, ?)") x @records;
            my $query = "INSERT INTO result_entries (hash_id, level, module, testcase, tag, timestamp, args) VALUES $query_values";
            my $sth = $dbh->prepare( $query );
            $sth = $sth->execute( map { @$_ } @records );

            $row_updated += $dbh->do( "UPDATE test_results SET results = NULL WHERE hash_id = ?", undef, $hash_id );
        }

        # increase by min(row_updated, row_count)
        $row_done += ( $row_updated < $row_count ) ? $row_updated : $row_count;
    }
    print "Progress update: $row_done / $row_total\n";
}

sub _update_data_normalize_domains {
    my ( $db ) = @_;

    my ( $row_total ) = $db->dbh->selectrow_array( 'SELECT count(*) FROM test_results' );
    print "Will update $row_total rows\n";


    my $sth1 = $db->dbh->prepare( 'SELECT hash_id, params FROM test_results' );
    $sth1->execute;

    my $row_done = 0;
    my $progress = 0;

    while ( my $row = $sth1->fetchrow_hashref ) {
        my $hash_id = $row->{hash_id};
        eval {
            my $raw_params = decode_json($row->{params});
            my $domain = $raw_params->{domain};

            # This has never been cleaned
            delete $raw_params->{user_ip};

            my $params = $db->encode_params( $raw_params );
            my $fingerprint = $db->generate_fingerprint( $raw_params );

            $domain = Zonemaster::Backend::DB::_normalize_domain( $domain );

            $db->dbh->do('UPDATE test_results SET domain = ?, params = ?, fingerprint = ? where hash_id = ?', undef, $domain, $params, $fingerprint, $hash_id);
        };
        if ($@) {
            warn "Caught error while updating record with hash id $hash_id, ignoring: $@\n";
        }
        $row_done += 1;
        my $new_progress = int(($row_done / $row_total) * 100);
        if ( $new_progress != $progress ) {
            $progress = $new_progress;
            print("$progress%\n");
        }
    }
}

sub patch_db_mysql {
    use Zonemaster::Backend::DB::MySQL;

    my $db = Zonemaster::Backend::DB::MySQL->from_config( $config );
    my $dbh = $db->dbh;

    $dbh->{AutoCommit} = 0;

    try {
        $db->create_schema();

        print( "\n-> (1/2) Populating new result_entries table\n" );
        _update_data_result_entries( $dbh, 50000 );

        print( "\n-> (2/2) Normalizing domain names\n" );
        _update_data_normalize_domains( $db );

        $dbh->commit();
    } catch {
        print( "\nCould not upgrade database:  " . $_ );

        $dbh->rollback();
    };
}

sub _patch_db_postgresql_step1 {
    my ($dbh, $chunk_size) = @_;
    $chunk_size //= 100_000;

    # This is used later for backslash-escaping data supplied to COPY … FROM
    # STDIN commands.
    my %conv = ( 8 => '\b', 9 => '\t', 10 => '\n', 11 => '\v', 12 => '\f', 13 => '\r', 92 => '\\\\' );

    my $utf8 = find_encoding('utf8');

    # Why a cursor instead of a plain SELECT statement? Because DBD::Pg does
    # not use server-side cursors itself when reading the result of a SELECT
    # query.
    #
    # And why is that a problem? That’s because the DBMS will try to compute
    # the entire result set before handing it to the client. With large
    # Zonemaster setups with years of history and millions of tests, this
    # SELECT statement will generate hundreds of millions of rows. So without
    # the appropriate precautions, a plain SELECT query like this one will
    # definitely take out the machine it is running on!
    print("Starting up\n");
    $dbh->do(q[
        DECLARE curs NO SCROLL CURSOR FOR
        SELECT
          test_results.hash_id,
          log_level.value AS level,
          CASE res.module
            WHEN 'DNSSEC' THEN res.module
            ELSE initcap(res.module)
          END AS module,
          CASE
            WHEN res.testcase IS NULL THEN ''
            WHEN res.testcase LIKE 'DNSSEC%' THEN res.testcase
            ELSE initcap(res.testcase)
          END AS testcase,
          res.tag AS tag,
          res.timestamp AS timestamp,
          COALESCE(migrated_args.args, '{}') AS args
          FROM test_results,
               json_to_recordset(results)
                 AS res(module TEXT, testcase TEXT, tag TEXT, level TEXT, timestamp REAL, args JSONB)
          LEFT JOIN log_level ON (res.level = log_level.level)
          LEFT JOIN LATERAL (
            SELECT CASE WHEN res.testcase = 'DELEGATION01'
                AND res.tag ~ '^(NOT_)?ENOUGH_IPV[46]_NS_(CHILD|DEL)$'
                AND (NOT res.args ? 'ns_list')
            THEN (
              SELECT res.args
                   - ARRAY['ns_ip_list', 'nsname_list']
                  || jsonb_build_object('ns_list', string_agg(name || '/' || ip, ';'))
                FROM unnest(
                  string_to_array(res.args->>'ns_ip_list', ';'),
                  string_to_array(res.args->>'nsname_list', ';'))
                       AS unnest(ip, name))
            ELSE res.args
            END) AS migrated_args(args) ON TRUE]);

    # I’ve tried to avoid hardcoding numbers but FETCH statements somehow
    # don’t like being parameterized with placeholders. This will have to do.
    my $read_sth = $dbh->prepare(sprintf(q[FETCH FORWARD %d FROM curs], $chunk_size));
    my $row_inserted = 0;

    while ($read_sth->execute(), (my $row_count = $read_sth->rows()) > 0) {
        my @copydata = ();

        print("Progress update: ${row_inserted} rows inserted\n");
        $row_inserted += $row_count;

        $dbh->do(q[COPY result_entries FROM STDIN]);
        while (my $row = $read_sth->fetchrow_arrayref) {
            my @columns = map {
                if (defined $_) {
                    # Replaces invalid UTF-8 sequences with U+FFFD and escapes
                    # characters as required by PostgreSQL’s text COPY data
                    # format.
                    $utf8->encode($utf8->decode($_) =~ s/[\x08-\x0D\\]/$conv{ord $&}/aegr);
                } else {
                    '\N';
                }
            } @$row;
            my $line = join("\t", @columns) . "\n";
            push @copydata, $line;
            $dbh->pg_putcopydata( $line );
        }

        try {
            $dbh->pg_putcopyend();
        }
        catch {
            print("An error occurred while trying to copy some data.\n");
            my ($fh, $filename) = tempfile();
            print $fh @copydata;
            close $fh;
            print("The data supplied to COPY causing the failure has been ",
                  "stored in $filename for inspection\n");
            die $_;
        }
    }
    $dbh->do(q[CLOSE curs]);
    print("Done inserting ${row_inserted} rows\n");
}

sub patch_db_postgresql {
    use Zonemaster::Backend::DB::PostgreSQL;

    my $db = Zonemaster::Backend::DB::PostgreSQL->from_config( $config );
    my $dbh = $db->dbh;

    $dbh->{AutoCommit} = 0;

    try {
        $db->create_schema();

        # Make sure the planner knows that log_level is a small table
        # so it can optimize step 1 appropriately
        $dbh->do(q[ANALYZE log_level]);

        print( "\n-> (1/2) Populating new result_entries table\n" );
        _patch_db_postgresql_step1( $dbh );

        $dbh->do(
            'UPDATE test_results SET results = NULL WHERE results IS NOT NULL'
        );

        print( "\n-> (2/2) Normalizing domain names\n" );
        _update_data_normalize_domains( $db );

        $dbh->commit();
    } catch {
        print( "\nCould not upgrade database:  " . $_ );

        $dbh->rollback();
    };
}

sub patch_db_sqlite {
    use Zonemaster::Backend::DB::SQLite;

    my $db = Zonemaster::Backend::DB::SQLite->from_config( $config );
    my $dbh = $db->dbh;

    $dbh->{AutoCommit} = 0;

    try {
        $db->create_schema();

        print( "\n-> (1/2) Populating new result_entries table\n" );
        _update_data_result_entries( $dbh, 142 );

        print( "\n-> (2/2) Normalizing domain names\n" );
        _update_data_normalize_domains( $db );

        $dbh->commit();
    } catch {
        print( "\nError while upgrading database:  " . $_ );

        $dbh->rollback();
    };
}


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.