Group
Extension

Statistics-Covid/script/db-load-data-from-fetched-json-data-files.pl

#!/usr/bin/env perl

use strict;
use warnings;

our $VERSION = '0.23';

use Statistics::Covid;

use Getopt::Long;
use Data::Dump qw/pp/;

my $configfile1 = undef;
my $configfile2 = undef;
my $tablename = 'Datum';
my $DEBUG = 0;
my $conditions = undef;
my $attributes = undef;
my $clear_db_before = 0;
# default values
my %providers = (
	'UK::BBC'=>1,
	'UK::GOVUK'=>1,
	'World::JHU'=>1
);

if( ! Getopt::Long::GetOptions(
	'config-file-source=s' => \$configfile1,
	'config-file-destination=s' => \$configfile2,
	'provider=s' => sub {
		# TODO: add a check for removing providers
		$providers{$_[1]} = 1;
	},
	'conditions=s' => \$conditions,
	'attributes=s' => \$attributes,
	'tablename=s' => \$tablename,
	'clear' => \$clear_db_before,
	'debug=i' => \$DEBUG,
) ){ die usage() . "\n\nerror in command line."; }

die usage() . "\n\nA configuration file (--config-file-source) is required." unless defined $configfile1;

my $ts = time;

my $package = 'Statistics::Covid::'.$tablename.'::IO';
my $packagefile = $package.'.pm'; $packagefile =~ s|\:\:|/|g;
my @providers = sort keys %providers;

eval { require $packagefile; 1; };
die "failed to load packagefile '$packagefile'. Most likely table '$tablename' is unknown or was wrongly capitalised, e.g. the 'Datum' table is correct : $@"
	if $@;

my $db_select_params = {};
if( defined $conditions ){
	my $pvc = eval $conditions;
	die "Syntax errors in the specified conditions '$conditions'"
		unless defined $pvc;
	$db_select_params->{'conditions'} = $pvc;
}
if( defined $attributes ){
	my $pva = eval $attributes;
	die "Syntax errors in the specified attributes '$attributes'"
		unless defined $pva;
	$db_select_params->{'attributes'} = $pva;
}

my $covid = Statistics::Covid->new({
	'config-file' => $configfile1,
	'debug' => $DEBUG,
	'providers' => \@providers,
	'save-to-file' => 0,
	'save-to-db' => 0,
});
die "call to Statistics::Covid->new() has failed (1)."
	unless defined $covid;
# read all data files from all the datadirs of each of our loaded providers
# we get a hashref key=providerstr, value=arrayref of datum objs
my $datumObjs = $covid->read_data_from_files();
die "call to read_data_from_files() has failed."
	unless defined $datumObjs;

if( defined $configfile2 ){
	# we are saving to a different data collection
	$covid = Statistics::Covid->new({
		'config-file' => $configfile2,
		'debug' => $DEBUG,
		'providers' => \@providers,
		'save-to-file' => 0,
		'save-to-db' => 0,
	});
	die "call to Statistics::Covid->new() has failed (2)."
		unless defined $covid;
}
my $count1 = $covid->db_datums_count();
# save to db
# we get back hash of {providerstr=>$datumobjsarrayref}
my ($count2, $count3);
for my $k (@providers){
	$count2 = $covid->db_datums_count();
#	print "$0 : provider '$k' has ".scalar(@{$datumObjs->{$k}})." items from file.\n";
	my $ret = $covid->db_datums_insert_bulk($datumObjs->{$k});
	die "call to db_datums_insert_bulk() has failed."
		unless defined $ret;
	$count3 = $covid->db_datums_count();
	print "$0 : success, destination database for '$k':\n" . pp($ret) . "\n";
	print "$0 : rows in '$tablename' before : $count2\n";
	print "$0 : rows in '$tablename' after  : $count3\n";
}
$count3 = $covid->db_datums_count();
print "$0 : rows in '$tablename' when started : $count1\n";
print "$0 : rows in '$tablename' at the end   : $count3\n";
print "$0 : succes, done in ".(time-$ts)." seconds.\n";
# db disconnects on $covid destruction

#### end

sub usage {
	return "Usage : $0 <options>\n"
	. " --conditions C : specify SELECT conditions as a string representing a Perl hashref adhering to the search-conditions expected by SQL::Abstract. For example, \"{'name' => {'like'=>'%ABC'}}\" See https://metacpan.org/pod/SQL::Abstract#WHERE-CLAUSES\n"
	. " --config-file-src C : specify a configuration file for doing IO with the source database.\n"
	. " --config-file-destination C : specify a configuration file for doing IO with the destination database.\n"
	. " --tablename T : specify the tablename for the SELECT, this corresponds to a package- name : Statistics::Covid::<tablename>::IO, so use the exact same capitalisation (e.g. 'Datum' and not 'datum').\n"
	. "[--clear : erase all contents of the destination database, if any and if it does indeed exist.]"
	. "[--attributes A : specify SELECT attributes as a string representing a Perl hashref adhering to the search-attributes expected by SQL::Abstract. In order to limit the number of rows selected use: '{rows=>10}', see https://metacpan.org/pod/DBIx::Class::ResultSet#ATTRIBUTES]\n"
	. "[--debug Level : specify a debug level, anything >0 is verbose.]\n"
	. "\n\nThis program will open the source database, extract objects from specified table using the specified conditions and then write them onto the same table of the destination database.\n"
	. "\nExample usage:\n"
. <<'EXA'
db-search-and-make-new-db.pl --config-file-source config/config.json --config-file-destination config/destination.json --tablename 'Datum' --conditions "{'name'=>'Hackney'}" --attributes "{'rows'=>3}"
EXA
	. "\nProgram by Andreas Hadjiprocopis (andreashad2\@gmail.com / bliako\@cpan.org)\n"
	;
}



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.