Virani/lib/Virani.pm
package Virani;
use 5.006;
use strict;
use warnings;
use TOML;
use File::Slurp;
use Net::Subnet;
use File::Find::IncludesTimeRange;
use File::Find::Rule;
use Digest::MD5 qw(md5_hex);
use File::Spec;
use IPC::Cmd qw(run);
use File::Copy "cp";
use Sys::Syslog;
use JSON;
use Time::Piece;
=head1 NAME
Virani - PCAP retrieval for a FPC setup writing to PCAP files.
=head1 VERSION
Version 1.2.0
=cut
our $VERSION = '1.2.0';
=head1 SYNOPSIS
use Virani;
my $virani = Virani->new();
...
=head1 METHODS
=head2 new_from_conf
Initiates the Virani object from the specified file.
- conf :: The config TOML to use.
- Default :: /usr/local/etc/virani.toml
=cut
sub new_from_conf {
my ( $blank, %opts ) = @_;
if ( !defined( $opts{conf} ) ) {
$opts{conf} = '/usr/local/etc/virani.toml';
}
if ( !-f $opts{conf} ) {
die( "'" . $opts{conf} . "' is not a file or does not exist" );
}
my $raw_toml;
eval { $raw_toml = read_file( $opts{conf} ); };
if ( $@ || !defined($raw_toml) ) {
my $error = 'Failed to read config file, "' . $opts{conf} . '"';
if ($@) {
$error = $error . ' ' . $@;
}
die($error);
}
my $toml;
eval { $toml = from_toml($raw_toml); };
if ($@) {
die($@);
}
return Virani->new( %{$toml} );
} ## end sub new_from_conf
=head2 new
Initiates the object.
- allowed_subnets :: The allowed subnets for fetching PCAPs for mojo-varini.
Defaults :: [ '192.168.0.0/', '127.0.0.1/8', '::1/127', '172.16.0.0/12' ]
- apikey :: Optional API key for mojo-varini.
Defaults :: undef
- auth_by_IP_only :: Auth by IP only and don't use a API key.
Default :: 1
- default_set :: The default set to use.
Default :: default
- cache :: Cache directory to write to.
Default :: /var/cache/virani
- default_regex :: The regex to use for getting the timestamp. The regex to pass to
File::Find::IncludesTimeRange for finding PCAP files with timestamps
that include the range in question.
Default :: (?<timestamp>\\d\\d\\d\\d\\d\\d+)(\\.pcap|(?<subsec>\\.\\d+)\\.pcap)$
- verbose_to_syslog :: Send verbose items to syslog. This is used by mojo-virani.
Default :: 0
- pcap_glob :: The glob to use for matching files.
Default :: *.pcap*
- ts_is_unixtime :: The timestamp is unixtime and does not requires additional processing.
Default :: 1
- verbose :: Print verbose info.
Default :: 1
- type :: Either tcpdump, tshark, or bpf2tshark, which to use for filtering PCAP files in the
specified time slot. tcpdump is faster, but in general will not nicely handles
some VLAN types. For that tshark is needed, but it is signfigantly slower. bpf2tshark
is handled via Virani->bpf2tshark and that should be seen for more info on that.
Default :: tcpdump
- padding :: How many seconds to add to the start and end time stamps to ensure the specified
time slot is definitely included.
Default :: 5
- sets :: A hash of hashes of available sets.
Default :: { default => { path => '/var/log/daemonlogger' } }
For sets, the following keys are usable, of which only path is required.
- path :: The base path of which the PCAPs are located.
- padding :: Padding value for this set.
- regex :: The timestamp regex to use with this set.
- type :: The default filter type to use with this set.
- ts_is_unixtime :: The timestamp is unixtime and does not requires additional processing.
=cut
sub new {
my ( $blank, %opts ) = @_;
my $self = {
allowed_subnets => [ '192.168.0.0/', '127.0.0.1/8', '::1/127', '172.16.0.0/12' ],
apikey => undef,
auth_by_IP_only => 1,
default_set => 'default',
cache => '/var/cache/virani',
default_regex => '(?<timestamp>\\d\\d\\d\\d\\d\\d+)(\\.pcap|(?<subsec>\\.\\d+)\\.pcap)$',
default_max_time => '3600',
verbose_to_syslog => 0,
verbose => 1,
type => 'tcpdump',
padding => 5,
ts_is_unixtime => 1,
pcap_glob => '*.pcap*',
sets => {
default => {
path => '/var/log/daemonlogger',
}
},
};
bless $self;
if ( defined( $opts{allowed_subnets} ) && ref( $opts{allowed_subnets} ) eq 'ARRAY' ) {
$self->{allowed_subnets} = $opts{allowed_subnets};
} elsif ( defined( $opts{allowed_subnets} ) && ref( $opts{allowed_subnets} ) ne 'ARRAY' ) {
die("$opts{allowed_subnets} defined, but not a array");
}
if ( defined( $opts{sets} ) && ref( $opts{sets} ) eq 'HASH' ) {
$self->{sets} = $opts{sets};
} elsif ( defined( $opts{sets} ) && ref( $opts{allowed_subnets} ) ne 'HASH' ) {
die("$opts{sets} defined, but not a hash");
}
# real in basic values
my @real_in = (
'apikey', 'default_set', 'cache', 'padding',
'default_max_time', 'verbose_to_syslog', 'verbose', 'auth_by_IP_only',
'type', 'ts_is_unixtime', 'pcap_glob', 'default_regex'
);
for my $key (@real_in) {
if ( defined( $opts{$key} ) ) {
$self->{$key} = $opts{$key};
}
}
return $self;
} ## end sub new
=head2 bpf2tshark
Does a quick and dumb conversion of a BPF filter to tshark.
my $tshark=$virani->bpf2tshark($bpf);
() -> ()
not () -> !()
icmp -> icmp
tcp -> tcp
udp -> udp
port $port -> ( tcp.port == $port or udp.port == $port )
not port $port -> ( tcp.port != $port or udp.port != $port )
dst port $port -> ( tcp.dstport == $port or udp.dstport == $port )
not dst port $port -> ( tcp.dstport != $port or udp.dstport != $port )
src port $port -> ( tcp.srcport == $port or udp.srcport == $port )
not src port $port -> ( tcp.srcport != $port or udp.srcport != $port )
host $host -> ip.addr == $host
not host $host -> ip.addr != $host
dst host $host -> ip.dst == $host
not dst host $host -> ip.dst != $host
src host $host -> ip.src == $host
not src host $host -> ip.src != $host
dst $host -> ip.dst == $host
not host $host -> ip.dst != $host
src src $host -> ip.src == $host
not src $host -> ip.src != $host
=cut
sub bpf2tshark {
my $self = $_[0];
my $bpf = $_[1];
if ( !defined($bpf) ) {
return '';
}
# make sure that () have spaces on either side
$bpf =~ s/\(/\ \(\ /g;
$bpf =~ s/\)/\ \)\ /g;
my @bpf_split = split( /[\ \t]+/, $bpf );
my @tshark_args;
my @previous;
my $not = 0;
foreach my $item (@bpf_split) {
# sets the equality operator based of if not is true or not
my $equality = '==';
if ($not) {
$equality = '!=';
}
# tcp/udp/icmp
if ( $item eq 'tcp' || $item eq 'udp' || $item eq 'icmp' ) {
push( @tshark_args, $item );
$not = 0;
@previous = ();
}
# handle negation
elsif ( $item eq 'not' ) {
$not = 1;
}
# handles closing )
elsif ( $item eq ')' ) {
$not = 0;
push( @tshark_args, ')' );
@previous = ();
}
# handles opening (
elsif ( $item eq ')' ) {
if ($not) {
push( @tshark_args, '!(' );
} else {
push( @tshark_args, '(' );
}
$not = 0;
@previous = ();
}
# and/or
elsif ( $item eq 'or' || $item eq 'and' ) {
# make sure we not add it twice
if ( $tshark_args[$#tshark_args] ne 'and' && $tshark_args[$#tshark_args] ne 'or' ) {
push( @tshark_args, $item );
}
$not = 0;
@previous = ();
}
# start of src/dst
elsif ( !defined( $previous[0] ) && ( $item eq 'src' || $item eq 'dst' ) ) {
push( @previous, $item );
}
# start of ether
elsif ( !defined( $previous[0] ) && $item eq 'ether' ) {
push( @previous, $item );
}
# adding src/dst/host to ether
elsif (defined( $previous[0] )
&& $previous[0] eq 'ether'
&& ( $item eq 'src' || $item eq 'dst' || $item eq 'host' ) )
{
push( @previous, $item );
}
# generic host/port
elsif ( !defined( $previous[0] ) && ( $item eq 'port' || $item eq 'host' ) ) {
push( @previous, $item );
}
# adding host/port to src/dst
elsif (defined( $previous[0] )
&& ( $previous[0] eq 'src' || $previous[0] eq 'dst' )
&& ( $item eq 'host' || $item eq 'port' ) )
{
push( @previous, $item );
}
# add ether src $ether
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'ether'
&& $previous[1] eq 'src' )
{
push( @tshark_args, 'etc.src', $equality, $item );
$not = 0;
@previous = ();
}
# add ether src $ether
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'ether'
&& $previous[1] eq 'dst' )
{
push( @tshark_args, 'etc.dst', $equality, $item );
$not = 0;
@previous = ();
}
# add ether host $ether
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'ether'
&& $previous[1] eq 'host' )
{
push( @tshark_args, 'etc.addr', $equality, $item );
$not = 0;
@previous = ();
}
# add src port $port
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'src'
&& $previous[1] eq 'port' )
{
push( @tshark_args, '(', 'tcp.srcport', $equality, $item, 'or', 'udp.srcport', $equality, $item, ')' );
$not = 0;
@previous = ();
}
# add dst port $port
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'dst'
&& $previous[1] eq 'port' )
{
push( @tshark_args, '(', 'tcp.dstport', $equality, $item, 'or', 'udp.dstport', $equality, $item, ')' );
$not = 0;
@previous = ();
}
# add src host $host
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'src'
&& $previous[1] eq 'host' )
{
push( @tshark_args, 'ip.src', $equality, $item );
$not = 0;
@previous = ();
}
# add dst host $host
elsif (defined( $previous[0] )
&& defined( $previous[1] )
&& $previous[0] eq 'dst'
&& $previous[1] eq 'host' )
{
push( @tshark_args, 'ip.dst', $equality, $item );
$not = 0;
@previous = ();
}
# add port $port
elsif ( defined( $previous[0] ) && !defined( $previous[1] ) && $previous[0] eq 'port' ) {
push( @tshark_args, '(', 'tcp.port', $equality, $item, 'or', 'udp.port', $equality, $item, ')' );
$not = 0;
@previous = ();
}
# add host $host
elsif ( defined( $previous[0] ) && !defined( $previous[1] ) && $previous[0] eq 'host' ) {
push( @tshark_args, 'ip.addr', $equality, $item );
$not = 0;
@previous = ();
}
# add src $host
elsif ( defined( $previous[0] ) && !defined( $previous[1] ) && $previous[0] eq 'src' ) {
push( @tshark_args, 'ip.src', $equality, $item );
$not = 0;
@previous = ();
}
# add dst $host
elsif ( defined( $previous[0] ) && !defined( $previous[1] ) && $previous[0] eq 'dst' ) {
push( @tshark_args, 'ip.dst', $equality, $item );
$not = 0;
@previous = ();
}
# if anything else is found, skip it
else {
$not = 0;
@previous = ();
}
} ## end foreach my $item (@bpf_split)
return join( ' ', @tshark_args );
} ## end sub bpf2tshark
=head2 filter_clean
Removes starting and trailing whitespace as well as collapsing
consecutive whitespace to a single space.
The purpose for this is to make sure that tshark/BPF filters passed
are consistent for cacheing, even if their white space differs.
A undef passed to it will return ''.
Will die if the filter matches /^\w*\-/ as it starts with a '-', which
tcpdump will interpret as a switch.
my $cleaned_bpf=$virani->filter_clean($bpf);
=cut
sub filter_clean {
my $self = $_[0];
my $string = $_[1];
if ( !defined($string) ) {
return '';
}
if ( $string =~ /^\w*\-/ ) {
die( 'The filter, "' . $string . '", begins with a "-", which dieing for safety reasons' );
}
# remove white space at the start and end
$string =~ s/^\s*//g;
$string =~ s/\s+$//g;
# replace all multiple white space characters with a single space
$string =~ s/\s\s+/ /g;
return $string;
} ## end sub filter_clean
=head1 check_apikey
Checks the API key.
If auth_via_IP_only is 1, this will always return true.
my $apikey=$c->param('apikey');
if (!$virani->check_apikey($apikey)) {
$c->render( text => "Invalid API key\n", status=>403, );
return;
}
=cut
sub check_apikey {
my $self = $_[0];
my $apikey = $_[1];
if ( $self->{auth_by_IP_only} ) {
return 1;
}
if ( !defined($apikey) ) {
return 0;
}
if ( !defined( $self->{apikey} ) || $self->{apikey} eq '' ) {
return 0;
}
if ( $apikey ne $self->{apikey} ) {
return 0;
}
return 1;
} ## end sub check_apikey
=head1 check_remote_ip
Checks if the remote IP is allowed or not.
if ( ! $virani->check_remote_ip( $c->{tx}{original_remote_address} )){
$c->render( text => "IP or subnet not allowed\n", status=>403, );
return;
}
=cut
sub check_remote_ip {
my $self = $_[0];
my $ip = $_[1];
if ( !defined($ip) ) {
return 0;
}
if ( !defined( $self->{allowed_subnets}[0] ) ) {
return 0;
}
my $allowed_subnets;
eval { $allowed_subnets = subnet_matcher( @{ $self->{allowed_subnets} } ); };
if ($@) {
die( 'Failed it init subnet matcher... ' . $@ );
} elsif ( !defined($allowed_subnets) ) {
die('Failed it init subnet matcher... sub_matcher returned undef');
}
if ( $allowed_subnets->($ip) ) {
return 1;
}
return 0;
} ## end sub check_remote_ip
=head1 check_type
Verify if the check is valid or note
Returns 0/1 based on if it a known type or not.
if ( ! $virani->check_type( $type )){
print $type." is not known\n";
}
=cut
sub check_type {
my $self = $_[0];
my $type = $_[1];
if ( !defined($type) ) {
return 0;
}
if ( $type ne 'tshark' && $type ne 'tcpdump' && $type ne 'bpf2tshark' ) {
return 0;
}
return 1;
} ## end sub check_type
=head2 get_default_set
Returns the deefault set to use.
my $set=$virani->get_default_set;
=cut
sub get_default_set {
my ($self) = @_;
return $self->{default_set};
}
=head2 get_cache_file
Takes the same args as get_pcap_lcal.
Returns the path to the file.
my $cache_file=$virani->get_cache_file(%opts);
if (! -f $cache_file.'json'){
echo "Cache file metadata does not exist, so either get_pcap_local died or it has not been ran\n";
}
=cut
sub get_cache_file {
my ( $self, %opts ) = @_;
# make sure we have something for type and check to make sure it is sane
if ( !defined( $opts{type} ) ) {
$opts{type} = $self->{type};
if ( defined( $self->{sets}{ $opts{set} }{type} ) ) {
$opts{type} = $self->{sets}{ $opts{set} }{type};
}
}
# check it here incase the config includes something off
if ( !$self->check_type( $opts{type} ) ) {
die( 'type "' . $opts{type} . '" is not a supported type, tcpdump or tshark,' );
}
# basic sanity checking
if ( !defined( $opts{start} ) ) {
die('$opts{start} not defined');
} elsif ( !defined( $opts{end} ) ) {
die('$opts{start} not defined');
} elsif ( ref( $opts{start} ) ne 'Time::Piece' ) {
die('$opts{start} is not a Time::Piece object');
} elsif ( ref( $opts{end} ) ne 'Time::Piece' ) {
die('$opts{end} is not a Time::Piece object');
} elsif ( defined( $opts{padding} ) && $opts{padding} !~ /^\d+/ ) {
die('$opts{padding} is not numeric');
}
if ( !defined( $opts{auto_no_cache} ) ) {
$opts{auto_no_cache} = 1;
}
if ( !defined( $opts{set} ) || $opts{set} eq '' ) {
$opts{set} = $self->get_default_set;
}
# make sure the set exists
if ( !defined( $self->{sets}->{ $opts{set} } ) ) {
die( 'The set "' . $opts{set} . '" is not defined' );
} elsif ( !defined( $self->{sets}->{ $opts{set} }{path} ) ) {
die( 'The path for set "' . $opts{set} . '" is not defined' );
} elsif ( !-d $self->{sets}->{ $opts{set} }{path} ) {
die( 'The path for set "'
. $opts{set} . '", "'
. $self->{sets}->{ $opts{set} }{path}
. '" is not exist or is not a directory' );
}
# get the paddimg, make sure it is sane, and apply it
if ( !defined( $opts{padding} ) ) {
$opts{padding} = $self->{padding};
if ( defined( $self->{sets}{ $opts{set} }{padding} ) ) {
$opts{padding} = $self->{sets}{ $opts{set} }{padding};
}
}
# clean the filter
$opts{filter} = $self->filter_clean( $opts{filter} );
my $cache_file;
if ( defined( $opts{file} ) ) {
my ( $volume, $directories, $file ) = File::Spec->splitpath( $opts{file} );
# make sure the directory the output file is using exists
if ( $directories ne '' && !-d $directories ) {
die( '$opts{file} is set to "'
. $opts{file}
. '" but the directory part,"'
. $directories
. '", does not exist' );
}
# figure what what to use as the cache file
if ( $opts{no_cache} ) {
$cache_file = $opts{file};
} elsif ( $opts{auto_no_cache} && ( !-d $self->{cache} || !-w $self->{cache} ) ) {
$cache_file = $opts{file};
} elsif ( $opts{auto_no_cache} && ( -d $self->{cache} || -w $self->{cache} ) ) {
$cache_file
= $self->{cache} . '/'
. $opts{set} . '-'
. $opts{type} . '-'
. $opts{start}->epoch . '-'
. $opts{end}->epoch . "-"
. lc( md5_hex( $opts{filter} ) );
} elsif ( !$opts{auto_no_cache} && ( !-d $self->{cache} || !-w $self->{cache} ) ) {
die( '$opts{auto_no_cache} is false and $opts{no_cache} is false, but the cache dir "'
. $self->{dir}
. '" does not exist, is not a dir, or is not writable' );
}
} else {
# make sure the cache is usable
if ( !-d $self->{cache} ) {
die( 'Cache dir,"' . $self->{cache} . '", does not exist or is not a dir' );
} elsif ( !-w $self->{cache} ) {
die( 'Cache dir,"' . $self->{cache} . '", is not writable' );
}
$cache_file
= $self->{cache} . '/'
. $opts{set} . '-'
. $opts{start}->epoch . '-'
. $opts{type} . '-'
. $opts{end}->epoch . "-"
. lc( md5_hex( $opts{filter} ) );
} ## end else [ if ( defined( $opts{file} ) ) ]
return $cache_file;
} ## end sub get_cache_file
=head2 get_pcap_local
Generates a PCAP locally and returns the path to it.
- start :: A L<Time::Piece> object of when to start looking.
- Default :: undef
- end :: A L<Time::Piece> object of when to stop looking.
- Default :: undef
- padding :: Number of seconds to pad the start and end with.
- Default :: 5
- filter :: The BPF or tshark filter to use.
- Default :: ''
- set :: The PCAP set to use. Will use what ever the default is set to if undef or blank.
- Default :: $virani->get_default_set
- file :: The file to output to. If undef it just returns the path to
the cache file.
- Default :: undef
- no_cache :: If cached, don't return that, but regen and if applicable re-cache.
- Default :: 0
- auto_no_cache :: If the cache dir is being used and not writeable and a file
as been specified, don't die, but use the output file name
as the basis of for the tmp file.
- Default :: 1
- type :: 'tcpdump' or 'tshark', depending on what one wants the filter todo.
- Default :: tcpdump
The return is a hash reference that includes the following keys.
- pcaps :: A array of PCAPs used.
- pcap_count :: A count of used PCAPs.
- pcap_glob :: The value of pcap_glob used.
- ts_is_unixtime :: The value of ts_is_unixtime used.
- failed :: A hash of PCAPs that failed. PCAP path as key and value being the reason.
- failed_count :: A count of failed PCAPs.
- path :: The path to the results file. If undef, unable it was unable
to process any of them.
- success_found :: A count of successfully processed PCAPs.
- filter :: The used filter.
- total_size :: The size of all PCAP files checked.
- failed_size :: The size of the PCAP files that failed.
- success_size :: the size of the PCAP files that successfully processed
- type :: The value of $opts{type}
- padding :: The value of padding.
- start_s :: Start time in seconds since epoch, not including pading.
- end :: Send time in the format '%Y-%m-%dT%H:%M:%S%z'.
- end_s :: End time in seconds since epoch, not including pading.
- end :: End time in the format '%Y-%m-%dT%H:%M:%S%z'.
- using_cache :: If the cache was used or not.
- req_start :: Timestamp of when the it started. In the format
%Y-%m-%dT%H:%M:%S%z
- req_start_s :: Same as req_start, but unixtime.
- req_end :: Timestamp of when the it finished. In the format
%Y-%m-%dT%H:%M:%S%z
- req_end_s :: Same as req_end, but unixtime.
- req_time :: Number of seconds it took.
=cut
sub get_pcap_local {
my ( $self, %opts ) = @_;
# start of the request
my $req_start = localtime;
# if set is undef or blank, use the default
if ( !defined( $opts{set} ) || $opts{set} eq '' ) {
$opts{set} = $self->get_default_set;
}
$self->verbose( 'info', 'Set: ' . $opts{set} );
# make sure we have something for type and check to make sure it is sane
if ( !defined( $opts{type} ) ) {
$opts{type} = $self->{type};
if ( defined( $self->{sets}{ $opts{set} }{type} ) ) {
$opts{type} = $self->{sets}{ $opts{set} }{type};
}
}
# figure out what to use for $ts_is_unixtime
my $ts_is_unixtime;
if ( defined( $self->{sets}{ $opts{set} }{ts_is_unixtime} ) ) {
$ts_is_unixtime = $self->{sets}{ $opts{set} }{ts_is_unixtime};
} else {
$ts_is_unixtime = $self->{ts_is_unixtime};
}
# figure out what to use for $pcap_glob
my $pcap_glob;
if ( defined( $self->{sets}{ $opts{set} }{pcap_glob} ) ) {
$pcap_glob = $self->{sets}{ $opts{set} }{pcap_glob};
} else {
$pcap_glob = $self->{pcap_glob};
}
$self->verbose( 'info', 'PCAP Glob: ' . $pcap_glob );
# check it here incase the config includes something off
if ( !$self->check_type( $opts{type} ) ) {
die( 'type "' . $opts{type} . '" is not a supported type, tcpdump or tshark,' );
}
$self->verbose( 'info', 'Type: ' . $opts{type} );
# basic sanity checking
if ( !defined( $opts{start} ) ) {
die('$opts{start} not defined');
} elsif ( !defined( $opts{end} ) ) {
die('$opts{start} not defined');
} elsif ( ref( $opts{start} ) ne 'Time::Piece' ) {
die('$opts{start} is not a Time::Piece object');
} elsif ( ref( $opts{end} ) ne 'Time::Piece' ) {
die('$opts{end} is not a Time::Piece object');
} elsif ( defined( $opts{padding} ) && $opts{padding} !~ /^\d+$/ ) {
die('$opts{padding} is not numeric');
}
$self->verbose( 'info', 'Start: ' . $opts{start}->strftime('%Y-%m-%dT%H:%M:%S%z') . ', ' . $opts{start}->epoch );
$self->verbose( 'info', 'End: ' . $opts{end}->strftime('%Y-%m-%dT%H:%M:%S%z') . ', ' . $opts{end}->epoch );
if ( !defined( $opts{auto_no_cache} ) ) {
$opts{auto_no_cache} = 1;
}
$self->verbose( 'info', 'auto_no_cache: ' . $opts{auto_no_cache} );
if ( !defined( $opts{no_cache} ) ) {
$opts{no_cache} = 0;
}
$self->verbose( 'info', 'no_cache: ' . $opts{no_cache} );
# make sure the set exists
if ( !defined( $self->{sets}->{ $opts{set} } ) ) {
die( 'The set "' . $opts{set} . '" is not defined' );
} elsif ( !defined( $self->{sets}->{ $opts{set} }{path} ) ) {
die( 'The path for set "' . $opts{set} . '" is not defined' );
} elsif ( !-d $self->{sets}->{ $opts{set} }{path} ) {
die( 'The path for set "'
. $opts{set} . '", "'
. $self->{sets}->{ $opts{set} }{path}
. '" is not exist or is not a directory' );
}
# get the paddimg, make sure it is sane, and apply it
if ( !defined( $opts{padding} ) ) {
$opts{padding} = $self->{padding};
if ( defined( $self->{sets}{ $opts{set} }{padding} ) ) {
$opts{padding} = $self->{sets}{ $opts{set} }{padding};
}
}
# clean the filter
$opts{filter} = $self->filter_clean( $opts{filter} );
$self->verbose( 'info', 'Filter: ' . $opts{filter} );
# get the cache file to use
my $cache_file;
eval { $cache_file = $self->get_cache_file(%opts); };
if ($@) {
die( '$self->get_cache_files(%opts) failed... ' . $@ );
}
# if applicable return the cache file
my $return_cache = 0;
if ( defined( $opts{file} )
&& $opts{file} ne $cache_file
&& !$opts{no_cache}
&& -f $cache_file
&& -f $cache_file . '.json' )
{
$return_cache = 1;
} elsif ( !defined( $opts{file} ) && !$opts{no_cache} && -f $cache_file && -f $cache_file . '.json' ) {
$return_cache = 1;
}
if ($return_cache) {
my $cache_message = 'Already cached... "' . $cache_file . '"';
if ( defined( $opts{file} ) && $opts{file} ne $cache_file ) {
$cache_message = $cache_message . ' -> "' . $opts{file} . '"';
}
$self->verbose( 'info', $cache_message );
if ( defined( $opts{file} ) && $opts{file} ne $cache_file ) {
cp( $cache_file, $opts{file} );
}
my $to_return;
eval {
my $cache_meta_raw = read_file( $cache_file . '.json' );
$to_return = decode_json($cache_meta_raw);
};
if ($@) {
die( 'Failed to read cache metadata JSON, "' . $cache_file . '.json"' );
}
$to_return->{using_cache} = 1;
return $to_return;
} ## end if ($return_cache)
# check it here incase the config includes something off
if ( $opts{padding} !~ /^[0-9]+$/ ) {
die( '"' . $opts{padding} . '" is not a numeric' );
}
# set the padding
my $start = $opts{start} - $opts{padding};
my $end = $opts{end} + $opts{padding};
$self->verbose( 'info', 'Padded Start: ' . $start->strftime('%Y-%m-%dT%H:%M:%S%z') . ', ' . $start->epoch );
$self->verbose( 'info', 'Padded End: ' . $end->strftime('%Y-%m-%dT%H:%M:%S%z') . ', ' . $end->epoch );
# get the set
my $set_path = $self->get_set_path( $opts{set} );
if ( !defined($set_path) ) {
die( 'The set "' . $opts{set} . '" does not either exist or the path value for it is undef' );
}
# get the pcaps
my @pcaps = File::Find::Rule->file()->name($pcap_glob)->in($set_path);
# get the ts_regexp to use
my $ts_regexp;
if ( defined( $self->{sets}{ $opts{set} }{regex} ) ) {
$ts_regexp = $self->{sets}{ $opts{set} }{regex};
} else {
$ts_regexp = $self->{default_regex};
}
$self->verbose( 'info', 'Timestamp Regexp: ' . $ts_regexp );
my $to_check = File::Find::IncludesTimeRange->find(
items => \@pcaps,
start => $start,
end => $end,
regex => $ts_regexp,
ts_is_unixtime => $ts_is_unixtime,
);
# The return hash and what will be used for the cache JSON
# req_end stuff set later
my $to_return = {
pcaps => $to_check,
pcap_glob => $pcap_glob,
pcap_count => 0,
failed => {},
failed_count => 0,
success_count => 0,
path => $cache_file,
filter => $opts{filter},
total_size => 0,
failed_size => 0,
success_size => 0,
tmp_size => 0,
final_size => 0,
type => $opts{type},
padding => $opts{padding},
start_s => $opts{start}->epoch,
start => $opts{start}->strftime('%Y-%m-%dT%H:%M:%S%z'),
end_s => $opts{end}->epoch,
end => $opts{end}->strftime('%Y-%m-%dT%H:%M:%S%z'),
req_start => $req_start->strftime('%Y-%m-%dT%H:%M:%S%z'),
req_start_s => $req_start->epoch,
ts_is_unixtime => $ts_is_unixtime,
};
# used for tracking the files to cleanup
my @tmp_files;
# puts together the tshark filter if needed
my $tshark_filter = $opts{filter};
if ( $opts{type} eq 'bpf2tshark' ) {
$tshark_filter = $self->bpf2tshark( $opts{filter} );
$to_return->{filter_translated} = $tshark_filter;
$self->verbose( 'info', 'Translated Filter ' . $tshark_filter );
}
# the merge command
my $to_merge = [ 'mergecap', '-w', $cache_file ];
foreach my $pcap ( @{$to_check} ) {
# get stat info for the file
my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks )
= stat($pcap);
$to_return->{total_size} += $size;
$self->verbose( 'info', 'Processing ' . $pcap . ", size=" . $size . " ..." );
my $tmp_file = $cache_file . '-' . $to_return->{pcap_count};
my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf );
if ( $opts{type} eq 'tcpdump' ) {
( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = run(
command => [ 'tcpdump', '-r', $pcap, '-w', $tmp_file, $opts{filter} ],
verbose => 0
);
} else {
( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = run(
command => [ 'tshark', '-r', $pcap, '-w', $tmp_file, $tshark_filter ],
verbose => 0
);
}
if ($success) {
$to_return->{success_count}++;
$to_return->{success_size} += $size;
push( @{$to_merge}, $tmp_file );
push( @tmp_files, $tmp_file );
# get stat info for the tmp file
( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks )
= stat($tmp_file);
$to_return->{tmp_size} += $size;
} else {
$to_return->{failed}{$pcap} = $error_message;
$to_return->{failed_count}++;
$to_return->{failed_size} += $size;
$self->verbose( 'warning', 'Failed ' . $pcap . " ... " . $error_message );
unlink $tmp_file;
}
$to_return->{pcap_count}++;
} ## end foreach my $pcap ( @{$to_check} )
# only try merging if we had more than one success
if ( $to_return->{success_count} > 0 ) {
$self->verbose( 'info', "Merging PCAPs... " . join( ' ', @{$to_merge} ) );
my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) = run(
command => $to_merge,
verbose => 0
);
if ($success) {
$self->verbose( 'info', "PCAPs merged into " . $cache_file );
} else {
# if verbose print different messages if mergecap generated a ouput file or not when it fialed
if ( -f $cache_file ) {
$self->verbose( 'warning', "PCAPs partially(output file generated) failed " . $error_message );
} else {
$self->verbose( 'err', "PCAPs merge completely(output file not generated) failed " . $error_message );
}
}
# remove each tmp file
foreach my $tmp_file (@tmp_files) {
unlink($tmp_file);
}
# don't bother checking size if the file was not generated
if ( -f $cache_file ) {
my ( $dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size, $atime, $mtime, $ctime, $blksize, $blocks )
= stat($cache_file);
$to_return->{final_size} = $size;
}
} else {
$self->verbose( 'err', "No PCAPs to merge" );
}
$self->verbose( 'info',
"PCAP sizes... failed_size="
. $to_return->{failed_size}
. " success_size="
. $to_return->{success_size}
. " total_size="
. $to_return->{total_size}
. " tmp_size="
. $to_return->{tmp_size}
. " final_size="
. $to_return->{final_size} );
# finalize info on how long the request took
my $req_end = localtime;
$to_return->{req_end} = $req_end->strftime('%Y-%m-%dT%H:%M:%S%z');
$to_return->{req_end_s} = $req_end->epoch;
$to_return->{req_time} = $req_end->epoch - $req_start->epoch;
$self->verbose( 'info', 'Creating metadata JSON at "' . $cache_file . '.json" ' );
my $json = JSON->new->allow_nonref->pretty->canonical(1);
my $raw_json = $json->encode($to_return);
write_file( $cache_file . '.json', $raw_json );
# if the file and cache file are the same, then the cache dir not accessing, so no need to copy it
if ( defined( $opts{file} ) && $cache_file ne $opts{file} ) {
$self->verbose( 'info', 'Copying "' . $cache_file . '" to "' . $opts{file} . '"' );
cp( $cache_file, $opts{file} );
}
$to_return->{using_cache} = 0;
return $to_return;
} ## end sub get_pcap_local
=head2 get_set_path
Returns the path to a set.
If no set is given, the default is used.
Will return undef if the set does not exist or if the set does not have a path defined.
my $path=$virani->get_set_path($set);
=cut
sub get_set_path {
my ( $self, $set ) = @_;
if ( !defined($set) ) {
$set = $self->get_default_set;
}
if ( !defined( $self->{sets}{$set} ) ) {
return undef;
}
if ( !defined( $self->{sets}{$set}{path} ) ) {
return undef;
}
return $self->{sets}{$set}{path};
} ## end sub get_set_path
=head2 set_verbose
Set if it should be verbose or not.
# be verbose
$virani->verbose(1);
# do not be verbose
$virani->verbose(0);
=cut
sub set_verbose {
my ( $self, $verbose ) = @_;
$self->{verbose} = $verbose;
}
=head2 set_verbose_to_syslog
Set if it should be verbose or not.
# send verbose messages to syslog
$virani->set_verbose_to_syslog(1);
# do not send verbose messages to syslog
$virani->set_verbose_to_syslog(0);
=cut
sub set_verbose_to_syslog {
my ( $self, $to_syslog ) = @_;
$self->{verbose_to_syslog} = $to_syslog;
}
=head2 verbose
Prints out error messages. This is inteded to be internal.
Only sends the string if verbose is enabled.
There is no need to add a "\n" as it will automatically if not sending to syslog.
Two variables are taken. The first is level the second is the message. Level is only used
for syslog. Default level is info.
- Levels :: emerg, alert, crit, err, warning, notice, info, debug
$self->verbose('info', 'some string');
=cut
sub verbose {
my ( $self, $level, $string ) = @_;
if ( !defined($string) || $string eq '' ) {
return;
}
if ( !defined($level) ) {
$level = 'info';
}
if ( $self->{verbose} ) {
if ( $self->{verbose_to_syslog} ) {
openlog( 'virani', undef, 'daemon' );
syslog( $level, $string );
closelog();
} else {
print $string. "\n";
}
}
return;
} ## end sub verbose
=head2 CONFIG
The config format used toml, processed via L<TOML>.
'new_from_conf' will initiate virani by reading it in and feeding it to 'new'.
=head2 DAEMONLOGGER ON FREEBSD
With daemonlogger setup along the lines of like below...
daemonlogger_enable="YES"
daemonlogger_flags="-f /usr/local/etc/daemonlogger.bpf -d -l /var/log/daemonlogger -t 120"
The following can be made available via mojo-varini or locally via varini with the set name of
default as below.
allowed_subnets=["192.168.14.0/23", "127.0.0.1/8"]
[sets.default]
path='/var/log/daemonlogger'
If you want to use 'init/freebsd' to start mojo-virani, you just need to copy it
it to '/usr/local/etc/rc.d/virani' and add the following or the like to '/etc/rc.conf'.
virani_enable="YES"
virani_flags="daemon -m production -l http://127.0.0.1:8080 -l http://192.168.14.1:8080"
See the script for information on the various possible config args for it.
=head1 AUTHOR
Zane C. Bowers-Hadley, C<< <vvelox at vvelox.net> >>
=head1 BUGS
Please report any bugs or feature requests to C<bug-virani at rt.cpan.org>, or through
the web interface at L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=Virani>. I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
=head1 SUPPORT
You can find documentation for this module with the perldoc command.
perldoc Virani
You can also look for information at:
=over 4
=item * RT: CPAN's request tracker (report bugs here)
L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Virani>
=item * Search CPAN
L<https://metacpan.org/release/Virani>
=back
=head1 ACKNOWLEDGEMENTS
=head1 LICENSE AND COPYRIGHT
This software is Copyright (c) 2024 by Zane C. Bowers-Hadley.
This is free software, licensed under:
The GNU Lesser General Public License, Version 2.1, February 1999
=cut
1; # End of Virani