Group
Extension

App-ElasticSearch-Utilities/scripts/es-index-fields.pl

#!perl
# PODNAME: es-index-fields.pl
# ABSTRACT: Show information on the fields storage usage
use strict;
use warnings;

use App::ElasticSearch::Utilities qw(:all);
use CLI::Helpers qw(:all);
use Const::Fast;
use JSON::MaybeXS;
use Getopt::Long::Descriptive;
use Pod::Usage;

#------------------------------------------------------------------------#
# Argument Collection
const my %DEFAULT => (
    duration => 'transient',
    top      => 10,
);
my ($opt,$usage) = describe_options('%c %o',
    ['top|limit|size|n=i', "Show the top N fields, defaults to $DEFAULT{top}",
        { default => $DEFAULT{top} },
    ],
    ['no-meta-fields|N', "Disable showing meta fields starting with an underscore"],
    []     ,
    ['help', 'Display this message', { shortcircuit => 1 }],
    ['manual', 'Display full manual', { shortcircuit => 1 }],
);

#------------------------------------------------------------------------#
# Documentations!
if( $opt->help ) {
    print $usage->text;
    exit 0;
}
pod2usage(-exitstatus => 0, -verbose => 2) if $opt->manual;

#------------------------------------------------------------------------#
my $json = JSON->new->pretty->utf8->canonical;

my %indices = map { $_ => (es_index_days_old($_) || 0) } es_indices();

const my @FieldStores => qw(
    doc_values
    norms
    stored_fields
    term_vectors
    points
);

my %Fields = ();

foreach my $idx ( sort keys %indices ) {
    output({clear=>1, color=>"cyan"}, "Getting field data for $idx");

    # Get Field Data
    my $result;
    eval {
        $result = es_request('_disk_usage', {
            method => 'POST',
            index  => $idx,
            uri_param => {
                run_expensive_tasks => 'true'
            },
        });
        1;
    } or do {
        my $err = $@;
        output({indent=>1, color=>'red'}, "Request Failed: $err");
    };
    $result //= {};

    if( my $fields = $result->{$idx}{fields} ) {
        my $by_size = sub {
            $fields->{$b}{total_in_bytes} <=> $fields->{$a}{total_in_bytes}
        };
        my $n = 0;
        foreach my $field ( sort $by_size keys %{ $fields } ) {
            # Skip meta fields
            next if $opt->no_meta_fields && $field =~ /^_/;

            # Collect field totals
            my $data = $fields->{$field};
            $Fields{$field} += $data->{total_in_bytes};

            # Skip the output, but collect all the datas
            $n++;
            next if $n > $opt->top;

            output({indent=>1,kv=>1,color=>color_pick($data->{total_in_bytes})}, $field => $data->{total});
            foreach my $k ( @FieldStores ) {
                if( $data->{"${k}_in_bytes"} > 0 ) {
                    output({indent=>2,kv=>1,color=>color_pick($data->{"${k}_in_bytes"})}, $k => $data->{$k} );
                }
            }
        }
    }
    else {
        output({indent=>1, color=>'red'}, "Failed retrieving field storage information");
    }

    if ( my $totals = $result->{$idx}{all_fields} ) {
            output({clear=>1,indent=>1,color=>'cyan'}, "All Fields ($idx):");
            output({indent=>2,kv=>1,color=>color_pick($totals->{total_in_bytes})}, total => $totals->{total});

            foreach my $k ( @FieldStores ) {
                if( $totals->{"${k}_in_bytes"} > 0 ) {
                    output({indent=>3,kv=>1,color=>color_pick($totals->{"${k}_in_bytes"})}, $k => $totals->{$k} );
                }
            }
    }
}

output({clear=>1,color=>'yellow'}, "Totals for fields in all indexes");
my $n = 0;
foreach my $k ( sort { $Fields{$b} <=> $Fields{$a} } keys %Fields ) {
    output({indent=>1,kv=>1,color=>color_pick($Fields{$k})}, $k, es_human_size($Fields{$k}));
    $n++;
    last if $n >= $opt->top;
}

sub color_pick {
    my ($v) = @_;
    return
        $v > 1024 * 1024 * 1024 * 10  ? 'red'
            : $v > 1024 * 1024 * 1024 ? 'yellow'
            : 'green';
}

__END__

=pod

=head1 NAME

es-index-fields.pl - Show information on the fields storage usage

=head1 VERSION

version 8.8

=head1 SYNOPSIS

es-index-fields.pl --index my-index-001

Options:

    --help              print help
    --manual            print full manual

From App::ElasticSearch::Utilities:

    --local         Use localhost as the elasticsearch host
    --host          ElasticSearch host to connect to
    --port          HTTP port for your cluster
    --proto         Defaults to 'http', can also be 'https'
    --http-username HTTP Basic Auth username
    --password-exec Script to run to get the users password
    --insecure      Don't verify TLS certificates
    --cacert        Specify the TLS CA file
    --capath        Specify the directory with TLS CAs
    --cert          Specify the path to the client certificate
    --key           Specify the path to the client private key file
    --noop          Any operations other than GET are disabled, can be negated with --no-noop
    --timeout       Timeout to ElasticSearch, default 10
    --keep-proxy    Do not remove any proxy settings from %ENV
    --index         Index to run commands against
    --base          For daily indexes, reference only those starting with "logstash"
                     (same as --pattern logstash-* or logstash-DATE)
    --pattern       Use a pattern to operate on the indexes
    --days          If using a pattern or base, how many days back to go, default: 1

See also the "CONNECTION ARGUMENTS" and "INDEX SELECTION ARGUMENTS" sections from App::ElasticSearch::Utilities.

From CLI::Helpers:

    --data-file         Path to a file to write lines tagged with 'data => 1'
    --tags              A comma separated list of tags to display
    --color             Boolean, enable/disable color, default use git settings
    --verbose           Incremental, increase verbosity (Alias is -v)
    --debug             Show developer output
    --debug-class       Show debug messages originating from a specific package, default: main
    --quiet             Show no output (for cron)
    --syslog            Generate messages to syslog as well
    --syslog-facility   Default "local0"
    --syslog-tag        The program name, default is the script name
    --syslog-debug      Enable debug messages to syslog if in use, default false
    --nopaste           Use App::Nopaste to paste output to configured paste service
    --nopaste-public    Defaults to false, specify to use public paste services
    --nopaste-service   Comma-separated App::Nopaste service, defaults to Shadowcat

=head1 DESCRIPTION

This script allows you to see the storage usage by field

=head1 AUTHOR

Brad Lhotsky <brad@divisionbyzero.net>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2024 by Brad Lhotsky.

This is free software, licensed under:

  The (three-clause) BSD License

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.