Group
Extension

App-Greple-jq/lib/App/Greple/jq.pm

=encoding utf-8

=head1 NAME

greple -Mjq - greple module to search JSON data with jq

=head1 SYNOPSIS

greple -Mjq --glob JSON-DATA --IN label pattern

=head1 VERSION

Version 0.06

=head1 DESCRIPTION

This is an experimental module for L<App::Greple> to search JSON
formatted text using L<jq(1)> as a backend.

Search top level json object which includes both C<Marvin> and
C<Zaphod> somewhere in its text representation.

    greple -Mjq 'Marvin Zaphod'

You can search object C<.commit.author.name> includes C<Marvin> like this:

    greple -Mjq --IN .commit.author.name Marvin

Search first C<name> field including C<Marvin> under C<.commit>:

    greple -Mjq --IN .commit..name Marvin

Search any C<author.name> field including C<Marvin>:

    greple -Mjq --IN author.name Marvin

Search C<name> is C<Marvin> and C<type> is C<Robot> or C<Android>:

    greple -Mjq --IN name Marvin --IN type 'Robot|Android'

Please be aware that this is just a text matching tool for indented
result of L<jq(1)> command.  So, for example, C<.commit.author>
includes everything under it and it matches C<committer> field name.
Use L<jq(1)> filter for more complex and precise operation.

=head1 CAUTION

L<greple(1)> commands read entire input before processing.  So it
should not be used for gigantic data or infinite stream.

=head1 INSTALL

=head2 CPANMINUS

    $ cpanm App::Greple::jq

=head1 OPTIONS

=over 7

=item B<--IN> I<label> I<pattern>

Search I<pattern> included in I<label> field.

Character C<%> can be used as a wildcard in I<label> string.  So
C<%name> matches labels end with C<name>, and C<name%> matches labels
start with C<name>.

If the label is simple string like C<name>, it matches any level of
JSON data.

If the label string contains period (C<.>), it is considered as a
nested labels.  Name C<.name> matches only C<name> label at the top
level.  Name C<process.name> matches only C<name> entry of some
C<process> hash.

If labels are separated by two or more dots (C<..>), they don't have
to have direct relationship.

=item B<--NOT> I<label> I<pattern>

Specify negative condition.

=item B<--MUST> I<label> I<pattern>

Specify required condition.  If there is one or more required
condition, all other positive rules move to optional.  They are not
required but highlighted if exist.

=back

=head1 LABEL SYNTAX

=over 15

=item B<.file>

C<file> at the top level.

=item B<.file.path>

C<path> under C<.file>.

=item B<.file..path>

C<path> in descendants of C<.file>.

=item B<path>

C<path> at any level.

=item B<file.path>

C<file.path> at any level.

=item B<file..path>

Some C<path> in descendants of some C<file>.

=item B<%path>

Any labels end with C<path>.

=item B<path%>

Any labels start with C<path>.

=item B<%path%>

Any labels include C<path>.

=back

=head1 EXAMPLES

Search from any C<name> labels.

    greple -Mjq --IN name _mina

Search from C<.process.name> label.

    greple -Mjq --IN .process.name _mina

Object C<.process.name> contains C<_mina> and C<.event> contains
C<EXEC>.

    greple -Mjq --IN .process.name _mina --IN .event EXEC

Object C<ppid> is 803 and C<.event> contains C<FORK> or C<EXEC>.

    greple -Mjq --IN ppid 803 --IN event 'FORK|EXEC'

Object C<name> is C<_mina> and C<.event> contains C<CREATE>.

    greple -Mjq --IN name _mina --IN event 'CREATE'

Object C<ancestors> contains C<1132> and C<.event> contains C<EXEC>
with C<arguments> highlighted.

   greple -Mjq --IN ancestors 1132 --IN event EXEC --IN arguments .

Object C<*pid> label contains 803.

    greple -Mjq --IN %pid 803

Object any <path> contains C<_mina> under C<.file> and C<.event>
contains C<WRITE>.

    greple -Mjq --IN .file..path _mina --IN .event WRITE

=head1 TIPS

=over 2

=item *

Use C<--all> option to show entire data.

=item *

Use C<--nocolor> option or set C<NO_COLOR=1> to disable colored
output.

=item *

Use C<-o> option to show only matched part.

=item *

Use C<--blockend=> option to cancel showing block separator.

=item *

Since this module implements original search function, L<greple(1)>
B<-i> does not take effect.  Set modifier in regex like C<(?i)pattern>
if you want case-insensitive match.

=item *

Use C<-Mjq::set=debug> to see actual regex.

=item *

Use C<-Mjq::set=noif> if you don't have to use L<jq> as an input
filter.  Data have to be well-formatted in that case.

=item *

Use C<--color=always> and set C<LESSANSIENDCHARS=mK> if you want to
see the output using L<less(1)>.  Put next line in your F<~/.greplerc>
to enable colored output always.

    option default --color=always

=back

=head1 SEE ALSO

L<App::Greple>, L<https://github.com/kaz-utashiro/greple>

L<https://stedolan.github.io/jq/>

=head1 AUTHOR

Kazumasa Utashiro

=head1 LICENSE

Copyright ©︎ 2022-2024 Kazumasa Utashiro

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

package App::Greple::jq;

use 5.014;
use strict;
use warnings;
use Carp;

our $VERSION = "0.06";

use Exporter 'import';
our @EXPORT = qw(&jq_filter);

use App::Greple::Common;
use App::Greple::Regions qw(match_regions merge_regions);
use Data::Dumper;

my %config;
sub set   { %config = @_ }
sub debug { set debug => 1 }
sub noif  { set noif  => 1 }

my $indent = '  ';
my $indent_re = qr/$indent/;

sub finalize {
    my($mod, $argv) = @_;
    if ($config{noif}) {
	my @default = $mod->default;
	$mod->setopt(default => grep { $_ ne '--jq-filter' } @default);
    }
}

sub re {
    my $pattern = shift;
    my $re = eval { qr/$pattern/ };
    if ($@) {
	die sprintf("$pattern: pattern error - %s\n",
		    $@ =~ /(.*?(?=;|$))/);
    }
    return $re;
}

sub prefix_regex {
    my $path = shift;
    my @prefix_re;
    my $level = '';
    while ($path =~ s/^([^.\n]*?)(\.+)//) {
	my($label, $dot) = ($1, $2);
	$label =~ s/%/.*/g;
	my $label_re = re($label);
	my $start_with = '';
	my $prefix_re = do {
	    if ($label eq '') {
		length($dot) > 1 ? '' : qr{ ^ (?= $indent_re \S) }xm;
	    } else {
		if (length($dot) == 1) {
		    ## using same capture group name is not a good idea
		    ## so make sure to put just for the last one
		    $level      = '?<level>' if $path eq '';
		    $start_with = qr/(?=\S)/;
		}
		qr{
		    ^ (${level} $indent_re*) "$label_re": .* \n
		    (?:
			## single line key-value pair
			\g{-1} $indent_re $start_with .++ \n
		    |
			## indented array/hash
			\g{-1} $indent_re \S .* [\[\{] \n
			(?: \g{-1} $indent_re \s .*+ \n) *+
			\g{-1} $indent_re [\]\}] ,? \n
		    ) *?
		}xm;
	    }
	};
	push @prefix_re, $prefix_re if $prefix_re;
    }
    if ($level eq '') {
	## refering named capture group causes error if it is not used
	## so put dummy expression just to fail
	push @prefix_re, qr/(?<level>(?!))?/;
    }
    @prefix_re
}

sub IN {
    my %opt = @_;
    my $target = delete $opt{&FILELABEL} or die;
    my($label, $pattern) = @opt{qw(label pattern)};
    my @prefix_re = $label =~ s/^((?:.*\.)?)// && prefix_regex($1);
    $label =~ s/%/.*/g;
    my($label_re, $pattern_re) = map re($_), $label, $pattern;
    my $re = qr{
	@prefix_re \K
	^
	(?(<level>) (?= \g{level} $indent_re \S ) )	# required level
	(?<in> [ ]*) "$label_re": [ ]*+			# find given label
	(?: . | \n\g{in} \s++ ) *			# and look for ...
	$pattern_re					# pattern
	(?: . | \n\g{in} (?: \s++ | [\]\}] ) ) *	# and take the rest
    }xm;
    warn "$re\n" if $config{debug};
    match_regions pattern => $re;
}

1;

__DATA__

define JSON-OBJECTS ^([ ]*)\{(?s:.*?)^\g{-1}\},?\n

option default --json-block --jq-filter

option --jq-filter --if='jq "if type == \"array\" then .[] else . end"'

option --json-block --block JSON-OBJECTS

define CALL_IN __PACKAGE__::IN(label=$<shift>,pattern=$<shift>)

option --AND  --IN
option --IN   --le  &CALL_IN --face +E
option --MUST --le +&CALL_IN --face +E
option --NOT  --le -&CALL_IN

#  LocalWords:  JSON jq json Zaphod greple CPANMINUS cpanm
#  LocalWords:  perl pid regex LESSANSIENDCHARS greplerc Kazumasa Mjq
#  LocalWords:  Utashiro Android committer mina ppid blockend


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.