Catmandu-MARC/lib/Catmandu/MARC.pm
package Catmandu::MARC;
use Catmandu::Sane;
use Catmandu::Util;
use Catmandu::Exporter::MARC::XML;
use MARC::Spec::Parser;
use List::Util;
use Memoize;
use Carp;
use Moo;
with 'MooX::Singleton';
memoize('compile_marc_path');
memoize('parse_marc_spec');
memoize('_get_index_range');
our $VERSION = '1.33';
sub marc_map {
my $self = $_[0];
# $_[2] : marc_path
my $context
= ref( $_[2] )
? $_[2]
: $self->compile_marc_path( $_[2], subfield_wildcard => 1 );
confess "invalid marc path" unless $context;
# $_[1] : data record
my $record = $_[1]->{'record'};
return wantarray ? () : undef
unless ( defined $record && ref($record) eq 'ARRAY' );
# $_[3] : opts
my $split = $_[3]->{'-split'} // 0;
my $join_char = $_[3]->{'-join'} // '';
my $pluck = $_[3]->{'-pluck'} // 0;
my $value_set = $_[3]->{'-value'} // undef;
my $nested_arrays = $_[3]->{'-nested_arrays'} // 0;
my $append = $_[3]->{'-force_array'} // undef;
# Do an implicit split for nested_arrays , except when no-implicit-split is set
if ( $nested_arrays == 1 ) {
$split = 1 unless $_[3]->{'-no-implicit-split'};
}
my $vals;
for my $field (@$record) {
next
if (
( $context->{is_regex_field} == 0
&& $field->[0] ne $context->{field}
)
|| ( defined $context->{ind1}
&& ( !defined $field->[1] || $field->[1] ne $context->{ind1} )
)
|| ( defined $context->{ind2}
&& ( !defined $field->[2] || $field->[2] ne $context->{ind2} )
)
|| ( $context->{is_regex_field} == 1
&& $field->[0] !~ $context->{field_regex} )
);
my $v;
if ($value_set) {
for ( my $i = 3; $i < @{$field}; $i += 2 ) {
my $subfield_regex = $context->{subfield_regex};
if ( $field->[$i] =~ $subfield_regex ) {
$v = $value_set;
last;
}
}
}
else {
$v = [];
if ($pluck) {
# Treat the subfield as a hash index
my $_h = {};
for ( my $i = $context->{start}; $i < @{$field}; $i += 2 ) {
push @{ $_h->{ $field->[$i] } }, $field->[ $i + 1 ];
}
my $subfield = $context->{subfield};
$subfield =~ s{[^a-zA-Z0-9]}{}g;
for my $c ( split( '', $subfield ) ) {
my $val = $_h->{$c} // [undef];
push @$v, @{$val};
}
}
else {
for ( my $i = $context->{start}; $i < @{$field}; $i += 2 ) {
my $subfield_regex = $context->{subfield_regex};
if ( $field->[$i] =~ $subfield_regex ) {
push( @$v, $field->[ $i + 1 ] );
}
}
}
if (@$v) {
if ( !$split ) {
my @defined_values = grep { defined($_) } @$v;
$v = join $join_char, @defined_values;
}
if ( defined( my $off = $context->{from} ) ) {
if ( ref $v eq 'ARRAY' ) {
my @defined_values = grep { defined($_) } @$v;
$v = join $join_char, @defined_values;
}
my $len = $context->{len};
if ( length( ${v} ) > $off ) {
$v = substr( $v, $off, $len );
}
else {
$v = undef;
}
}
}
else {
$v = undef;
}
}
if ( defined $v ) {
if ($split) {
$v = [$v] unless ( defined($v) && ref($v) eq 'ARRAY' );
if ( defined($vals) && ref($vals) eq 'ARRAY' ) {
# With the nested arrays option a split will
# always return an array of array of values.
# This was the old behavior of Inline marc_map functions
if ( $nested_arrays == 1 ) {
push @$vals, $v;
}
else {
push @$vals, @$v;
}
}
else {
if ( $nested_arrays == 1 ) {
$vals = [$v];
}
else {
$vals = [@$v];
}
}
}
else {
push @$vals, $v;
}
}
}
if ( $split && defined $vals ) {
$vals = [$vals];
}
elsif ($append) {
# we got a $append
}
elsif ( defined $vals ) {
$vals = join $join_char, @$vals;
}
else {
# no result
}
$vals;
}
sub marc_add {
my ( $self, $data, $marc_path, @subfields ) = @_;
my %subfields = @subfields;
my $marc = $data->{'record'} // [];
if ( $marc_path =~ /^\w{3}$/ ) {
my @field = ();
push @field, $marc_path;
my $ind1 = $subfields{ind1} // ' ';
my $ind2 = $subfields{ind2} // ' ';
if ($ind1 =~ /^\$\.(\S+)$/) {
my $path = $1;
$ind1 = Catmandu::Util::data_at( $path, $data );
}
if ($ind2 =~ /^\$\.(\S+)$/) {
my $path = $1;
$ind2 = Catmandu::Util::data_at( $path, $data );
}
push @field, $ind1;
push @field, $ind2;
for ( my $i = 0; $i < @subfields; $i += 2 ) {
my $code = $subfields[$i];
next unless length $code == 1;
my $value = $subfields[ $i + 1 ];
if ( $value =~ /^\$\.(\S+)$/ ) {
my $path = $1;
$value = Catmandu::Util::data_at( $path, $data );
}
if ( Catmandu::Util::is_array_ref $value) {
for (@$value) {
push @field, $code;
push @field, $_;
}
}
elsif ( Catmandu::Util::is_hash_ref $value) {
for ( keys %$value ) {
push @field, $code;
push @field, $value->{$_};
}
}
elsif ( Catmandu::Util::is_value($value) && length($value) > 0 ) {
push @field, $code;
push @field, $value;
}
}
push @{$marc}, \@field if @field > 3;
}
$data->{'record'} = $marc;
$data;
}
sub marc_append {
my ( $self, $data, $marc_path, $value ) = @_;
my $record = $data->{'record'};
return $data unless defined $record;
if ( $value =~ /^\$\.(\S+)/ ) {
my $path = $1;
$value = Catmandu::Util::data_at( $path, $data );
}
if ( Catmandu::Util::is_array_ref $value) {
$value = $value->[-1];
}
elsif ( Catmandu::Util::is_hash_ref $value) {
my $last;
for ( keys %$value ) {
$last = $value->{$_};
}
$value = $last;
}
my $context = $self->compile_marc_path($marc_path);
confess "invalid marc path" unless $context;
for my $field (@$record) {
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
if ( $context->{is_regex_field} ) {
next unless $tag =~ $context->{field_regex};
}
else {
next unless $tag eq $context->{field};
}
if ( defined $context->{ind1} ) {
if ( !defined $ind1 || $ind1 ne $context->{ind1} ) {
next;
}
}
if ( defined $context->{ind2} ) {
if ( !defined $ind2 || $ind2 ne $context->{ind2} ) {
next;
}
}
if ( $context->{subfield} ) {
for ( my $i = 0; $i < @subfields; $i += 2 ) {
if ( $subfields[$i] =~ $context->{subfield} ) {
$field->[ $i + 4 ] .= $value;
}
}
}
else {
$field->[-1] .= $value;
}
}
$data;
}
sub marc_replace_all {
my ( $self, $data, $marc_path, $regex, $value ) = @_;
my $record = $data->{'record'};
return $data unless defined $record;
if ( $value =~ /^\$\.(\S+)/ ) {
my $path = $1;
$value = Catmandu::Util::data_at( $path, $data );
}
if ( Catmandu::Util::is_array_ref $value) {
$value = $value->[-1];
}
elsif ( Catmandu::Util::is_hash_ref $value) {
my $last;
for ( keys %$value ) {
$last = $value->{$_};
}
$value = $last;
}
my $context
= $self->compile_marc_path( $marc_path, subfield_wildcard => 1 );
confess "invalid marc path" unless $context;
for my $field (@$record) {
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
if ( $context->{is_regex_field} ) {
next unless $tag =~ $context->{field_regex};
}
else {
next unless $tag eq $context->{field};
}
if ( defined $context->{ind1} ) {
if ( !defined $ind1 || $ind1 ne $context->{ind1} ) {
next;
}
}
if ( defined $context->{ind2} ) {
if ( !defined $ind2 || $ind2 ne $context->{ind2} ) {
next;
}
}
for ( my $i = 0; $i < @subfields; $i += 2 ) {
if ( $subfields[$i] =~ $context->{subfield} ) {
# Trick to double eval the right hand side
$field->[ $i + 4 ] =~ s{$regex}{"\"$value\""}eeg;
}
}
}
$data;
}
sub marc_set {
my ( $self, $data, $marc_path, $value, %opts ) = @_;
my $record = $data->{'record'};
return $data unless defined $record;
if ( $value =~ /^\$\.(\S+)/ ) {
my $path = $1;
$value = Catmandu::Util::data_at( $path, $data );
}
if ( Catmandu::Util::is_array_ref $value) {
$value = $value->[-1];
}
elsif ( Catmandu::Util::is_hash_ref $value) {
my $last;
for ( keys %$value ) {
$last = $value->{$_};
}
$value = $last;
}
my $context
= $self->compile_marc_path( $marc_path, subfield_default => 1 );
confess "invalid marc path" unless $context;
for my $field (@$record) {
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
if ( $context->{is_regex_field} ) {
next unless $tag =~ $context->{field_regex};
}
else {
next unless $tag eq $context->{field};
}
if ( defined $context->{ind1} ) {
if ( !defined $ind1 || $ind1 ne $context->{ind1} ) {
next;
}
}
if ( defined $context->{ind2} ) {
if ( !defined $ind2 || $ind2 ne $context->{ind2} ) {
next;
}
}
my $found = 0;
for ( my $i = 0; $i < @subfields; $i += 2 ) {
if ( $subfields[$i] =~ $context->{subfield} ) {
if ( defined $context->{from} ) {
substr( $field->[ $i + 4 ],
$context->{from}, $context->{len} )
= $value;
}
else {
$field->[ $i + 4 ] = $value;
}
$found = 1;
}
}
if ( $found == 0 ) {
push( @$field, $context->{subfield}, $value );
}
}
$data;
}
sub marc_remove {
my ( $self, $data, $marc_path, %opts ) = @_;
my $record = $data->{'record'};
my $new_record;
my $context = $self->compile_marc_path($marc_path);
confess "invalid marc path" unless $context;
for my $field (@$record) {
my $field_size = int(@$field);
if (( $context->{is_regex_field} == 0
&& $field->[0] eq $context->{field}
)
|| ( $context->{is_regex_field} == 1
&& $field->[0] =~ $context->{field_regex} )
)
{
my $ind_match = undef;
if ( defined $context->{ind1} && defined $context->{ind2} ) {
$ind_match = 1
if ( defined $field->[1]
&& $field->[1] eq $context->{ind1}
&& defined $field->[2]
&& $field->[2] eq $context->{ind2} );
}
elsif ( defined $context->{ind1} ) {
$ind_match = 1
if ( defined $field->[1]
&& $field->[1] eq $context->{ind1} );
}
elsif ( defined $context->{ind2} ) {
$ind_match = 1
if ( defined $field->[2]
&& $field->[2] eq $context->{ind2} );
}
else {
$ind_match = 1;
}
if ( $ind_match && !defined $context->{subfield_regex} ) {
next;
}
if ( defined $context->{subfield_regex} ) {
my $subfield_regex = $context->{subfield_regex};
my $new_subf = [];
for ( my $i = $context->{start}; $i < $field_size; $i += 2 ) {
unless ( $field->[$i] =~ $subfield_regex ) {
push @$new_subf, $field->[$i];
push @$new_subf, $field->[ $i + 1 ];
}
}
splice @$field, $context->{start}, int(@$field), @$new_subf
if $ind_match;
}
}
push @$new_record, $field;
}
$data->{'record'} = $new_record;
return $data;
}
sub marc_spec {
my $self = $_[0];
# $_[1] : data record
my $data = $_[1];
my $record = $data->{'record'};
# $_[2] : spec
my ( $ms, $spec );
if ( ref $_[2] ) {
$ms = $_[2];
$spec = $ms->to_string();
}
else {
$ms = $self->parse_marc_spec( $_[2] ); # memoized
$spec = $_[2];
}
my $EMPTY = q{};
# $_[3] : opts
my $split = $_[3]->{'-split'} // 0;
my $join_char = $_[3]->{'-join'} // $EMPTY;
my $pluck = $_[3]->{'-pluck'} // 0;
my $value_set = $_[3]->{'-value'} // undef;
my $invert = $_[3]->{'-invert'} // 0;
my $nested_arrays = $_[3]->{'-nested_arrays'} // 0;
my $append = $_[3]->{'-force_array'} // 0;
if ($nested_arrays) {
$split = 1;
}
# filter by tag
my @fields = ();
my $field_spec = $ms->field;
my $tag_spec = $field_spec->tag;
@fields = grep { $_->[0] =~ /$tag_spec/ } @{$record};
return unless @fields;
# calculate char start
my $chst = sub {
my ($sp) = @_;
my $char_start;
if ( $sp->has_char_start ) {
$char_start
= ( '#' eq $sp->char_start )
? $sp->char_length * -1
: $sp->char_start;
}
return $char_start;
};
# vars we need only for subfields
my ( @sf_spec, $invert_level, $codes, $invert_chars );
if ( $ms->has_subfields ) {
# set the order of subfields
@sf_spec = map {$_} @{ $ms->subfields };
unless ($pluck) {
@sf_spec = sort { $a->code cmp $b->code } @sf_spec;
}
# set invert level default
$invert_level = 4;
if ($invert) {
$codes = '[^';
$codes .= join $EMPTY, map { $_->code } @sf_spec;
$codes .= ']';
}
$invert_chars = sub {
my ( $str, $start, $length ) = @_;
for ( substr $str, $start, $length ) {
$_ = $EMPTY;
}
return $str;
};
}
else {
# return $value_set ASAP
return $value_set if defined $value_set;
}
# vars we need for fields and subfields
my ( $referred, $char_start, $prev_tag, $index_range );
my $current_tag = $EMPTY;
my $tag_index = 0;
my $index_start = $field_spec->index_start;
my $index_end = $field_spec->index_end;
my $to_referred = sub {
my (@values) = @_;
if ($nested_arrays) {
push @{$referred}, \@values;
}
elsif ($split) {
push @{$referred}, @values;
}
else {
push @{$referred}, join $join_char, @values;
}
};
if ( defined $field_spec->index_start ) {
$index_range = _get_index_range( $field_spec->index_start,
$field_spec->index_end, $#fields );
}
# iterate over fields
for my $field (@fields) {
$prev_tag = $current_tag;
$current_tag = $field->[0];
$tag_index
= ( $prev_tag eq $current_tag and defined $tag_index )
? ++$tag_index
: 0; #: $field_spec->index_start;
# filter by index
if ( defined $index_range ) {
next
unless (
Catmandu::Util::array_includes( $index_range, $tag_index ) );
}
# filter field by subspec
if ( $field_spec->has_subspecs ) {
my $valid = $self->_it_subspecs( $data, $current_tag,
$field_spec->subspecs, $tag_index );
next unless $valid;
}
my @subfields = ();
if ( $ms->has_subfields ) { # now we dealing with subfields
for my $sf (@sf_spec) {
# set invert level
if ( $invert && !$sf->has_subspecs ) {
if ( -1 == $sf->index_length && !$sf->has_char_start ) {
next
if ( $invert_level == 3 )
; # skip subfield spec it's already covered
$invert_level = 3;
}
elsif ( $sf->has_char_start ) {
$invert_level = 1;
}
else {
$invert_level = 2;
}
}
my @subfield = ();
my $code = ( $invert_level == 3 ) ? $codes : $sf->code;
$code = qr/$code/;
for ( my $i = 3; $i < @{$field}; $i += 2 ) {
if ( $field->[$i] =~ /$code/ ) {
push @subfield, $field->[ $i + 1 ];
}
}
if ( $invert_level == 3 ) { # no index or charpos
if (@subfield) {
push @subfields, @subfield;
}
if ( $referred && $value_set ) { # return $value_set ASAP
return $value_set;
}
next;
}
next unless (@subfield);
# filter by index
if ( defined $sf->index_start ) {
my $sf_range
= _get_index_range( $sf->index_start, $sf->index_end,
$#subfield );
if ( $invert_level == 2 ) { # inverted
@subfield = map {
Catmandu::Util::array_includes( $sf_range, $_ )
? ()
: $subfield[$_];
} 0 .. $#subfield;
}
else { # without invert
@subfield = map {
defined $subfield[$_] ? $subfield[$_] : ();
} @{$sf_range};
}
next unless (@subfield);
}
# return $value_set ASAP
return $value_set if $value_set;
# filter subfield by subspec
if ( $sf->has_subspecs ) {
my $valid = $self->_it_subspecs( $data, $field_spec->tag,
$sf->subspecs, $tag_index );
next unless $valid;
}
# get substring
$char_start = $chst->($sf);
if ( defined $char_start ) {
if ( $invert_level == 1 ) { # inverted
@subfield = map {
$invert_chars->(
$_, $char_start, $sf->char_length
);
} @subfield;
}
else {
@subfield
= map { substr $_, $char_start, $sf->char_length; }
@subfield;
}
}
next unless @subfield;
push @subfields, @subfield;
} # end of subfield iteration
$to_referred->(@subfields) if @subfields;
} # end of subfield handling
elsif ( $ms->has_indicator ) {
# filter field by subspec
if ( $ms->indicator->has_subspecs ) {
my $valid = $self->_it_subspecs( $data, $current_tag,
$ms->indicator->subspecs, $tag_index );
next unless $valid;
}
my @indicators = ();
push @indicators, $field->[ $ms->indicator->position ]
if defined $field->[ $ms->indicator->position ];
$to_referred->(@indicators);
}
else { # no particular subfields requested
my @contents = ();
for ( my $i = 4; $i < @{$field}; $i += 2 ) {
# get substring
$char_start = $chst->($field_spec);
my $content = ( defined $char_start )
? substr $field->[$i], $char_start,
$field_spec->char_length
: $field->[$i];
push @contents, $content;
}
next unless (@contents);
$to_referred->(@contents);
} # end of field handling
} # end of field iteration
return unless ($referred);
if ($append) {
return [$referred] if $split;
return $referred;
}
elsif ($split) {
return [$referred];
}
return join $join_char, @{$referred};
}
sub _it_subspecs {
my ( $self, $data, $tag, $subspecs, $tag_index ) = @_;
my $set_index = sub {
my ($subspec) = @_;
foreach my $side ( ( 'left', 'right' ) ) {
next if ( ref $subspec->$side eq 'MARC::Spec::Comparisonstring' );
# only set new index if subspec field tag equals spec field tag!!
my $spec_tag = $subspec->$side->field->tag;
next unless ( $tag =~ /$spec_tag/ );
$subspec->$side->field->set_index_start_end($tag_index);
}
};
my $valid = 1;
foreach my $subspec ( @{$subspecs} ) {
if ( ref $subspec eq 'ARRAY' ) { # chained subSpecs (OR)
foreach my $or_subspec ( @{$subspec} ) {
$set_index->($or_subspec);
$valid = $self->_validate_subspec( $or_subspec, $data, $tag );
# at least one of them is true (OR)
last if $valid;
}
}
else { # repeated SubSpecs (AND)
$set_index->($subspec);
$valid = $self->_validate_subspec( $subspec, $data, $tag );
# all of them have to be true (AND)
last unless $valid;
}
}
return $valid;
}
sub _validate_subspec {
my ( $self, $subspec, $data, $tag ) = @_;
my ( $left_subterm, $right_subterm );
if ( '!' ne $subspec->operator && '?' ne $subspec->operator ) {
if ( ref $subspec->left ne 'MARC::Spec::Comparisonstring' ) {
my $new_spec = $subspec->left->to_string();
$new_spec =~ s/^\.\.\./$tag/;
$left_subterm
= $self->marc_spec( $data, $new_spec, { '-split' => 1 } )
; # split should result in an array ref
return 0 unless defined $left_subterm;
}
else {
push @{$left_subterm}, $subspec->left->comparable;
}
}
if ( ref $subspec->right ne 'MARC::Spec::Comparisonstring' ) {
my $new_spec = $subspec->right->to_string();
$new_spec =~ s/^\.\.\./$tag/;
$right_subterm
= $self->marc_spec( $data, $new_spec, { '-split' => 1 } )
; # split should result in an array ref
unless ( defined $right_subterm ) {
$right_subterm = [];
}
}
else {
push @{$right_subterm}, $subspec->right->comparable;
}
if ( $subspec->operator eq '?' ) {
return ( @{$right_subterm} ) ? 1 : 0;
}
if ( $subspec->operator eq '!' ) {
return ( @{$right_subterm} ) ? 0 : 1;
}
if ( $subspec->operator eq '=' ) {
foreach my $v ( @{ $left_subterm->[0] } ) {
return 1 if List::Util::any { $v eq $_ } @{$right_subterm};
}
}
if ( $subspec->operator eq '!=' ) {
foreach my $v ( @{ $left_subterm->[0] } ) {
return 0 if List::Util::any { $v eq $_ } @{$right_subterm};
}
return 1;
}
if ( $subspec->operator eq '~' ) {
foreach my $v ( @{ $left_subterm->[0] } ) {
return 1 if List::Util::any { $v =~ /$_/ } @{$right_subterm};
}
}
if ( $subspec->operator eq '!~' ) {
foreach my $v ( @{ $left_subterm->[0] } ) {
return 0 if List::Util::any { $v =~ /$_/ } @{$right_subterm};
}
return 1;
}
return 0;
}
sub parse_marc_spec {
my ( $self, $marc_spec ) = @_;
return MARC::Spec::Parser->new($marc_spec)->marcspec;
}
sub _get_index_range {
my ( $index_start, $index_end, $last_index ) = @_;
if ( '#' eq $index_start ) {
if ( '#' eq $index_end or 0 == $index_end ) { return [$last_index]; }
$index_start = $last_index;
$index_end = $last_index - $index_end;
if ( 0 > $index_end ) { $index_end = 0; }
}
else {
if ( $last_index < $index_start ) {
return [$index_start];
} # this will result to no hits
}
if ( '#' eq $index_end or $index_end > $last_index ) {
$index_end = $last_index;
}
return ( $index_start <= $index_end )
? [ $index_start .. $index_end ]
: [ $index_end .. $index_start ];
}
sub marc_xml {
my ( $self, $data, %opts ) = @_;
if ( $opts{reverse} ) {
my $record
= Catmandu->import_from_string( $data, 'MARC', type => 'XML' );
return $record->[0]->{record} if $record;
return undef;
}
else {
my $xml;
my $exporter = Catmandu::Exporter::MARC::XML->new(
file => \$xml,
xml_declaration => 0,
collection => 0
);
$exporter->add( { record => $data } );
$exporter->commit;
return $xml;
}
}
sub marc_record_to_json {
my ( $self, $data, %opts ) = @_;
if ( my $marc = delete $data->{'record'} ) {
for my $field (@$marc) {
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
if ( $tag eq 'LDR' ) {
shift @subfields;
$data->{leader} = join "", @subfields;
}
elsif ( $tag eq 'FMT' || substr( $tag, 0, 2 ) eq '00' ) {
shift @subfields;
push @{ $data->{fields} ||= [] },
{ $tag => join "", @subfields };
}
else {
my @sf;
my $start = !defined( $subfields[0] )
|| $subfields[0] eq '_' ? 2 : 0;
for ( my $i = $start; $i < @subfields; $i += 2 ) {
push @sf, { $subfields[$i] => $subfields[ $i + 1 ] };
}
push @{ $data->{fields} ||= [] },
{ $tag =>
{ subfields => \@sf, ind1 => $ind1, ind2 => $ind2 } };
}
}
}
$data;
}
sub marc_json_to_record {
my ( $self, $data, %opts ) = @_;
my $record = [];
if ( Catmandu::Util::is_string( $data->{leader} ) ) {
push @$record, [ 'LDR', undef, undef, '_', $data->{leader} ],;
}
if ( Catmandu::Util::is_array_ref( $data->{fields} ) ) {
for my $field ( @{ $data->{fields} } ) {
next unless Catmandu::Util::is_hash_ref($field);
my ($tag) = keys %$field;
my $val = $field->{$tag};
if ( $tag eq 'FMT' || substr( $tag, 0, 2 ) eq '00' ) {
push @$record, [ $tag, undef, undef, '_', $val ],;
}
elsif ( Catmandu::Util::is_hash_ref($val) ) {
my $ind1 = $val->{ind1};
my $ind2 = $val->{ind2};
next unless Catmandu::Util::is_array_ref( $val->{subfields} );
my $sfs = [ '_', '' ];
for my $sf ( @{ $val->{subfields} } ) {
next unless Catmandu::Util::is_hash_ref($sf);
my ($code) = keys %$sf;
my $sval = $sf->{$code};
push @$sfs, [ $code, $sval ];
}
push @$record, [ $tag, $ind1, $ind2, @$sfs ];
}
}
}
if ( @$record > 0 ) {
delete $data->{fields};
delete $data->{leader};
$data->{'record'} = $record;
}
$data;
}
sub marc_decode_dollar_subfields {
my ( $self, $data, %opts ) = @_;
my $old_record = $data->{'record'};
my $new_record = [];
for my $field (@$old_record) {
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
my $fixed_field = [ $tag, $ind1, $ind2 ];
for ( my $i = 0; $i < @subfields; $i += 2 ) {
my $code = $subfields[$i];
my $value = $subfields[ $i + 1 ];
# If a subfield contains fields coded like: data$xmore$yevenmore
# chunks = (data,x,y,evenmore)
my @chunks = split( /\$([a-z])/, $value );
my $real_value = shift @chunks;
push @$fixed_field, ( $code, $real_value );
while (@chunks) {
push @$fixed_field, ( splice @chunks, 0, 2 );
}
}
push @$new_record, $fixed_field;
}
$data->{'record'} = $new_record;
$data;
}
sub compile_marc_path {
my ( $self, $marc_path, %opts ) = @_;
my ($field, $field_regex, $ind1, $ind2,
$subfield, $subfield_regex, $from, $to,
$len, $is_regex_field
);
my $MARC_PATH_REGEX
= qr/(\S{1,3})(\[([^,])?,?([^,])?\])?([\$_a-zA-Z0-9^-]+)?(\/([0-9]+)(-([0-9]+))?)?/;
if ( $marc_path =~ $MARC_PATH_REGEX ) {
$field = $1;
$ind1 = $3;
$ind2 = $4;
$subfield = $5;
$field = "0" x ( 3 - length($field) )
. $field; # fixing 020 treated as 20 bug
if ( defined($subfield) ) {
$subfield =~ s{\$}{}g;
unless ( $subfield =~ /^[a-zA-Z0-9]$/ ) {
$subfield = "[$subfield]";
}
}
elsif ( $opts{subfield_default} ) {
$subfield = $field =~ /^0|LDR|FMT/ ? '_' : 'a';
}
elsif ( $opts{subfield_wildcard} ) {
$subfield = '[a-z0-9_]';
}
if ( defined($subfield) ) {
$subfield_regex = qr/^(?:${subfield})$/;
}
$from = $7;
$to = $9;
$len = defined $to ? $to - $from + 1 : 1;
}
else {
return undef;
}
if ( $field =~ /[\*\.]/ ) {
$field_regex = $field;
$field_regex =~ s/[\*\.]/(?:[A-Z0-9])/g;
$is_regex_field = 1;
$field_regex = qr/^$field_regex$/;
}
else {
$is_regex_field = 0;
}
return {
field => $field,
field_regex => $field_regex,
is_regex_field => $is_regex_field,
subfield => $subfield,
subfield_regex => $subfield_regex,
ind1 => $ind1,
ind2 => $ind2,
start => 3,
from => $from,
to => $to,
len => $len
};
}
sub marc_copy {
my $self = $_[0];
my $data = $_[1];
my $marc_path = $_[2];
my $marc_value = $_[3];
my $is_cut = $_[4];
# $_[2] : marc_path
my $context
= ref($marc_path)
? $marc_path
: $self->compile_marc_path( $_[2], subfield_wildcard => 0 );
confess "invalid marc path" unless $context;
# $_[1] : data record
my $record = $data->{'record'};
return wantarray ? () : undef
unless ( defined $record && ref($record) eq 'ARRAY' );
# When is_cut is on, we need to create a new record containing the remaining fields
my @new_record = ();
my $fields = [];
for my $field (@$record) {
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
if (( $context->{is_regex_field} == 0 && $tag ne $context->{field} )
|| ( $context->{is_regex_field} == 1
&& $tag !~ $context->{field_regex} )
)
{
push @new_record, $field if $is_cut;
next;
}
if ( defined $context->{ind1} ) {
if ( !defined $ind1 || $ind1 ne $context->{ind1} ) {
push @new_record, $field if $is_cut;
next;
}
}
if ( defined $context->{ind2} ) {
if ( !defined $ind2 || $ind2 ne $context->{ind2} ) {
push @new_record, $field if $is_cut;
next;
}
}
if ( $context->{subfield} ) {
my $found = 0;
for ( my $i = 0; $i < @subfields; $i += 2 ) {
if ( $subfields[$i] =~ $context->{subfield} ) {
if ( defined($marc_value) ) {
$found = 1 if $subfields[ $i + 1 ] =~ /$marc_value/;
}
else {
$found = 1;
}
}
}
unless ($found) {
push @new_record, $field if $is_cut;
next;
}
}
else {
if ( defined($marc_value) ) {
my @sf = ();
for ( my $i = 0; $i < @subfields; $i += 2 ) {
push @sf, $subfields[ $i + 1 ];
}
my $string = join "", @sf;
unless ( $string =~ /$marc_value/ ) {
push @new_record, $field if $is_cut;
next;
}
}
}
my $f = {};
$f->{tag} = $field->[0];
# indicator 1
if ( defined $field->[1] ) {
$f->{ind1} = $field->[1];
}
else {
$f->{ind1} = undef;
}
# indicator 2
if ( defined $field->[2] ) {
$f->{ind2} = $field->[2];
}
else {
$f->{ind2} = undef;
}
# fixed fields
if ( $field->[3] eq '_' ) {
$f->{content} = $field->[4];
push( @$fields, $f );
next;
}
# subfields
for ( my $i = $context->{start}; $i < @{$field}; $i += 2 ) {
push(
@{ $f->{subfields} },
{ $field->[$i] => $field->[ $i + 1 ] }
);
}
push( @$fields, $f );
}
if ($is_cut) {
$data->{record} = \@new_record;
}
[$fields];
}
sub marc_paste {
my $self = $_[0];
my $data = $_[1];
my $json_path = $_[2];
my $marc_path = $_[3];
my $marc_value = $_[4];
my $value = Catmandu::Util::data_at( $json_path, $data );
return $data
unless Catmandu::Util::is_array_ref($value)
|| Catmandu::Util::is_hash_ref($value);
$value = [$value] unless Catmandu::Util::is_array_ref($value);
my @new_parts;
for my $part (@$value) {
return $data
unless Catmandu::Util::is_hash_ref($part)
&& exists $part->{tag}
&& exists $part->{ind1}
&& exists $part->{ind2}
&& ( exists $part->{content} || exists $part->{subfields} );
my $tag = $part->{tag};
my $ind1 = $part->{ind1} // ' ';
my $ind2 = $part->{ind2} // ' ';
my $content = $part->{content};
my $subfields = $part->{subfields};
if ( defined($content) ) {
push @new_parts, [ $tag, $ind1, $ind2, '_', $content ];
}
elsif ( defined($subfields)
&& Catmandu::Util::is_array_ref($subfields) )
{
my @tmp = ( $tag, $ind1, $ind2 );
for my $sf (@$subfields) {
while ( my ( $key, $value ) = each %$sf ) {
push @tmp, $key, $value;
}
}
push @new_parts, [@tmp];
}
else {
# Illegal input
return $data;
}
}
if ( defined($marc_path) ) {
my $context
= $self->compile_marc_path( $marc_path, subfield_wildcard => 0 );
confess "invalid marc path" unless $context;
my @record = @{ $data->{record} };
my $found_match = undef;
my $field_position = -1;
for my $field (@record) {
$field_position++;
my ( $tag, $ind1, $ind2, @subfields ) = @$field;
if ( $context->{is_regex_field} ) {
next unless $tag =~ $context->{field_regex};
}
else {
next unless $tag eq $context->{field};
}
if ( defined $context->{ind1} ) {
if ( !defined $ind1 || $ind1 ne $context->{ind1} ) {
next;
}
}
if ( defined $context->{ind2} ) {
if ( !defined $ind2 || $ind2 ne $context->{ind2} ) {
next;
}
}
if ( $context->{subfield} ) {
for ( my $i = 0; $i < @subfields; $i += 2 ) {
if ( $subfields[$i] =~ $context->{subfield} ) {
if ( defined($marc_value) ) {
$found_match = $field_position
if $subfields[ $i + 1 ] =~ /$marc_value/;
}
else {
$found_match = $field_position;
}
}
}
}
else {
if ( defined($marc_value) ) {
my @sf = ();
for ( my $i = 0; $i < @subfields; $i += 2 ) {
push @sf, $subfields[ $i + 1 ];
}
my $string = join "", @sf;
if ( $string =~ /$marc_value/ ) {
$found_match = $field_position;
}
else {
# don't match anything
}
}
else {
$found_match = $field_position;
}
}
}
if ( defined $found_match ) {
my @new_record = (
@record[ 0 .. $found_match ],
@new_parts, @record[ $found_match + 1 .. $#record ]
);
$data->{record} = \@new_record;
}
}
else {
push @{ $data->{record} }, @new_parts;
}
$data;
}
sub marc_sort {
my ( $self, $data ) = @_;
if ( defined $data->{record} ) {
my @record = map { $_->[0] } sort { $a->[1] <=> $b->[1] } map {
[ $_,
$_->[0] =~ m/^FMT$/ ? 0.1
: $_->[0] =~ m/^LDR$/ ? 0.2
: $_->[0] =~ m/^[A-Z]/ ? 99 . ord( substr( $_->[0], 0 ) )
: $_->[0]
]
} @{ $data->{record} };
$data->{record} = \@record;
}
return $data;
}
1;
__END__
=head1 NAME
Catmandu::MARC - Catmandu modules for working with MARC data
=begin markdown
# STATUS
[](https://travis-ci.org/LibreCat/Catmandu-MARC)
[](https://github.com/LibreCat/Catmandu-MARC/actions/workflows/linux.yml)
[](https://coveralls.io/r/LibreCat/Catmandu-MARC)
[](http://cpants.cpanauthors.org/dist/Catmandu-MARC)
=end markdown
=head1 SYNOPSIS
# On the command line
$ catmandu convert MARC to JSON < data.mrc
$ catmandu convert MARC --type MiJ to YAML < data.marc_in_json
$ catmandu convert MARC --fix "marc_map(245,title)" < data.mrc
$ catmandu convert MARC --fix myfixes.txt < data.mrc
myfixes:
marc_map("245a", title)
marc_map("5**", note.$append)
marc_map('710','my.authors.$append')
marc_map('008_/35-35','my.language')
remove_field(record)
add_field(my.funny.field,'test123')
$ catmandu import MARC --fix myfixes.txt to ElasticSearch --index_name 'catmandu' < data.marc
# In perl
use Catmandu;
my $importer = Catmandu->importer('MARC', file => 'data.mrc' );
my $fixer = Catmandu->fixer('myfixes.txt');
my $store = Catmandu->store('ElasticSearch', index_name => 'catmandu');
$store->add_many(
$fixer->fix($importer)
);
=head1 MODULES
=over
=item * L<Catmandu::MARC::Tutorial>
=item * L<Catmandu::Importer::MARC>
=item * L<Catmandu::Importer::MARC::ALEPHSEQ>
=item * L<Catmandu::Importer::MARC::ISO>
=item * L<Catmandu::Importer::MARC::Line>
=item * L<Catmandu::Importer::MARC::Lint>
=item * L<Catmandu::Importer::MARC::MARCMaker>
=item * L<Catmandu::Importer::MARC::MicroLIF>
=item * L<Catmandu::Importer::MARC::MiJ>
=item * L<Catmandu::Importer::MARC::RAW>
=item * L<Catmandu::Importer::MARC::Record>
=item * L<Catmandu::Importer::MARC::XML>
=item * L<Catmandu::Exporter::MARC>
=item * L<Catmandu::Exporter::MARC::ALEPHSEQ>
=item * L<Catmandu::Exporter::MARC::ISO>
=item * L<Catmandu::Exporter::MARC::Line>
=item * L<Catmandu::Exporter::MARC::MARCMaker>
=item * L<Catmandu::Exporter::MARC::MiJ>
=item * L<Catmandu::Exporter::MARC::XML>
=item * L<Catmandu::Fix::marc_add>
=item * L<Catmandu::Fix::marc_append>
=item * L<Catmandu::Fix::marc_copy>
=item * L<Catmandu::Fix::marc_cut>
=item * L<Catmandu::Fix::marc_decode_dollar_subfields>
=item * L<Catmandu::Fix::marc_in_json>
=item * L<Catmandu::Fix::marc_map>
=item * L<Catmandu::Fix::marc_paste>
=item * L<Catmandu::Fix::marc_remove>
=item * L<Catmandu::Fix::marc_replace_all>
=item * L<Catmandu::Fix::marc_set>
=item * L<Catmandu::Fix::marc_sort>
=item * L<Catmandu::Fix::marc_spec>
=item * L<Catmandu::Fix::marc_xml>
=item * L<Catmandu::Fix::Bind::marc_each>
=item * L<Catmandu::Fix::Condition::marc_all_match>
=item * L<Catmandu::Fix::Condition::marc_any_match>
=item * L<Catmandu::Fix::Condition::marc_has>
=item * L<Catmandu::Fix::Condition::marc_has_many>
=item * L<Catmandu::Fix::Condition::marc_match>
=item * L<Catmandu::Fix::Condition::marc_spec_has>
=item * L<Catmandu::Fix::Inline::marc_map>
=item * L<Catmandu::Fix::Inline::marc_add>
=item * L<Catmandu::Fix::Inline::marc_remove>
=item * L<Catmandu::Fix::Inline::marc_set>
=back
=head1 DESCRIPTION
With Catmandu, LibreCat tools abstract digital library and research services as data
warehouse processes. As stores we reuse MongoDB or ElasticSearch providing us with
developer friendly APIs. Catmandu works with international library standards such as
MARC, MODS and Dublin Core, protocols such as OAI-PMH, SRU and open repositories such
as DSpace and Fedora. And, of course, we speak the evolving Semantic Web.
Follow us on L<http://librecat.org> and read an introduction into Catmandu data
processing at L<https://github.com/LibreCat/Catmandu/wiki>.
=head1 SEE ALSO
L<Catmandu>,
L<Catmandu::Importer>,
L<Catmandu::Fix>,
L<Catmandu::Store>,
L<MARC::Spec>
=head1 AUTHOR
Patrick Hochstenbach, C<< <patrick.hochstenbach at ugent.be> >>
=head1 CONTRIBUTORS
=over
=item * Nicolas Steenlant, C<< <nicolas.steenlant at ugent.be> >>
=item * Nicolas Franck, C<< <nicolas.franck at ugent.be> >>
=item * Johann Rolschewski, C<< jorol at cpan.org >>
=item * Chris Cormack
=item * Robin Sheat
=item * Carsten Klee, C<< klee at cpan.org >>
=back
=head1 COPYRIGHT
Copyright 2012- Patrick Hochstenbach , Carsten Klee, Johann Rolschewski
=head1 LICENSE
This program is free software; you can redistribute it and/or modify it
under the terms of either: the GNU General Public License as published
by the Free Software Foundation; or the Artistic License.
See L<http://dev.perl.org/licenses/> for more information.
=cut