Group
Extension

Encoding-HandleUtf8/lib/Encoding/HandleUtf8.pm

package Encoding::HandleUtf8;
# ABSTRACT: Fix the encoding for Perl value store (input) and general output (output) to a console or the web.
#
# This file is part of Encoding-HandleUtf8
#
# This software is Copyright (c) 2014 by BURNERSK <burnersk@cpan.org>.
#
# This is free software, licensed under:
#
#   The MIT (X11) License
#
use strict;
use warnings FATAL => 'all';

BEGIN {
  our $VERSION = '0.004'; # VERSION: generated by Dist::Zilla
}

use Carp qw( carp croak );
use Encoding::FixLatin qw( fix_latin );
use Clone 'clone';
use List::Util qw( first );

############################################################################
# Prototype definition - required for the recursion, otherwise it will not
# find itself because it is not already in the internal symbol list.

sub fix_encoding ($\[$@%];$);
sub fix_encoding_return ($$;$);

############################################################################
# Setup exporter.

our @EXPORT_OK;
our @IGNORE_LIST;

BEGIN {
  use base 'Exporter';
  @EXPORT_OK = qw( &fix_encoding &fix_encoding_return );
  @IGNORE_LIST = qw( Cpanel::JSON::XS::Boolean JSON::XS::Boolean JSON::PP::Boolean JSON::Boolean )
    unless scalar @IGNORE_LIST;
}

############################################################################
# Utility function to fix the encoding for Perl value store (input) and
# general output (output) to a console or the web.
# Second parameter is the object indended which should be fixed. Due to
# prototypes it automatically turns into an reference.
sub fix_encoding ($\[$@%];$) {
  my ( $direction, $obj, $skip_latin ) = @_;

  my $ref = ref $obj;
  my $obj_ref = $ref eq 'REF' ? ref ${$obj} : $ref;

  # Check encoding direction.
  croak sprintf q{invalid direction '%s' (input or output)}, $direction
    if !$direction || ( $direction ne 'input' && $direction ne 'output' );

  # If $obj is just a string everything is very basic.
  if ( $ref eq 'SCALAR' ) {

    # Fix possible mixed encodings.
    ${$obj} = fix_latin ${$obj} unless $skip_latin;

    # Final encoding it to UTF-8 (output) or Unicode (input).
    if ( $direction eq 'output' ) {
      utf8::encode ${$obj} if defined ${$obj} && utf8::is_utf8 ${$obj};
    }
    else {
      utf8::decode ${$obj} if defined ${$obj} && !utf8::is_utf8 ${$obj};
    }
  }

  # Iterate over an array reference.
  elsif ( $ref eq 'ARRAY' ) {
    fix_encoding $direction, $_ for ( @{$obj} );
  }

  # Iterate over the keys of a hash reference.
  elsif ( $ref eq 'HASH' ) {
    fix_encoding $direction, $obj->{$_} for ( keys %{$obj} );
  }

  # Otherwise if $obj is a reference we have to use some recursive magic.
  elsif ( $ref eq 'REF' ) {

    # Iterate over an array reference.
    if ( $obj_ref eq 'ARRAY' ) {
      fix_encoding $direction, $_ for ( @{ ${$obj} } );
    }

    # Iterate over the keys of a hash reference.
    elsif ( $obj_ref eq 'HASH' ) {
      fix_encoding $direction, ${$obj}->{$_} for ( keys %{ ${$obj} } );
    }

    # Everything else is not supported.
    else {
      carp sprintf q{unsupported reference '%s'}, $obj_ref
        unless first { $_ eq $obj_ref } @IGNORE_LIST;
    }

  }

  # w00t - this shouldn't ever happen!
  else {
    carp sprintf q{unknown object reference '%s'}, $ref
      unless first { $_ eq $ref } @IGNORE_LIST;
  }

  return $ref eq 'REF' || $ref eq 'SCALAR' ? ${$obj} : $obj;
}


############################################################################
# Does the same like fix_encoding but do not touches original reference.
sub fix_encoding_return ($$;$) {
  my ( $direction, $obj, $skip_latin ) = @_;
  my $obj_cloned = clone $obj;
  fix_encoding $direction, $obj_cloned, $skip_latin // 0;
  return $obj_cloned;
}



############################################################################
1;
############################################################################

__END__

=pod

=encoding UTF-8

=head1 NAME

Encoding::HandleUtf8 - Fix the encoding for Perl value store (input) and general output (output) to a console or the web.

=head1 VERSION

version 0.004

=head1 SYNOPSIS

    use Encoding::HandleUtf8 qw( fix_encoding fix_encoding_return );
    
    ########################################################################
    # Simple usage - CAUTION: clones the object.
    
    printf "%s\n", fix_encoding_return 'input', 'Here are some German umlauts: äÄüÜöÖßẞ';
    
    ########################################################################
    # Working with strings.
    
    my $string = 'Here are some German umlauts: äÄüÜöÖßẞ';
    
    # Fix the encoding of a input string to handle them safe within Perl.
    fix_encoding 'input', $string;
    
    # Fix the encoding of a Perl variable for output.
    fix_encoding 'output', $string;
    print "$string\n";
    
    ########################################################################
    # Working with hashes.
    
    my %hash = ( a => 'äÄ', u => 'üÜ', o => 'öÖ', ss => 'ßẞ' );
    
    # Fix the encoding of a hash to handle them safe within Perl.
    fix_encoding 'input', %hash;
    
    # Fix the encoding of a Perl hash for output.
    fix_encoding 'output', %hash;
    print "$_: $hash{$_}\n" for( keys %hash );
    
    ########################################################################
    # Working with hash references.
    
    my $hash = { a => 'äÄ', u => 'üÜ', o => 'öÖ', ss => 'ßẞ' };
    
    # Fix the encoding of a hash reference to handle them safe within Perl.
    fix_encoding 'input', $hash;
    
    # Fix the encoding of a Perl hash reference for output.
    fix_encoding 'output', %hash;
    print "$_: $hash->{$_}\n" for( keys %{ $hash } );
    
    ########################################################################
    # Working with arrays.
    
    my @array = ( 'äÄ', 'üÜ', 'öÖ', 'ßẞ' );
    
    # Fix the encoding of an array to handle them safe within Perl.
    fix_encoding 'input', @array;
    
    # Fix the encoding of a Perl array for output.
    fix_encoding 'output', @array;
    print "$_: $hash{$_}\n" for( @array );
    
    ########################################################################
    # Working with array references.
    
    my $array = [ 'äÄ', 'üÜ', 'öÖ', 'ßẞ' ];
    
    # Fix the encoding of an array reference to handle them safe within Perl.
    fix_encoding 'input', $array;
    
    # Fix the encoding of a Perl array reference for output.
    fix_encoding 'output', @array;
    print "$_: $hash{$_}\n" for( @{ $array } );

=head1 DESCRIPTION

Fix the encoding for Perl value store (input) and general output (output) to
e.g. a console or the web.

=head1 METHODS

=head2 fix_encoding

Takes an direction and a object and fixes the encoding.

=over

=item B<Required parameters>

=over

=item [0] C<$direction>

The direction in which the object should be fixed. Either C<input> to work
safely with inputs (convert to Unicode) or C<output> to output (convert to
UTF-8) them to e.g. a console or the web.

=item [1] C<$obj>

The actual object which should be fixed. Can either be a C<SCALAR>, C<HASH>
or C<ARRAY> (including but not mandentory: references).

=back

=item B<Optional parameters>

=over

=item [2] C<$skip_latin>

Skips L<Encoding::FixLatin>'s L<fix_latin|Encoding::FixLatin/fix_latin>
call on scalars when C<$skip_latin> is set to a true value.

=back

=back

=head2 fix_encoding_return

Does and takes exactly the same as L</fix_encoding> but instead touching the
original supplied object it will clone it an return the new encoded object.

=head1 CONFIGURATION

Encoding::HandleUtf8 warns about references it does not know. You can turn
off this warnings by adding the reference name to the C<@IGNORE_LIST>
array.

    push @Encoding::HandleUtf8::IGNORE_LIST, 'JSON::Boolean';

By default the following references will be ignored (no warnings):

=over

=item Cpanel::JSON::XS::Boolean

=item JSON::XS::Boolean

=item JSON::PP::Boolean

=item JSON::Boolean

=back

=head1 AUTHOR

BURNERSK <burnersk@cpan.org>

=head1 COPYRIGHT AND LICENSE

This software is Copyright (c) 2014 by BURNERSK <burnersk@cpan.org>.

This is free software, licensed under:

  The MIT (X11) License

=cut


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.