Group
Extension

TeX-Hyphen-Pattern/tools/build_catalog_from_ctan.pl

#!/usr/bin/env perl
# -*- cperl; cperl-indent-level: 4 -*-
# Copyright (C) 2009-2021, Roland van Ipenburg
use strict;
use warnings;

use utf8;
use 5.014000;

use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
use Encode qw(encode);
use English q{-no_match_vars};
use File::Basename;
use File::Spec;
use File::Temp qw(tempfile);
use Getopt::Long;
use IO::File;
use File::Slurp qw(read_file);
use Log::Log4perl qw(:easy get_logger);
use HTTP::Tiny::Cache;
use Pod::Usage;
use Perl::Tidy;
use Progress::Any;
use Progress::Any::Output q{TermProgressBarColor};

our $VERSION = v1.1.8;
use Readonly;

## no critic qw(prohibitCallsToUnexportedSubs)
Readonly::Scalar my $ZIP =>
  q{http://mirror.ctan.org/language/hyphenation-utf8.zip};
Readonly::Scalar my $PREFIX      => q{../};
Readonly::Scalar my $TARGET      => q{lib/TeX/Hyphen/Pattern/};
Readonly::Scalar my $TARGET_PATH => File::Spec->catdir( $PREFIX, $TARGET );
Readonly::Scalar my $PM_EXT      => q{.pm};
Readonly::Scalar my $EMPTY       => q{};
Readonly::Scalar my $SPACE       => q{ };
Readonly::Scalar my $DASH        => q{-};
Readonly::Scalar my $UNDERSCORE  => q{_};
Readonly::Scalar my $NEWLINE     => qq{\n};
Readonly::Scalar my $CACHE       => 3600;
Readonly::Scalar my $KEY         => q{HTTP_TINY_CACHE_MAX_AGE};
Readonly::Scalar my $LOG_CONF    => q{build_catalog_log.conf};
Readonly::Array my @DEBUG_LEVELS => ( $WARN, $INFO, $DEBUG, $TRACE );

Readonly::Scalar my $FIND_LOCALE => qr{ .*/hyph-([^./]+)[.]tex$ }smx;

Readonly::Array my @GETOPT_CONFIG =>
  qw(no_ignore_case bundling auto_version auto_help);
Readonly::Array my @GETOPTIONS  => ( q{verbose|v+}, q{max_age|m=i} );
Readonly::Hash my %OPTS_DEFAULT => ( 'max_age' => $CACHE );
Readonly::Hash my %LOG          => (
    'DOWNLOAD_STARTED'  => q{Started download of resource '%s'},
    'DOWNLOAD_FINISHED' => q{Finished download of resource '%s'},
    'DOWNLOAD_WRITE'    => q{Writing download temporarily to file '%s'},
    'ERROR_WRITE'       => q{Error writing to file '%s', stopped %s},
    'ERROR_OPEN'        => q{Error opening file '%s', stopped %s},
    'ERROR_CLOSE'       => q{Error closing file '%s', stopped %s},
    'ERROR_DOWNLOAD'    => q{Error downloading resource '%s'},
    'ERROR_ZIP'         => q{Error reading file '%s' as ZIP archive},
    'PACKAGE'           => q{Adding package '%s' to catalog},
    'FILE'              => q{Extracting data from file '%s' for locale '%s'},
    'CONTENTS'          => qq{Contents of file '%s':\n%s},
);
## use critic

Getopt::Long::Configure(@GETOPT_CONFIG);
my %opts = %OPTS_DEFAULT;
Getopt::Long::GetOptions( \%opts, @GETOPTIONS ) or Pod::Usage::pod2usage(2);

if ( -r $LOG_CONF ) {
## no critic qw(ProhibitCallsToUnexportedSubs)
    Log::Log4perl::init_and_watch($LOG_CONF);
## use critic
}
else {
## no critic qw(ProhibitCallsToUnexportedSubs)
    Log::Log4perl::easy_init($ERROR);
## use critic
}
my $log = Log::Log4perl->get_logger( File::Basename::basename $PROGRAM_NAME );
$log->level(
    $DEBUG_LEVELS[
      (
          ( $opts{'verbose'} || 0 ) > $#DEBUG_LEVELS
          ? $#DEBUG_LEVELS
          : $opts{'verbose'}
      )
      || 0
    ],
);

my $zip;
my ( $fh, $zip_name ) = File::Temp::tempfile();
$log->info( sprintf $LOG{'DOWNLOAD_STARTED'}, $ZIP );
## no critic (RequireLocalizedPunctuationVars)
$ENV{$KEY} = ( $opts{'max_age'} ) // $ENV{$KEY} // $CACHE;
## use critic
my $response = HTTP::Tiny::Cache->new->get($ZIP);
if ( ${$response}{'success'} ) {
    $log->info( sprintf $LOG{'DOWNLOAD_FINISHED'}, $ZIP );
    $log->debug( sprintf $LOG{'DOWNLOAD_WRITE'}, $zip_name );
    print {$fh} ${$response}{'content'}
      or $log->logdie( sprintf $LOG{'ERROR_WRITE'}, $zip_name, $ERRNO );
    close $fh or $log->error( sprintf $LOG{'ERROR_CLOSE'}, $zip_name, $ERRNO );
    $zip = Archive::Zip->new();
## no critic qw(prohibitCallsToUnexportedSubs)
    if ( $zip->read($zip_name) != Archive::Zip::AZ_OK ) {
## use critic
        unlink $zip_name;
        $log->logdie( sprintf $LOG{'ERROR_ZIP'}, $zip_name );
    }
}
else {
    $log->logdie( sprintf $LOG{'ERROR_DOWNLOAD'}, $ZIP );
}

## no critic qw(prohibitCallsToUnexportedSubs)
my $template = File::Slurp::read_file( \*DATA, 'binmode' => ':utf8' );
## use critic

sub get_locale {
    my $filename = shift;
    if ( $filename =~ s{$FIND_LOCALE}{$1}smx ) {
        return $filename;
    }
    return ();
}

my @locales =
  sort map { get_locale( $_->fileName ) } $zip->membersMatching($FIND_LOCALE);
$log->info( +@locales . q{ locales found in the zip: } . join $SPACE,
    @locales );

sub get_data {
    my $locale = shift;
    my $file   = qq{/hyph-$locale.tex};
    $log->debug( sprintf $LOG{'FILE'}, $file, $locale );
    my $member =
      shift @{ [ $zip->membersMatching($file) ] };
    $log->trace( sprintf $LOG{'CONTENTS'}, $file, $member->contents || $EMPTY );
    my $re_starts = qr{(%|\\(?:message|bgroup|lccode|begingroup|def|edef))}smx;
    my $re_lic    = qr{(?<lic>(?:(?:$re_starts\N*.)|\v)+)}smx;
    my $re_pat    = qr{(?<pat>\\(patterns[{].*[}]|input[ ]\S+))?}smx;
    my $re_hyp    = qr{\s*(?<hyp>\\hyphenation[{].*[}])?}smx;
    Encode::decode( q{utf8}, ( $member->contents || $EMPTY ) ) =~
      m{$re_lic$re_pat$re_hyp}gsmx;
    Encode::decode( q{utf8}, ( $member->contents || $EMPTY ) ) =~
      m{$re_lic$re_pat$re_hyp}gsmx;
    my $lic = $LAST_PAREN_MATCH{'lic'} || $EMPTY;
    my $pat = $LAST_PAREN_MATCH{'pat'} || $EMPTY;
    my $hyp = $LAST_PAREN_MATCH{'hyp'} || $EMPTY;
    $lic =~ s{^%[ ]?}{}gsmx;
## no critic qw(RequireLineBoundaryMatching)
    $lic =~ s{\s+$}{}gsx;
## use critic
    return ( $lic, $pat, $hyp );
}

sub generate {
    my $filename;
    my $package;

    my $progress;
    ( $log->is_info() && !$log->is_debug() )
      && (
        $progress = Progress::Any->get_indicator(
            'task'   => q{generate},
            'pos'    => 0,
            'target' => ~~ @locales,
        )
      );

    while ( my $locale = shift @locales ) {
        $package = ucfirst $locale;
        $package =~ s/$DASH/$UNDERSCORE/xmgis;
        $filename = File::Spec->catdir( $TARGET_PATH, $package . $PM_EXT );
        my $target = IO::File->new( q{> } . $filename );
        $progress
          && $progress->update(
            'message' => sprintf $LOG{'PACKAGE'},
            $package,
          );
        $target->binmode(q{utf8});
        if ( defined $target ) {
            my $source = sprintf $template,
              (
                $package, $::VERSION, $package, $package, $package,
                $package, get_data($locale),
              );
            my $destination;
            my $error = Perl::Tidy::perltidy(
                'source'      => \$source,
                'destination' => \$destination,
            );
            print {$target} $destination
              or $log->logdie( sprintf $LOG{'ERROR_WRITE'}, $target, $ERRNO );
            $target->close
              or $log->error( sprintf $LOG{'ERROR_CLOSE'}, $filename, $ERRNO );
        }
        else {
            $log->logdie( sprintf $LOG{'ERROR_OPEN'}, $filename, $ERRNO );
        }
    }
    $progress && $progress->finish();
    return;
}

generate();

unlink $zip_name;

## no critic qw(RequirePodAtEnd)

=encoding utf8

=for stopwords CTAN Ipenburg MERCHANTABILITY Readonly

=head1 NAME

build_catalog_from_ctan.pl - generate the pattern module files from the CTAN
upstream source

=head1 USAGE

./build_catalog_from_ctan.pl

=head1 DESCRIPTION

For package maintainers and to adhere to the open source licenses of the
patterns, this script can be used to generate the pattern files from their
upstream source on L<CTAN|https:://www.ctan.org>.

=head1 REQUIRED ARGUMENTS

There are no required arguments.

=head1 OPTIONS

=over 4

=item * B<max_age>: The maximum time in seconds the remote file stays cached.
Default one hour.

=item * B<verbose>: Be more verbose.

=back

=head1 DIAGNOSTICS

The script uses L<Log::Log4perl> for logging and that can be configured in a
file named C<build_catalog_log.conf>.

=head1 EXIT STATUS

=head1 CONFIGURATION

The environment variable I<HTTP_TINY_CACHE_MAX_AGE> is used to set the default
caching period for requests to 3600 seconds if it wasn't set already, or to
the value given by the I<max_age> option.

=head1 DEPENDENCIES

=over 4

=item * L<Archive::Zip|Archive::Zip>
=item * L<Encode|Encode>
=item * L<English|English>
=item * L<File::Basename|File::Basename>
=item * L<File::Spec|File::Spec>
=item * L<File::Temp|File::Temp>
=item * L<Getopt::Long|Getopt::Long>
=item * L<IO::File|IO::File>
=item * L<File::Slurp|File::Slurp>
=item * L<Log::Log4perl|Log::Log4perl>
=item * L<HTTP::Tiny::Cache|HTTP::Tiny::Cache>
=item * L<Pod::Usage|Pod::Usage>
=item * L<Perl::Tidy|Perl::Tidy>
=item * L<Readonly|Readonly>
=item * L<Progress::Any|Readonly>
=item * L<Progress::Any::Output::TermProgressBarColor|Progress::Any::Output::TermProgressBarColor>

=back

=head1 INCOMPATIBILITIES

This script only aims to get basic TeX stuff needed for hyphenation, it
doesn't properly parse TeX syntax, so complicated TeX pattern are likely to
fail, but also not very likely to occur.

=head1 BUGS AND LIMITATIONS

Please report any bugs or feature requests at
L<Bitbucket|
https://bitbucket.org/rolandvanipenburg/tex-hyphen-pattern/issues>.

=head1 AUTHOR

Roland van Ipenburg  C<< <roland@rolandvanipenburg.com> >>

=head1 LICENSE AND COPYRIGHT

Copyright 2009-2021 by Roland van Ipenburg

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.12.2 or,
at your option, any later version of Perl 5 you may have available.

=head1 DISCLAIMER OF WARRANTY

BECAUSE THIS SOFTWARE IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE SOFTWARE, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE SOFTWARE "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE SOFTWARE IS WITH
YOU. SHOULD THE SOFTWARE PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
NECESSARY SERVICING, REPAIR, OR CORRECTION.

IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE SOFTWARE AS PERMITTED BY THE ABOVE LICENSE, BE
LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL,
OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE
THE SOFTWARE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
FAILURE OF THE SOFTWARE TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

=cut

__DATA__
## no critic qw(RequirePodSections)    # -*- cperl -*-
# This file is auto-generated by the Perl TeX::Hyphen::Pattern Suite hyphen
# pattern catalog generator. This code generator comes with the
# TeX::Hyphen::Pattern module distribution in the tools/ directory
#
# Do not edit this file directly.

package TeX::Hyphen::Pattern::%s v%vd;
use strict;
use warnings;
use 5.014000;
use utf8;

use Moose;

my $pattern_file = q{};
while (<DATA>) {
	 $pattern_file .= $_;
}

sub pattern_data {
	return $pattern_file;
}

sub version {
	return $TeX::Hyphen::Pattern::%s::VERSION;
}

1;
## no critic qw(RequirePodAtEnd RequireASCII ProhibitFlagComments)
=encoding utf8

=for stopwords CTAN Ipenburg %s

=head1 NAME

TeX::Hyphen::Pattern::%s - class for hyphenation in locale %s

=head1 SUBROUTINES/METHODS

=over 4

=item $pattern-E<gt>pattern_data();

Returns the pattern data.

=item $pattern-E<gt>version();

Returns the version of the pattern package.

=back

=head1 COPYRIGHT

=begin text

%s

=end text

=cut

__DATA__
%s
%s


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.