Group
Extension

Task-HTML5/lib/Task/HTML5/Examples/htmltidy.pod

=head1 NAME

Task::HTML5::Examples::htmltidy - fix and tidy HTML

=head1 DESCRIPTION

This simple example uses L<HTML::HTML5::Parser> to read some HTML,
L<HTML::HTML5::Sanity> to fix it up a little (including canonicalising
language tags), L<XML::LibXML::PrettyPrint> to indent it neatly, and
L<HTML::HTML5::Writer>.

HTML::HTML5::Writer outputs some fairly idiomatic HTML by default,
leaving out optional start and end tags, avoiding quoting attribute
values, etc. Passing the command-line option C<--markup=xhtml> (or
just C<-mxml> for short) will output strict XHTML, so should improve
matters.

=head1 EXAMPLE

  #!/usr/bin/perl
  
  use 5.010;
  use strict;
  
  use Getopt::Long 0 qw(:config permute bundling no_auto_abbrev);
  use HTML::HTML5::Parser 0.107 qw();
  use HTML::HTML5::Sanity 0.101 qw(fix_document);
  use XML::LibXML::PrettyPrint 0.001 qw();
  use HTML::HTML5::Writer 0.104 qw();
  
  # Read command-line options
  my %options;
  Getopt::Long::GetOptions(\%options,
    'markup|m=s',
    'polyglot=i',
    'doctype=s',
    'charset=s',
    'quote_attributes=s',
    'voids=s',
    'start_tags=s',
    'end_tags=s',
    'refs=s',
    'indent_string=s',
    );
  my $input  = shift // '-';
  my $output = shift // '-';
  
  # Create a parser object and parse input HTML
  my $parser = HTML::HTML5::Parser->new;
  my $dom = ($input eq '-')
    ? $parser->parse_string(do { local $/ = <STDIN> })
    : $parser->parse_file($input);
  
  # Use HTML::HTML5::Sanity to fix up HTML quirks
  $HTML::HTML5::Sanity::FIX_LANG_ATTRIBUTES = 2;
  my $fixed_dom = fix_document($dom);
  
  # Pretty indentation
  XML::LibXML::PrettyPrint
    ->new_for_html(indent_string => (delete $options{indent_string} // "\t"))
    ->pretty_print($fixed_dom);
  
  # Create a writer object
  my $writer = HTML::HTML5::Writer
    ->new(%options);
  
  if ($output eq '-')
  {
    # Output to STDOUT
    say $writer->document($fixed_dom);
  }
  else
  {
    # Output to FILE
    open my($fh), '>:encoding(UTF-8)', $output;
    say $fh $writer->document($fixed_dom);
    close $fh;
  }

=head1 AUTHOR

Toby Inkster E<lt>tobyink@cpan.orgE<gt>.

=head1 COPYRIGHT

Copyright 2011 Toby Inkster.

This library is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut



Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.