Group
Extension

Wiki-JSON/lib/Wiki/JSON/HTML.pm

package Wiki::JSON::HTML;

use v5.16.3;

use strict;
use warnings;

use Moo;
use Mojo::Util qw/xml_escape/;
use Mojo::URL;

has _wiki_json => ( is => 'lazy' );

sub pre_html_json {
    my ( $self, $wiki_text, $template_callback, $options ) = @_;
    $options //= {};
    my @dom;
    push @dom,
      $self->_open_html_element( 'article', 0, { class => 'wiki-article' } );
    my $json =
      $self->_wiki_json->parse( $wiki_text, { track_lines_for_errors => 1 } );

    #    print Data::Dumper::Dumper $json;

    push @dom, @{ $self->_parse_output( $json, $template_callback, $options ) };
    push @dom, $self->_close_html_element('article');
    return \@dom;
}

sub _build__wiki_json {
    my $self = shift;
    require Wiki::JSON;
    return Wiki::JSON->new;
}

sub _open_html_element {
    if ( @_ < 2 ) {
        die '_open_html_element needs $self and $tag at least as arguments';
    }
    my ( $self, $tag, $self_closing, $attributes ) = @_;
    $self_closing //= 0;
    $attributes   //= {};
    if ( 'HASH' ne ref $attributes ) {
        die 'HTML attributes are not a HASHREF';
    }
    return {
        tag    => $tag,
        status => $self_closing ? 'self-close' : 'open',
        attrs  => $attributes,
    };
}

sub _close_html_element {
    if ( @_ != 2 ) {
        die
'_close_html_element accepts exactly the following arguments $self and $tag';
    }
    my ( $self, $tag ) = @_;
    return {
        tag    => $tag,
        status => 'close',
    };
}

sub _html_string_content_to_pushable {
    my ( $self, $content ) = @_;
    $content =~ s/(?:\r|\n)/ /gs;
    $content =~ s/ +/ /gs;
    return $content;
}

sub _parse_output_try_parse_plain_text {
    if ( @_ != 6 ) {
        die
'_parse_output_try_parse_plain_text needs $self, $dom, $element, $last_element_inline_element, $needs_closing_parragraph, $options';
    }
    my ( $self, $dom, $element, $last_element_inline_element,
        $needs_closing_parragraph, $options )
      = @_;
    my $needs_next = 0;
    my $found_text;
    if ( 'HASH' ne ref $element ) {
        $found_text = 1;
        if ( !$last_element_inline_element ) {
            ($needs_closing_parragraph) =
              $self->_close_parragraph( $dom, $needs_closing_parragraph,
                $options );
        }
        if ($element) {
            if ( !$last_element_inline_element ) {
                ($needs_closing_parragraph) =
                  $self->_open_parragraph( $dom, $needs_closing_parragraph, 0,
                    $options );
            }
            push @$dom, $self->_html_string_content_to_pushable($element);
        }
        $needs_next = 1;
    }
    return ( $needs_next, $needs_closing_parragraph, $found_text );
}

sub _parse_output_try_parse_italic {
    if ( @_ < 7 ) {
        die 'Incorrect arguments _parse_output_try_parse_italic';
    }
    my ( $self, $dom, $element, $found_inline_element,
        $needs_closing_parragraph, $template_callback, $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'italic' ) {
        $found_inline_element = 1;
        ($needs_closing_parragraph) =
          $self->_open_parragraph( $dom, $needs_closing_parragraph,
            $found_inline_element, $options );
        push @$dom, $self->_open_html_element('i');
        push @$dom,
          @{
            $self->_parse_output(
                $element->{output}, $template_callback,
                { %$options, inside_inline_element => 1 }
            )
          };
        push @$dom, $self->_close_html_element('i');
        $needs_next = 1;
    }
    return ( $needs_next, $needs_closing_parragraph, $found_inline_element );
}

sub _parse_output_try_parse_bold_and_italic {
    if ( @_ < 7 ) {
        die 'Incorrect arguments _parse_output_try_parse_bold_and_italic';
    }
    my ( $self, $dom, $element, $found_inline_element,
        $needs_closing_parragraph, $template_callback, $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'bold_and_italic' ) {
        $found_inline_element = 1;
        ($needs_closing_parragraph) =
          $self->_open_parragraph( $dom, $needs_closing_parragraph,
            $found_inline_element, $options );
        push @$dom, $self->_open_html_element('b');
        push @$dom, $self->_open_html_element('i');
        push @$dom,
          @{
            $self->_parse_output(
                $element->{output}, $template_callback,
                { %$options, inside_inline_element => 1 }
            )
          };
        push @$dom, $self->_close_html_element('i');
        push @$dom, $self->_close_html_element('b');
        $needs_next = 1;
    }
    return ( $needs_next, $needs_closing_parragraph, $found_inline_element );
}

sub _parse_output_try_parse_bold {
    if ( @_ < 7 ) {
        die 'Incorrect arguments _parse_output_try_parse_bold';
    }
    my ( $self, $dom, $element, $found_inline_element,
        $needs_closing_parragraph, $template_callback, $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'bold' ) {
        $found_inline_element = 1;
        ($needs_closing_parragraph) =
          $self->_open_parragraph( $dom, $needs_closing_parragraph,
            $found_inline_element, $options );
        push @$dom, $self->_open_html_element('b');
        push @$dom,
          @{
            $self->_parse_output(
                $element->{output}, $template_callback,
                { %$options, inside_inline_element => 1 }
            )
          };
        push @$dom, $self->_close_html_element('b');
        $needs_next = 1;
    }

    return ( $needs_next, $needs_closing_parragraph, $found_inline_element );
}

sub _parse_output_try_parse_link {
    if ( @_ < 6 ) {
        die 'Incorrect arguments';
    }
    my ( $self, $dom, $element, $needs_closing_parragraph,
        $found_inline_element, $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'link' ) {
        $found_inline_element = 1;
        ($needs_closing_parragraph) =
          $self->_open_parragraph( $dom, $needs_closing_parragraph,
            $found_inline_element, $options );
        my $real_link = $element->{link};
        if ( $real_link !~ /^\w:/ && $real_link !~ m@^(?:/|\w+\.)@ ) {

            # TODO: Allow setting a base URL.
            $real_link = '/' . $real_link;
        }
        push @$dom,
          $self->_open_html_element( 'a', 0,
            { href => $real_link =~ s/ /%20/gr } );
        push @$dom,
          $self->_html_string_content_to_pushable( $element->{title} );
        push @$dom, $self->_close_html_element('a');
        $needs_next = 1;
    }
    return ( $needs_next, $needs_closing_parragraph, $found_inline_element );
}

sub _parse_output_try_parse_image {
    if ( @_ < 6 ) {
        die 'Incorrect arguments';
    }
    my ( $self, $dom, $element, $needs_closing_parragraph,
        $found_inline_element, $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'image' ) {
        {
            $needs_next = 1;
            my $format = $element->{options}{format};
            $format //= {};
            my $is_inline = !$format->{thumb} && !$format->{frame};
            if ($is_inline) {
                $found_inline_element = 1;
                ($needs_closing_parragraph) =
                  $self->_open_parragraph( $dom, $needs_closing_parragraph,
                    $found_inline_element, $options );
            }
            else {
                if ( $options->{inside_inline_element} ) {
                    say STDERR
'Image found when the content is expected to be inline WIKI_LINE: '
                      . $element->{start_line};
                }
                ($needs_closing_parragraph) =
                  $self->_close_parragraph( $dom, $needs_closing_parragraph,
                    $options );
            }
            my $link_url = Mojo::URL->new( $element->{link} );
            my $is_video = $link_url->path =~ /\.(?:mp4|webm|ogg|3gp|mpeg)/;
            my $is_pdf   = $link_url->path =~ /\.(?:pdf)/;

            my $pdf_element_attrs = sub {
                my $page;
                my $fragment;
                if ( defined $element->{options}{page} ) {
                    $page     = $element->{options}{page};
                    $fragment = "page=@{[0+$page]}";
                }
                if ( defined $fragment ) {
                    say $fragment;
                    $link_url->fragment($fragment);
                }
                return { src => "$link_url", };
            };
            if ($is_inline) {
                if ($is_pdf) {
                    push @$dom,
                      $self->_open_html_element( 'iframe', 0,
                        $pdf_element_attrs->(), );
                    push @$dom, $self->_close_html_element('iframe');
                    next;
                }
                if ($is_video) {
                    push @$dom,
                      $self->_open_html_element(
                        'video', 1,
                        {
                            src => "" . $link_url,
                        }
                      );
                    next;
                }
                my $alt = $element->{options}{alt} // $element->{caption};
                push @$dom,
                  $self->_open_html_element(
                    'img', 1,
                    {
                        src => "$link_url",
                        ( ( defined $alt ) ? ( alt => $alt ) : () )
                    }
                  );
                next;
            }
            my $typeof = 'mw:File/Frame';
            if ( $format->{thumb} ) {
                $typeof = 'mw:File/Thumb';
            }
            push @$dom,
              $self->_open_html_element( 'figure', 0, { typeof => $typeof } );

            my $alt = $element->{options}{alt};
            {
                if ($is_pdf) {
                    push @$dom,
                      $self->_open_html_element( 'iframe', 0,
                        $pdf_element_attrs->(), );
                    push @$dom, $self->_close_html_element('iframe');
                    next;
                }
                if ($is_video) {
                    push @$dom,
                      $self->_open_html_element(
                        'video', 1,
                        {
                            src => $link_url . ''
                        }
                      );
                    next;
                }
                push @$dom,
                  $self->_open_html_element(
                    'img', 1,
                    {
                        src => $link_url . '',
                        (
                              ( defined $alt )
                            ? ( alt => $alt )
                            : ()
                        )
                    }
                  );
            }
            if ( defined $element->{caption} ) {
                push @$dom, $self->_open_html_element('figcaption');
                push @$dom, $element->{caption};
                push @$dom, $self->_close_html_element('figcaption');
            }
            push @$dom, $self->_close_html_element('figure');
        }
    }
    return ( $needs_next, $needs_closing_parragraph, $found_inline_element );
}

sub _parse_output_try_parse_template {
    my ( $self, $dom, $element, $needs_closing_parragraph,
        $found_inline_element, $template_callbacks, $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'template' ) {
        my $template  = $element;
        my $is_inline = $template_callbacks->{is_inline}->($template);
        if ( $options->{inside_inline_element} && !$is_inline ) {
            say STDERR
'No-inline (block) template found inside inline element WIKI_LINE: '
              . $element->{start_line};
        }
        if ($is_inline) {
            $found_inline_element = 1;
            ($needs_closing_parragraph) =
              $self->_open_parragraph( $dom, $needs_closing_parragraph,
                $found_inline_element, $options );
        }
        else {
            ($needs_closing_parragraph) =
              $self->_close_parragraph( $dom, $needs_closing_parragraph,
                $options );
        }
        my $parse_sub = sub {
            my ( $wiki_text, $options ) = @_;
            return $self->pre_html_json( $wiki_text, $template_callbacks,
                $options );
        };
        my $open_html_element_sub = sub {
            my ( $tag, $self_closing, $attrs ) = @_;
            if ( !defined $tag ) {
                die 'Tag is not optional';
            }
            $self_closing //= 0;
            $attrs        //= {};
            return $self->_open_html_element( $tag, $self_closing, $attrs );
        };
        my $close_html_element_sub = sub {
            my ($tag) = @_;
            if ( !defined $tag ) {
                die 'Tag is not optional';
            }
            return $self->_close_html_element($tag);
        };
        my $new_elements = $template_callbacks->{generate_elements}->(
            $element, $options, $parse_sub, $open_html_element_sub,
            $close_html_element_sub
        );
        if ( defined $new_elements ) {
            {
                if ( 'ARRAY' ne ref $new_elements ) {
                    warn
'Return from generate_elements is not an ArrayRef, user error';
                    next;
                }
                push @$dom, @$new_elements;
            }
        }
    }
    return ( $needs_next, $needs_closing_parragraph, $found_inline_element );
}

sub _parse_output_try_parse_unordered_list {
    if ( @_ < 6 ) {
        die 'Incorrect number of parameters';
    }
    my ( $self, $dom, $element, $needs_closing_parragraph, $template_callback,
        $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'unordered_list' ) {
        if ( $options->{inside_inline_element} ) {
            say STDERR
'unordered list found when content is expected to be inline WIKI_LINE: ',
              $element->{start_line};
        }
        ($needs_closing_parragraph) =
          $self->_close_parragraph( $dom, $needs_closing_parragraph, $options );
        my $elements = $element->{output};
        push @$dom, $self->_open_html_element('ul');
        for my $element (@$elements) {
            if ( 'HASH' ne ref $element ) {
                die 'List element is text and not hash';
            }
            if ( $element->{type} ne 'list_element' ) {
                die 'List element is not a list_element';
            }
            push @$dom, $self->_open_html_element('li');
            push @$dom,
              @{
                $self->_parse_output(
                    $element->{output}, $template_callback,
                    { %$options, is_list_element => 1 }
                )
              };
            push @$dom, $self->_close_html_element('li');
        }
        push @$dom, $self->_close_html_element('ul');
        $needs_next = 1;
    }
    return ( $needs_next, $needs_closing_parragraph );
}

sub _parse_output_try_parse_hx {
    if ( @_ < 6 ) {
        die 'Incorrect arguments to _parse_output_try_parse_hx';
    }
    my ( $self, $dom, $element, $needs_closing_parragraph, $template_callback,
        $options )
      = @_;
    my $needs_next;
    if ( $element->{type} eq 'hx' ) {
        if ( $options->{inside_inline_element} ) {
            say STDERR
              'HX found when the content is expected to be inline WIKI_LINE: '
              . $element->{start_line};
        }
        ($needs_closing_parragraph) =
          $self->_close_parragraph( $dom, $needs_closing_parragraph, $options );
        my $hx_level = $element->{hx_level};

        push @$dom, $self->_open_html_element( xml_escape "h$hx_level" );
        push @$dom,
          @{
            $self->_parse_output(
                $element->{output}, $template_callback,
                { %$options, inside_inline_element => 1 }
            )
          };
        push @$dom, $self->_close_html_element( xml_escape "h$hx_level" );
        $needs_next = 1;
    }
    return ( $needs_next, $needs_closing_parragraph );
}

sub _parse_output {
    if ( @_ < 3 ) {
        die '_parse_output needs at least $self and $output';
    }
    my ( $self, $output, $template_callback, $options ) = @_;
    $options //= {};
    my @dom;
    my $needs_closing_parragraph = 0;
    my $first                    = 1;
    my $last_element_inline_element;
    my $last_element_text;

    for my $element (@$output) {
        my $found_inline_element;
        my $found_text;
        {
            my ($needs_next);
            $options->{first}             = $first;
            $options->{last_element_text} = $last_element_text;
            ( $needs_next, $needs_closing_parragraph, $found_text ) =
              $self->_parse_output_try_parse_plain_text( \@dom, $element,
                $last_element_inline_element, $needs_closing_parragraph,
                $options );
            next if $needs_next;

            ( $needs_next, $needs_closing_parragraph, $found_inline_element ) =
              $self->_parse_output_try_parse_bold( \@dom, $element,
                $found_inline_element, $needs_closing_parragraph,
                $template_callback,    $options );
            next if $needs_next;
            ( $needs_next, $needs_closing_parragraph, $found_inline_element ) =
              $self->_parse_output_try_parse_bold_and_italic( \@dom, $element,
                $found_inline_element, $needs_closing_parragraph,
                $template_callback,    $options );
            next if $needs_next;
            ( $needs_next, $needs_closing_parragraph, $found_inline_element ) =
              $self->_parse_output_try_parse_italic( \@dom, $element,
                $found_inline_element, $needs_closing_parragraph,
                $template_callback,    $options );
            next if $needs_next;
            ( $needs_next, $needs_closing_parragraph ) =
              $self->_parse_output_try_parse_hx( \@dom, $element,
                $needs_closing_parragraph, $template_callback, $options );
            next if $needs_next;

            ( $needs_next, $needs_closing_parragraph ) =
              $self->_parse_output_try_parse_unordered_list( \@dom, $element,
                $needs_closing_parragraph, $template_callback, $options );
            next if $needs_next;
            ( $needs_next, $needs_closing_parragraph, $found_inline_element ) =
              $self->_parse_output_try_parse_link( \@dom, $element,
                $needs_closing_parragraph, $found_inline_element, $options );
            next if $needs_next;
            ( $needs_next, $needs_closing_parragraph, $found_inline_element ) =
              $self->_parse_output_try_parse_image( \@dom, $element,
                $needs_closing_parragraph, $found_inline_element, $options );
            next if $needs_next;
            ( $needs_next, $needs_closing_parragraph, $found_inline_element ) =
              $self->_parse_output_try_parse_template( \@dom, $element,
                $needs_closing_parragraph, $found_inline_element,
                $template_callback,        $options );
            next if $needs_next;
        }
        $first                       = 0;
        $last_element_inline_element = !!$found_inline_element;
        $last_element_text           = !!$found_text;
    }
    ($needs_closing_parragraph) =
      $self->_close_parragraph( \@dom, $needs_closing_parragraph, $options );
    return \@dom;
}

sub _open_parragraph {
    if ( @_ < 5 ) {
        die 'Incorrect arguments';
    }
    my ( $self, $dom, $needs_closing_parragraph, $found_inline_element,
        $options )
      = @_;
    if ( $options->{is_list_element} || $options->{inside_inline_element} ) {
        if ( !$options->{first} && !$found_inline_element ) {
            push @$dom, $self->_open_html_element( 'br', 1 );
        }
        return ($needs_closing_parragraph);
    }
    if ( !$needs_closing_parragraph ) {
        push @$dom, $self->_open_html_element('p');
        $needs_closing_parragraph = 1;
    }
    return ($needs_closing_parragraph);
}

sub _close_parragraph {
    my ( $self, $dom, $needs_closing_parragraph, $options ) = @_;
    if ($needs_closing_parragraph) {
        push @$dom, $self->_close_html_element('p');
        $needs_closing_parragraph = 0;
    }
    return ($needs_closing_parragraph);
}
1


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.