Yet Another CPAN Grep

Text-GooglewikiFormat/lib/Text/GooglewikiFormat.pm

package Text::GooglewikiFormat;

use warnings;
use strict;
use URI;
use URI::Escape;
use Text::GooglewikiFormat::Blocks;
use Scalar::Util qw( blessed reftype );
use URI::Find;

use vars qw( $VERSION %tags $indent $code_delimiters);
$VERSION = '0.05';
$indent  = qr/^(?:\t+|\s{4,})/;
$code_delimiters = 0;
%tags    = (
	indent		=> qr/^(?:\t+|\s{1,})/,
	newline		=> '<br />',

	strong		=> sub { " <strong>$_[0]</strong> " },
	italic      => sub { " <i>$_[0]</i> " },
	strike   	=> sub { qq~ <span style="text-decoration: line-through">$_[0]</span> ~ },
	superscript => sub { "<sup>$_[0]</sup>" },
	subscript   => sub { "<sub>$_[0]</sub>" },
	inline      => sub { "<tt>$_[0]</tt>" },
	strong_tag  => qr/(^|\s+)\*(.+?)\*(\s+|$)/,
	italic_tag  => qr/(^|\s+)_(.+?)_(\s+|$)/,
	strike_tag  => qr/(^|\s+)\~\~(.+?)\~\~(\s+|$)/,
	superscript_tag => qr/\^(.+?)\^/,
	subscript_tag   => qr/\,\,(.+?)\,\,/,
	inline_tag  => qr/\`(.+?)\`/,

    header      => [ '', '', sub {
		my $level = length $_[2];
		return "<h$level>", format_line($_[3], @_[-2, -1]), "</h$level>" }
	],
	unordered   => ["<ul>", "</ul>", '<li>', " </li>"],
    ordered		=> ["<ol>", "</ol>", '<li>', " </li>"],

	code		=> [ '<pre class="prettyprint">', "</pre>", sub {
	    my ($line, $level, $args, $tags, $opts) = @_;
	    $line =~ s/(^\{\{\{|\}\}\}$)//isg;
	    return (length($line)) ? $line . "\n" : '';
	} ],
	paragraph	=> [ '<p>', "</p>", '', "<br />", 1 ],
	quote       => [ '<blockquote>', "</blockquote>", '', "\n"],
	table       => [ '<table>', '</table>', sub {
	    my ($line, $level, $args, $tags, $opts) = @_;
	    $line =~ s/(^\|\||\|\|$)//isg;
	    $line =~ s/\|\|/\<\/td\>\<td style\=\"border\: 1px solid \#aaa\; padding\: 5px\;\"\>/isg;
	    $line = qq~<tr><td style="border: 1px solid #aaa; padding: 5px;">$line</td></tr> ~;
	    return $line,
	} ],
	

	blocks		=> {
	    header      => qr/^(=+)(.+)\1/,
		ordered		=> qr/^\#\s*/,
		unordered	=> qr/^\*\s*/,
	    quote       => qr/^ /,
		paragraph   => qr/^/,
		table       => qr/^\|\|/,
	},

	indented    => { map { $_ => 1 } qw( ordered unordered )},
	nests       => { map { $_ => 1 } qw( ordered unordered code table ) },

	blockorder               =>
		[qw( header ordered unordered table quote paragraph code )],
	
	link		=> \&make_html_link,
	extended_link_delimiters => [qw( [ ] )],
	schemas => [ qw( http https ftp mailto gopher ) ],
);

sub merge_hash {
	my ($from, $to) = @_;

	while (my ($key, $value) = each %$from)
	{
		if ((reftype( $value ) || '' ) eq 'HASH' )
		{
			$to->{$key} = {} unless defined $to->{$key};
			merge_hash( $value, $to->{$key} );
			next;
		}

		$to->{$key} = $value;
	}

	return $to;
}

sub format {
	my ($text, $newtags, $opts) = @_;

	$opts    ||=
	{
		prefix => '', extended => 1, implicit_links => 1, absolute_links => 1
	};

	my %tags   = %tags;

	merge_hash( $newtags, \%tags )
		if defined $newtags and ( reftype( $newtags ) || '' ) eq 'HASH';
	check_blocks( \%tags )
		if exists $newtags->{blockorder} or exists $newtags->{blocks};

    # find URIs
    my $finder = URI::Find->new( sub {
        my($uri, $orig_uri) = @_;
        # If your link points to an image (that is, if it ends in .png, .gif, .jpg or .jpeg), it will get inserted as an image into the page:
        if ($uri =~ /\.(jpe?g|png|gif)$/) {
            return qq|<img src="$uri" /> |;
        } else {
            return qq|[$uri]|;
        }
    } );
    $finder->find(\$text);
    $text =~ s/\[\[(.+?)\]/\[$1/isg; # dirty hack

	my @blocks =  find_blocks( $text,     \%tags, $opts );
	@blocks    = merge_blocks( \@blocks                 );
	@blocks    =  nest_blocks( \@blocks                 );

	return process_blocks( \@blocks,  \%tags, $opts );
}

sub check_blocks
{
	my $tags   = shift;
	my %blocks = %{ $tags->{blocks} };
	delete @blocks{ @{ $tags->{blockorder} } };

	if (keys %blocks)
	{
		require Carp;
		Carp::carp(
			"No order specified for blocks '" . join(', ', keys %blocks )
			. "'\n"
		)
	}
}

sub find_blocks
{
	my ($text, $tags, $opts) = @_;

	my @blocks;
	for my $line ( split(/\r?\n/, $text) )
	{
		my $block = start_block( $line, $tags, $opts );
		push @blocks, $block if $block;
	}

	return @blocks;
}

sub start_block
{
	my ($text, $tags, $opts) = @_;
	return new_block( 'end', level => 0 ) unless $text;

    # for {{{ }}}
    if ($text =~ /^\}\}\}$/) {
        $code_delimiters = 0;
        return new_block( 'end', level => 1 );
    } elsif ($code_delimiters or $text =~ /^\{\{\{$/) {
        $code_delimiters = 1;
        return new_block( 'code', level => 1, text => $text, opts => $opts, tags => $tags );
    }

	for my $block (@{ $tags->{blockorder} })
	{
		my ($line, $level, $indentation)  = ( $text, 0, '' );

		if ($tags->{indented}{$block})
		{
			($level, $line, $indentation) = get_indentation( $tags, $line );
			next unless $level;
		}

		my $marker_removed = length ($line =~ s/$tags->{blocks}{$block}//);

		next unless $marker_removed;

		return new_block( $block,
			args  => [ grep { defined } $1, $2, $3, $4, $5, $6, $7, $8, $9 ],
			level => $level || 0,
			opts  => $opts,
			text  => $line,
			tags  => $tags,
		);
	}
}

# merge_blocks() and nest_blocks()
BEGIN
{
	for my $op (qw( merge nest ))
	{
		no strict 'refs';
		*{ $op . '_blocks' } = sub
		{
			my $blocks    = shift;
			return unless @$blocks;

			my @processed = shift @$blocks;

			for my $block (@$blocks)
			{
				push @processed, $processed[-1]->$op( $block );
			}
	
			return @processed;
		};
	}
}

sub process_blocks
{
	my ($blocks, $tags, $opts) = @_;

	my @open;
	for my $block (@$blocks)
	{
		push @open, process_block( $block, $tags, $opts )
			unless $block->type() eq 'end';
	}

	return join('', @open);
}

sub process_block {
	my ($block, $tags, $opts) = @_;

	my ($start, $end, $start_line, $end_line, $between)
		= @{ $tags->{ $block->type() } };

	my @text;
	for my $line ( $block->formatted_text() )
	{
		if (blessed( $line ))
		{
			my $prev_end = pop @text || ();
			push @text, process_block( $line, $tags, $opts ), $prev_end;
			next;
		}

		if ((reftype( $start_line ) || '' ) eq 'CODE' )
		{
			(my $start_line, $line, $end_line) = 
				$start_line->(
					$line, $block->level(), $block->shift_args(), $tags, $opts
				);
			push @text, $start_line;
		}
		else
		{
			push @text, $start_line;
		}
		push @text, $line, $end_line;
	}

	pop @text if $between;
	
	@text = grep { defined $_ } @text; # remove warnings
	return join('', $start, @text, $end);
}

sub get_indentation
{
	my ($tags, $text) = @_;

	return 0, $text unless $text =~ s/($tags->{indent})//;
	return( length( $1 ) + 1, $text, $1 );
}

sub format_line {
	my ($text, $tags, $opts) = @_;
	$opts ||= {};

	$text =~ s!$tags->{strong_tag}!$tags->{strong}->($2, $opts)!eg;
	$text =~ s!$tags->{italic_tag}!$tags->{italic}->($2, $opts)!eg;
	$text =~ s!$tags->{strike_tag}!$tags->{strike}->($2, $opts)!eg;
	$text =~ s!$tags->{superscript_tag}!$tags->{superscript}->($1, $opts)!eg;
	$text =~ s!$tags->{subscript_tag}!$tags->{subscript}->($1, $opts)!eg;
	$text =~ s!$tags->{inline_tag}!$tags->{inline}->($1, $opts)!eg;

	$text = find_extended_links( $text, $tags, $opts );

	$text =~ s|(?<!["/>=])\b((?:[A-Z][a-z0-9]\w*){2,})|
			  $tags->{link}->($1, $opts)|egx;

	return $text;
}

sub find_innermost_balanced_pair
{
	my ($text, $open, $close) = @_;

	my $start_pos             = rindex( $text, $open              );
	return if $start_pos == -1;

	my $end_pos               =  index( $text, $close, $start_pos );
	return if $end_pos   == -1;

	my $open_length           = length( $open );
	my $close_length          = length( $close );
	my $close_pos             = $end_pos + $close_length;
	my $enclosed_length       = $close_pos - $start_pos;

	my $enclosed_atom        = substr( $text, $start_pos, $enclosed_length );
	return substr( $enclosed_atom, $open_length, 0 - $close_length ),
	       substr( $text, 0, $start_pos ),
		   substr( $text, $close_pos );
}

sub find_extended_links
{
	my ($text, $tags, $opts) = @_;

    my $schemas = join('|', @{$tags->{schemas}});
    $text =~ s!(\s+)(($schemas):\S+)!$1 . $tags->{link}->($2, $opts)!egi;

	my ($start, $end) = @{ $tags->{extended_link_delimiters} };

	while (my @pieces = find_innermost_balanced_pair( $text, $start, $end ) )
	{
		my ($tag, $before, $after) = map { defined $_ ? $_ : '' } @pieces;
		my $extended               = $tags->{link}->( $tag, $opts ) || '';
		$text                      = $before . $extended . $after;
	};

	return $text;
}

sub make_html_link {
	my ($link, $opts)        = @_;
	$opts                  ||= {};

	($link, my $title)       = find_link_title( $link, $opts );
	($link, my $is_relative) = escape_link( $link, $opts );

	my $prefix               = ( defined $opts->{prefix} && $is_relative )
		? $opts->{prefix} : '';

    unless ($is_relative) {
        return qq|<a href="$link" rel="nofollow">$title</a>|;
    } else {
    	return qq|<a href="$prefix$link">$title</a>|;
    }
}

sub escape_link {
	my ($link, $opts) = @_;

	my $u = URI->new( $link );
	return $link if $u->scheme();

	# it's a relative link
	return( uri_escape( $link ), 1 );
}

sub find_link_title {
	my ($link, $opts)  = @_;
	my $title;

	($link, $title)    = split(/\s+/, $link, 2);
	$title             = $link unless $title;

	return $link, $title;
}

'shamelessly adapted from the Jellybean project, directly from Text::WikiFormat';

__END__

=head1 NAME

Text::GooglewikiFormat - Translate Google Code Wiki markup into HTML

=head1 SYNOPSIS

    use Text::GooglewikiFormat;
    my $raw  = '*bold* _italic_ ~~strike~~';
    my $html = Text::GooglewikiFormat::format($raw);
    # $html is qq~<p><strong>bold</strong> <i>italic</i> <span style="text-decoration: line-through">strike</span></p>~ now

=head1 DESCRIPTION

Google Code L<http://code.google.com/> is a great code hosting place.

This module is aim to convert L<http://code.google.com/p/support/wiki/WikiSyntax> to HTML.

=head1 ADV. Example

    my $raw  = 'WikiWordLink';
    my %tags = %Text::GooglewikiFormat::tags;
    my $html = Text::GooglewikiFormat::format($raw, \%tags, { prefix => 'http://code.google.com/p/fayland/wiki/' } );
    # $html is qq~<p><a href="http://code.google.com/p/fayland/wiki/WikiWordLink">WikiWordLink</a></p>~ now

=head1 BUGS

It's not excatly the same as what google outputs. for the linebreak generally.

please report bugs to L<http://code.google.com/p/fayland/issues/list>

=head1 SEE ALSO

L<Text::WikiFormat>, L<Text::MediawikiFormat>

=head1 AUTHOR

Fayland Lam, C<< <fayland at gmail.com> >>

=head1 COPYRIGHT & LICENSE

Copyright 2007 Fayland Lam, all rights reserved.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.