Perl-Tokenizer/lib/Perl/Tokenizer.pm
package Perl::Tokenizer;
use utf8;
use 5.018;
use strict;
use warnings;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(perl_tokens);
our $VERSION = '0.11';
=encoding utf8
=head1 NAME
Perl::Tokenizer - A tiny Perl code tokenizer.
=head1 VERSION
Version 0.11
=cut
my $make_esc_delim = sub {
if ($_[0] eq '\\') {
return qr{\\(.*?)\\}s;
}
my $delim = quotemeta shift;
qr{$delim([^$delim\\]*+(?>\\.|[^$delim\\]+)*+)$delim}s;
};
my $make_end_delim = sub {
if ($_[0] eq '\\') {
return qr{.*?\\}s;
}
my $delim = quotemeta shift;
qr{[^$delim\\]*+(?>\\.|[^$delim\\]+)*+$delim}s;
};
my %bdelims;
foreach my $d ([qw~< >~], [qw~( )~], [qw~{ }~], [qw~[ ]~]) {
my @ed = map { quotemeta } @{$d};
$bdelims{$d->[0]} = qr{
$ed[0]
(?>
[^$ed[0]$ed[1]\\]+
|
\\.
|
(??{$bdelims{$d->[0]}})
)*
$ed[1]
}xs;
}
# string - single quote
my $str_sq = $make_esc_delim->(q{'});
# string - double quote
my $str_dq = $make_esc_delim->(q{"});
# backtick - backquote
my $str_bq = $make_esc_delim->(q{`});
# regex - //
my $match_re = $make_esc_delim->(q{/});
# glob/readline
my $glob = $bdelims{'<'};
# Cache regular expressions that are generated dynamically
my %cache_esc;
my %cache_end;
# Double pairs
my $dpairs = qr{
(?=
(?(?<=\s)
(.)
|
(\W)
)
)
(??{$bdelims{$+} // ($cache_esc{$+} //= $make_esc_delim->($+))})
}x;
# Double pairs -- comments
my $dcomm = qr{
\s* (?>(?<=\s)\# (?-s:.*) \s*)*
}x;
# Quote-like balanced (q{}, m//)
my $make_single_q_balanced = sub {
my $name = shift;
qr{
$name
$dcomm
$dpairs
}x;
};
# Quote-like balanced (q{}, m//)
my %single_q;
foreach my $name (qw(q qq qr qw qx m)) {
$single_q{$name} = $make_single_q_balanced->($name);
}
# First of balanced pairs
my $bbpair = qr~[<\[\{\(]~;
my $make_double_q_balanced = sub {
my $name = shift;
qr{
$name
$dcomm
(?(?=$bbpair) # balanced pairs (e.g.: s{}//)
$dpairs
$dcomm
$dpairs
| # or: single delims (e.g.: s///)
$dpairs
(??{$cache_end{$+} //= $make_end_delim->($+)})
)
}x;
};
# Double quote-like balanced (s{}{}, s///)
my %double_q;
foreach my $name (qw(tr s y)) {
$double_q{$name} = $make_double_q_balanced->($name);
}
my $number = qr{(?=\.?[0-9])[0-9_]*(?:\.(?!\.)[0-9_]*)?(?:[Ee](?:[+-]?[0-9_]+))?};
my $hex_num = qr{0x[_0-9A-Fa-f]*};
my $binary_num = qr{0b[_01]*};
my $var_name = qr{(?>\w+|(?>::)+|'(?=\w))++};
my $vstring = qr{\b(?:v[0-9]+(?>\.[0-9][0-9_]*+)*+ | [0-9][0-9_]*(?>\.[0-9][0-9_]*){2,})\b}x;
# HERE-DOC beginning
my $bhdoc = qr{
<<(?>\h*(?>$str_sq|$str_dq)|\\?+(\w+))
}x;
my $tr_flags = qr{[rcds]*};
my $match_flags = qr{[mnsixpogcdual]*};
my $substitution_flags = qr{[mnsixpogcerdual]*};
my $compiled_regex_flags = qr{[mnsixpodual]*};
my @postfix_operators = qw( ++ -- );
my @prec_operators = qw ( ... .. -> ++ -- =~ <=> \\ ? ~~ ~. ~ : >> >= > << <= < == != ! );
my @assignment_operators = qw( && || // ** % ^. ^ &. & |. | * + - = / . << >> );
my $operators = do {
local $" = '|';
qr{@{[map{quotemeta} @prec_operators, @assignment_operators]}};
};
my $postfix_operators = do {
local $" = '|';
qr{@{[map{quotemeta} @postfix_operators]}};
};
my $assignment_operators = do {
local $" = '|';
qr{@{[map{($_ eq '=') ? '=(?!=)' : "\Q$_=\E"} @assignment_operators]}};
};
my @special_var_names = (qw( \\ | + / ~ ! @ $ % ^ & * ( ) } < > : ; " ` ' ? = - [ ] . ), '#', ',');
my $special_var_names = do {
local $" = '|';
qr{@{[map {quotemeta} @special_var_names]}};
};
my $bracket_var = qr~(?=\s*\{)(?!\s*\{\s*(?:\^?$var_name|$special_var_names|\{)\s*\})~;
#<<<
my $perl_keywords =
qr/(?:CORE::)?(?>(a(?:(?:ccept|larm|tan2|bs|nd))|b(?:in(?:(?:mode|d))|less|reak)|c(?:aller|h(?:dir|mod
|o(?:(?:m?p|wn))|r(?:oot)?)|lose(?:dir)?|mp|o(?:n(?:(?:tinue|nect))|s)|rypt)|d(?:bm(?:(?:close|open))|
e(?:f(?:(?:ault|ined))|lete)|ie|ump|o)|e(?:ach|ls(?:(?:if|e))|nd(?:grent|hostent|netent|p(?:(?:roto|w)
ent)|servent)|of|val|x(?:ec|i(?:(?:sts|t))|p)|q)|f(?:c(?:ntl)?|ileno|lock|or(?:(?:each|m(?:(?:line|at)
)|k))?)|g(?:e(?:t(?:gr(?:(?:ent|gid|nam))|host(?:by(?:(?:addr|name))|ent)|login|net(?:by(?:(?:addr|nam
e))|ent)|p(?:eername|grp|pid|r(?:iority|oto(?:byn(?:(?:umber|ame))|ent))|w(?:(?:ent|nam|uid)))|s(?:erv
(?:by(?:(?:name|port))|ent)|ock(?:(?:name|opt)))|c))?|iven|lob|mtime|oto|rep|t)|hex|i(?:mport|n(?:(?:d
ex|t))|octl|sa|f)|join|k(?:(?:eys|ill))|l(?:ast|c(?:first)?|e(?:ngth)?|i(?:(?:sten|nk))|o(?:c(?:al(?:t
ime)?|k)|g)|stat|t)|m(?:ap|kdir|sg(?:(?:ctl|get|rcv|snd))|y)|n(?:e(?:xt)?|ot?)|o(?:ct|pen(?:dir)?|rd?|
ur)|p(?:ack(?:age)?|ipe|o[ps]|r(?:(?:ototype|intf?))|ush)|quotemeta|r(?:and|e(?:ad(?:(?:(?:lin[ek]|pip
e|dir)))?|cv|do|name|quire|set|turn|verse|winddir|f)|index|mdir)|s(?:ay|calar|e(?:ek(?:dir)?|lect|m(?:
(?:ctl|get|op))|nd|t(?:grent|hostent|netent|p(?:grp|r(?:(?:iority|otoent))|went)|s(?:(?:erven|ockop)t)
))|h(?:ift|m(?:(?:write|read|ctl|get))|utdown)|in|leep|o(?:cket(?:pair)?|rt)|p(?:li(?:(?:ce|t))|rintf)
|qrt|rand|t(?:(?:ate?|udy))|ub(?:str)?|y(?:mlink|s(?:(?:write|call|open|read|seek|tem))))|t(?:ell(?:di
r)?|i(?:(?:mes?|ed?))|runcate)|u(?:c(?:first)?|mask|n(?:def|l(?:(?:ess|ink))|pack|shift|ti[el])|se|tim
e)|v(?:(?:alues|ec))|w(?:a(?:it(?:pid)?|ntarray|rn)|h(?:(?:ile|en))|rite)|xor|BEGIN|END|INIT|CHECK))\b
/x;
#>>>
my $perl_filetests = qr/\-[ABCMORSTWXbcdefgkloprstuwxz]/;
sub perl_tokens(&$) {
my ($callback, $code) = @_;
ref($callback) eq 'CODE'
or die "usage: perl_tokens {...} \$code;";
my $variable = 0;
my $flat = 0;
my $regex = 1;
my $canpod = 1;
my $proto = 0;
my $format = 0;
my $expect_format = 0;
my $postfix_op = 0;
my @heredoc_eofs;
$code = "$code";
{
if ($expect_format == 1 and $code =~ /\G(?=\R)/) {
if ($code =~ /.*?\R\.\h*(?=\R|\z)/cgs) {
$callback->('vertical_space', $-[0], $-[0] + 1);
$callback->('format', $-[0] + 1, $+[0]);
$expect_format = 0;
$canpod = 1;
$regex = 1;
$postfix_op = 0;
}
else {
if ($code =~ /\G(.)/cgs) {
$callback->('unknown_char', $-[0], $+[0]);
redo;
}
}
redo;
}
if ($#heredoc_eofs >= 0 and $code =~ /\G(?=\R)/) {
my $token = shift @heredoc_eofs;
if ($code =~ m{\G.*?\R\Q$token\E(?=\R|\z)}sgc) {
$callback->('vertical_space', $-[0], $-[0] + 1);
$callback->('heredoc', $-[0] + 1, $+[0]);
}
redo;
}
if (($regex == 1 or $code =~ /\G(?!<<[0-9])/) and $code =~ m{\G$bhdoc}gc) {
$callback->('heredoc_beg', $-[0], $+[0]);
push @heredoc_eofs, $+;
$regex = 0;
$canpod = 0;
redo;
}
if ($canpod == 1 and $code =~ /\G^=[a-zA-Z]/cgm) {
$code =~ /\G.*?\R=cut\h*(?=\R|\z)/cgs
or $code =~ /\G.*\z/cgs;
$callback->('pod', $-[0] - 2, $+[0]);
redo;
}
if ($code =~ /\G(?=\s)/) {
if ($code =~ /\G\h+/cg) {
$callback->('horizontal_space', $-[0], $+[0]);
redo;
}
if ($code =~ /\G\v+/cg) {
$callback->('vertical_space', $-[0], $+[0]);
redo;
}
if ($code =~ /\G\s+/cg) {
$callback->('other_space', $-[0], $+[0]);
redo;
}
}
if ($variable > 0) {
if ($code =~ m{\G$var_name}gco or $code =~ m{\G(?<=\$)\#$var_name}gco) {
$callback->('var_name', $-[0], $+[0]);
$regex = 0;
$variable = 0;
$canpod = 0;
$flat = ($code =~ /\G(?=\s*\{)/) ? 1 : 0;
redo;
}
if (
$code =~ m{\G(?!\$+$var_name)}o
and ( $code =~ m~\G(?:\s+|#?)\{\s*(?:$var_name|$special_var_names|[#{])\s*\}~goc
or $code =~ m{\G(?:\^\w+|#(?!\{)|$special_var_names)}gco
or $code =~ /\G#/cg)
) {
$callback->('special_var_name', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$variable = 0;
$flat = ($code =~ /\G(?<!\})(?=\s*\{)/) ? 1 : 0;
redo;
}
# continue
}
if ($code =~ /\G#.*/cg) {
$callback->('comment', $-[0], $+[0]);
redo;
}
if (($regex == 1 and not($postfix_op)) or $code =~ /\G(?=[\@\$])/) {
if ($code =~ /\G\$/cg) {
$callback->('scalar_sigil', $-[0], $+[0]);
$code =~ /\G$bracket_var/o or ++$variable;
$regex = 0;
$canpod = 0;
$flat = 1;
redo;
}
if ($code =~ /\G\@/cg) {
$callback->('array_sigil', $-[0], $+[0]);
$code =~ /\G$bracket_var/o or ++$variable;
$regex = 0;
$canpod = 0;
$flat = 1;
redo;
}
if ($code =~ /\G\%/cg) {
$callback->('hash_sigil', $-[0], $+[0]);
$code =~ /\G$bracket_var/o or ++$variable;
$regex = 0;
$canpod = 0;
$flat = 1;
redo;
}
if ($code =~ /\G\*/cg) {
$callback->('glob_sigil', $-[0], $+[0]);
$code =~ /\G$bracket_var/o or ++$variable;
$regex = 0;
$canpod = 0;
$flat = 1;
redo;
}
if ($code =~ /\G&/cg) {
$callback->('ampersand_sigil', $-[0], $+[0]);
$code =~ /\G$bracket_var/o or ++$variable;
$regex = 0;
$canpod = 0;
$flat = 1;
redo;
}
# continue
}
if ($proto == 1 and $code =~ /\G\(.*?\)/cgs) {
$callback->('sub_proto', $-[0], $+[0]);
$proto = 0;
$canpod = 0;
$regex = 0;
redo;
}
if ($code =~ /\G\(/cg) {
$callback->('parenthesis_open', $-[0], $+[0]);
$regex = 1;
$flat = 0;
$canpod = 0;
redo;
}
if ($code =~ /\G\)/cg) {
$callback->('parenthesis_close', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ /\G\{/cg) {
$callback->('curly_bracket_open', $-[0], $+[0]);
$regex = 1;
$proto = 0;
redo;
}
if ($code =~ /\G\}/cg) {
$callback->('curly_bracket_close', $-[0], $+[0]);
$flat = 0;
$canpod = 1;
redo;
}
if ($code =~ /\G\[/cg) {
$callback->('right_bracket_open', $-[0], $+[0]);
$regex = 1;
$postfix_op = 0;
$flat = 0;
$canpod = 0;
redo;
}
if ($code =~ /\G\]/cg) {
$callback->('right_bracket_close', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($proto == 0) {
if ($canpod == 1 and $code =~ /\Gformat\b/cg) {
$callback->('keyword', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$format = 1;
redo;
}
if (
(
$flat == 0 or ( $flat == 1
and $code =~ /\G(?!\w+\h*\})/)
)
and $code =~ m{\G(?<!->)$perl_keywords}gco
) {
my $name = $1;
my @pos = ($-[0], $+[0]);
my $is_bare_word = ($code =~ /\G(?=\h*=>)/);
$callback->(($is_bare_word ? 'bare_word' : 'keyword'), @pos);
if ($name eq 'sub' and not $is_bare_word) {
$proto = 1;
$regex = 0;
}
else {
$regex = 1;
$postfix_op = 0;
}
$canpod = 0;
redo;
}
# continue
}
if ($code =~ /\G(?!(?>tr|[ysm]|q[rwxq]?)\h*=>)/ and $code =~ /\G(?<!->)/) {
if (($flat == 1 and $code =~ /\G(?=[a-z]+\h*\})/) or $code =~ /\G((?<=\{)|(?<=\{\h))(?=[a-z]+\h*\})/) {
## ok
}
else {
if ($code =~ m{\G $double_q{s} $substitution_flags }gcxo) {
$callback->('substitution', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G (?> $double_q{tr} | $double_q{y} ) $tr_flags }gxco) {
$callback->('transliteration', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G $single_q{m} $match_flags }gcxo
or ($regex == 1 and $code =~ m{\G $match_re $match_flags }gcxo)) {
$callback->('match_regex', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G $single_q{qr} $compiled_regex_flags }gcxo) {
$callback->('compiled_regex', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$single_q{q}}gco) {
$callback->('q_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$single_q{qq}}gco) {
$callback->('qq_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$single_q{qw}}gco) {
$callback->('qw_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$single_q{qx}}gco) {
$callback->('qx_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
}
# continue
}
if ($code =~ m{\G$str_dq}gco) {
$callback->('double_quoted_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ m{\G$str_sq}gco) {
$callback->('single_quoted_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ m{\G$str_bq}gco) {
$callback->('backtick', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ /\G;/cg) {
$callback->('semicolon', $-[0], $+[0]);
$canpod = 1;
$regex = 1;
$postfix_op = 0;
$proto = 0;
$flat = 0;
redo;
}
if ($code =~ /\G=>/cg) {
$callback->('fat_comma', $-[0], $+[0]);
$regex = 1;
$postfix_op = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ /\G,/cg) {
$callback->('comma', $-[0], $+[0]);
$regex = 1;
$postfix_op = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ m{\G$vstring}gco) {
$callback->('v_string', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$perl_filetests\b}gco) {
my @pos = ($-[0], $+[0]);
my $is_bare_word = ($code =~ /\G(?=\h*=>)/);
$callback->(($is_bare_word ? 'bare_word' : 'file_test'), @pos);
if ($is_bare_word) {
$canpod = 0;
$regex = 0;
}
else {
$regex = 1; # ambiguous, but possible
$postfix_op = 0;
$canpod = 0;
}
redo;
}
if ($code =~ /\G(?=__)/) {
if ($code =~ m{\G__(?>DATA|END)__\b\h*+(?!=>).*\z}gcs) {
$callback->('data', $-[0], $+[0]);
redo;
}
if ($code =~ m{\G__(?>SUB|FILE|PACKAGE|LINE)__\b(?!\h*+=>)}gc) {
$callback->('special_keyword', $-[0], $+[0]);
$canpod = 0;
$regex = 0;
redo;
}
# continue
}
if ($regex == 1 and $code =~ /\G(?<!(?:\+\+|--)\h)/ and $code =~ m{\G$glob}gco) {
$callback->('glob_readline', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$assignment_operators}gco) {
$callback->('assignment_operator', $-[0], $+[0]);
if ($format) {
if (substr($code, $-[0], $+[0] - $-[0]) eq '=') {
$format = 0;
$expect_format = 1;
}
}
$regex = 1;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ /\G->/cg) {
$callback->('dereference_operator', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 1;
redo;
}
if ($code =~ m{\G$operators}gco or $code =~ /\Gx(?=[0-9\W])/cg) {
$callback->('operator', $-[0], $+[0]);
if (substr($code, $-[0], ($+[0] - $-[0])) =~ /^$postfix_operators\z/o) {
$postfix_op = 1;
}
else {
$postfix_op = 0;
}
$canpod = 0;
$regex = 1;
$flat = 0;
redo;
}
if ($code =~ m{\G$hex_num}gco) {
$callback->('hex_number', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$binary_num}gco) {
$callback->('binary_number', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$number}gco) {
$callback->('number', $-[0], $+[0]);
$regex = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\GSTD(?>OUT|ERR|IN)\b}gc) {
$callback->('special_fh', $-[0], $+[0]);
$regex = 1;
$postfix_op = 0;
$canpod = 0;
redo;
}
if ($code =~ m{\G$var_name}gco) {
$callback->(($proto == 1 ? 'sub_name' : 'bare_word'), $-[0], $+[0]);
$regex = 0;
$canpod = 0;
$flat = 0;
redo;
}
if ($code =~ /\G(.)/cgs) {
$callback->('unknown_char', $-[0], $+[0]);
redo;
}
# all done
}
return pos($code);
}
1;
=head1 SYNOPSIS
use Perl::Tokenizer;
my $code = 'my $num = 42;';
perl_tokens { print "@_\n" } $code;
=head1 DESCRIPTION
Perl::Tokenizer is a tiny tokenizer which splits a given Perl code into a list of tokens, using the power of regular expressions.
=head1 SUBROUTINES
=over 4
=item perl_tokens(&$)
This function takes a callback subroutine and a string. The subroutine is called for each token in real-time.
perl_tokens {
my ($token, $pos_beg, $pos_end) = @_;
...
} $code;
The positions are absolute to the string.
=back
=head2 EXPORT
The function B<perl_tokens> is exported by default. This is the only function provided by this module.
=head1 TOKENS
The standard token names that are available are:
format .................. Format text
heredoc_beg ............. The beginning of a here-document ('<<"EOT"')
heredoc ................. The content of a here-document
pod ..................... An inline POD document, until '=cut' or end of the file
horizontal_space ........ Horizontal whitespace (matched by /\h/)
vertical_space .......... Vertical whitespace (matched by /\v/)
other_space ............. Whitespace that is neither vertical nor horizontal (matched by /\s/)
var_name ................ Alphanumeric name of a variable (excluding the sigil)
special_var_name ........ Non-alphanumeric name of a variable, such as $/ or $^H (excluding the sigil)
sub_name ................ Subroutine name
sub_proto ............... Subroutine prototype
comment ................. A #-to-newline comment (excluding the newline)
scalar_sigil ............ The sigil of a scalar variable: '$'
array_sigil ............. The sigil of an array variable: '@'
hash_sigil .............. The sigil of a hash variable: '%'
glob_sigil .............. The sigil of a glob symbol: '*'
ampersand_sigil ......... The sigil of a subroutine call: '&'
parenthesis_open ........ Open parenthesis: '('
parenthesis_close ....... Closed parenthesis: ')'
right_bracket_open ...... Open right bracket: '['
right_bracket_close ..... Closed right bracket: ']'
curly_bracket_open ...... Open curly bracket: '{'
curly_bracket_close ..... Closed curly bracket: '}'
substitution ............ Regex substitution: s/.../.../
transliteration.......... Transliteration: tr/.../.../ or y/.../.../
match_regex ............. Regex in matching context: m/.../
compiled_regex .......... Quoted compiled regex: qr/.../
q_string ................ Single quoted string: q/.../
qq_string ............... Double quoted string: qq/.../
qw_string ............... List of quoted words: qw/.../
qx_string ............... System command quoted string: qx/.../
backtick ................ Backtick system command quoted string: `...`
single_quoted_string .... Single quoted string, as: '...'
double_quoted_string .... Double quoted string, as: "..."
bare_word ............... Unquoted string
glob_readline ........... <readline> or <shell glob>
v_string ................ Version string: "vX" or "X.X.X"
file_test ............... File test operator (-X), such as: "-d", "-e", etc...
data .................... The content of `__DATA__` or `__END__` sections
keyword ................. Regular Perl keyword, such as: `if`, `else`, etc...
special_keyword ......... Special Perl keyword, such as: `__PACKAGE__`, `__FILE__`, etc...
comma ................... Comma: ','
fat_comma ............... Fat comma: '=>'
operator ................ Primitive operator, such as: '+', '||', etc...
assignment_operator ..... '=' or any assignment operator: '+=', '||=', etc...
dereference_operator .... Arrow dereference operator: '->'
hex_number .............. Hexadecimal literal number: 0x...
binary_number ........... Binary literal number: 0b...
number .................. Decimal literal number, such as 42, 3.1e4, etc...
special_fh .............. Special file-handle name, such as 'STDIN', 'STDOUT', etc...
unknown_char ............ Unknown or unexpected character
=head1 EXAMPLE
For this code:
my $num = 42;
it generates the following tokens:
# TOKEN POS
( keyword => ( 0, 2) )
( horizontal_space => ( 2, 3) )
( scalar_sigil => ( 3, 4) )
( var_name => ( 4, 7) )
( horizontal_space => ( 7, 8) )
( assignment_operator => ( 8, 9) )
( horizontal_space => ( 9, 10) )
( number => (10, 12) )
( semicolon => (12, 13) )
=head1 REPOSITORY
L<https://github.com/trizen/Perl-Tokenizer>
=head1 AUTHOR
Daniel Șuteu, C<< <trizen at cpan.org> >>
=head1 COPYRIGHT AND LICENSE
Copyright (C) 2013-2017 Daniel Șuteu
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.22.0 or,
at your option, any later version of Perl 5 you may have available.
=cut