Benchmark-Perl-Formance-Cargo/share/P6STD/STD_P5.pm6
# STD_P5.pm
#
# Copyright 2009-2010, Larry Wall
#
# You may copy this software under the terms of the Artistic License,
# version 2.0 or later.
grammar STD::P5 is STD;
use DEBUG;
method TOP ($STOP?) {
if defined $STOP {
my $*GOAL ::= $STOP;
self.unitstop($STOP).comp_unit;
}
else {
self.comp_unit;
}
}
##############
# Precedence #
##############
# The internal precedence levels are *not* part of the public interface.
# The current values are mere implementation; they may change at any time.
# Users should specify precedence only in relation to existing levels.
constant %term = (:dba('term') , :prec<z=>);
constant %methodcall = (:dba('methodcall') , :prec<y=>, :assoc<unary>, :uassoc<left>, :fiddly);
constant %autoincrement = (:dba('autoincrement') , :prec<x=>, :assoc<unary>, :uassoc<non>);
constant %exponentiation = (:dba('exponentiation') , :prec<w=>, :assoc<right>);
constant %symbolic_unary = (:dba('symbolic unary') , :prec<v=>, :assoc<unary>, :uassoc<left>);
constant %binding = (:dba('binding') , :prec<u=>, :assoc<unary>, :uassoc<left>);
constant %multiplicative = (:dba('multiplicative') , :prec<t=>, :assoc<left>);
constant %additive = (:dba('additive') , :prec<s=>, :assoc<left>);
constant %shift = (:dba('shift') , :prec<r=>, :assoc<left>);
constant %named_unary = (:dba('named unary') , :prec<q=>, :assoc<unary>, :uassoc<left>);
constant %comparison = (:dba('comparison') , :prec<p=>, :assoc<non>, :diffy);
constant %equality = (:dba('equality') , :prec<o=>, :assoc<chain>, :diffy, :iffy);
constant %bitwise_and = (:dba('bitwise and') , :prec<n=>, :assoc<left>);
constant %bitwise_or = (:dba('bitwise or') , :prec<m=>, :assoc<left>);
constant %tight_and = (:dba('tight and') , :prec<l=>, :assoc<left>);
constant %tight_or = (:dba('tight or') , :prec<k=>, :assoc<left>);
constant %range = (:dba('range') , :prec<j=>, :assoc<right>, :fiddly);
constant %conditional = (:dba('conditional') , :prec<i=>, :assoc<right>, :fiddly);
constant %assignment = (:dba('assignment') , :prec<h=>, :assoc<right>);
constant %comma = (:dba('comma operator') , :prec<g=>, :assoc<left>, :nextterm<nulltermish>, :fiddly);
constant %listop = (:dba('list operator') , :prec<f=>, :assoc<unary>, :uassoc<left>);
constant %loose_not = (:dba('not operator') , :prec<e=>, :assoc<unary>, :uassoc<left>);
constant %loose_and = (:dba('loose and') , :prec<d=>, :assoc<left>);
constant %loose_or = (:dba('loose or') , :prec<c=>, :assoc<left>);
constant %LOOSEST = (:dba('LOOSEST') , :prec<a=!>);
constant %terminator = (:dba('terminator') , :prec<a=>, :assoc<list>);
# "epsilon" tighter than terminator
#constant $LOOSEST = %LOOSEST<prec>;
constant $LOOSEST = "a=!"; # XXX preceding line is busted
##############
# Categories #
##############
# Categories are designed to be easily extensible in derived grammars
# by merely adding more rules in the same category. The rules within
# a given category start with the category name followed by a differentiating
# adverbial qualifier to serve (along with the category) as the longer name.
# The endsym context, if specified, says what to implicitly check for in each
# rule right after the initial <sym>. Normally this is used to make sure
# there's appropriate whitespace. # Note that endsym isn't called if <sym>
# isn't called.
my $*endsym = "null";
my $*endargs = -1;
proto token category { <...> }
token category:category { <sym> }
token category:p5sigil { <sym> }
proto token p5sigil { <...> }
token category:p5special_variable { <sym> }
proto token p5special_variable { <...> }
token category:p5comment { <sym> }
proto token p5comment { <...> }
token category:p5version { <sym> }
proto token p5version { <...> }
token category:p5module_name { <sym> }
proto token p5module_name { <...> }
token category:p5value { <sym> }
proto token p5value { <...> }
token category:p5term { <sym> }
proto token p5term { <...> }
token category:p5number { <sym> }
proto token p5number { <...> }
token category:p5quote { <sym> }
proto token p5quote () { <...> }
token category:p5prefix { <sym> }
proto token p5prefix is unary is defequiv(%symbolic_unary) { <...> }
token category:p5infix { <sym> }
proto token p5infix is binary is defequiv(%additive) { <...> }
token category:p5postfix { <sym> }
proto token p5postfix is unary is defequiv(%autoincrement) { <...> }
token category:p5dotty { <sym> }
proto token p5dotty (:$*endsym = 'unspacey') { <...> }
token category:p5circumfix { <sym> }
proto token p5circumfix { <...> }
token category:p5postcircumfix { <sym> }
proto token p5postcircumfix is unary { <...> } # unary as far as EXPR knows...
token category:p5type_declarator { <sym> }
proto token p5type_declarator (:$*endsym = 'spacey') { <...> }
token category:p5scope_declarator { <sym> }
proto token p5scope_declarator (:$*endsym = 'nofun') { <...> }
token category:p5package_declarator { <sym> }
proto token p5package_declarator (:$*endsym = 'spacey') { <...> }
token category:p5routine_declarator { <sym> }
proto token p5routine_declarator (:$*endsym = 'nofun') { <...> }
token category:p5regex_declarator { <sym> }
proto token p5regex_declarator (:$*endsym = 'spacey') { <...> }
token category:p5statement_prefix { <sym> }
proto rule p5statement_prefix () { <...> }
token category:p5statement_control { <sym> }
proto rule p5statement_control (:$*endsym = 'spacey') { <...> }
token category:p5statement_mod_cond { <sym> }
proto rule p5statement_mod_cond (:$*endsym = 'nofun') { <...> }
token category:p5statement_mod_loop { <sym> }
proto rule p5statement_mod_loop (:$*endsym = 'nofun') { <...> }
token category:p5terminator { <sym> }
proto token p5terminator { <...> }
token unspacey { <.unsp>? }
token endid { <?before <-[ \- \' \w ]> > }
token spacey { <?before <[ \s \# ]> > }
token nofun { <!before '(' | '.(' | '\\' | '\'' | '-' | "'" | \w > }
##################
# Lexer routines #
##################
token ws {
:temp @*STUB = return self if @*MEMOS[self.pos]<ws> :exists;
:my $startpos = self.pos;
:dba('whitespace')
[
| \h+ <![\#\s\\]> { @*MEMOS[$¢.pos]<ws> = $startpos; } # common case
| <?before \w> <?after \w> :::
{ @*MEMOS[$startpos]<ws> :delete; }
<.panic: "Whitespace is required between alphanumeric tokens"> # must \s+ between words
]
||
[
| <.unsp>
| <.vws> <.heredoc>
| <.unv>
| $ { $¢.moreinput }
]*
{{
if ($¢.pos == $startpos) {
@*MEMOS[$¢.pos]<ws> :delete;
}
else {
@*MEMOS[$¢.pos]<ws> :delete;
@*MEMOS[$¢.pos]<endstmt> = @*MEMOS[$startpos]<endstmt>
if @*MEMOS[$startpos]<endstmt> :exists;
}
}}
}
token unsp {
<!>
}
token vws {
:dba('vertical whitespace')
\v
[ '#DEBUG -1' { say "DEBUG"; $STD::DEBUG = $*DEBUG = -1; } ]?
}
# We provide two mechanisms here:
# 1) define $*moreinput, or
# 2) override moreinput method
method moreinput () {
$*moreinput.() if $*moreinput;
}
token unv {
:dba('horizontal whitespace')
[
| \h+
| <?before \h* '=' [ \w | '\\'] > ^^ <.pod_comment>
| \h* <comment=p5comment>
]
}
token p5comment:sym<#> {
'#' {} \N*
}
token ident {
<.alpha> \w*
}
token identifier {
<.alpha> \w*
}
# XXX We need to parse the pod eventually to support $= variables.
token pod_comment {
^^ \h* '=' <.unsp>?
[
| 'begin' \h+ <identifier> ::
[
|| .*? "\n" \h* '=' <.unsp>? 'end' \h+ $<identifier> » \N*
|| <?{ $<identifier>.Str eq 'END'}> .*
|| { my $id = $<identifier>.Str; self.panic("=begin $id without matching =end $id"); }
]
| 'begin' » :: \h* [ $$ || '#' || <.panic: "Unrecognized token after =begin"> ]
[ .*? "\n" \h* '=' <.unsp>? 'end' » \N* || { self.panic("=begin without matching =end"); } ]
| 'for' » :: \h* [ <identifier> || $$ || '#' || <.panic: "Unrecognized token after =for"> ]
[.*? ^^ \h* $$ || .*]
| ::
[ <?before .*? ^^ '=cut' » > <.panic: "Obsolete pod format, please use =begin/=end instead"> ]?
[<alpha>||\s||<.panic: "Illegal pod directive">]
\N*
]
}
###################
# Top-level rules #
###################
# Note: we only check for the stopper. We don't check for ^ because
# we might be embedded in something else.
rule comp_unit {
:my $*begin_compunit = 1;
:my $*endargs = -1;
:my %*LANG;
:my $*PKGDECL ::= "";
:my $*IN_DECL;
:my $*DECLARAND;
:my $*NEWPKG;
:my $*NEWLEX;
:my $*QSIGIL ::= '';
:my $*IN_META = 0;
:my $*QUASIMODO;
:my $*SCOPE = "";
:my $*LEFTSIGIL;
:my %*MYSTERY = ();
:my $*INVOCANT_OK;
:my $*INVOCANT_IS;
:my $*CURLEX;
:my $*MULTINESS = '';
:my $*CURPKG;
{{
%*LANG<MAIN> = ::STD ;
%*LANG<Q> = ::STD::Q ;
%*LANG<Regex> = ::STD::Regex ;
%*LANG<Trans> = ::STD::Trans ;
%*LANG<P5> = ::STD::P5 ;
%*LANG<P5Regex> = ::STD::P5::Regex ;
@*WORRIES = ();
self.load_setting($*SETTINGNAME);
my $oid = $*SETTING.id;
my $id = 'MY:file<' ~ $*FILE<name> ~ '>';
$*CURLEX = Stash.new(
'OUTER::' => [$oid],
'!file' => $*FILE, '!line' => 0,
'!id' => [$id],
);
$STD::ALL.{$id} = $*CURLEX;
$*UNIT = $*CURLEX;
$STD::ALL.<UNIT> = $*UNIT;
self.finishlex;
}}
<statementlist>
[ <?unitstopper> || <.panic: "Confused"> ]
# "CHECK" time...
{{
if @*WORRIES {
warn "Potential difficulties:\n " ~ join( "\n ", @*WORRIES) ~ "\n";
}
my $m = $¢.explain_mystery();
warn $m if $m;
}}
}
method explain_mystery() {
my %post_types;
my %unk_types;
my %unk_routines;
my $m = '';
for keys(%*MYSTERY) {
my $p = %*MYSTERY{$_}.<lex>;
if self.is_name($_, $p) {
# types may not be post-declared
%post_types{$_} = %*MYSTERY{$_};
next;
}
next if self.is_known($_, $p) or self.is_known('&' ~ $_, $p);
# just a guess, but good enough to improve error reporting
if $_ lt 'a' {
%unk_types{$_} = %*MYSTERY{$_};
}
else {
%unk_routines{$_} = %*MYSTERY{$_};
}
}
if %post_types {
my @tmp = sort keys(%post_types);
$m ~= "Illegally post-declared type" ~ ('s' x (@tmp != 1)) ~ ":\n";
for @tmp {
$m ~= "\t$_ used at line " ~ %post_types{$_}.<line> ~ "\n";
}
}
if %unk_types {
my @tmp = sort keys(%unk_types);
$m ~= "Undeclared name" ~ ('s' x (@tmp != 1)) ~ ":\n";
for @tmp {
$m ~= "\t$_ used at line " ~ %unk_types{$_}.<line> ~ "\n";
}
}
if %unk_routines {
my @tmp = sort keys(%unk_routines);
$m ~= "Undeclared routine" ~ ('s' x (@tmp != 1)) ~ ":\n";
for @tmp {
$m ~= "\t$_ used at line " ~ %unk_routines{$_}.<line> ~ "\n";
}
}
$m;
}
# Look for an expression followed by a required lambda.
token xblock {
:my $*GOAL ::= '{';
:dba('block expression') '(' ~ ')' <EXPR>
<.ws>
<block>
}
token block {
:temp $*CURLEX;
:dba('scoped block')
[ <?before '{' > || <.panic: "Missing block"> ]
<.newlex>
<blockoid>
}
token blockoid {
# encapsulate braided languages
:temp %*LANG;
<.finishlex>
[
| :dba('block') '{' ~ '}' <statementlist>
| <?terminator> <.panic: 'Missing block'>
| <?> <.panic: "Malformed block">
]
[
| <?before \h* $$> # (usual case without comments)
{ @*MEMOS[$¢.pos]<endstmt> = 2; }
| \h* <?before <[\\,:]>>
| <.unv>? $$
{ @*MEMOS[$¢.pos]<endstmt> = 2; }
| {} <.unsp>? { @*MEMOS[$¢.pos]<endargs> = 1; }
]
}
token regex_block {
# encapsulate braided languages
:temp %*LANG;
:my $lang = %*LANG<Regex>;
:my $*GOAL ::= '}';
[ <quotepair> <.ws>
{
my $kv = $<quotepair>[*-1];
$lang = $lang.tweak($kv.<k>, $kv.<v>)
or self.sorry("Unrecognized adverb :" ~ $kv.<k> ~ '(' ~ $kv.<v> ~ ')');
}
]*
'{'
<nibble( $¢.cursor_fresh($lang).unbalanced('}') )>
[ '}' || <.panic: "Unable to parse regex; couldn't find right brace"> ]
[
| <?before \h* $$> # (usual case without comments)
{ @*MEMOS[$¢.pos]<endstmt> = 2; }
| \h* <?before <[\\,:]>>
| <.unv>? $$
{ @*MEMOS[$¢.pos]<endstmt> = 2; }
| {} <.unsp>? { @*MEMOS[$¢.pos]<endargs> = 1; }
]
}
# statement semantics
rule statementlist {
:my $*INVOCANT_OK = 0;
:dba('statement list')
[
| $
| <?before <[\)\]\}]> >
| [<statement><eat_terminator> ]*
]
}
# embedded semis, context-dependent semantics
rule semilist {
:my $*INVOCANT_OK = 0;
:dba('semicolon list')
[
| <?before <[\)\]\}]> >
| [<statement><eat_terminator> ]*
]
}
token label {
:my $label;
<identifier> ':' <?before \s> <.ws>
[ <?{ $¢.is_name($label = $<identifier>.Str) }>
<.sorry("Illegal redeclaration of '$label'")>
]?
# add label as a pseudo type
{{ $¢.add_my_name($label); }}
}
token statement {
:my $*endargs = -1;
:my $*QSIGIL ::= 0;
<!before <[\)\]\}]> >
# this could either be a statement that follows a declaration
# or a statement that is within the block of a code declaration
<!!{ $¢ = %*LANG<MAIN>.bless($¢); }>
[
| <label> <statement>
| <statement_control=p5statement_control>
| <EXPR>
:dba('statement end')
<.ws>
:dba('statement modifier')
[
| <statement_mod_loop=p5statement_mod_loop>
| <statement_mod_cond=p5statement_mod_cond>
]?
| <?before ';'>
]
}
token eat_terminator {
[
|| ';' [ <?before $> { $*ORIG ~~ s/\;$/ /; } ]?
|| <?{ @*MEMOS[$¢.pos]<endstmt> }> <.ws>
|| <?terminator>
|| $
|| {{ if @*MEMOS[$¢.pos]<ws> { $¢.pos = @*MEMOS[$¢.pos]<ws>; } }} # undo any line transition
<.panic: "Confused">
]
}
#####################
# statement control #
#####################
token p5statement_control:use {
:my $longname;
:my $*SCOPE = 'use';
<sym> <.ws>
[
| <version=p5version>
| <module_name=p5module_name>
{{
$longname = $<module_name><longname>;
}}
[
|| <.spacey> <arglist>
{{
$¢.do_use($longname, $<arglist>);
}}
|| {{ $¢.do_use($longname, ''); }}
]
]
<.ws>
}
token p5statement_control:no {
<sym> <.ws>
<module_name=p5module_name>[<.spacey><arglist>]?
<.ws>
}
token p5statement_control:if {
$<sym>=['if'|'unless']:s
<xblock>
[
[ <!before 'else'\s*'if'> || <.panic: "Please use 'elsif'"> ]
'elsif'<?spacey> <elsif=xblock>
]*
[
'else'<?spacey> <else=pblock>
]?
}
token p5statement_control:while {
<sym> :s
<xblock>
}
token p5statement_control:until {
<sym> :s
<xblock>
}
token p5statement_control:for {
['for'|'foreach'] <.ws>
$<eee> = (
'(' [ :s
<e1=EXPR>? ';'
<e2=EXPR>? ';'
<e3=EXPR>?
')'||<.panic: "Malformed loop spec">]
)? <.ws>
<block>
}
token p5statement_control:given {
<sym> :s
<xblock>
}
token p5statement_control:when {
<sym> :s
<xblock>
}
rule p5statement_control:default {<sym> <block> }
rule p5statement_prefix:BEGIN {<sym> <block> }
rule p5statement_prefix:CHECK {<sym> <block> }
rule p5statement_prefix:INIT {<sym> <block> }
rule p5statement_control:END {<sym> <block> }
#######################
# statement modifiers #
#######################
rule modifier_expr { <EXPR> }
rule p5statement_mod_cond:if {<sym> <modifier_expr> }
rule p5statement_mod_cond:unless {<sym> <modifier_expr> }
rule p5statement_mod_cond:when {<sym> <modifier_expr> }
rule p5statement_mod_loop:while {<sym> <modifier_expr> }
rule p5statement_mod_loop:until {<sym> <modifier_expr> }
rule p5statement_mod_loop:for {<sym> <modifier_expr> }
rule p5statement_mod_loop:given {<sym> <modifier_expr> }
################
# module names #
################
token def_module_name {
<longname>
[ :dba('generic role')
<?before '['>
<?{ ($*PKGDECL//'') eq 'role' }>
<.newlex>
'[' ~ ']' <signature>
{ $*IN_DECL = 0; }
<.finishlex>
]?
}
token p5module_name:normal {
<longname>
[ <?before '['> :dba('generic role') '[' ~ ']' <arglist> ]?
}
token vnum {
\d+ | '*'
}
token p5version:sym<v> {
'v' <?before \d+> :: <vnum> +% '.' '+'?
}
###############
# Declarators #
###############
token variable_declarator {
:my $*IN_DECL = 1;
:my $*DECLARAND;
<variable>
{ $*IN_DECL = 0; $¢.add_variable($<variable>.Str) }
[ # Is it a shaped array or hash declaration?
# <?{ $<sigil> eq '@' | '%' }>
<.unsp>?
$<shape> = [
| '(' ~ ')' <signature>
| :dba('shape definition') '[' ~ ']' <semilist>
| :dba('shape definition') '{' ~ '}' <semilist>
| <?before '<'> <postcircumfix=p5postcircumfix>
]*
]?
<.ws>
<trait>*
}
rule scoped($*SCOPE) {
:dba('scoped declarator')
[
| <declarator>
| <regex_declarator=p5regex_declarator>
| <package_declarator=p5package_declarator>
]
|| <?before <[A..Z]>><longname>{{
my $t = $<longname>.Str;
if not $¢.is_known($t) {
$¢.sorry("In \"$*SCOPE\" declaration, typename $t must be predeclared (or marked as declarative with :: prefix)");
}
}}
<!> # drop through
|| <.panic: "Malformed $*SCOPE">
}
token p5scope_declarator:my { <sym> <scoped('my')> }
token p5scope_declarator:our { <sym> <scoped('our')> }
token p5scope_declarator:state { <sym> <scoped('state')> }
token p5package_declarator:package {
:my $*PKGDECL ::= 'package';
<sym> <package_def>
}
token p5package_declarator:require { # here because of declarational aspects
<sym> <.ws>
[
|| <module_name=p5module_name> <EXPR>?
|| <EXPR>
]
}
rule package_def {
:my $longname;
:my $*IN_DECL = 1;
:my $*DECLARAND;
:my $*NEWPKG;
:my $*NEWLEX;
{ $*SCOPE ||= 'our'; }
[
[
<def_module_name>{
$longname = $<def_module_name>[0]<longname>;
$¢.add_name($longname.Str);
}
]?
[
|| <?before ';'>
{{
$longname orelse $¢.panic("Package cannot be anonymous");
my $shortname = $longname.<name>.Str;
$*CURPKG = $*NEWPKG // $*CURPKG.{$shortname ~ '::'};
}}
|| <.panic: "Unable to parse " ~ $*PKGDECL ~ " definition">
]
] || <.panic: "Malformed $*PKGDECL">
}
token declarator {
[
| <constant_declarator>
| <variable_declarator>
| '(' ~ ')' <signature> <trait>*
| <routine_declarator=p5routine_declarator>
| <regex_declarator=p5regex_declarator>
| <type_declarator=p5type_declarator>
]
}
token p5multi_declarator:null {
:my $*MULTINESS = '';
<declarator>
}
token p5routine_declarator:sub { <sym> <routine_def> }
rule parensig {
:dba('signature')
'(' ~ ')' <signature(1)>
}
method checkyada {
try {
my $startsym = self.<blockoid><statementlist><statement>[0]<EXPR><term><sym> // '';
if $startsym eq '...' or $startsym eq '!!!' or $startsym eq '???' {
$*DECLARAND<stub> = 1;
}
};
return self;
}
rule routine_def () {
:temp $*CURLEX;
:my $*IN_DECL = 1;
:my $*DECLARAND;
[
[ '&'<deflongname>? | <deflongname> ]?
<.newlex(1)>
<parensig>?
<trait>*
<!{
$*IN_DECL = 0;
}>
<blockoid>:!s
<.checkyada>
<.getsig>
] || <.panic: "Malformed routine">
}
rule trait {
:my $*IN_DECL = 0;
':' <EXPR(item %comma)>
}
#########
# Nouns #
#########
# (for when you want to tell EXPR that infix already parsed the term)
token nullterm {
<?>
}
token nulltermish {
:dba('null term')
[
| <?stdstopper>
| <term=termish>
{
$¢.<PRE> = $<term><PRE>:delete;
$¢.<POST> = $<term><POST>:delete;
$¢.<~CAPS> = $<term><~CAPS>;
}
| <?>
]
}
token termish {
:my $*SCOPE = "";
:my $*VAR;
:dba('prefix or term')
[
| <PRE> [ <!{ my $p = $<PRE>; my @p = @$p; @p[*-1]<O><term> and $<term> = pop @$p }> <PRE> ]*
[ <?{ $<term> }> || <term> ]
| <term=p5term>
]
# also queue up any postfixes
:dba('postfix')
[
|| <?{ $*QSIGIL }>
[
|| <?{ $*QSIGIL eq '$' }> [ <POST>+! <?after <[ \] } > ) ]> > ]?
|| <POST>+! <?after <[ \] } > ) ]> >
|| { $*VAR = 0; }
]
|| <!{ $*QSIGIL }>
<POST>*
]
{
self.check_variable($*VAR) if $*VAR;
$¢.<~CAPS> = $<term><~CAPS>;
}
}
token p5term:fatarrow { <fatarrow> }
token p5term:variable { <variable> { $*VAR = $<variable> } }
token p5term:package_declarator { <package_declarator=p5package_declarator> }
token p5term:scope_declarator { <scope_declarator=p5scope_declarator> }
token p5term:routine_declarator { <routine_declarator=p5routine_declarator> }
token p5term:circumfix { <circumfix=p5circumfix> }
token p5term:dotty { <dotty=p5dotty> }
token p5term:value { <value=p5value> }
token p5term:capterm { <capterm> }
token p5term:statement_prefix { <statement_prefix=p5statement_prefix> }
token fatarrow {
<key=identifier> \h* '=>' <.ws> <val=EXPR(item %assignment)>
}
# Most of these special variable rules are there simply to catch old p5 brainos
token p5special_variable:sym<$!> { <sym> <!before \w> }
token p5special_variable:sym<$!{ }> {
'$!{' ~ '}' <EXPR>
}
token p5special_variable:sym<$/> {
<sym>
}
token p5special_variable:sym<$~> {
<sym>
}
token p5special_variable:sym<$`> {
<sym>
}
token p5special_variable:sym<$@> {
<sym>
}
token p5special_variable:sym<$#> {
<sym>
}
token p5special_variable:sym<$$> {
<sym> <!alpha>
}
token p5special_variable:sym<$%> {
<sym>
}
token p5special_variable:sym<$^X> {
<sigil=p5sigil> '^' $<letter> = [<[A..Z]>] <?before \W >
}
token p5special_variable:sym<$^> {
<sym>
}
token p5special_variable:sym<$&> {
<sym>
}
token p5special_variable:sym<$*> {
<sym>
}
token p5special_variable:sym<$)> {
<sym>
}
token p5special_variable:sym<$-> {
<sym>
}
token p5special_variable:sym<$=> {
<sym>
}
token p5special_variable:sym<@+> {
<sym>
}
token p5special_variable:sym<%+> {
<sym>
}
token p5special_variable:sym<$+[ ]> {
'$+['
}
token p5special_variable:sym<@+[ ]> {
'@+['
}
token p5special_variable:sym<@+{ }> {
'@+{'
}
token p5special_variable:sym<@-> {
<sym>
}
token p5special_variable:sym<%-> {
<sym>
}
token p5special_variable:sym<$-[ ]> {
'$-['
}
token p5special_variable:sym<@-[ ]> {
'@-['
}
token p5special_variable:sym<%-{ }> {
'@-{'
}
token p5special_variable:sym<$+> {
<sym>
}
token p5special_variable:sym<${^ }> {
<sigil=p5sigil> '{^' :: $<text>=[.*?] '}'
}
token p5special_variable:sym<::{ }> {
'::' <?before '{'>
}
regex p5special_variable:sym<${ }> {
<sigil=p5sigil> '{' {} $<text>=[.*?] '}'
}
token p5special_variable:sym<$[> {
<sym>
}
token p5special_variable:sym<$]> {
<sym>
}
token p5special_variable:sym<$\\> {
<sym>
}
token p5special_variable:sym<$|> {
<sym>
}
token p5special_variable:sym<$:> {
<sym>
}
token p5special_variable:sym<$;> {
<sym>
}
token p5special_variable:sym<$'> { #'
<sym>
}
token p5special_variable:sym<$"> {
<sym> <!{ $*QSIGIL }>
}
token p5special_variable:sym<$,> {
<sym>
}
token p5special_variable:sym['$<'] {
<sym>
}
token p5special_variable:sym«\$>» {
<sym>
}
token p5special_variable:sym<$.> {
<sym>
}
token p5special_variable:sym<$?> {
<sym>
}
# desigilname should only follow a sigil
token desigilname {
[
| <?before '$' > <variable> { $*VAR = $<variable> }
| <longname>
]
}
token variable {
:my $*IN_META = 0;
:my $sigil = '';
:my $name;
<?before <sigil=p5sigil> {
$sigil = $<sigil>.Str;
}> {}
[
|| '&'
[
| <sublongname> { $name = $<sublongname>.Str }
| :dba('infix noun') '[' ~ ']' <infixish(1)>
]
|| [
| <sigil=p5sigil> <desigilname> { $name = $<desigilname>.Str }
| <special_variable=p5special_variable>
| <sigil=p5sigil> $<index>=[\d+]
# Note: $() can also parse as contextualizer in an expression; should have same effect
| <sigil=p5sigil> <?before '<' | '('> <postcircumfix=p5postcircumfix>
| <sigil=p5sigil> <?{ $*IN_DECL }>
| <?> {{
if $*QSIGIL {
return ();
}
else {
$¢.panic("Anonymous variable requires declarator");
}
}}
]
]
}
# Note, don't reduce on a bare sigil unless you don't care what the longest token is.
token p5sigil:sym<$> { <sym> }
token p5sigil:sym<@> { <sym> }
token p5sigil:sym<%> { <sym> }
token p5sigil:sym<&> { <sym> }
token deflongname {
:dba('new name to be defined')
<name>
[
| <colonpair>+ { $¢.add_macro($<name>) if $*IN_DECL; }
| { $¢.add_routine($<name>.Str) if $*IN_DECL; }
]
}
token longname {
<name> <colonpair>*
}
token name {
[
| <identifier> <morename>*
| <morename>+
]
}
token morename {
:my $*QSIGIL ::= '';
'::'
[
<?before '(' | <alpha> >
[
| <identifier>
| :dba('indirect name') '(' ~ ')' <EXPR>
]
]?
}
token subshortname {
[
| <category>
[ <colonpair>+ { $¢.add_macro($<category>) if $*IN_DECL; } ]?
| <desigilname>
]
}
token sublongname {
<subshortname> <sigterm>?
}
token p5value:quote { <quote=p5quote> }
token p5value:number { <number=p5number> }
token p5value:version { <version=p5version> }
# Note: call this only to use existing type, not to declare type
token typename {
[
| '::?'<identifier> # parse ::?CLASS as special case
| <longname>
<?{{
my $longname = $<longname>.Str;
if substr($longname, 0, 2) eq '::' {
$¢.add_my_name(substr($longname, 2));
}
else {
$¢.is_name($longname)
}
}}>
]
# parametric type?
<.unsp>? [ <?before '['> <postcircumfix=p5postcircumfix> ]?
<.ws> [ 'of' <.ws> <typename> ]?
}
token numish {
[
| <integer>
| <dec_number>
| <rad_number>
| 'NaN' »
| 'Inf' »
| '+Inf' »
| '-Inf' »
]
}
token number:numish { <numish> }
token integer {
[
| 0 [ b <[01]>+ [ _ <[01]>+ ]*
| o <[0..7]>+ [ _ <[0..7]>+ ]*
| x <[0..9a..fA..F]>+ [ _ <[0..9a..fA..F]>+ ]*
| d \d+ [ _ \d+]*
| \d+[_\d+]*
<!!{ $¢.worry("Leading 0 does not indicate octal in Perl 6") }>
]
| \d+[_\d+]*
]
}
token radint {
[
| <integer>
| <?before ':'> <rad_number> <?{
defined $<rad_number><intpart>
and
not defined $<rad_number><fracpart>
}>
]
}
token escale {
<[Ee]> <[+\-]>? \d+[_\d+]*
}
# careful to distinguish from both integer and 42.method
token dec_number {
:dba('decimal number')
[
| $<coeff> = [ '.' \d+[_\d+]* ] <escale>?
| $<coeff> = [\d+[_\d+]* '.' \d+[_\d+]* ] <escale>?
| $<coeff> = [\d+[_\d+]* ] <escale>
]
<!!before [ '.' <?before \d> <.panic: "Number contains two decimal points (missing 'v' for version number?)">]? >
}
token octints { [<.ws><octint><.ws>] +% ',' }
token octint {
<[ 0..7 ]>+ [ _ <[ 0..7 ]>+ ]*
}
token hexints { [<.ws><hexint><.ws>] +% ',' }
token hexint {
<[ 0..9 a..f A..F ]>+ [ _ <[ 0..9 a..f A..F ]>+ ]*
}
##########
# Quotes #
##########
our @herestub_queue;
class Herestub {
has Str $.delim;
has $.orignode;
has $.lang;
} # end class
role herestop {
token stopper { ^^ {} $<ws>=(\h*?) $*DELIM \h* <.unv>?? $$ \v? }
} # end role
# XXX be sure to temporize @herestub_queue on reentry to new line of heredocs
method heredoc () {
my $*CTX ::= self.callm if $*DEBUG +& DEBUG::trace_call;
return if self.peek;
my $here = self;
while my $herestub = shift @herestub_queue {
my $*DELIM = $herestub.delim;
my $lang = $herestub.lang.mixin( ::herestop );
my $doc;
if ($doc) = $here.nibble($lang) {
$here = $doc.trim_heredoc();
$herestub.orignode<doc> = $doc;
}
else {
self.panic("Ending delimiter $*DELIM not found");
}
}
return self.cursor($here.pos); # return to initial type
}
proto token backslash { <...> }
proto token escape { <...> }
token starter { <!> }
token escape:none { <!> }
token babble ($l) {
:my $lang = $l;
:my $start;
:my $stop;
<.ws>
[ <quotepair> <.ws>
{
my $kv = $<quotepair>[*-1];
$lang = $lang.tweak($kv.<k>, $kv.<v>)
or self.sorry("Unrecognized adverb :" ~ $kv.<k> ~ '(' ~ $kv.<v> ~ ')');
}
]*
{
($start,$stop) = $¢.peek_delimiters();
$lang = $start ne $stop ?? $lang.balanced($start,$stop)
!! $lang.unbalanced($stop);
$<B> = [$lang,$start,$stop];
}
}
token quibble ($l) {
:my ($lang, $start, $stop);
<babble($l)>
{ my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
$start <nibble($lang)> [ $stop || <.panic: "Couldn't find terminator $stop"> ]
{{
if $lang<_herelang> {
push @herestub_queue,
::Herestub.new(
delim => $<nibble><nibbles>[0]<TEXT>,
orignode => $¢,
lang => $lang<_herelang>,
);
}
}}
}
token sibble ($l, $lang2) {
:my ($lang, $start, $stop);
<babble($l)>
{ my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
$start <left=nibble($lang)> [ $stop || <.panic: "Couldn't find terminator $stop"> ]
[ <?{ $start ne $stop }>
<.ws> <quibble($lang2)>
||
{ $lang = $lang2.unbalanced($stop); }
<right=nibble($lang)> $stop
]
}
token tribble ($l, $lang2 = $l) {
:my ($lang, $start, $stop);
<babble($l)>
{ my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
$start <left=nibble($lang)> [ $stop || <.panic: "Couldn't find terminator $stop"> ]
[ <?{ $start ne $stop }>
<.ws> <quibble($lang2)>
||
{ $lang = $lang2.unbalanced($stop); }
<right=nibble($lang)> $stop
]
}
token quasiquibble ($l) {
:my ($lang, $start, $stop);
:my $*QUASIMODO = 0; # :COMPILING sets true
<babble($l)>
{ my $B = $<babble><B>; ($lang,$start,$stop) = @$B; }
[
|| <?{ $start eq '{' }> [ :lang($lang) <block> ]
|| $start [ :lang($lang) <statementlist> ] [$stop || <.panic: "Couldn't find terminator $stop"> ]
]
}
# note: polymorphic over many quote languages, we hope
token nibbler {
:my $text = '';
:my $from = self.pos;
:my $to = $from;
:my @nibbles = ();
:my $multiline = 0;
{ $<_from> = self.pos; }
[ <!before <stopper> >
[
|| <starter> <nibbler> <stopper>
{{
push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to;
my $n = $<nibbler>[*-1]<nibbles>;
my @n = @$n;
push @nibbles, $<starter>;
push @nibbles, @n;
push @nibbles, $<stopper>;
$text = '';
$to = $from = $¢.pos;
}}
|| <escape> {{
push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to;
push @nibbles, $<escape>[*-1];
$text = '';
$to = $from = $¢.pos;
}}
|| .
{{
my $ch = substr($*ORIG, $¢.pos-1, 1);
$text ~= $ch;
$to = $¢.pos;
if $ch ~~ "\n" {
$multiline++;
}
}}
]
]*
{{
push @nibbles, $¢.makestr(TEXT => $text, _from => $from, _pos => $to ) if $from != $to or !@nibbles;
$<nibbles> = \@nibbles;
$<_pos> = $¢.pos;
$<nibbler> :delete;
$<escape> :delete;
$<starter> :delete;
$<stopper> :delete;
$*LAST_NIBBLE = $¢;
$*LAST_NIBBLE_MULTILINE = $¢ if $multiline;
}}
}
# and this is what makes nibbler polymorphic...
method nibble ($lang) {
self.cursor_fresh($lang).nibbler;
}
token p5quote:sym<' '> { "'" <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q).unbalanced("'"))> "'" }
token p5quote:sym<" "> { '"' <nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).unbalanced('"'))> '"' }
token p5circumfix:sym«< >» { '<'
<nibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq).tweak(:w).balanced('<','>'))> '>' }
token p5quote:sym</ /> {
'/' <nibble( $¢.cursor_fresh( %*LANG<Regex> ).unbalanced("/") )> [ '/' || <.panic: "Unable to parse regex; couldn't find final '/'"> ]
<p5rx_mods>?
}
# handle composite forms like qww
token quote:qq {
'qq'
[
| » <.ws> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:qq))>
]
}
token quote:q {
'q'
[
| » <.ws> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q))>
]
}
token quote:qr {
<sym> » <!before '('>
<quibble( $¢.cursor_fresh( %*LANG<Regex> ) )>
<p5rx_mods>
}
token quote:m {
<sym> » <!before '('>
<quibble( $¢.cursor_fresh( %*LANG<Regex> ) )>
<p5rx_mods>
}
token quote:s {
<sym> » <!before '('>
<pat=sibble( $¢.cursor_fresh( %*LANG<Regex> ), $¢.cursor_fresh( %*LANG<Q> ).tweak(:qq))>
<p5rx_mods>
}
token quote:tr {
<sym> » <!before '('> <pat=tribble( $¢.cursor_fresh( %*LANG<Q> ).tweak(:q))>
<p5tr_mods>
}
token p5rx_mods {
<!after \s>
(< i g s m x c e >+)
}
token p5tr_mods {
(< c d s ] >+)
}
# assumes whitespace is eaten already
method peek_delimiters {
my $pos = self.pos;
my $startpos = $pos;
my $char = substr($*ORIG,$pos++,1);
if $char ~~ /^\s$/ {
self.panic("Whitespace character is not allowed as delimiter"); # "can't happen"
}
elsif $char ~~ /^\w$/ {
self.panic("Alphanumeric character is not allowed as delimiter");
}
elsif %STD::close2open{$char} {
self.panic("Use of a closing delimiter for an opener is reserved");
}
my $rightbrack = %STD::open2close{$char};
if not defined $rightbrack {
return $char, $char;
}
while substr($*ORIG,$pos,1) eq $char {
$pos++;
}
my $len = $pos - $startpos;
my $start = $char x $len;
my $stop = $rightbrack x $len;
return $start, $stop;
}
role startstop[$start,$stop] {
token starter { $start }
token stopper { $stop }
} # end role
role stop[$stop] {
token starter { <!> }
token stopper { $stop }
} # end role
role unitstop[$stop] {
token unitstopper { $stop }
} # end role
token unitstopper { $ }
method balanced ($start,$stop) { self.mixin( ::startstop[$start,$stop] ); }
method unbalanced ($stop) { self.mixin( ::stop[$stop] ); }
method unitstop ($stop) { self.mixin( ::unitstop[$stop] ); }
token charname {
[
| <radint>
| <[a..z A..Z]><-[ \] , \# ]>*?<[a..z A..Z ) ]> <?before \s*<[ \] , \# ]>>
] || <.panic: "Unrecognized character name">
}
token charnames { [<.ws><charname><.ws>] +% ',' }
token charspec {
[
| :dba('character name') '[' ~ ']' <charnames>
| \d+
| <[ ?..Z \\.._ ]>
| <?> <.panic: "Unrecognized \\c character">
]
}
method truly ($bool,$opt) {
return self if $bool;
self.panic("Cannot negate $opt adverb");
}
grammar Q is STD {
role b1 {
token p5escape:sym<\\> { <sym> <item=p5backslash> }
token p5backslash:qq { <?before 'q'> { $<quote> = $¢.cursor_fresh(%*LANG<MAIN>).quote(); } }
token p5backslash:sym<\\> { <text=sym> }
token p5backslash:stopper { <text=stopper> }
token p5backslash:a { <sym> }
token p5backslash:b { <sym> }
token p5backslash:c { <sym> <charspec> }
token p5backslash:e { <sym> }
token p5backslash:f { <sym> }
token p5backslash:n { <sym> }
token p5backslash:o { :dba('octal character') <sym> [ <octint> | '[' ~ ']' <octints> ] }
token p5backslash:r { <sym> }
token p5backslash:t { <sym> }
token p5backslash:x { :dba('hex character') <sym> [ <hexint> | '[' ~ ']' <hexints> ] }
token p5backslash:sym<0> { <sym> }
} # end role
role b0 {
token p5escape:sym<\\> { <!> }
} # end role
role c1 {
token p5escape:sym<{ }> { <?before '{'> [ :lang(%*LANG<MAIN>) <block> ] }
} # end role
role c0 {
token p5escape:sym<{ }> { <!> }
} # end role
role s1 {
token p5escape:sym<$> {
:my $*QSIGIL ::= '$';
<?before '$'>
[ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> ] || <.panic: "Non-variable \$ must be backslashed">
}
} # end role
role s0 {
token p5escape:sym<$> { <!> }
} # end role
role a1 {
token p5escape:sym<@> {
:my $*QSIGIL ::= '@';
<?before '@'>
[ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> | <!> ] # trap ABORTBRANCH from variable's ::
}
} # end role
role a0 {
token p5escape:sym<@> { <!> }
} # end role
role h1 {
token p5escape:sym<%> {
:my $*QSIGIL ::= '%';
<?before '%'>
[ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> | <!> ]
}
} # end role
role h0 {
token p5escape:sym<%> { <!> }
} # end role
role f1 {
token p5escape:sym<&> {
:my $*QSIGIL ::= '&';
<?before '&'>
[ :lang(%*LANG<MAIN>) <EXPR(item %methodcall)> | <!> ]
}
} # end role
role f0 {
token p5escape:sym<&> { <!> }
} # end role
role w1 {
method postprocess ($s) { $s.words }
} # end role
role w0 {
method postprocess ($s) { $s }
} # end role
role ww1 {
method postprocess ($s) { $s.words }
} # end role
role ww0 {
method postprocess ($s) { $s }
} # end role
role x1 {
method postprocess ($s) { $s.run }
} # end role
role x0 {
method postprocess ($s) { $s }
} # end role
role q {
token stopper { \' }
token p5escape:sym<\\> { <sym> <item=p5backslash> }
token p5backslash:qq { <?before 'q'> { $<quote> = $¢.cursor_fresh(%*LANG<MAIN>).quote(); } }
token p5backslash:sym<\\> { <text=sym> }
token p5backslash:stopper { <text=stopper> }
# in single quotes, keep backslash on random character by default
token p5backslash:misc { {} (.) { $<text> = "\\" ~ $0.Str; } }
# begin tweaks (DO NOT ERASE)
multi method tweak (:single(:$q)!) { self.panic("Too late for :q") }
multi method tweak (:double(:$qq)!) { self.panic("Too late for :qq") }
# end tweaks (DO NOT ERASE)
} # end role
role qq does b1 does c1 does s1 does a1 does h1 does f1 {
token stopper { \" }
# in double quotes, omit backslash on random \W backslash by default
token p5backslash:misc { {} [ (\W) { $<text> = $0.Str; } | $<x>=(\w) <.panic("Unrecognized backslash sequence: '\\" ~ $<x>.Str ~ "'")> ] }
# begin tweaks (DO NOT ERASE)
multi method tweak (:single(:$q)!) { self.panic("Too late for :q") }
multi method tweak (:double(:$qq)!) { self.panic("Too late for :qq") }
# end tweaks (DO NOT ERASE)
} # end role
role p5 {
# begin tweaks (DO NOT ERASE)
multi method tweak (:$g!) { self }
multi method tweak (:$i!) { self }
multi method tweak (:$m!) { self }
multi method tweak (:$s!) { self }
multi method tweak (:$x!) { self }
multi method tweak (:$p!) { self }
multi method tweak (:$c!) { self }
# end tweaks (DO NOT ERASE)
} # end role
# begin tweaks (DO NOT ERASE)
multi method tweak (:single(:$q)!) { self.truly($q,':q'); self.mixin( ::q ); }
multi method tweak (:double(:$qq)!) { self.truly($qq, ':qq'); self.mixin( ::qq ); }
multi method tweak (:backslash(:$b)!) { self.mixin($b ?? ::b1 !! ::b0) }
multi method tweak (:scalar(:$s)!) { self.mixin($s ?? ::s1 !! ::s0) }
multi method tweak (:array(:$a)!) { self.mixin($a ?? ::a1 !! ::a0) }
multi method tweak (:hash(:$h)!) { self.mixin($h ?? ::h1 !! ::h0) }
multi method tweak (:function(:$f)!) { self.mixin($f ?? ::f1 !! ::f0) }
multi method tweak (:closure(:$c)!) { self.mixin($c ?? ::c1 !! ::c0) }
multi method tweak (:exec(:$x)!) { self.mixin($x ?? ::x1 !! ::x0) }
multi method tweak (:words(:$w)!) { self.mixin($w ?? ::w1 !! ::w0) }
multi method tweak (:quotewords(:$ww)!) { self.mixin($ww ?? ::ww1 !! ::ww0) }
multi method tweak (:heredoc(:$to)!) { self.truly($to, ':to'); self.cursor_herelang; }
multi method tweak (:$regex!) {
return %*LANG<Regex>;
}
multi method tweak (:$trans!) {
return %*LANG<Trans>;
}
multi method tweak (*%x) {
my @k = keys(%x);
self.panic("Unrecognized quote modifier: " ~ join('',@k));
}
# end tweaks (DO NOT ERASE)
} # end grammar
###########################
# Captures and Signatures #
###########################
token capterm {
'\\'
[
| '(' <capture>? ')'
| <?before \S> <termish>
]
}
rule capture {
:my $*INVOCANT_OK = 1;
<EXPR>
}
rule param_sep { [','|':'|';'|';;'] }
token signature ($lexsig = 0) {
# XXX incorrectly scopes &infix:<x> parameters to outside following block
:my $*IN_DECL = 1;
:my $*zone = 'posreq';
:my $startpos = self.pos;
<.ws>
[
| <?before '-->' | ')' | ']' | '{' | ':'\s >
| [ <parameter> || <.panic: "Malformed parameter"> ]
] +% <param_sep>
<.ws>
{ $*IN_DECL = 0; }
[ '-->' <.ws> <typename> ]?
{{
$*LEFTSIGIL = '@';
if $lexsig {
$*CURLEX.<$?SIGNATURE> ~= '(' ~ substr($*ORIG, $startpos, $¢.pos - $startpos) ~ ')';
$*CURLEX.<!NEEDSIG>:delete;
}
}}
}
token type_constraint {
<typename>
<.ws>
}
rule p5statement_prefix:do {<sym> <block> }
rule p5statement_prefix:eval {<sym> <block> }
#########
# Terms #
#########
# start playing with the setting stubber
token p5term:sym<undef> {
<sym> »
<O(|%term)>
}
token p5term:sym<continue>
{ <sym> » <O(|%term)> }
token p5circumfix:sigil
{ :dba('contextualizer') <sigil=p5sigil> '(' ~ ')' <semilist> { $*LEFTSIGIL ||= $<sigil>.Str } <O(|%term)> }
token p5circumfix:sym<( )>
{ :dba('parenthesized expression') '(' ~ ')' <semilist> <O(|%term)> }
token p5circumfix:sym<[ ]>
{ :dba('array composer') '[' ~ ']' <semilist> <O(|%term)> }
#############
# Operators #
#############
token PRE {
:dba('prefix or meta-prefix')
[
| <prefix=p5prefix>
{ $<O> = $<prefix><O>; $<sym> = $<prefix><sym> }
]
# XXX assuming no precedence change
<.ws>
}
token infixish ($in_meta = $*IN_META) {
:my $*IN_META = $in_meta;
<!stdstopper>
<!infixstopper>
:dba('infix or meta-infix')
<infix=p5infix>
{ $<O> = $<infix>.<O>; $<sym> = $<infix>.<sym>; }
}
token p5dotty:sym«->» {
<sym> <dottyop>
<O(|%methodcall)> }
token dottyopish {
<term=dottyop>
}
token dottyop {
:dba('dotty method or postfix')
[
| <methodop>
| <!alpha> <postcircumfix=p5postcircumfix> { $<O> = $<postcircumfix><O>; $<sym> = $<postcircumfix><sym>; }
]
}
# Note, this rule mustn't do anything irreversible because it's used
# as a lookahead by the quote interpolator.
token POST {
<!stdstopper>
# last whitespace didn't end here
<!{ @*MEMOS[$¢.pos]<ws> }>
:dba('postfix')
[
| <dotty=p5dotty> { $<O> = $<dotty><O>; $<sym> = $<dotty><sym>; $<~CAPS> = $<dotty><~CAPS>; }
| <postop> { $<O> = $<postop><O>; $<sym> = $<postop><sym>; $<~CAPS> = $<postop><~CAPS>; }
]
}
token p5postcircumfix:sym<( )>
{ :dba('argument list') '(' ~ ')' <semiarglist> <O(|%methodcall)> }
token p5postcircumfix:sym<[ ]>
{ :dba('subscript') '[' ~ ']' <semilist> <O(|%methodcall)> }
token p5postcircumfix:sym<{ }>
{ :dba('subscript') '{' ~ '}' <semilist> <O(|%methodcall)> }
token postop {
| <postfix=p5postfix> { $<O> := $<postfix><O>; $<sym> := $<postfix><sym>; }
| <postcircumfix=p5postcircumfix> { $<O> := $<postcircumfix><O>; $<sym> := $<postcircumfix><sym>; }
}
token methodop {
[
| <longname>
| <?before '$' | '@' | '&' > <variable> { $*VAR = $<variable> }
]
:dba('method arguments')
[
| <?[\\(]> <args>
]?
}
token semiarglist {
<arglist> +% ';'
<.ws>
}
token arglist {
:my $inv_ok = $*INVOCANT_OK;
:my StrPos $*endargs = 0;
:my $*GOAL ::= 'endargs';
:my $*QSIGIL ::= '';
<.ws>
:dba('argument list')
[
| <?stdstopper>
| <EXPR(item %listop)> {{
my $delims = $<EXPR><delims>;
for @$delims {
if ($_.<sym> // '') eq ':' {
if $inv_ok {
$*INVOCANT_IS = $<EXPR><list>[0];
}
}
}
}}
]
}
token p5circumfix:sym<{ }> {
<?before '{' >
<pblock>
<O(|%term)> }
## methodcall
token p5postfix:sym['->'] ()
{ '->' }
## autoincrement
token p5postfix:sym<++>
{ <sym> <O(|%autoincrement)> }
token p5postfix:sym«--»
{ <sym> <O(|%autoincrement)> }
token p5prefix:sym<++>
{ <sym> <O(|%autoincrement)> }
token p5prefix:sym«--»
{ <sym> <O(|%autoincrement)> }
## exponentiation
token p5infix:sym<**>
{ <sym> <O(|%exponentiation)> }
## symbolic unary
token p5prefix:sym<!>
{ <sym> <O(|%symbolic_unary)> }
token p5prefix:sym<+>
{ <sym> <O(|%symbolic_unary)> }
token p5prefix:sym<->
{ <sym> <O(|%symbolic_unary)> }
token p5prefix:sym<~>
{ <sym> <O(|%symbolic_unary)> }
## binding
token p5infix:sym<!~>
{ <sym> <O(|%binding)> }
token p5infix:sym<=~>
{ <sym> <O(|%binding)> }
## multiplicative
token p5infix:sym<*>
{ <sym> <O(|%multiplicative)> }
token p5infix:sym</>
{ <sym> <O(|%multiplicative)> }
token p5infix:sym<%>
{ <sym> <O(|%multiplicative)> }
token p5infix:sym« << »
{ <sym> <O(|%multiplicative)> }
token p5infix:sym« >> »
{ <sym> <O(|%multiplicative)> }
token p5infix:sym<x>
{ <sym> <O(|%multiplicative)> }
## additive
token p5infix:sym<.> ()
{ <sym> <O(|%additive)> }
token p5infix:sym<+>
{ <sym> <O(|%additive)> }
token p5infix:sym<->
{ <sym> <O(|%additive)> }
## bitwise and (all)
token p5infix:sym<&>
{ <sym> <O(|%bitwise_and)> }
token p5infix:sym<also>
{ <sym> <O(|%bitwise_and)> }
## bitwise or (any)
token p5infix:sym<|>
{ <sym> <O(|%bitwise_or)> }
token p5infix:sym<^>
{ <sym> <O(|%bitwise_or)> }
## named unary examples
# (need \s* to win LTM battle with listops)
token p5prefix:sleep
{ <sym> » <?before \s*> <O(|%named_unary)> }
token p5prefix:abs
{ <sym> » <?before \s*> <O(|%named_unary)> }
token p5prefix:let
{ <sym> » <?before \s*> <O(|%named_unary)> }
token p5prefix:temp
{ <sym> » <?before \s*> <O(|%named_unary)> }
## comparisons
token p5infix:sym« <=> »
{ <sym> <?{ $<O><returns> = "Order"; }> <O(|%comparison)> }
token p5infix:cmp
{ <sym> <?{ $<O><returns> = "Order"; }> <O(|%comparison)> }
token p5infix:sym« < »
{ <sym> <O(|%comparison)> }
token p5infix:sym« <= »
{ <sym> <O(|%comparison)> }
token p5infix:sym« > »
{ <sym> <O(|%comparison)> }
token p5infix:sym« >= »
{ <sym> <O(|%comparison)> }
token p5infix:sym<eq>
{ <sym> <O(|%equality)> }
token p5infix:sym<ne>
{ <sym> <O(|%equality)> }
token p5infix:sym<lt>
{ <sym> <O(|%comparison)> }
token p5infix:sym<le>
{ <sym> <O(|%comparison)> }
token p5infix:sym<gt>
{ <sym> <O(|%comparison)> }
token p5infix:sym<ge>
{ <sym> <O(|%comparison)> }
## equality
token p5infix:sym<==>
{ <sym> <!before '=' > <O(|%equality)> }
token p5infix:sym<!=>
{ <sym> <?before \s> <O(|%equality)> }
## tight and
token p5infix:sym<&&>
{ <sym> <O(|%tight_and)> }
## tight or
token p5infix:sym<||>
{ <sym> <O(|%tight_or)> }
token p5infix:sym<^^>
{ <sym> <O(|%tight_or)> }
token p5infix:sym<//>
{ <sym> <O(|%tight_or)> }
## range
token p5infix:sym<..>
{ <sym> <O(|%range)> }
token p5infix:sym<...>
{ <sym> <O(|%range)> }
## conditional
token p5infix:sym<? :> {
:my $*GOAL ::= ':';
'?'
<.ws>
<EXPR(item %assignment)>
[ ':' ||
[
|| <?before '='> <.panic: "Assignment not allowed within ?:">
|| <?before '!!'> <.panic: "Please use : rather than !!">
|| <?before <infixish>> # Note: a tight infix would have parsed right
<.panic: "Precedence too loose within ?:; use ?(): instead ">
|| <.panic: "Found ? but no :; possible precedence problem">
]
]
{ $<O><_reducecheck> = 'raise_middle'; }
<O(|%conditional)> }
method raise_middle {
self.<middle> = self.<infix><EXPR>;
self;
}
token p5infix:sym<=> ()
{ <sym> <O(|%assignment)> }
## list item separator
token p5infix:sym<,>
{ <sym> { $<O><fiddly> = 0; } <O(|%comma)> }
token p5infix:sym« => »
{ <sym> { $<O><fiddly> = 0; } <O(|%comma)> }
# force identifier(), identifier.(), etc. to be a function call always
token p5term:identifier
{
:my $name;
:my $pos;
<identifier> <?before [<unsp>|'(']? >
{ $name = $<identifier>.Str; $pos = $¢.pos; }
<args( $¢.is_name($name) )>
{ self.add_mystery($name,$pos,substr($*ORIG,$pos,1)) unless $<args><invocant>; }
<O(|%term)> }
token p5term:opfunc
{
<category> <colonpair>+ <args>
<O(|%term)> }
token args ($istype = 0) {
:my $listopish = 0;
:my $*GOAL ::= '';
:my $*INVOCANT_OK = 1;
:my $*INVOCANT_IS;
[
# | :dba('argument list') '.(' ~ ')' <semiarglist>
| :dba('argument list') '(' ~ ')' <semiarglist>
| :dba('argument list') <.unsp> '(' ~ ')' <semiarglist>
| { $listopish = 1 } [<?before \s> <!{ $istype }> <.ws> <!infixstopper> <arglist>]?
]
{ $<invocant> = $*INVOCANT_IS; }
:dba('extra arglist after (...):')
[
|| <?{ $listopish }>
|| ':' <?before \s> <moreargs=arglist> # either switch to listopiness
|| {{ $<O> = {}; }} # or allow adverbs (XXX needs hoisting?)
]
}
# names containing :: may or may not be function calls
# bare identifier without parens also handled here if no other rule parses it
token p5term:name
{
:my $name;
:my $pos;
<longname>
{
$name = $<longname>.Str;
$pos = $¢.pos;
}
[
|| <?{
$¢.is_name($<longname>.Str) or substr($<longname>.Str,0,2) eq '::'
}>
# parametric type?
<.unsp>? [ <?before '['> <postcircumfix=p5postcircumfix> ]?
:dba('type parameter')
[
'::'
<?before [ '«' | '<' | '{' | '<<' ] > <postcircumfix=p5postcircumfix>
]?
# unrecognized names are assumed to be post-declared listops.
|| <args> { self.add_mystery($name,$pos,'termish') unless $<args><invocant>; }
]
<O(|%term)> }
## loose and
token p5infix:sym<and>
{ <sym> <O(|%loose_and)> }
token p5infix:sym<andthen>
{ <sym> <O(|%loose_and)> }
## loose or
token p5infix:sym<or>
{ <sym> <O(|%loose_or)> }
token p5infix:sym<orelse>
{ <sym> <O(|%loose_or)> }
token p5infix:sym<xor>
{ <sym> <O(|%loose_or)> }
## expression terminator
# Note: must always be called as <?terminator> or <?before ...<p5terminator>...>
token p5terminator:sym<;>
{ ';' <O(|%terminator)> }
token p5terminator:sym<if>
{ 'if' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<unless>
{ 'unless' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<while>
{ 'while' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<until>
{ 'until' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<for>
{ 'for' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<given>
{ 'given' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<when>
{ 'when' » <.nofun> <O(|%terminator)> }
token p5terminator:sym<)>
{ <sym> <O(|%terminator)> }
token p5terminator:sym<]>
{ ']' <O(|%terminator)> }
token p5terminator:sym<}>
{ '}' <O(|%terminator)> }
token p5terminator:sym<:>
{ ':' <?{ $*GOAL eq ':' }> <O(|%terminator)> }
regex infixstopper {
:dba('infix stopper')
[
| <?before <stopper> >
| <?before ':' > <?{ $*GOAL eq ':' }>
| <?{ $*GOAL eq 'endargs' and @*MEMOS[$¢.pos]<endargs> }>
]
}
# overridden in subgrammars
token stopper { <!> }
# hopefully we can include these tokens in any outer LTM matcher
regex stdstopper {
:temp @*STUB = return self if @*MEMOS[self.pos]<endstmt> :exists;
:dba('standard stopper')
[
| <?terminator>
| <?unitstopper>
| $ # unlikely, check last (normal LTM behavior)
]
{ @*MEMOS[$¢.pos]<endstmt> ||= 1; }
}
## vim: expandtab sw=4 ft=perl6
grammar Regex is STD {
# begin tweaks (DO NOT ERASE)
multi method tweak (:global(:$g)!) { self }
multi method tweak (:ignorecase(:$i)!) { self }
# end tweaks (DO NOT ERASE)
token category:p5metachar { <sym> }
proto token p5metachar { <...> }
token category:p5backslash { <sym> }
proto token p5backslash { <...> }
token category:p5assertion { <sym> }
proto token p5assertion { <...> }
token category:p5quantifier { <sym> }
proto token p5quantifier { <...> }
token category:p5mod_internal { <sym> }
proto token p5mod_internal { <...> }
proto token p5regex_infix { <...> }
# suppress fancy end-of-line checking
token codeblock {
:my $*GOAL ::= '}';
'{' :: [ :lang($¢.cursor_fresh(%*LANG<MAIN>)) <statementlist> ]
[ '}' || <.panic: "Unable to parse statement list; couldn't find right brace"> ]
}
token ws {
<?{ $*RX<s> }>
|| [ <?before \s | '#'> <.nextsame> ]? # still get all the pod goodness, hopefully
}
rule nibbler {
:temp $*ignorecase;
<EXPR>
}
token termish {
<.ws> # XXX assuming old /x here?
<term=quant_atom_list>
}
token quant_atom_list {
<quantified_atom>+
}
token infixish {
<!infixstopper>
<!stdstopper>
<regex_infix=p5regex_infix>
{
$<O> = $<regex_infix><O>;
$<sym> = $<regex_infix><sym>;
}
}
regex infixstopper {
:dba('infix stopper')
<?before <stopper> >
}
token p5regex_infix:sym<|> { <sym> <O(|%tight_or)> }
token quantified_atom {
<!stopper>
<!p5regex_infix>
<atom>
[ <.ws> <quantifier=p5quantifier>
# <?{ $<atom>.max_width }>
# || <.panic: "Cannot quantify zero-width atom">
]?
<.ws>
}
token atom {
[
| \w
| <metachar=p5metachar>
| '\\' :: .
]
}
# sequence stoppers
token p5metachar:sym<|> { '|' :: <fail> }
token p5metachar:sym<)> { ')' :: <fail> }
token p5metachar:quant { <quantifier=p5quantifier> <.panic: "quantifier quantifies nothing"> }
# "normal" metachars
token p5metachar:sym<[ ]> {
<before '['> <quibble($¢.cursor_fresh( %*LANG<Q> ).tweak(:q))> # XXX parse as q[] for now
}
token p5metachar:sym«(? )» {
'(?' {} <assertion=p5assertion>
[ ')' || <.panic: "Perl 5 regex assertion not terminated by parenthesis"> ]
}
token p5metachar:sym<( )> {
'(' {} [:lang(self.unbalanced(')')) <nibbler>]?
[ ')' || <.panic: "Unable to parse Perl 5 regex; couldn't find right parenthesis"> ]
{ $/<sym> := <( )> }
}
token p5metachar:sym<\\> { <sym> <backslash=p5backslash> }
token p5metachar:sym<.> { <sym> }
token p5metachar:sym<^> { <sym> }
token p5metachar:sym<$> {
'$' <?before \W | $>
}
token p5metachar:var {
<?before <sigil=p5sigil>\w>
<.panic: "Cannot interpolate variable in Perl 5 regex">
}
token p5backslash:A { <sym> }
token p5backslash:a { <sym> }
token p5backslash:b { :i <sym> }
token p5backslash:c { :i <sym>
<[ ?.._ ]> || <.panic: "Unrecognized \\c character">
}
token p5backslash:d { :i <sym> }
token p5backslash:e { :i <sym> }
token p5backslash:f { :i <sym> }
token p5backslash:h { :i <sym> }
token p5backslash:l { :i <sym> }
token p5backslash:n { :i <sym> }
token p5backslash:o { :dba('octal character') '0' [ <octint> | '{' ~ '}' <octints> ] }
token p5backslash:p { :i <sym> '{' <[\w:]>+ '}' }
token p5backslash:Q { <sym> }
token p5backslash:r { :i <sym> }
token p5backslash:s { :i <sym> }
token p5backslash:t { :i <sym> }
token p5backslash:u { :i <sym> }
token p5backslash:v { :i <sym> }
token p5backslash:w { :i <sym> }
token p5backslash:x { :i :dba('hex character') <sym> [ <hexint> | '{' ~ '}' <hexints> ] }
token p5backslash:z { :i <sym> }
token p5backslash:misc { $<litchar>=(\W) | $<number>=(\d+) }
token p5backslash:oops { <.panic: "Unrecognized Perl 5 regex backslash sequence"> }
token p5assertion:sym<?> { <sym> <codeblock> }
token p5assertion:sym<{ }> { <codeblock> }
token p5assertion:sym«<» { <sym> <?before '=' | '!'> <assertion=p5assertion> }
token p5assertion:sym<=> { <sym> [ <?before ')'> | <rx> ] }
token p5assertion:sym<!> { <sym> [ <?before ')'> | <rx> ] }
token p5assertion:sym«>» { <sym> <rx> }
token rx {
# [:lang(self.unbalanced(')')) <nibbler>]
<nibbler>
[ <?before ')'> || <.panic: "Unable to parse Perl 5 regex; couldn't find right parenthesis"> ]
}
#token p5assertion:identifier { <longname> [ # is qq right here?
# | <?before ')' >
# | <.ws> <nibbler>
# ]
# [ ':' <rx> ]?
#}
token p5mod { <[imox]>* }
token p5mods { <on=p5mod> [ '-' <off=p5mod> ]? }
token p5assertion:mod { <mods=p5mods> [ # is qq right here?
| ':' <rx>?
| <?before ')' >
]
}
token p5assertion:bogus { <.panic: "Unrecognized Perl 5 regex assertion"> }
token p5quantifier:sym<*> { <sym> <quantmod> }
token p5quantifier:sym<+> { <sym> <quantmod> }
token p5quantifier:sym<?> { <sym> <quantmod> }
token p5quantifier:sym<{ }> { '{' \d+ [','\d*]? '}' <quantmod> }
token quantmod { [ '?' | '+' ]? }
} # end grammar