Grammar specification

This document contains the most up-to-date grammar specification for the Bondrewd programming language. The grammar is written in a modified version of Pegen’s format. The parser produced is a PEG parser with packrat caching.

The lexical specification is still to be documented.

Note

The grammar is still a work in progress. Use for general reference only.

Grammar

  1# PEG grammar for the Bondrewd language
  2
  3# TODO: add lookaheads and cuts where applicable; add `(memo)` to the most common rules
  4# TODO: Sequence helpers!
  5# TODO: Helpers to change expr_context. Then also add expr_context to the AST
  6# TODO: Empty rules cause issues with how left-recursion is handled. Fix it!
  7
  8@subheader '''\
  9#include <string>
 10'''
 11
 12@extras '''\
 13std::string _concat_strings(const std::vector<lex::Token> &strings) const {
 14    if (strings.empty()) {
 15        return "";
 16    }
 17
 18    std::string result{};
 19    std::string_view quotes = "";
 20    bool first = true;
 21
 22    for (auto &s: strings) {
 23        if (first) {
 24            quotes = s.get_string().quotes;
 25            first = false;
 26        }
 27
 28        result += s.get_string().value;
 29        if (s.get_string().quotes != quotes) {
 30            // TODO: Custom error type!
 31            throw std::runtime_error("String literals must have the same quotes");
 32        }
 33    }
 34
 35    return result;
 36}
 37
 38template <typename T>
 39ast::maybe<T> _opt2maybe(std::optional<ast::field<T>> opt) {
 40    if (opt) {
 41        return std::move(*opt);
 42    } else {
 43        return nullptr;
 44    }
 45}
 46
 47template <typename T>
 48ast::sequence<T> _prepend1(ast::field<T> item, ast::sequence<T> seq) {
 49    assert(item);
 50    seq.insert(seq.begin(), std::move(*item));
 51    return seq;
 52}
 53
 54'''
 55
 56# This means all ast::* types are automatically wrapped into ast::field<>
 57@wrap_ast_types
 58
 59start: file
 60
 61#region file
 62file[ast::file]:
 63    | b=stmt* $  { ast::File(std::move(b)) }
 64#endregion file
 65
 66#region stmt
 67stmt[ast::stmt] (memo):
 68    | cartridge_header_stmt
 69    | assign_stmt
 70    | expr_stmt
 71    | pass_stmt
 72
 73cartridge_header_stmt[ast::stmt]:
 74    | 'cartridge' n=name ';'  { ast::CartridgeHeader(std::move(n)) }
 75
 76assign_stmt[ast::stmt]:
 77    | a=expr op=assign_op b=expr ';'  { ast::Assign(std::move(a), std::move(b), std::move(op)) }
 78
 79assign_op[ast::assign_op]:
 80    | '='    { ast::AsgnNone() }
 81    | '+='   { ast::AsgnAdd() }
 82    | '-='   { ast::AsgnSub() }
 83    | '*='   { ast::AsgnMul() }
 84    | '/='   { ast::AsgnDiv() }
 85    | '%='   { ast::AsgnMod() }
 86    | '<<='  { ast::AsgnLShift() }
 87    | '>>='  { ast::AsgnRShift() }
 88    | '&='   { ast::AsgnBitAnd() }
 89    | '|='   { ast::AsgnBitOr() }
 90    | '^='   { ast::AsgnBitXor() }
 91
 92expr_stmt[ast::stmt]:
 93    | a=expr ';'  { ast::Expr(std::move(a)) }
 94
 95pass_stmt[ast::stmt]:
 96    | ';'  { ast::Pass() }
 97#endregion stmt
 98
 99#region defn
100defn[ast::defn] (memo):
101    | f=xtime_flag a=raw_defn  { ({ a->flag = std::move(f); a; }) }
102
103raw_defn[ast::defn]:
104    | var_def
105    | func_def
106    | struct_def
107    | impl_def
108    | ns_def
109
110# TODO: Allow 'let' too
111var_def[ast::defn]:
112    | 'var' n=name t=type_annotation? v=['=' expr] ';'  { ast::VarDef(std::move(n), _opt2maybe(std::move(t)), _opt2maybe(std::move(v)), true) }
113
114func_def[ast::defn]:
115    | 'func' n=name? '(' a=args_spec ')' t=type_annotation? b=func_body  { ast::FuncDef(std::move(n), std::move(a), _opt2maybe(std::move(t)), std::move(b)) }
116
117func_body[ast::expr]:
118    | '=>' expr
119    | block_expr
120
121impl_def[ast::defn]:
122    | 'impl' c=expr b=defn_block  { ast::ImplDef(std::move(c), std::nullopt, std::move(b)) }
123    | 'impl' t=expr 'for' c=expr b=defn_block  { ast::ImplDef(std::move(c), std::move(t), std::move(b)) }
124
125defn_block[ast::sequence<ast::stmt>]:
126    | '{' stmt* '}'
127
128# TODO: Forbid 'class' here?
129struct_def[ast::defn]:
130    | ('class' | 'struct') n=name? a=args_spec  { ast::StructDef(std::move(n), std::move(a)) }
131
132# TODO: Allow actual names!
133ns_def[ast::defn]:
134    | 'ns' ns_spec
135
136#region ns_spec
137# TODO: Represent "cartridge::" somehow other than a string?
138ns_spec[ast::defn]:
139    | 'cartridge' '::' a=ns_spec_raw  { ast::NsDef(_prepend1(std::move("cartridge"), std::move(a))) }
140    | a=ns_spec_raw  { ast::NsDef(std::move(a)) }
141
142ns_spec_raw[ast::sequence<ast::identifier>]:
143    | a='::'.name+  { std::move(a) }
144#endregion ns_spec
145
146#region args_spec
147# TODO: *args, **kwargs - or templated that, perhaps?
148# TODO: support for explicit argspec objects, if necessary
149args_spec[ast::args_spec]:
150    | a=args_spec_nonempty ','?  { std::move(a) }
151    | { ast::args_spec(ast::make_sequence<ast::arg_def>(), false) }
152
153args_spec_nonempty[ast::args_spec]:
154    | "self" a=(',' arg_spec)*  { ast::args_spec(std::move(a), true) }
155    | a=','.arg_spec+  { ast::args_spec(std::move(a), false) }
156
157# TODO: unused and fixed args?
158arg_spec[ast::arg_spec]:
159    | n=name t=type_annotation d=('=' expr)?  { ast::arg_spec(std::move(n), std::move(t), _opt2maybe(std::move(d))) }
160#endregion args_spec
161#endregion defn
162
163#region flow
164flow[ast::flow] (memo):
165    | 'unwrap' a=raw_flow  { ({ a->unwrap = true; a; }) }
166    | raw_flow
167
168raw_flow[ast::flow]:
169    | if_flow
170    | for_flow
171    | while_flow
172    | loop_flow
173
174if_flow[ast::flow]:
175    | 'if' c=expr t=flow_block e=('else' flow_block)?  { ast::If(std::move(c), std::move(t), _opt2maybe(std::move(e))) }
176
177for_flow[ast::flow]:
178    | 'for' v=name 'in' s=expr b=flow_block e=('else' flow_block)?  { ast::For(std::move(v), std::move(s), std::move(b), _opt2maybe(std::move(e))) }
179
180while_flow[ast::flow]:
181    | 'while' c=expr b=flow_block e=('else' flow_block)?  { ast::While(std::move(c), std::move(b), _opt2maybe(std::move(e))) }
182
183loop_flow[ast::flow]:
184    | 'loop' b=flow_block  { ast::Loop(std::move(b)) }
185
186flow_block[ast::expr]:
187    | block_expr
188    | flow_expr
189    | flow_control_expr
190#endregion flow
191
192#region expr
193expr_or_unit[ast::expr]:
194    | expr
195    | { ast::Constant(std::monostate()) }  # TODO: Implement Unit!
196
197# TODO: Support constants!
198expr[ast::expr] (memo):
199    | defn_expr
200    | flow_expr
201    | expr_0
202
203#region wrappers
204defn_expr[ast::expr]:
205    | a=defn  { ast::Defn(std::move(a)) }
206
207flow_expr[ast::expr]:
208    | a=flow  { ast::Flow(std::move(a)) }
209#endregion wrappers
210
211#region operators
212#region expr_0
213expr_0[ast::expr] (memo):
214    | and_expr
215    | or_expr
216    | expr_1
217
218and_expr[ast::expr]:
219    | a=expr_2 b=('and' expr_1)+  { ast::BoolOp(ast::And(), _prepend1(a, b)) }
220
221or_expr[ast::expr]:
222    | a=expr_2 b=('or' expr_1)+  { ast::BoolOp(ast::Or(), _prepend1(a, b)) }
223#endregion expr_0
224
225#region expr_1
226expr_1[ast::expr]:
227    | not_expr
228    | expand_expr
229    | pass_spec_expr
230    | flow_control_expr
231    | expr_2
232
233not_expr[ast::expr]:
234    | 'not' a=expr_1  { ast::UnOp(ast::Not(), std::move(a)) }
235
236# TODO: Maybe add other expand rules?
237#       For statements, at least?
238expand_expr[ast::expr]:
239    | 'expand' a=expr_1  { ast::Expand(std::move(a)) }
240
241pass_spec_expr[ast::expr]:
242    | 'ref' a=expr_1  { ast::PassSpec(ast::ByRef(), std::move(a)) }
243    | 'move' a=expr_1  { ast::PassSpec(ast::ByMove(), std::move(a)) }
244    | 'copy' a=expr_1  { ast::PassSpec(ast::ByCopy(), std::move(a)) }
245
246flow_control_expr[ast::expr]:
247    | return_expr
248    | break_expr
249    | continue_expr
250
251return_expr[ast::expr]:
252    | 'return' a=expr_or_unit  { ast::Return(std::move(a)) }
253
254break_expr[ast::expr]:
255    | 'break' a=expr_or_unit  { ast::Break(std::move(a)) }
256
257continue_expr[ast::expr]:
258    | 'continue'  { ast::Continue() }
259#endregion expr_1
260
261#region expr_2
262expr_2[ast::expr]:
263    | comparison_expr
264    | bidir_cmp_expr
265    | expr_3
266
267comparison_expr[ast::expr]:  # TODO: Extract from sequence somehow (without a 1000-char rule, preferably)
268    | f=expr_3 n=comparison_followup_pair+  { ast::Compare(
269        std::move(f),
270        ast::make_sequence<ast::cmp_op>(),
271        ast::make_sequence<ast::expr>()) 
272    }
273
274comparison_followup_pair[std::pair<ast::field<ast::cmp_op>, ast::field<ast::expr>>]:
275    | o=comparison_op a=expr_3  { std::make_pair(std::move(o), std::move(a)) }
276
277comparison_op[ast::cmp_op]:
278    | '=='  { ast::Eq() }
279    | '!='  { ast::NotEq() }
280    | '<'   { ast::Lt() }
281    | '<='  { ast::LtE() }
282    | '>'   { ast::Gt() }
283    | '>='  { ast::GtE() }
284    | 'in'  { ast::In() }
285    | 'not' 'in'  { ast::NotIn() }
286
287bidir_cmp_expr[ast::expr]:
288    | a=expr_3 '<=>' b=expr_3  { ast::BinOp(ast::BidirCmp(), std::move(a), std::move(b)) }
289#endregion expr_2
290
291#region expr_3
292expr_3[ast::expr]:
293    | arithm_expr
294    | bitwise_expr
295    | expr_4
296
297arithm_expr[ast::expr]:
298    | sum_expr
299    | product_expr
300    | modulo_expr
301
302sum_expr[ast::expr]:
303    | a=(sum_expr | product_expr) o=sum_bin_op b=product_expr  { ast::BinOp(std::move(o), std::move(a), std::move(b)) }
304
305sum_bin_op[ast::binary_op]:
306    | '+'  { ast::Add() }
307    | '-'  { ast::Sub() }
308
309product_expr[ast::expr]:
310    | a=(product_expr | expr_4) o=product_bin_op b=expr_4  { ast::BinOp(std::move(o), std::move(a), std::move(b)) }
311
312product_bin_op[ast::binary_op]:
313    | '*'  { ast::Mul() }
314    | '/'  { ast::Div() }
315
316modulo_expr[ast::expr]:
317    | a=expr_4 '%' b=expr_4  { ast::BinOp(ast::Mod(), std::move(a), std::move(b)) }
318
319bitwise_expr[ast::expr]:
320    | bitor_expr
321    | bitand_expr
322    | bitxor_expr
323    | shift_expr
324
325bitor_expr[ast::expr]:
326    | a=(bitor_expr | expr_4) '|' b=expr_4  { ast::BinOp(ast::BitOr(), std::move(a), std::move(b)) }
327
328bitand_expr[ast::expr]:
329    | a=(bitand_expr | expr_4) '&' b=expr_4  { ast::BinOp(ast::BitAnd(), std::move(a), std::move(b)) }
330
331bitxor_expr[ast::expr]:
332    | a=(bitxor_expr | expr_4) '^' b=expr_4  { ast::BinOp(ast::BitXor(), std::move(a), std::move(b)) }
333
334shift_expr[ast::expr]:
335    | a=(shift_expr | expr_4) o=shift_bin_op b=expr_4  { ast::BinOp(std::move(o), std::move(a), std::move(b)) }
336
337shift_bin_op[ast::binary_op]:
338    | '<<'  { ast::LShift() }
339    | '>>'  { ast::RShift() }
340#endregion expr_3
341
342#region expr_4
343expr_4[ast::expr] (memo):
344    | unary_expr
345    | power_expr
346    | expr_5
347
348unary_expr[ast::expr]:
349    | o=unary_op a=(unary_expr | expr_5)  { ast::UnOp(std::move(o), std::move(a)) }
350
351unary_op[ast::unary_op]:
352    | '+'  { ast::UAdd() }
353    | '-'  { ast::USub() }
354    | '~'  { ast::BitInv() }
355    | '&'  { ast::URef() }
356    | '*'  { ast::UStar() }
357
358power_expr[ast::expr]:
359    | a=expr_5 '**' b=expr_5  { ast::BinOp(ast::Pow(), std::move(a), std::move(b)) }
360#endregion expr_4
361
362#region expr_5
363expr_5[ast::expr]:
364    | dot_attr_expr
365    | colon_attr_expr
366    | call_expr
367    | macro_call_expr
368    | subscript_expr
369    | expr_6
370
371dot_attr_expr[ast::expr]:
372    | a=expr_5 '.' b=name  { ast::DotAttribute(std::move(a), std::move(b)) }
373
374colon_attr_expr[ast::expr]:
375    | a=expr_5 '::' b=name  { ast::ColonAttribute(std::move(a), std::move(b)) }
376
377call_expr[ast::expr]:
378    | a=expr_5 '(' b=call_args ')'  { ast::Call(std::move(a), std::move(b)) }
379
380macro_call_expr[ast::expr]:
381    | a=expr_5 '!' b=token_stream_delim  { ast::MacroCall(std::move(a), std::move(b)) }
382
383subscript_expr[ast::expr]:
384    | a=expr_5 '[' b=call_args ']'  { ast::Subscript(std::move(a), std::move(b)) }
385
386# TODO: Actually implement
387call_args[ast::call_args] (memo):
388    | { ast::call_args(ast::make_sequence<ast::call_arg>(), nullptr, nullptr) }
389
390token_stream[ast::expr]:
391    | token_stream_delim
392    | token_stream_no_parens
393
394token_stream_delim[ast::expr]:
395    | '(' ~ a=token_stream* ')'  { ast::TokenStream(/* ??? */) }
396    | '[' ~ a=token_stream* ']'  { ast::TokenStream(/* ??? */) }
397    | '{' ~ a=token_stream* '}'  { ast::TokenStream(/* ??? */) }
398
399token_stream_no_parens[ast::expr]:
400    | (!any_paren any_token)+  { ast::TokenStream(/* ??? */) }
401
402any_paren:
403    | '(' | ')'
404    | '[' | ']'
405    | '{' | '}'
406
407any_token[lex::Token]:
408    | NAME
409    | NUMBER
410    | STRING
411    | KEYWORD
412    | PUNCT
413#endregion expr_5
414
415#region expr_6
416expr_6[ast::expr]:
417    | primary_expr
418#endregion expr_6
419#endregion operators
420
421#region primary
422primary_expr[ast::expr]:
423    | a=NUMBER  { ast::Constant(util::variant_cast(a.get_number().value)) }
424    | &STRING a=strings  { ast::Constant(std::move(a)) }
425    | '...'  { ast::Constant(/* Ellipsis, somehow... */) }
426    | var_ref_expr
427    | group_expr
428    | tuple_expr
429    | array_expr
430    | ctime_block_expr
431    | block_expr
432
433var_ref_expr[ast::expr]:
434    | a=name  { ast::VarRef(std::move(a)) }
435
436# TODO: _concat_strings!
437strings[std::string] (memo):
438    | a=STRING+  { _concat_strings(a) }
439
440group_expr[ast::expr]:
441    | '(' weak_expr ')'
442
443tuple_expr[ast::expr]:
444    | '(' ')'  { ast::Tuple(ast::make_sequence<ast::expr>()) }
445    | '(' a=','.expr+ ','? ')'  { ast::Tuple(std::move(a)) }
446
447array_expr[ast::expr]:
448    | '[' ']'  { ast::Array(ast::make_sequence<ast::expr>()) }
449    | '[' a=','.expr+ ','? ']'  { ast::Array(std::move(a)) }
450
451# TODO: Maybe allow runtime blocks too?
452ctime_block_expr[ast::expr]:
453    | 'ctime' b=block_expr  { ast::CtimeBlock(std::move(b)) }
454
455block_expr[ast::expr] (memo):
456    | '{' b=stmt* v=expr_or_unit '}'  { ast::Block(std::move(b), std::move(v)) }
457
458# To allow for both a::b::c and a::(123)::("abra" concat "cadabra")
459attr_name[ast::expr]:
460    | n=name  { ast::Constant(std::move(n)) }
461    | group_expr
462#endregion primary
463
464#region weak
465weak_expr[ast::expr]:
466    | infix_call_expr
467    | expr
468
469infix_call_expr[ast::expr]:
470    | a=expr_4 o=name b=expr_4  { ast::InfixCall(std::move(o), std::move(a), std::move(b)) }
471#endregion weak
472#endregion expr
473
474#region utils
475name[std::string]:
476    | a=NAME  { a.get_name().value }
477
478xtime_flag[ast::xtime_flag]:
479    | 'ctime'  { ast::CTime() }
480    | 'rtime'  { ast::RTime() }
481    | { ast::DefaultTime() }
482
483type_annotation[ast::expr]:
484    | ':' a=expr  { std::move(a) }
485#endregion utils