Grammar specification
This document contains the most up-to-date grammar specification for the Bondrewd programming language. The grammar is written in a modified version of Pegen’s format. The parser produced is a PEG parser with packrat caching.
The lexical specification is still to be documented.
Note
The grammar is still a work in progress. Use for general reference only.
Grammar
1# PEG grammar for the Bondrewd language
2
3# TODO: add lookaheads and cuts where applicable; add `(memo)` to the most common rules
4# TODO: Sequence helpers!
5# TODO: Helpers to change expr_context. Then also add expr_context to the AST
6# TODO: Empty rules cause issues with how left-recursion is handled. Fix it!
7
8@subheader '''\
9#include <string>
10'''
11
12@extras '''\
13std::string _concat_strings(const std::vector<lex::Token> &strings) const {
14 if (strings.empty()) {
15 return "";
16 }
17
18 std::string result{};
19 std::string_view quotes = "";
20 bool first = true;
21
22 for (auto &s: strings) {
23 if (first) {
24 quotes = s.get_string().quotes;
25 first = false;
26 }
27
28 result += s.get_string().value;
29 if (s.get_string().quotes != quotes) {
30 // TODO: Custom error type!
31 throw std::runtime_error("String literals must have the same quotes");
32 }
33 }
34
35 return result;
36}
37
38template <typename T>
39ast::maybe<T> _opt2maybe(std::optional<ast::field<T>> opt) {
40 if (opt) {
41 return std::move(*opt);
42 } else {
43 return nullptr;
44 }
45}
46
47template <typename T>
48ast::sequence<T> _prepend1(ast::field<T> item, ast::sequence<T> seq) {
49 assert(item);
50 seq.insert(seq.begin(), std::move(*item));
51 return seq;
52}
53
54'''
55
56# This means all ast::* types are automatically wrapped into ast::field<>
57@wrap_ast_types
58
59start: file
60
61#region file
62file[ast::file]:
63 | b=stmt* $ { ast::File(std::move(b)) }
64#endregion file
65
66#region stmt
67stmt[ast::stmt] (memo):
68 | cartridge_header_stmt
69 | assign_stmt
70 | expr_stmt
71 | pass_stmt
72
73cartridge_header_stmt[ast::stmt]:
74 | 'cartridge' n=name ';' { ast::CartridgeHeader(std::move(n)) }
75
76assign_stmt[ast::stmt]:
77 | a=expr op=assign_op b=expr ';' { ast::Assign(std::move(a), std::move(b), std::move(op)) }
78
79assign_op[ast::assign_op]:
80 | '=' { ast::AsgnNone() }
81 | '+=' { ast::AsgnAdd() }
82 | '-=' { ast::AsgnSub() }
83 | '*=' { ast::AsgnMul() }
84 | '/=' { ast::AsgnDiv() }
85 | '%=' { ast::AsgnMod() }
86 | '<<=' { ast::AsgnLShift() }
87 | '>>=' { ast::AsgnRShift() }
88 | '&=' { ast::AsgnBitAnd() }
89 | '|=' { ast::AsgnBitOr() }
90 | '^=' { ast::AsgnBitXor() }
91
92expr_stmt[ast::stmt]:
93 | a=expr ';' { ast::Expr(std::move(a)) }
94
95pass_stmt[ast::stmt]:
96 | ';' { ast::Pass() }
97#endregion stmt
98
99#region defn
100defn[ast::defn] (memo):
101 | f=xtime_flag a=raw_defn { ({ a->flag = std::move(f); a; }) }
102
103raw_defn[ast::defn]:
104 | var_def
105 | func_def
106 | struct_def
107 | impl_def
108 | ns_def
109
110# TODO: Allow 'let' too
111var_def[ast::defn]:
112 | 'var' n=name t=type_annotation? v=['=' expr] ';' { ast::VarDef(std::move(n), _opt2maybe(std::move(t)), _opt2maybe(std::move(v)), true) }
113
114func_def[ast::defn]:
115 | 'func' n=name? '(' a=args_spec ')' t=type_annotation? b=func_body { ast::FuncDef(std::move(n), std::move(a), _opt2maybe(std::move(t)), std::move(b)) }
116
117func_body[ast::expr]:
118 | '=>' expr
119 | block_expr
120
121impl_def[ast::defn]:
122 | 'impl' c=expr b=defn_block { ast::ImplDef(std::move(c), std::nullopt, std::move(b)) }
123 | 'impl' t=expr 'for' c=expr b=defn_block { ast::ImplDef(std::move(c), std::move(t), std::move(b)) }
124
125defn_block[ast::sequence<ast::stmt>]:
126 | '{' stmt* '}'
127
128# TODO: Forbid 'class' here?
129struct_def[ast::defn]:
130 | ('class' | 'struct') n=name? a=args_spec { ast::StructDef(std::move(n), std::move(a)) }
131
132# TODO: Allow actual names!
133ns_def[ast::defn]:
134 | 'ns' ns_spec
135
136#region ns_spec
137# TODO: Represent "cartridge::" somehow other than a string?
138ns_spec[ast::defn]:
139 | 'cartridge' '::' a=ns_spec_raw { ast::NsDef(_prepend1(std::move("cartridge"), std::move(a))) }
140 | a=ns_spec_raw { ast::NsDef(std::move(a)) }
141
142ns_spec_raw[ast::sequence<ast::identifier>]:
143 | a='::'.name+ { std::move(a) }
144#endregion ns_spec
145
146#region args_spec
147# TODO: *args, **kwargs - or templated that, perhaps?
148# TODO: support for explicit argspec objects, if necessary
149args_spec[ast::args_spec]:
150 | a=args_spec_nonempty ','? { std::move(a) }
151 | { ast::args_spec(ast::make_sequence<ast::arg_def>(), false) }
152
153args_spec_nonempty[ast::args_spec]:
154 | "self" a=(',' arg_spec)* { ast::args_spec(std::move(a), true) }
155 | a=','.arg_spec+ { ast::args_spec(std::move(a), false) }
156
157# TODO: unused and fixed args?
158arg_spec[ast::arg_spec]:
159 | n=name t=type_annotation d=('=' expr)? { ast::arg_spec(std::move(n), std::move(t), _opt2maybe(std::move(d))) }
160#endregion args_spec
161#endregion defn
162
163#region flow
164flow[ast::flow] (memo):
165 | 'unwrap' a=raw_flow { ({ a->unwrap = true; a; }) }
166 | raw_flow
167
168raw_flow[ast::flow]:
169 | if_flow
170 | for_flow
171 | while_flow
172 | loop_flow
173
174if_flow[ast::flow]:
175 | 'if' c=expr t=flow_block e=('else' flow_block)? { ast::If(std::move(c), std::move(t), _opt2maybe(std::move(e))) }
176
177for_flow[ast::flow]:
178 | 'for' v=name 'in' s=expr b=flow_block e=('else' flow_block)? { ast::For(std::move(v), std::move(s), std::move(b), _opt2maybe(std::move(e))) }
179
180while_flow[ast::flow]:
181 | 'while' c=expr b=flow_block e=('else' flow_block)? { ast::While(std::move(c), std::move(b), _opt2maybe(std::move(e))) }
182
183loop_flow[ast::flow]:
184 | 'loop' b=flow_block { ast::Loop(std::move(b)) }
185
186flow_block[ast::expr]:
187 | block_expr
188 | flow_expr
189 | flow_control_expr
190#endregion flow
191
192#region expr
193expr_or_unit[ast::expr]:
194 | expr
195 | { ast::Constant(std::monostate()) } # TODO: Implement Unit!
196
197# TODO: Support constants!
198expr[ast::expr] (memo):
199 | defn_expr
200 | flow_expr
201 | expr_0
202
203#region wrappers
204defn_expr[ast::expr]:
205 | a=defn { ast::Defn(std::move(a)) }
206
207flow_expr[ast::expr]:
208 | a=flow { ast::Flow(std::move(a)) }
209#endregion wrappers
210
211#region operators
212#region expr_0
213expr_0[ast::expr] (memo):
214 | and_expr
215 | or_expr
216 | expr_1
217
218and_expr[ast::expr]:
219 | a=expr_2 b=('and' expr_1)+ { ast::BoolOp(ast::And(), _prepend1(a, b)) }
220
221or_expr[ast::expr]:
222 | a=expr_2 b=('or' expr_1)+ { ast::BoolOp(ast::Or(), _prepend1(a, b)) }
223#endregion expr_0
224
225#region expr_1
226expr_1[ast::expr]:
227 | not_expr
228 | expand_expr
229 | pass_spec_expr
230 | flow_control_expr
231 | expr_2
232
233not_expr[ast::expr]:
234 | 'not' a=expr_1 { ast::UnOp(ast::Not(), std::move(a)) }
235
236# TODO: Maybe add other expand rules?
237# For statements, at least?
238expand_expr[ast::expr]:
239 | 'expand' a=expr_1 { ast::Expand(std::move(a)) }
240
241pass_spec_expr[ast::expr]:
242 | 'ref' a=expr_1 { ast::PassSpec(ast::ByRef(), std::move(a)) }
243 | 'move' a=expr_1 { ast::PassSpec(ast::ByMove(), std::move(a)) }
244 | 'copy' a=expr_1 { ast::PassSpec(ast::ByCopy(), std::move(a)) }
245
246flow_control_expr[ast::expr]:
247 | return_expr
248 | break_expr
249 | continue_expr
250
251return_expr[ast::expr]:
252 | 'return' a=expr_or_unit { ast::Return(std::move(a)) }
253
254break_expr[ast::expr]:
255 | 'break' a=expr_or_unit { ast::Break(std::move(a)) }
256
257continue_expr[ast::expr]:
258 | 'continue' { ast::Continue() }
259#endregion expr_1
260
261#region expr_2
262expr_2[ast::expr]:
263 | comparison_expr
264 | bidir_cmp_expr
265 | expr_3
266
267comparison_expr[ast::expr]: # TODO: Extract from sequence somehow (without a 1000-char rule, preferably)
268 | f=expr_3 n=comparison_followup_pair+ { ast::Compare(
269 std::move(f),
270 ast::make_sequence<ast::cmp_op>(),
271 ast::make_sequence<ast::expr>())
272 }
273
274comparison_followup_pair[std::pair<ast::field<ast::cmp_op>, ast::field<ast::expr>>]:
275 | o=comparison_op a=expr_3 { std::make_pair(std::move(o), std::move(a)) }
276
277comparison_op[ast::cmp_op]:
278 | '==' { ast::Eq() }
279 | '!=' { ast::NotEq() }
280 | '<' { ast::Lt() }
281 | '<=' { ast::LtE() }
282 | '>' { ast::Gt() }
283 | '>=' { ast::GtE() }
284 | 'in' { ast::In() }
285 | 'not' 'in' { ast::NotIn() }
286
287bidir_cmp_expr[ast::expr]:
288 | a=expr_3 '<=>' b=expr_3 { ast::BinOp(ast::BidirCmp(), std::move(a), std::move(b)) }
289#endregion expr_2
290
291#region expr_3
292expr_3[ast::expr]:
293 | arithm_expr
294 | bitwise_expr
295 | expr_4
296
297arithm_expr[ast::expr]:
298 | sum_expr
299 | product_expr
300 | modulo_expr
301
302sum_expr[ast::expr]:
303 | a=(sum_expr | product_expr) o=sum_bin_op b=product_expr { ast::BinOp(std::move(o), std::move(a), std::move(b)) }
304
305sum_bin_op[ast::binary_op]:
306 | '+' { ast::Add() }
307 | '-' { ast::Sub() }
308
309product_expr[ast::expr]:
310 | a=(product_expr | expr_4) o=product_bin_op b=expr_4 { ast::BinOp(std::move(o), std::move(a), std::move(b)) }
311
312product_bin_op[ast::binary_op]:
313 | '*' { ast::Mul() }
314 | '/' { ast::Div() }
315
316modulo_expr[ast::expr]:
317 | a=expr_4 '%' b=expr_4 { ast::BinOp(ast::Mod(), std::move(a), std::move(b)) }
318
319bitwise_expr[ast::expr]:
320 | bitor_expr
321 | bitand_expr
322 | bitxor_expr
323 | shift_expr
324
325bitor_expr[ast::expr]:
326 | a=(bitor_expr | expr_4) '|' b=expr_4 { ast::BinOp(ast::BitOr(), std::move(a), std::move(b)) }
327
328bitand_expr[ast::expr]:
329 | a=(bitand_expr | expr_4) '&' b=expr_4 { ast::BinOp(ast::BitAnd(), std::move(a), std::move(b)) }
330
331bitxor_expr[ast::expr]:
332 | a=(bitxor_expr | expr_4) '^' b=expr_4 { ast::BinOp(ast::BitXor(), std::move(a), std::move(b)) }
333
334shift_expr[ast::expr]:
335 | a=(shift_expr | expr_4) o=shift_bin_op b=expr_4 { ast::BinOp(std::move(o), std::move(a), std::move(b)) }
336
337shift_bin_op[ast::binary_op]:
338 | '<<' { ast::LShift() }
339 | '>>' { ast::RShift() }
340#endregion expr_3
341
342#region expr_4
343expr_4[ast::expr] (memo):
344 | unary_expr
345 | power_expr
346 | expr_5
347
348unary_expr[ast::expr]:
349 | o=unary_op a=(unary_expr | expr_5) { ast::UnOp(std::move(o), std::move(a)) }
350
351unary_op[ast::unary_op]:
352 | '+' { ast::UAdd() }
353 | '-' { ast::USub() }
354 | '~' { ast::BitInv() }
355 | '&' { ast::URef() }
356 | '*' { ast::UStar() }
357
358power_expr[ast::expr]:
359 | a=expr_5 '**' b=expr_5 { ast::BinOp(ast::Pow(), std::move(a), std::move(b)) }
360#endregion expr_4
361
362#region expr_5
363expr_5[ast::expr]:
364 | dot_attr_expr
365 | colon_attr_expr
366 | call_expr
367 | macro_call_expr
368 | subscript_expr
369 | expr_6
370
371dot_attr_expr[ast::expr]:
372 | a=expr_5 '.' b=name { ast::DotAttribute(std::move(a), std::move(b)) }
373
374colon_attr_expr[ast::expr]:
375 | a=expr_5 '::' b=name { ast::ColonAttribute(std::move(a), std::move(b)) }
376
377call_expr[ast::expr]:
378 | a=expr_5 '(' b=call_args ')' { ast::Call(std::move(a), std::move(b)) }
379
380macro_call_expr[ast::expr]:
381 | a=expr_5 '!' b=token_stream_delim { ast::MacroCall(std::move(a), std::move(b)) }
382
383subscript_expr[ast::expr]:
384 | a=expr_5 '[' b=call_args ']' { ast::Subscript(std::move(a), std::move(b)) }
385
386# TODO: Actually implement
387call_args[ast::call_args] (memo):
388 | { ast::call_args(ast::make_sequence<ast::call_arg>(), nullptr, nullptr) }
389
390token_stream[ast::expr]:
391 | token_stream_delim
392 | token_stream_no_parens
393
394token_stream_delim[ast::expr]:
395 | '(' ~ a=token_stream* ')' { ast::TokenStream(/* ??? */) }
396 | '[' ~ a=token_stream* ']' { ast::TokenStream(/* ??? */) }
397 | '{' ~ a=token_stream* '}' { ast::TokenStream(/* ??? */) }
398
399token_stream_no_parens[ast::expr]:
400 | (!any_paren any_token)+ { ast::TokenStream(/* ??? */) }
401
402any_paren:
403 | '(' | ')'
404 | '[' | ']'
405 | '{' | '}'
406
407any_token[lex::Token]:
408 | NAME
409 | NUMBER
410 | STRING
411 | KEYWORD
412 | PUNCT
413#endregion expr_5
414
415#region expr_6
416expr_6[ast::expr]:
417 | primary_expr
418#endregion expr_6
419#endregion operators
420
421#region primary
422primary_expr[ast::expr]:
423 | a=NUMBER { ast::Constant(util::variant_cast(a.get_number().value)) }
424 | &STRING a=strings { ast::Constant(std::move(a)) }
425 | '...' { ast::Constant(/* Ellipsis, somehow... */) }
426 | var_ref_expr
427 | group_expr
428 | tuple_expr
429 | array_expr
430 | ctime_block_expr
431 | block_expr
432
433var_ref_expr[ast::expr]:
434 | a=name { ast::VarRef(std::move(a)) }
435
436# TODO: _concat_strings!
437strings[std::string] (memo):
438 | a=STRING+ { _concat_strings(a) }
439
440group_expr[ast::expr]:
441 | '(' weak_expr ')'
442
443tuple_expr[ast::expr]:
444 | '(' ')' { ast::Tuple(ast::make_sequence<ast::expr>()) }
445 | '(' a=','.expr+ ','? ')' { ast::Tuple(std::move(a)) }
446
447array_expr[ast::expr]:
448 | '[' ']' { ast::Array(ast::make_sequence<ast::expr>()) }
449 | '[' a=','.expr+ ','? ']' { ast::Array(std::move(a)) }
450
451# TODO: Maybe allow runtime blocks too?
452ctime_block_expr[ast::expr]:
453 | 'ctime' b=block_expr { ast::CtimeBlock(std::move(b)) }
454
455block_expr[ast::expr] (memo):
456 | '{' b=stmt* v=expr_or_unit '}' { ast::Block(std::move(b), std::move(v)) }
457
458# To allow for both a::b::c and a::(123)::("abra" concat "cadabra")
459attr_name[ast::expr]:
460 | n=name { ast::Constant(std::move(n)) }
461 | group_expr
462#endregion primary
463
464#region weak
465weak_expr[ast::expr]:
466 | infix_call_expr
467 | expr
468
469infix_call_expr[ast::expr]:
470 | a=expr_4 o=name b=expr_4 { ast::InfixCall(std::move(o), std::move(a), std::move(b)) }
471#endregion weak
472#endregion expr
473
474#region utils
475name[std::string]:
476 | a=NAME { a.get_name().value }
477
478xtime_flag[ast::xtime_flag]:
479 | 'ctime' { ast::CTime() }
480 | 'rtime' { ast::RTime() }
481 | { ast::DefaultTime() }
482
483type_annotation[ast::expr]:
484 | ':' a=expr { std::move(a) }
485#endregion utils