https://github.com/akkartik/mu1/blob/master/011load.cc
  1 //: Phase 1 of running Mu code: load it from a textual representation.
  2 //:
  3 //: The process of running Mu code:
  4 //:   load -> transform -> run
  5 
  6 :(scenarios load)  // use 'load' instead of 'run' in all scenarios in this layer
  7 :(scenario first_recipe)
  8 def main [
  9   1:number <- copy 23
 10 ]
 11 +parse: instruction: copy
 12 +parse:   ingredient: {23: "literal"}
 13 +parse:   product: {1: "number"}
 14 
 15 :(code)
 16 vector<recipe_ordinal> load(string form) {
 17   istringstream in(form);
 18   in >> std::noskipws;
 19   return load(in);
 20 }
 21 
 22 vector<recipe_ordinal> load(istream& in) {
 23   in >> std::noskipws;
 24   vector<recipe_ordinal> result;
 25   while (has_data(in)) {
 26     skip_whitespace_and_comments(in);
 27     if (!has_data(in)) break;
 28     string command = next_word(in);
 29     if (command.empty()) {
 30       assert(!has_data(in));
 31       break;
 32     }
 33     // Command Handlers
 34     if (command == "recipe" || command == "def") {
 35       recipe_ordinal r = slurp_recipe(in);
 36       if (r > 0) result.push_back(r);
 37     }
 38     else if (command == "recipe!" || command == "def!") {
 39       Disable_redefine_checks = true;
 40       recipe_ordinal r = slurp_recipe(in);
 41       if (r > 0) result.push_back(r);
 42       Disable_redefine_checks = false;
 43     }
 44     // End Command Handlers
 45     else {
 46       raise << "unknown top-level command: " << command << '\n' << end();
 47     }
 48   }
 49   return result;
 50 }
 51 
 52 // return the recipe ordinal slurped, or -1 if it failed
 53 int slurp_recipe(istream& in) {
 54   recipe result;
 55   result.name = next_word(in);
 56   if (result.name.empty()) {
 57     assert(!has_data(in));
 58     raise << "file ended with 'recipe'\n" << end();
 59     return -1;
 60   }
 61   // End Load Recipe Name
 62   skip_whitespace_but_not_newline(in);
 63   // End Recipe Refinements
 64   if (result.name.empty())
 65     raise << "empty result.name\n" << end();
 66   trace(9991, "parse") << "--- defining " << result.name << end();
 67   if (!contains_key(Recipe_ordinal, result.name))
 68     put(Recipe_ordinal, result.name, Next_recipe_ordinal);
 69   result.ordinal = get(Recipe_ordinal, result.name);
 70   ++Next_recipe_ordinal;
 71   if (Recipe.find(get(Recipe_ordinal, result.name)) != Recipe.end()) {
 72     trace(9991, "parse") << "already exists" << end();
 73     if (should_check_for_redefine(result.name))
 74       raise << "redefining recipe " << result.name << "\n" << end();
 75     Recipe.erase(get(Recipe_ordinal, result.name));
 76   }
 77   slurp_body(in, result);
 78   // End Recipe Body(result)
 79   put(Recipe, get(Recipe_ordinal, result.name), result);
 80   return get(Recipe_ordinal, result.name);
 81 }
 82 
 83 void slurp_body(istream& in, recipe& result) {
 84   in >> std::noskipws;
 85   skip_whitespace_but_not_newline(in);
 86   if (in.get() != '[')
 87     raise << result.name << ": recipe body must begin with '['\n" << end();
 88   skip_whitespace_and_comments(in);  // permit trailing comment after '['
 89   instruction curr;
 90   while (next_instruction(in, &curr)) {
 91     curr.original_string = to_original_string(curr);
 92     // End Rewrite Instruction(curr, recipe result)
 93     trace(9992, "load") << "after rewriting: " << to_string(curr) << end();
 94     if (!curr.is_empty()) result.steps.push_back(curr);
 95   }
 96 }
 97 
 98 bool next_instruction(istream& in, instruction* curr) {
 99   curr->clear();
100   skip_whitespace_and_comments(in);
101   if (!has_data(in)) {
102     raise << "incomplete recipe at end of file (0)\n" << end();
103     return false;
104   }
105 
106   vector<string> words;
107   while (has_data(in) && in.peek() != '\n') {
108     skip_whitespace_but_not_newline(in);
109     if (!has_data(in)) {
110       raise << "incomplete recipe at end of file (1)\n" << end();
111       return false;
112     }
113     string word = next_word(in);
114     if (word.empty()) {
115       assert(!has_data(in));
116       raise << "incomplete recipe at end of file (2)\n" << end();
117       return false;
118     }
119     words.push_back(word);
120     skip_whitespace_but_not_newline(in);
121   }
122   skip_whitespace_and_comments(in);
123   if (SIZE(words) == 1 && words.at(0) == "]")
124     return false;  // end of recipe
125 
126   if (SIZE(words) == 1 && is_label_word(words.at(0))) {
127     curr->is_label = true;
128     curr->label = words.at(0);
129     trace(9993, "parse") << "label: " << curr->label << end();
130     if (!has_data(in)) {
131       raise << "incomplete recipe at end of file (3)\n" << end();
132       return false;
133     }
134     return true;
135   }
136 
137   vector<string>::iterator p = words.begin();
138   if (find(words.begin(), words.end(), "<-") != words.end()) {
139     for (;  *p != "<-";  ++p)
140       curr->products.push_back(reagent(*p));
141     ++p;  // skip <-
142   }
143 
144   if (p == words.end()) {
145     raise << "instruction prematurely ended with '<-'\n" << end();
146     return false;
147   }
148   curr->name = *p;  ++p;
149   // curr->operation will be set at transform time
150 
151   for (;  p != words.end();  ++p)
152     curr->ingredients.push_back(reagent(*p));
153 
154   trace(9993, "parse") << "instruction: " << curr->name << end();
155   trace(9993, "parse") << "  number of ingredients: " << SIZE(curr->ingredients) << end();
156   for (vector<reagent>::iterator p = curr->ingredients.begin();  p != curr->ingredients.end();  ++p)
157     trace(9993, "parse") << "  ingredient: " << to_string(*p) << end();
158   for (vector<reagent>::iterator p = curr->products.begin();  p != curr->products.end();  ++p)
159     trace(9993, "parse") << "  product: " << to_string(*p) << end();
160   if (!has_data(in)) {
161     raise << "9: unbalanced '[' for recipe\n" << end();
162     return false;
163   }
164   // End next_instruction(curr)
165   return true;
166 }
167 
168 // can return empty string -- only if 'in' has no more data
169 string next_word(istream& in) {
170   skip_whitespace_but_not_newline(in);
171   // End next_word Special-cases
172   ostringstream out;
173   slurp_word(in, out);
174   skip_whitespace_and_comments_but_not_newline(in);
175   string result = out.str();
176   if (result != "[" && ends_with(result, '['))
177     raise << "insert a space before '[' in '" << result << "'\n" << end();
178   return result;
179 }
180 
181 bool is_label_word(const string& word) {
182   if (word.empty()) return false;  // error raised elsewhere
183   return !isalnum(word.at(0)) && string("$_*@&,=-[]()").find(word.at(0)) == string::npos;
184 }
185 
186 bool ends_with(const string& s, const char c) {
187   if (s.empty()) return false;
188   return *s.rbegin() == c;
189 }
190 
191 :(before "End Globals")
192 // word boundaries
193 extern const string Terminators("(){}");
194 :(code)
195 void slurp_word(istream& in, ostream& out) {
196   char c;
197   if (has_data(in) && Terminators.find(in.peek()) != string::npos) {
198     in >> c;
199     out << c;
200     return;
201   }
202   while (in >> c) {
203     if (isspace(c) || Terminators.find(c) != string::npos || Ignore.find(c) != string::npos) {
204       in.putback(c);
205       break;
206     }
207     out << c;
208   }
209 }
210 
211 void skip_whitespace_and_comments(istream& in) {
212   while (true) {
213     if (!has_data(in)) break;
214     if (isspace(in.peek())) in.get();
215     else if (Ignore.find(in.peek()) != string::npos) in.get();
216     else if (in.peek() == '#') skip_comment(in);
217     else break;
218   }
219 }
220 
221 // confusing; move to the next line only to skip a comment, but never otherwise
222 void skip_whitespace_and_comments_but_not_newline(istream& in) {
223   while (true) {
224     if (!has_data(in)) break;
225     if (in.peek() == '\n') break;
226     if (isspace(in.peek())) in.get();
227     else if (Ignore.find(in.peek()) != string::npos) in.get();
228     else if (in.peek() == '#') skip_comment(in);
229     else break;
230   }
231 }
232 
233 void skip_comment(istream& in) {
234   if (has_data(in) && in.peek() == '#') {
235     in.get();
236     while (has_data(in) && in.peek() != '\n') in.get();
237   }
238 }
239 
240 :(scenario recipe_instead_of_def)
241 recipe main [
242   1:number <- copy 23
243 ]
244 +parse: instruction: copy
245 +parse:   ingredient: {23: "literal"}
246 +parse:   product: {1: "number"}
247 
248 :(scenario parse_comment_outside_recipe)
249 # this comment will be dropped by the tangler, so we need a dummy recipe to stop that
250 def f1 [
251 ]
252 # this comment will go through to 'load'
253 def main [
254   1:number <- copy 23
255 ]
256 +parse: instruction: copy
257 +parse:   ingredient: {23: "literal"}
258 +parse:   product: {1: "number"}
259 
260 :(scenario parse_comment_amongst_instruction)
261 def main [
262   # comment
263   1:number <- copy 23
264 ]
265 +parse: instruction: copy
266 +parse:   ingredient: {23: "literal"}
267 +parse:   product: {1: "number"}
268 
269 :(scenario parse_comment_amongst_instruction_2)
270 def main [
271   # comment
272   1:number <- copy 23
273   # comment
274 ]
275 +parse: instruction: copy
276 +parse:   ingredient: {23: "literal"}
277 +parse:   product: {1: "number"}
278 
279 :(scenario parse_comment_amongst_instruction_3)
280 def main [
281   1:number <- copy 23
282   # comment
283   2:number <- copy 23
284 ]
285 +parse: instruction: copy
286 +parse:   ingredient: {23: "literal"}
287 +parse:   product: {1: "number"}
288 +parse: instruction: copy
289 +parse:   ingredient: {23: "literal"}
290 +parse:   product: {2: "number"}
291 
292 :(scenario parse_comment_after_instruction)
293 def main [
294   1:number <- copy 23  # comment
295 ]
296 +parse: instruction: copy
297 +parse:   ingredient: {23: "literal"}
298 +parse:   product: {1: "number"}
299 
300 :(scenario parse_label)
301 def main [
302   +foo
303 ]
304 +parse: label: +foo
305 
306 :(scenario parse_dollar_as_recipe_name)
307 def main [
308   $foo
309 ]
310 +parse: instruction: $foo
311 
312 :(scenario parse_multiple_properties)
313 def main [
314   1:number <- copy 23/foo:bar:baz
315 ]
316 +parse: instruction: copy
317 +parse:   ingredient: {23: "literal", "foo": ("bar" "baz")}
318 +parse:   product: {1: "number"}
319 
320 :(scenario parse_multiple_products)
321 def main [
322   1:number, 2:number <- copy 23
323 ]
324 +parse: instruction: copy
325 +parse:   ingredient: {23: "literal"}
326 +parse:   product: {1: "number"}
327 +parse:   product: {2: "number"}
328 
329 :(scenario parse_multiple_ingredients)
330 def main [
331   1:number, 2:number <- copy 23, 4:number
332 ]
333 +parse: instruction: copy
334 +parse:   ingredient: {23: "literal"}
335 +parse:   ingredient: {4: "number"}
336 +parse:   product: {1: "number"}
337 +parse:   product: {2: "number"}
338 
339 :(scenario parse_multiple_types)
340 def main [
341   1:number, 2:address:number <- copy 23, 4:number
342 ]
343 +parse: instruction: copy
344 +parse:   ingredient: {23: "literal"}
345 +parse:   ingredient: {4: "number"}
346 +parse:   product: {1: "number"}
347 +parse:   product: {2: ("address" "number")}
348 
349 :(scenario parse_properties)
350 def main [
351   1:address:number/lookup <- copy 23
352 ]
353 +parse:   product: {1: ("address" "number"), "lookup": ()}
354 
355 //: this test we can't represent with a scenario
356 :(code)
357 void test_parse_comment_terminated_by_eof() {
358   load("recipe main [\n"
359        "  a:number <- copy 34\n"
360        "]\n"
361        "# abc");  // no newline after comment
362   cerr << ".";  // termination = success
363 }
364 
365 :(scenario warn_on_missing_space_before_bracket)
366 % Hide_errors = true;
367 def main[
368   1:number <- copy 23
369 ]
370 +error: insert a space before '[' in 'main['
371 
372 //: Warn if a recipe gets redefined, because large codebases can accidentally
373 //: step on their own toes. But there'll be many occasions later where
374 //: we'll want to disable the errors.
375 :(before "End Globals")
376 bool Disable_redefine_checks = false;
377 :(before "End Reset")
378 Disable_redefine_checks = false;
379 :(code)
380 bool should_check_for_redefine(const string& recipe_name) {
381   if (Disable_redefine_checks) return false;
382   return true;
383 }
384 
385 :(scenario forbid_redefining_recipes)
386 % Hide_errors = true;
387 def main [
388   1:number <- copy 23
389 ]
390 def main [
391   1:number <- copy 24
392 ]
393 +error: redefining recipe main
394 
395 :(scenario permit_forcibly_redefining_recipes)
396 def main [
397   1:number <- copy 23
398 ]
399 def! main [
400   1:number <- copy 24
401 ]
402 -error: redefining recipe main
403 $error: 0
404 
405 :(code)
406 // for debugging
407 void show_rest_of_stream(istream& in) {
408   cerr << '^';
409   char c;
410   while (in >> c)
411     cerr << c;
412   cerr << "$\n";
413   exit(0);
414 }