-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscheme.c
3848 lines (3331 loc) · 107 KB
/
scheme.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// ## Chapter 1, where the story begins
//
// Every story has to start somewhere. This one is a tour through a
// simplistic Scheme interpreter written in C, and every C program begins
// with the `main()` function.
//
void setup_runtime(void);
void do_useful_stuff(int argc, const char** argv);
void teardown_runtime(void);
int main(int argc, const char** argv)
{
setup_runtime();
do_useful_stuff(argc, argv);
teardown_runtime();
}
//
// Here I can imagine people asking, "Wait a minute, is this some kind of
// pseudo-C? Ain't you supposed to have `#include <stdio.h>` at the
// beginning and `main()` function at the very end?"
//
// Well. There is a whole universe of possible implications behind "Ain't
// you supposed to do X?" questions, and I'll use them as rant fuel
// throughout this story. But considering the most straightforward one,
// which is "is C compiler okay with such way of structuring code?" the
// answer is "yes, it compiles just fine". It will keep failing to *link*
// until those functions will be implemented, but, as far as the compiler
// is concerned, forward declarations are good enough.
//
// Moreover, this is the general pattern in this story where I follow the
// example of Quintus Fabius Maximus and keep postponing writing
// lower-level implementation of a feature until I get a handle on how it's
// going to be used.
//
// This approach is called top-down, and it's not the only way to write a
// program. A program can also be written bottom-up by starting with
// individual nuts and bolts and then assembling them into a big piece of
// software.
//
// There's a potential problem with the latter, though.
//
// Let's say I start with writing a piece of the standard library. I'm
// certainly going to need it at some point, so it isn't an obviously wrong
// place to start. Oh, and I'll also need a garbage collector, so I can
// write one too.
//
// But then I'm running a risk of ending up with an implementation of a
// chunk of Scheme standard library that is neat, and cute, and pretty, and
// a garbage collector that is as terrific as a garbage collector in a pet
// project may possibly be, and **they don't quite fit!**
//
// And then I'll have a dilemma. Either I'll have to redo one or both of
// those pieces. Which probably won't be 100% waste, as I hope to have
// learned a few things on the way, but it's still double work. Or else I
// can refuse to accept sunk costs and then stubbornly work around the
// incompatibilities between my own standard library implementation and my
// own garbage collector. And that's just dumb.
//
// But as long as something *isn't done at all*, I can be totally sure that
// it *isn't done wrong*. There's a certain Zen feeling to it.
//
// Another thing is more subtle but will get many programmers, especially
// more junior ones, nervous once they figure it out. It's
// `setup_runtime()` call. It's pretty clear **what** it will do, which is
// initialize garbage collector and such, but it also implies I'm going to
// have **the** runtime, probably scattered around in a bunch of global
// variables.
//
// I can almost hear voices asking, "But what if you need to have multiple
// runtimes? What if a customer comes and asks to make your interpreter
// embeddable as a scripting engine? What about multithreading? Why are you
// not worried?!"
//
// The answer is, "I consciously don't care." This is just a pet project
// that started with me willing to tinker with Scheme. And then realizing
// that just writing in Scheme is too easy, so I wrote my own interpreter.
// And then figuring out that **even that is not fun enough**, so I wrapped
// it into a sort of "literary programming" exercise. In a (highly
// improbable) situation when I'll have to write my own multithreaded
// embeddable Scheme interpreter, I'll just start from scratch, and that's
// about it.
//
// Anyway, I'll write functions to set up and tear down runtime once said
// runtime will take a more concrete shape. And for now, I'll focus on
// doing the useful stuff.
//
#include <stdio.h>
#include <unistd.h>
void execute(FILE*);
void execute_file(const char* filename);
void repl(void);
void do_useful_stuff(int argc, const char** argv)
{
if (argc >= 2) {
for (int i = 1; i < argc; i++)
execute_file(argv[i]);
} else if (isatty(fileno(stdin))) {
repl();
} else {
execute(stdin);
}
}
//
// This one is pretty straightforward: when a program is launched as
// `./scheme foo.scm`, then execute a file; when it's started as `cat
// foo.scm | ./scheme` do precisely the same, and otherwise fire up a REPL.
//
// Now that I know that I'm going to have a function that reads code from a
// stream and executes it, writing a function that does the same with a
// file is trivial, so let's just make one.
//
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#define DIE(fmt, ...) \
do { \
fprintf(stderr, "[%s:%d] ", __FILE__, __LINE__); \
fprintf(stderr, fmt, ##__VA_ARGS__); \
fprintf(stderr, "\n"); \
abort(); \
} while (0)
FILE* fopen_or_die(const char* pathname, const char* mode)
{
FILE* f = fopen(pathname, mode);
if (f)
return f;
const char* err = strerror(errno);
DIE("Error opening file %s: %s", pathname, err);
}
void execute_file(const char* filename)
{
FILE* f = fopen_or_die(filename, "r");
execute(f);
fclose(f);
}
//
// Some might find error handling in `fopen_or_die()` a bit naive. If you
// aren't among those, you can skip the following rant, and if you are,
// it's okay, there's nothing to be ashamed of, it's just natural cognitive
// inertia.
//
// See, in general, when something goes wrong, you have three options:
//
// 1. You can handle the problem and continue.
//
// 2. You can abort.
//
// 3. You can notify the invoker about the problem and let them make their
// own choice from these same three options.
//
// In this case, option #1 is unavailable. Because, well, failing to open a
// file that the interpreter is told to execute is clearly a fatal error,
// and there's no sane way to recover from it.
//
// Oh, of course, there are insane ways to do it. For instance, I can just
// quietly skip the problematic file and later collapse in an obscure way
// because I can't find functions that were supposed to be defined in that
// file, or take your guess, but I'm not even going to spend time
// explaining why I'm not doing that.
//
// Option #3 is an interesting one to reflect on as this is what many
// programmers would consider a natural and only alternative when #1 is not
// available. In fact, if you're coding on top of a rich fat framework
// (think Spring or Django), this indeed is the natural and only way to do
// it. But there's no framework here, and the operating system is
// effectively **the** invoker ("*there was nothing between us... not even
// a condom...*" yeah, horrible joke), and `abort()`ing is a proper way to
// notify the operating system about the problem. So #2 is pretty much #3,
// just without boilerplate code to pull error status to `main()` and then
// abort there.
//
// Anyway, let's implement `execute()`
//
#include <stdbool.h>
struct object;
typedef struct object* object_t;
void decref(object_t);
object_t eval_repl(object_t);
object_t read_object(FILE*);
void execute(FILE* in)
{
object_t expr;
while ((expr = read_object(in))) {
object_t result = eval_repl(expr);
decref(expr);
decref(result);
}
}
//
// Several things are introduced here.
//
// The first one is `struct object`, which is going to be the
// representation of a Scheme object. It's clearly going to be some sort of
// a `struct` (I mean, we're doing C here, what else can it be); internal
// details of that struct I'll figure out later.
//
// The second and the third are `read_object()` and `eval_repl()` functions
// that, respectively, read an object from an input stream and evaluate it
// in REPL context.
//
// The last one is the `decref()` function that is needed because I'm going
// to have automatic memory management. For this, I have three options:
//
// 1. I can do reference counting. Very simple to do for as long as objects
// don't form reference cycles, then it gets quirky.
//
// 2. I can make a tracing garbage collector that traverses the process'
// memory to figure out which objects are still needed.
//
// 3. I can apply a sort of a hybrid approach where I do tracing for the
// sections of process' memory that are convenient to traverse and fall
// back to reference counting for those which aren't.
//
// From this simple function, it's already clear that whichever method I
// choose must be able to deal with references from the C call stack.
// Analyzing them in a pure tracing manner is pretty cumbersome, so I have
// to count them anyway, and that's what `decref()` will do.
//
// Now comes the REPL...
//
void write_object(FILE*, object_t obj);
void repl()
{
object_t expr;
printf("> ");
while ((expr = read_object(stdin))) {
object_t result = eval_repl(expr);
decref(expr);
write_object(stdout, result);
decref(result);
printf("\n> ");
fflush(stdout);
}
printf("bye\n");
}
//
// ...which isn't particularly interesting, and we proceed to
// ## Chapter 2, where I parse Lisp code
//
// What's neat about implementing a Lisp dialect is that you can be done
// with parsing in about three pints of Guinness and then move on to
// funnier stuff.
//
// Of course, "funnier" is relative here, and not just grammatically, but
// also in a Theory of Relativity kind of sense. I mean, the Theory of
// Relativity describes extreme conditions where gravity is so high that
// common Newtonian laws don't work any more.
//
// Likewise, here we're venturing deep into the dark swampy forests of
// Nerdyland, where the common understanding of "fun" doesn't apply. By the
// standards of ordinary folks whose idea of having fun involves such
// activities as mountain skiing and dance festivals, spending evenings
// tinkering with the implementation of infinite recursion is hopelessly
// weird either way. So I mean absolutely no judgement towards those
// fantastic guys and gals who enjoy messing with lexers, parsers, and all
// that shebang. Whatever floats your boat, really!
//
// This had to be said. Anyway, back to parsing.
//
int fgetc_skip(FILE*);
object_t read_atom(FILE* in);
object_t read_list(FILE* in);
object_t read_quote(FILE* in);
object_t read_string(FILE* in);
object_t read_object(FILE* in)
{
int ch = fgetc_skip(in);
switch (ch) {
case EOF:
return NULL;
case '(':
return read_list(in);
case ')':
DIE("Unmatched ')'");
case '\'':
return read_quote(in);
case '"':
return read_string(in);
default:
ungetc(ch, in);
return read_atom(in);
}
}
//
// Lisp syntax is famously Spartan. Basically, all you get is:
//
// * lists (those thingies with (the (astonishingly) copious) amount of
// parentheses),
//
// * strings (delimited by "double quotes" or however you call that
// character),
//
// * quotations (if you don't know who these are, you better look it up in
// Scheme spec, but basically it's a way to specify that ```'(+ 1 2)``` is
// *literally* a list with three elements and not an expression that adds
// two numbers),
//
// * and atoms, which are pretty much everything else, including numbers,
// characters, and symbols.
//
// So what I'm doing here is I'm looking at the first non-trivial character
// in the input stream, and if it's an opening parenthesis, I interpret it
// as a beginning of a list etc.
//
#include <ctype.h>
int fgetc_or_die(FILE* in)
{
int ch = fgetc(in);
if ((ch == EOF) && (! feof(in))) {
const char* err = strerror(errno);
DIE("IO error: %s", err);
}
return ch;
}
int fgetc_skip(FILE* in)
{
bool comment = false, skip;
int ch;
do {
ch = fgetc_or_die(in);
if (ch == ';')
comment = true;
skip = comment || isspace(ch);
if (ch == '\n')
comment = false;
} while ((ch != EOF) && skip);
return ch;
}
//
// Oh, and "the first non-trivial character" means I fast-forward through
// the input stream ignoring comments and whitespace until I encounter a
// character that's neither or reach an EOF.
//
// There are four `read_something()` functions that I promised to
// implement, let's start with `read_string()`
//
int fgetc_read_string(FILE* in)
{
int ch = fgetc_or_die(in);
switch (ch) {
case EOF:
DIE("Premature end of input");
case '\"':
return EOF;
case '\\':
ch = fgetc_or_die(in);
switch (ch) {
case EOF:
DIE("Premature end of input");
case 'n':
return '\n';
}
}
return ch;
}
object_t wrap_string(const char*);
object_t read_string(FILE* in)
{
char buffer[10240];
int fill = 0, ch;
while ((ch = fgetc_read_string(in)) != EOF) {
buffer[fill++] = ch;
if (fill >= 10240)
DIE("Buffer overflow");
}
buffer[fill] = '\0';
return wrap_string(buffer);
}
//
// Nothing particularly surprising here. Just read characters into the
// buffer until you reach the closing double quote, then wrap the contents
// of the buffer into an `object_t` and call it a day.
//
// Yes, this simplistic implementation will miserably fail to parse a
// source file with a string constant that is longer than 10K characters.
//
// And if you take some time to think about it, hard-coded 10K bytes for
// buffer size is kind of interesting here. It's an arbitrary number that,
// on the one hand, is safely above any practical limit in terms of
// usefulness. I mean, of course, you can hard-code the entirety of "Crime
// and Punishment" as a single string constant just to humiliate a dimwit
// interpreter author. But within any remotely sane coding style, such blob
// must be offloaded to an external text file, and even a buffer that is an
// order of magnitude smaller should still be good enough for all
// reasonable intents and purposes.
//
// On the other hand, it's also safely below any practical limit in terms
// of conserving memory. It can easily be an order of magnitude larger
// without causing any issues whatsoever.
//
// At least on a modern general-purpose machine with a couple of gigs of
// memory. If you've got a PDP-7 like one that Ken Thompson used for his
// early development of Unix, then a hundred kilobytes might be your
// **entire** RAM, and then you have to be more thoughtful with your
// throwaway buffers.
//
// By the way, it's awe-inspiring how those people in the 1960s could
// develop an entire real operating system on a computer like that. Well,
// not precisely mind-boggling, like, I myself started coding on a
// Soviet-made ZX Spectrum clone with 48 kilobytes of RAM, and you can't
// write a super-duper-sophisticated OS on such machine because **it just
// won't fit**, but still, it's so cool.
//
// Okay, back to business. Let's `parse_atom()`.
//
bool isspecial(char ch)
{
switch (ch) {
case '(':
case ')':
case ';':
case '\"':
case '\'':
return true;
default:
return false;
}
}
int fgetc_read_atom(FILE* in)
{
int ch = fgetc_or_die(in);
if (ch == EOF)
return EOF;
if (isspace(ch) || isspecial(ch)) {
ungetc(ch, in);
return EOF;
}
return ch;
}
object_t parse_atom(const char*);
object_t wrap_char(char ch);
object_t read_character(FILE* in)
{
int ch = fgetc_or_die(in);
if ((ch == EOF) || isspace(ch))
return wrap_char(' ');
else
return wrap_char(ch);
}
object_t read_atom(FILE* in)
{
char buffer[10240];
int fill = 0, ch;
while ((ch = fgetc_read_atom(in)) != EOF) {
buffer[fill++] = ch;
if (fill >= 10240)
DIE("Buffer overflow");
}
if ((fill == 2) && (buffer[0] == '#') && (buffer[1] == '\\'))
return read_character(in);
buffer[fill] = '\0';
return parse_atom(buffer);
}
//
// Here I use the same approach as in `read_string()`: collect characters
// for as long as it looks like an atom, then convert it to an `object_t`,
// and that's pretty much it.
//
// Well, the syntax for characters in Scheme is a bit wonky: you have
// `#!\x` for the letter 'x' and `#\!` for an exclamation mark, and,
// surprisingly, `#!\newline` and `#!\space` for a newline and space
// respectively. Oh, and `#\` is also a space. Go figure.
//
// Most of that wonkiness can be dealt with by simply reading everything up
// until a special character and then figuring out what I've got in
// `parse_atom()`. Unless **it is** a special character, e.g. `#\)` or
// `#\;`, those need a bit of special handling.
//
// And now I'm looking at another buffer, and do you know what actually
// boggles my mind?
//
// Remember, at the very beginning of this story, I mentioned that a C
// program is typically supposed to have its `main()` function at the end?
// So, what boggles my mind is why are we still doing it?
//
// Well, I don't mean we all do. In some programming languages, it is more
// common, and in some, it is less, but really, why would you do it in any
// language? It's such a weird way to layout your code where you have to
// scroll all the way down to the bottom of the source file and then work
// your way up in a Benjamin Button kind of way.
//
// I mean, I know it's the legacy of Pascal where you were required to have
// the equivalent of `main()` at the bottom (and finish it with an `end.`
// with a period instead of a semicolon). I also understand that, back in
// those days, it made sense in order to simplify the compiler that had to
// run on limited hardware. But why we still sometimes do it in the 2020s
// is a mystery to me.
//
// Okay, enough of ranting, let's `parse_atom()`
//
object_t wrap_bool(bool v);
object_t parse_bool(const char* text)
{
if (strcmp(text, "#f") == 0)
return wrap_bool(false);
if (strcmp(text, "#t") == 0)
return wrap_bool(true);
return NULL;
}
object_t parse_char(const char* text)
{
if (strcmp(text, "#\\newline") == 0)
return wrap_char('\n');
if (strcmp(text, "#\\space") == 0)
return wrap_char(' ');
if (strcmp(text, "#\\") == 0)
return wrap_char(' ');
if ((strlen(text) == 3) && (text[0] == '#') && (text[1] == '\\'))
return wrap_char(text[2]);
return NULL;
}
object_t wrap_int(int value);
object_t parse_int(const char* text)
{
int index = 0, digits = 0, accum = 0, sign = 1;
if (text[0] == '-') {
index = 1;
sign = -1;
}
for (; text[index]; index++) {
if (! isdigit(text[index]))
return NULL;
accum = accum * 10 + (text[index] - '0');
digits++;
}
return (digits == 0) ? NULL : wrap_int(sign * accum);
}
object_t wrap_symbol(const char*);
object_t parse_atom(const char* buffer)
{
object_t result;
if ((result = parse_bool(buffer)))
return result;
if ((result = parse_int(buffer)))
return result;
if ((result = parse_char(buffer)))
return result;
return wrap_symbol(buffer);
}
//
// Pretty straightforward, really. If it looks like a boolean, convert it
// to a boolean. If it appears to be an integer number, convert it to an
// integer. If it seems to be a symbol, convert it to a symbol. If it looks
// like a floating-point number... Convert it to a symbol because screw
// you!
//
// To give a bit of a background here, this pet project of mine was never
// intended to be a feature-complete standards-compliant Scheme
// implementation. It started with solving some of "99 Lisp problems" and
// then escalated into writing a sufficient Scheme interpreter to run
// those. None of those problems relied on floating-point arithmetics, and
// so I didn't implement it.
//
// Not that it's particularly hard, just tedious (if done Python-style with
// proper type coercions), and JavaScript solution with simply using floats
// for everything I find aesthetically unappealing.
//
// What I couldn't possibly skip is lists (they didn't call it LISt
// Processing for nothing), so let's `read_list()`
//
// Oh, and a quick remark on the naming convention. Functions like
// `wrap_symbol()` are named this way intentionally. They could easily be
// called, say, `make_symbol()`, but that would imply that it's some sort
// of a constructor that really **makes** a *new* object. But by the time I
// get to actually implement those, I might not want to be bound by this
// implication (because I might find out that a proper constructor doesn't
// conform to the language standard and/or isn't practical, and I need
// and/or want some sort of a cache or a pool or whatever).
//
// So, instead, it's a vague "wrap", which stands for "get me an object
// that represents this value, and how you make it under the hood is none
// of my business."
//
object_t read_next_object(FILE* in)
{
int ch = fgetc_skip(in);
if (ch == EOF)
DIE("Premature end of input");
if (ch == ')')
return NULL;
ungetc(ch, in);
return read_object(in);
}
void push_to_list(object_t* ptr, object_t item);
object_t reverse_read_list(object_t list);
object_t wrap_nil(void);
object_t read_list(FILE* in)
{
object_t accum = wrap_nil(), obj;
while ((obj = read_next_object(in))) {
push_to_list(&accum, obj);
decref(obj);
}
object_t result = reverse_read_list(accum);
decref(accum);
return result;
}
//
// This one is simple but might need a bit of refresher on how lists work
// in Lisp (and other FP(-esque) languages).
//
// So, your basic building block is a two-element tuple (also known as a
// pair). If you make a tuple with some value in the first cell and in the
// second cell a reference to another tuple with another value in the first
// cell et cetera et cetera... And then you put a special null value to the
// second cell of the last tuple, then you what get is a singly-linked
// list. Oh, and the representation of the empty list is simply the null
// value.
//
// So what I do here is I read objects from the input stream and push them
// one by one to the front of the list until I see a closing parenthesis.
// But then the list ends up reversed, so I need to reverse it back. Easy.
//
object_t wrap_pair(object_t, object_t);
void push_to_list(object_t* ptr, object_t head)
{
object_t tail = *ptr;
*ptr = wrap_pair(head, tail);
decref(tail);
}
//
// This is such a pretty little function that utilizes call by pointer
// (very much a C idiom) to construct a very Lispy list. Tell me about
// multiparadigm programming.
//
// Oh, and while we're at it, let's also implement `reverse_read_list()`
//
struct pair;
typedef struct pair* pair_t;
pair_t assert_pair(object_t obj, const char* context);
object_t car(pair_t);
bool is_symbol(const char* text, object_t obj);
object_t pop_from_list(object_t*);
void incref(object_t);
object_t reverse_read_list(object_t list)
{
object_t result = wrap_nil(), obj;
while ((obj = pop_from_list(&list))) {
if (! is_symbol(".", obj)) {
push_to_list(&result, obj);
continue;
}
pair_t pair = assert_pair(result, "when parsing a list");
obj = car(pair);
incref(obj);
decref(result);
result = obj;
}
return result;
}
//
// that simply pops things from one list and pushes them to another.
//
// Well, except for one gotcha: `.` has special meaning in list notation,
// so that `'(a . b)` is not a list but is equivalent to `(cons 'a 'b)`,
// and so I cater for it here.
//
object_t cdr(pair_t);
bool is_nil(object_t);
object_t pop_from_list(object_t* ptr)
{
object_t obj = *ptr;
if (is_nil(obj))
return NULL;
pair_t pair = assert_pair(obj, "when traversing a list");
*ptr = cdr(pair);
return car(pair);
}
//
// `pop_from_list()` is pretty much the opposite of `push_to_list()` with a
// bit of type checking to make sure I'm dealing with a list and not
// something dodgy.
//
#define DEBUG(key, obj) \
do { \
fprintf(stderr, "[%s:%d] %s = ", __FILE__, __LINE__, key); \
write_object(stderr, obj); \
fprintf(stderr, "\n"); \
} while (0)
const char* typename(object_t);
pair_t to_pair(object_t);
pair_t assert_pair(object_t obj, const char* context)
{
pair_t pair = to_pair(obj);
if (pair)
return pair;
DEBUG("obj", obj);
DIE("Expected a pair %s, got %s instead", context, typename(obj));
}
//
// I still haven't decided what exactly I will put into either `struct
// object` or `struct pair`, but I already need to be able to convert one
// to another. So, I promise to write a `to_pair()` function that would do
// just that (or return `NULL` if the value that this object holds is not a
// pair), and here's write a neat little helper around it to abort with a
// human-readable message when the conversion fails.
//
object_t read_quote(FILE* in)
{
object_t obj = read_object(in);
if (! obj)
DIE("Premature end of input");
object_t result = wrap_nil();
push_to_list(&result, obj);
decref(obj);
object_t keyword = wrap_symbol("quote");
push_to_list(&result, keyword);
decref(keyword);
return result;
}
//
// Since `'bla` is merely a shorter version of `(quote bla)` parsing it is
// trivial, and with that in place, we're finally done with parsing and can
// move on to
// ## Chapter 3, where I evaluate
//
// By the way, I don't know if you noticed or not, but I try to use the
// word "we" as sparingly as I can. Perhaps it has something to do with me
// coming from a culture where "We" is commonly associated with the
// dystopian novel by Yevgeny Zamyatin.
//
// Of course, there are legit usages for "we," such as academic writing
// where all of "us" are listed on the paper's first page, and the reader
// is interested in overall results rather than the internal dynamics of
// the research team.
//
// But using "we did it" when it's actually "I did it" (and it's
// stylistically appropriate to say "I did it") feels to me like the
// speaker is a wimp who wants to avoid the responsibility.
//
// Likewise, using "we" when it's actually "I and Joe, mostly Joe" feels
// like reluctance to give a fair share of the credit.
//
// Okay, enough of that, let's implement `eval_repl()`
//
struct scope;
typedef struct scope* scope_t;
object_t eval_eager(scope_t scope, object_t expr);
scope_t get_repl_scope(void);
object_t eval_repl(object_t expr)
{
return eval_eager(get_repl_scope(), expr);
}
//
// That's a one-liner function that relies on two crucial concepts.
//
// The first one is the scope. The scope is pretty much just a binding
// between variables' names and their values. For now, just think of it as
// a sort of a dictionary (it's not exactly that, but we'll get there when
// we get there).
//
// Another one is the differentiation between eager and lazy evaluation.
// Before I go into explaining *what exactly do I mean by eager and lazy
// evaluation* in the context of this story, I first have to elaborate on
// the pragmatics for having all that in the first place.
//
// So. Scheme is a functional programming language, and in functional
// programming, people don't do loops, but instead, they do recursion. And
// for infinite loops, they do, well, infinite recursion. And "infinite"
// here doesn't mean "enormously big," but properly infinite.
//
// Consider this example:
//
// embed snippets/infinite-loop.scm : infinite loop
//
// Obviously, a direct equivalent of this code in vanilla
// C/C++/Ruby/Python/Java will run for some time and eventually blow up
// with a stack overflow. But code in Scheme, well, better shouldn't.
//
// I have three ways to deal with it:
//
// 1. Just do nothing and hope that the C stack will not overflow.
//
// 2. Do code rewriting so that, under the hood, the snippet above is
// automagically converted into a loop, e.g.
//
// embed snippets/infinite-loop.scm : infinite loop rewritten
//
// 3. Apply the technique called trampolining. Semantically it means
//
// embed snippets/infinite-loop.scm : infinite loop trampoline
//
// that instead of calling itself, function... Well, to generalize and to
// simplify, let's say it informs the evaluator that computation is
// incomplete and also tells what to do next in order to complete it.
//
// #1 looks like a joke, but actually, it's a pretty good solution. It's
// also a pretty bad solution, but let's get through the upsides first.
//
// First of all, it's trivial to implement (because it doesn't require
// writing any specific code). It clearly won't introduce any quirky bugs
// (because there's no specific code!). And it won't have any performance
// impact (because it does nothing!!)
//
// You see, "*just do nothing*" half of it is all good; it's the "*and hope
// that*" part that isn't. Although for simple examples, it doesn't really
// matter: provided there are a couple of thousands of stack levels
// available, it's gonna be okay with or without optimizations. But a more
// complex program may eventually hit that boundary, and then I'll have to
// get around deficiencies of my code in, well, my other code, and that's
// not a place I'd like to get myself into.
//
// This also sets a constraint on what a "proper" solution should be: it
// must be provably reliable for a complex piece of code, or else it's back
// to square one.
//
// #2 looks like a super fun thing to play with, and it seems deceptively
// simple for toy snippets. But thinking just a tiny bit about pesky stuff
// like mutually recursive functions, and self-modifying-ish code (think
// `(set! infinite-loop something-else)` *from within* `infinite-loop`),
// and escape procedures and whatnot... And all this starts to feel like a
// breeding ground for wacky corner cases, and I don't want to commit to
// being able to weed them all out.
//
// #3, on the contrary, is pretty straightforward, both conceptually and
// implementation-wise, so that's what I'll do (although I might do #2 on
// top of it later; because it looks like a super fun thing to play with).
//
// Now let's get back to lazy vs eager. "Lazy" in this context means that
// the evaluation function may return either a result (if computation is
// finished) or a thunk (a special object that describes what to do next).
// Whereas "eager" means that the evaluation function will always return
// the final result.
//
// "Eager" evaluation can be easily arranged by getting a "lazy" result
// first...
//
object_t eval_lazy(scope_t scope, object_t expr);
object_t force(object_t value);
object_t eval_eager(scope_t scope, object_t expr)
{
object_t result_or_thunk = eval_lazy(scope, expr);
return force(result_or_thunk);
}
//
// ...and then reevaluating it until the computation is complete.
//
struct thunk;
typedef struct thunk* thunk_t;
object_t eval_thunk(thunk_t thunk);
thunk_t to_thunk(object_t obj);
object_t force(object_t value)
{
thunk_t thunk;
while ((thunk = to_thunk(value))) {
object_t new_value = eval_thunk(thunk);
decref(value);
value = new_value;
}
return value;
}
//
// You know, I've just realized it's the third time in this story when I
// say, "I have three ways to deal with it," the previous two being
// considerations about memory management and error handling in Chapter 1.
//
// Moreover, I noticed a pattern. In a generalized form, those three
// options to choose from are:
//
// 1. Consider yourself lucky. Assume things won't go wrong. Don't worry
// that your solution is too optimistic.
//
// 2. Consider yourself clever. Assume you'll be able to fix every bug.
// Don't worry that your solution is too complex.
//
// 3. Just bloody admit that you're a dumb loser. Design a balanced
// solution that is resilient while still reasonable.
//
// This is such a deep topic that I'm not even going to try to cover it in
// one take, but I'm damn sure I'll be getting back to it repeatedly.
//
// For now, let's continue with `eval_lazy()`
//
struct symbol;
typedef struct symbol* symbol_t;
symbol_t to_symbol(object_t);
object_t eval_sexpr(scope_t, object_t head, object_t body);
object_t eval_var(scope_t scope, symbol_t key);
object_t eval_lazy(scope_t scope, object_t expr)
{
symbol_t varname = to_symbol(expr);
if (varname)
return eval_var(scope, varname);
pair_t sexpr = to_pair(expr);
if (sexpr)
return eval_sexpr(scope, car(sexpr), cdr(sexpr));
incref(expr);