From f49cb8abc271be168f6e08a45f81e392184b7a38 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Thu, 15 Jun 2023 08:21:47 +0100 Subject: [PATCH] Properly Cook Strings and Characters Re-enable the remaining new parser tests and properly handle the differnt escape sequences. --- .vscode/tasks.json | 21 -- src/Feersum.CompilerServices/Syntax/Parse.fs | 25 +- .../Syntax/SyntaxShim.fs | 2 +- src/Feersum.CompilerServices/Syntax/Tree.fs | 71 ++++-- test/Feersum.Tests/LexTests.fs | 12 +- test/Feersum.Tests/SyntaxTestsNew.fs | 237 ++++++++++-------- 6 files changed, 202 insertions(+), 166 deletions(-) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 036f6d9..fc75005 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -37,26 +37,6 @@ "presentation": { "reveal": "silent" }, -<<<<<<< HEAD - { - "label": "runcompiler", - "command": "dotnet", - "type": "shell", - "dependsOn": "build", - "args": [ - "${workspaceFolder}/src/Feersum/bin/Debug/net7.0/Feersum.dll", - "--configuration", - "Debug", - "-o", - "${workspaceFolder}/spec/bin/", - "${input:scmToCompile}" - ], - "group": "build", - "presentation": { - "reveal": "silent" - }, - "problemMatcher": "$msCompile" -======= "problemMatcher": "$msCompile" }, { @@ -76,7 +56,6 @@ "group": { "kind": "test", "isDefault": true ->>>>>>> 0f9f9a2 (Reformat JSON Config) }, "presentation": { "reveal": "silent" diff --git a/src/Feersum.CompilerServices/Syntax/Parse.fs b/src/Feersum.CompilerServices/Syntax/Parse.fs index f3419be..7cfb6d3 100644 --- a/src/Feersum.CompilerServices/Syntax/Parse.fs +++ b/src/Feersum.CompilerServices/Syntax/Parse.fs @@ -165,21 +165,28 @@ let private parseConstant (builder: GreenNodeBuilder) state = builder.FinishNode() state -let private skipAtmosphere (builder: GreenNodeBuilder) state = - let mutable state = state - - while lookingAtAny [ TokenKind.Whitespace; TokenKind.Comment ] state do - state <- eat builder AstKind.ATMOSPHERE state - - state - let private parseIdentifier (builder: GreenNodeBuilder) state = builder.StartNode(AstKind.SYMBOL |> SyntaxUtils.astToGreen) let state = expect builder TokenKind.Identifier AstKind.IDENTIFIER state builder.FinishNode() state -let rec private parseQuote (builder: GreenNodeBuilder) state = +let rec private skipAtmosphere (builder: GreenNodeBuilder) state = + let mutable state = state + + while lookingAtAny [ TokenKind.Whitespace; TokenKind.Comment; TokenKind.DatumCommentMarker ] state do + match currentKind state with + | TokenKind.DatumCommentMarker -> + builder.StartNode(AstKind.ATMOSPHERE |> SyntaxUtils.astToGreen) + + state <- state |> eat builder AstKind.ATMOSPHERE |> parseExpr builder + + builder.FinishNode() + | _ -> state <- eat builder AstKind.ATMOSPHERE state + + state + +and private parseQuote (builder: GreenNodeBuilder) state = builder.StartNode(AstKind.QUOTED_DATUM |> SyntaxUtils.astToGreen) let state = diff --git a/src/Feersum.CompilerServices/Syntax/SyntaxShim.fs b/src/Feersum.CompilerServices/Syntax/SyntaxShim.fs index f977e17..c4f1765 100644 --- a/src/Feersum.CompilerServices/Syntax/SyntaxShim.fs +++ b/src/Feersum.CompilerServices/Syntax/SyntaxShim.fs @@ -10,7 +10,7 @@ type LegacyNodeKind<'a> = Feersum.CompilerServices.Syntax.AstNodeKind<'a> let rec transformExpr (doc: TextDocument) (expr: Expression) : LegacyNode = let kind = match expr with - | Form f -> f.Body |> Seq.map (transformExpr doc) |> List.ofSeq |> LegacyNodeKind.Form + | FormNode f -> f.Body |> Seq.map (transformExpr doc) |> List.ofSeq |> LegacyNodeKind.Form | _ -> // TODO: All the other node kinds LegacyNodeKind.Error diff --git a/src/Feersum.CompilerServices/Syntax/Tree.fs b/src/Feersum.CompilerServices/Syntax/Tree.fs index bed4fe6..0d4fbbb 100644 --- a/src/Feersum.CompilerServices/Syntax/Tree.fs +++ b/src/Feersum.CompilerServices/Syntax/Tree.fs @@ -4,7 +4,6 @@ open Firethorn open Firethorn.Green open Firethorn.Red - /// Node kind for each element in the raw tree. type AstKind = | ERROR = -1 @@ -103,7 +102,7 @@ module private Utils = | 'f' -> (Plain, sb.Append('\f')) | 'r' -> (Plain, sb.Append('\r')) | 'x' -> (InHex "0x", sb) - | _ -> (Plain, sb.AppendFormat("\\{0}", ch)) + | _ -> (Plain, sb.Append(ch)) s |> Seq.fold (cookChar) (CookingState.Plain, StringBuilder()) @@ -186,9 +185,8 @@ and StrVal internal (red: SyntaxToken) = inherit ConstantValue(red) member public x.Value = - // FIXME: Cook the string here. - red.Green.Text - + let text = red.Green.Text + text[1 .. text.Length - 2] |> cookString /// Boolean node in the syntax tree. and BoolVal internal (red: SyntaxToken) = @@ -203,9 +201,28 @@ and CharVal internal (red: SyntaxToken) = inherit ConstantValue(red) - member public x.Value = - // TODO: Cook this character - x.Text[1] + member public _.Value = + let charText = red.Green.Text + + if charText.Length = 3 then + Some(charText[2]) + else if charText.StartsWith("#\\x") then + match System.Int32.TryParse(charText[3..], System.Globalization.NumberStyles.HexNumber, null) with + | true, hex -> Some((char) hex) + | _ -> None + else + match charText[2..] with + | "alarm" -> Some('\u0007') + | "backspace" -> Some('\u0008') + | "delete" -> Some('\u007F') + | "escape" -> Some('\u001B') + | "newline" -> Some('\u000A') + | "null" -> Some('\u0000') + | "return" -> Some('\u000D') + | "space" -> Some(' ') + | "tab" -> Some('\u0009') + | _ -> None + // *********** NODES @@ -322,11 +339,37 @@ module Patterns = open Feersum.CompilerServices.Ice /// Pattern to match on known expression types - let (|ByteVec|Vec|Form|Constant|Symbol|) (expr: Expression) = + let (|ByteVecNode|VecNode|FormNode|ConstantNode|SymbolNode|) (expr: Expression) = match expr with - | :? ByteVec as b -> ByteVec b - | :? Vec as v -> Vec v - | :? Form as f -> Form f - | :? Constant as c -> Constant c - | :? Symbol as s -> Symbol s + | :? ByteVec as b -> ByteVecNode b + | :? Vec as v -> VecNode v + | :? Form as f -> FormNode f + | :? Constant as c -> ConstantNode c + | :? Symbol as s -> SymbolNode s | _ -> icef "Unexpected expression type: %A" (expr.GetType()) + + /// Ergonomic pattern to match the useful inner parts of an expression + let (|ByteVec|Vec|Form|Constant|Symbol|) (expr: Expression) = + match expr with + | ByteVecNode b -> ByteVec + | VecNode v -> Vec + | FormNode f -> Form(f.Body |> List.ofSeq) + | ConstantNode c -> Constant c.Value + | SymbolNode s -> Symbol s.CookedValue + + /// Pattern to match on known constant types + let (|NumValNode|StrValNode|BoolValNode|CharValNode|) (cnst: ConstantValue) = + match cnst with + | :? NumVal as n -> NumValNode n + | :? StrVal as s -> StrValNode s + | :? BoolVal as b -> BoolValNode b + | :? CharVal as c -> CharValNode c + | _ -> icef "Unexpected constant type: %A" (cnst.GetType()) + + /// Ergonomic pattern to match the inner parts of a constant value + let (|NumVal|StrVal|BoolVal|CharVal|) (cnst: ConstantValue) = + match cnst with + | NumValNode n -> NumVal n.Value + | StrValNode s -> StrVal s.Value + | BoolValNode b -> BoolVal b.Value + | CharValNode c -> CharVal c.Value diff --git a/test/Feersum.Tests/LexTests.fs b/test/Feersum.Tests/LexTests.fs index a9e161c..3da18e4 100644 --- a/test/Feersum.Tests/LexTests.fs +++ b/test/Feersum.Tests/LexTests.fs @@ -9,18 +9,8 @@ open Feersum.CompilerServices.Text let private p name line col = TextPoint.FromParts(name, line, col) |> TextLocation.Point -/// Grab the kind from a syntax token pair. -let private getKind token = - let (kind, _) = token - kind - -/// Grab the value from a syntax token pair. -let private getValue token = - let (_, value) = token - value - [] -let ``Empty input text always returns end of file`` () = +let ``Empty input text contains no tokens`` () = let tokens = tokenise "" "test.scm" Assert.Empty(tokens) diff --git a/test/Feersum.Tests/SyntaxTestsNew.fs b/test/Feersum.Tests/SyntaxTestsNew.fs index 212ac8d..e4b1f65 100644 --- a/test/Feersum.Tests/SyntaxTestsNew.fs +++ b/test/Feersum.Tests/SyntaxTestsNew.fs @@ -8,57 +8,62 @@ open Feersum.CompilerServices.Text open Feersum.CompilerServices.Syntax.Tree open Feersum.CompilerServices.Syntax.Parse -let readScriptExpr line = - let result = Parse.readExpr1 "repl" line +// TODO: negative cases for a lot of these parsers. e.g. unterminated strings, +// invalid hex escapes, bad identifiers and so on. - if result.Diagnostics |> List.isEmpty then - result.Root - else - failwithf "Expected single expression but got errors: %A" result.Diagnostics +[] +module private Utils = -let readSingle line = - let result = Parse.readRaw Parse.ReadMode.Script "repl" line + let readScript line = + let result = Parse.readExpr1 "repl" line - if result.Diagnostics |> List.isEmpty then - result.Root.Children() |> Seq.exactlyOne - else - failwithf "Expected single expression but got: %A" result.Diagnostics + if result.Diagnostics |> List.isEmpty then + result.Root + else + failwithf "Expected single expression but got errors: %A in source: %s" result.Diagnostics line -let getKind (node: SyntaxNode) = node.Kind |> SyntaxUtils.greenToAst + let readScriptExpr line = + match (readScript line).Body with + | Some expr -> expr + | None -> failwithf "Expected single expression in %s" line -let getTokenKind (token: SyntaxToken) = token.Kind |> SyntaxUtils.greenToAst + let readProgExprs line = + let result = Parse.readProgram "repl" line -// open SyntaxUtils -// open SyntaxFactory + if result.Diagnostics |> List.isEmpty then + result.Root.Body + else + failwithf "Expected program but got errors: %A in source %s" result.Diagnostics line -// TODO: negative cases for a lot of these parsers. e.g. unterminated strings, -// invalid hex escapes, bad identifiers and so on. + let readSingle line = + let result = Parse.readRaw Parse.ReadMode.Script "repl" line + + if result.Diagnostics |> List.isEmpty then + result.Root.Children() |> Seq.exactlyOne + else + failwithf "Expected single expression but got: %A" result.Diagnostics + + let getKind (node: SyntaxNode) = node.Kind |> SyntaxUtils.greenToAst + + let getTokenKind (token: SyntaxToken) = token.Kind |> SyntaxUtils.greenToAst + +[] +let ``parse happy path`` () = + + match readProgExprs "1 23" |> List.ofSeq with + | [ Constant(Some(NumVal 1.0)); Constant(Some(NumVal 23.0)) ] -> () + | x -> failwithf "Parse test failure, got %A" x + + match readProgExprs "#t" |> List.ofSeq with + | [ Constant(Some(BoolVal true)) ] -> () + | x -> failwithf "Parse test failure, got %A" x + + Assert.Empty(readProgExprs "") -// let sanitise = -// sanitiseNodeWith (function -// | _ -> dummyLocation) - -// [] -// let ``parse seqs`` () = -// Assert.Equal( -// Seq [ Number 1.0 |> Constant |> node -// Number 23.0 |> Constant |> node ] -// |> node, -// readMany "1 23" |> sanitise -// ) - -// Assert.Equal(Seq [ Boolean true |> Constant |> node ] |> node, readMany "#t" |> sanitise) -// Assert.Equal(Seq [] |> node, readMany "" |> sanitise) - -// Assert.Equal( -// Seq [ Form [ Ident "+" |> node -// Number 12.0 |> Constant |> node -// Number 34.0 |> Constant |> node ] -// |> node -// Boolean false |> Constant |> node ] -// |> node, -// readMany "(+ 12 34) #f" |> sanitise -// ) + match readProgExprs "(+ 12 34) #f" |> List.ofSeq with + | [ Form [ Symbol "+"; Constant(Some(NumVal 12.0)); Constant(Some(NumVal 34.0)) ]; Constant(Some(BoolVal false)) ] -> + () + | x -> failwithf "Parse test failure, got %A" x [] let ``parse atoms`` () = @@ -150,7 +155,7 @@ let ``extended identifier characters`` ident = [] [] let ``identifier literals`` raw (cooked: string) = - let script = readScriptExpr raw + let script = readScript raw let tree = script.RawNode.Children() |> Seq.exactlyOne Assert.Equal(AstKind.SYMBOL, tree |> getKind) @@ -163,36 +168,43 @@ let ``identifier literals`` raw (cooked: string) = Assert.Equal(AstKind.IDENTIFIER, identTok |> getTokenKind) match script.Body with - | Some(Symbol s) -> Assert.Equal(cooked, s.CookedValue) + | Some(SymbolNode s) -> Assert.Equal(cooked, s.CookedValue) | _ -> failwithf "Expected identifier but got %A" script.Body -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// let ``parse escaped characters`` escaped char = -// Assert.Equal(Str(char |> string) |> Constant, readSingle (sprintf "\"%s\"" escaped)) - -// [] -// let ``parse datum comment`` () = -// Assert.Equal( -// Number 1.0 |> Constant, -// readSingle -// "#;(= n 1) -// 1 ;Base case: return 1" -// ) - -// Assert.Equal(Number 123.0 |> Constant, readSingle "#;(= n 1)123") -// Assert.Equal(Number 456.0 |> Constant, readSingle "#;123 456") +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +let ``parse escaped characters`` escaped char = + match readScriptExpr (sprintf "\"%s\"" escaped) with + | Constant(Some(StrVal s)) -> + Assert.Equal(1, s.Length) + Assert.Equal(char, s[0]) + | _ -> failwith "Expected string" + +[] +let ``parse datum comment`` () = + let checkFor num (parsed: Expression) = + match parsed with + | Constant(Some(NumValNode n)) -> Assert.Equal(num, n.Value) + | _ -> failwith "Expected constant value" + + "#;(= n 1) + 1 ;Base case: return 1" + |> readScriptExpr + |> checkFor 1.0 + + "#;(= n 1)123" |> readScriptExpr |> checkFor 123.0 + "#;123 456" |> readScriptExpr |> checkFor 456.0 [] let ``parse block comments`` () = @@ -201,42 +213,49 @@ let ``parse block comments`` () = Assert.Equal(AstKind.CONSTANT, readSingle "#| this #| is a |# comment |#1" |> getKind) -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// let ``parse simple character literals`` char = -// Assert.Equal(Character char |> Constant, readSingle (@"#\" + string char)) - -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// [] -// let ``parse named characters`` name char = -// Assert.Equal(Character char |> Constant, readSingle (@"#\" + name)) - -// [] -// [] -// [] -// [] -// let ``parse hex characters`` hex char = -// Assert.Equal(Character char |> Constant, readSingle hex) +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +let ``parse simple character literals`` char = + match readScriptExpr (@"#\" + string char) with + | Constant(Some(CharVal(Some c))) -> Assert.Equal(char, c) + | _ -> failwith "Expected character value" + +[] +[] +[] +[] +[] +[] +[] +[] +[] +[] +let ``parse named characters`` name char = + match readScriptExpr (@"#\" + name) with + | Constant(Some(CharVal(Some c))) -> Assert.Equal(char, c) + | _ -> failwith "Expected character value" + +[] +[] +[] +[] +[] +let ``parse hex characters`` hex char = + match readScriptExpr hex with + | Constant(Some(CharVal(Some c))) -> Assert.Equal(char, c) + | _ -> failwith "Expected character value" [] let ``multiple diagnostics on error`` () = @@ -255,8 +274,6 @@ let ``syntax shim test`` () = |> Result.map (fun x -> x.Body |> Seq.map (SyntaxShim.transformExpr doc) |> Seq.exactlyOne) |> Result.unwrap - printfn "@%A" tree.Location - Assert.Equal(1L, tree.Location.Start.Line) Assert.Equal(0L, tree.Location.Start.Col) Assert.Equal(1L, tree.Location.End.Line)