Skip to content

Commit

Permalink
Cooking of Identifier Strings
Browse files Browse the repository at this point in the history
Fixup some of the syntax tests by implemeting cooking for idnetifier
string values. This involves walking through the characters in the
identifier's token and replacing any escape sequences with the
appropriate values.
  • Loading branch information
iwillspeak committed Jun 14, 2023
1 parent 037dbb8 commit aba5169
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 4 deletions.
82 changes: 82 additions & 0 deletions src/Feersum.CompilerServices/Syntax/Tree.fs
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,88 @@ module SyntaxUtils =
let dump = Debug.debugDump (Debug.mappedFormatter greenToAst)

open SyntaxUtils
open System.Text

[<AutoOpen>]
module private Utils =

/// Predicate funtion to filter tokesn by AST Kind
let tokenOfKind (kind: AstKind) (token: SyntaxToken) = token.Kind = (kind |> astToGreen)

type CookingState =
| Plain
| InEscape
| InHex of string

/// Cook a stirng value, replacing escapes with values
let cookString (s: string) =

let cookChar ((state: CookingState), sb: StringBuilder) (ch: char) =
match state with
| Plain ->
match ch with
| '\\' -> (InEscape, sb)
| _ -> (Plain, sb.Append(ch))
| InHex buff ->
match ch with
| ';' -> (Plain, sb.Append((char) (int buff)))
| '0'
| '1'
| '2'
| '3'
| '4'
| '5'
| '6'
| '7'
| '8'
| '9'
| 'a'
| 'b'
| 'c'
| 'd'
| 'e'
| 'f'
| 'A'
| 'B'
| 'C'
| 'D'
| 'E'
| 'F' -> (InHex(buff + (string ch)), sb)
| _ -> (Plain, sb.AppendFormat("\\x{0}{1}", buff[2..], ch))
| InEscape ->
match ch with
| 'a' -> (Plain, sb.Append('\a'))
| 'b' -> (Plain, sb.Append('\b'))
| 't' -> (Plain, sb.Append('\t'))
| 'n' -> (Plain, sb.Append('\n'))
| 'v' -> (Plain, sb.Append('\v'))
| 'f' -> (Plain, sb.Append('\f'))
| 'r' -> (Plain, sb.Append('\r'))
| 'x' -> (InHex "0x", sb)
| _ -> (Plain, sb.AppendFormat("\\{0}", ch))

s
|> Seq.fold (cookChar) (CookingState.Plain, StringBuilder())
|> snd
|> (fun x -> x.ToString())


/// Cook the value of an identifier. This takes the raw identifier and
/// converts it into a 'cooked' form, expanding out escaped values and
/// replacing them with the true characters.
let cookIdentifier (token: SyntaxToken) =
let tokenText = token.Green.Text

if
tokenText.StartsWith('|')
&& tokenText.EndsWith('|')
then
tokenText[1 .. (tokenText.Length - 2)]
|> cookString
else
tokenText


/// ------------------------ Syntax Tree Types --------------------------------
///
/// The syntax tree is made up of three laysers. The red and green layers come
Expand Down Expand Up @@ -171,6 +246,13 @@ and Symbol internal (red: SyntaxNode) =

inherit Expression(red)

member public _.CookedValue =
red.ChildrenWithTokens()
|> Seq.choose (NodeOrToken.asToken)
|> Seq.tryExactlyOne
|> Option.map (cookIdentifier)
|> Option.defaultValue ""

/// This is a self-evaluating expression that contains a sngle constant datum.
and Constant internal (red: SyntaxNode) =

Expand Down
17 changes: 13 additions & 4 deletions test/Feersum.Tests/SyntaxTestsNew.fs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ open Feersum.CompilerServices.Text
open Feersum.CompilerServices.Syntax.Tree
open Feersum.CompilerServices.Syntax.Parse

let readScriptExpr line =
let result = Parse.readExpr1 "repl" line

if result.Diagnostics |> List.isEmpty then
result.Root
else
failwithf "Expected single expression but got errors: %A" result.Diagnostics

let readSingle line =
let result = Parse.readRaw Parse.ReadMode.Script "repl" line

Expand Down Expand Up @@ -142,7 +150,8 @@ let ``extended identifier characters`` ident =
[<InlineData(@"|H\x65;llo|", "Hello")>]
[<InlineData(@"|\x3BB;|", "λ")>]
let ``identifier literals`` raw (cooked: string) =
let tree = readSingle raw
let script = readScriptExpr raw
let tree = script.RawNode.Children() |> Seq.exactlyOne

Assert.Equal(AstKind.SYMBOL, tree |> getKind)

Expand All @@ -153,9 +162,9 @@ let ``identifier literals`` raw (cooked: string) =

Assert.Equal(AstKind.IDENTIFIER, identTok |> getTokenKind)

// FIXME: Assert on the value of cooked. Probably parse as typed tree
// instead here and access the value through that.
cooked |> ignore
match script.Body with
| Some (Symbol s) -> Assert.Equal(cooked, s.CookedValue)
| _ -> failwithf "Expected identifier but got %A" script.Body

// [<Theory>]
// [<InlineData("\\a", '\a')>]
Expand Down

0 comments on commit aba5169

Please sign in to comment.