From 3a11107b59b0f1d51250a3693fffe4d68ad3f505 Mon Sep 17 00:00:00 2001
From: Matthew Pope <81593196+popematt@users.noreply.github.com>
Date: Tue, 19 Nov 2024 12:12:20 -0800
Subject: [PATCH] Adds conformance tests for floats (#134)

---
 conformance/README.md            |   8 +-
 conformance/data_model/float.ion | 340 +++++++++++++++++++++++++++++++
 conformance/grammar.isl          |  10 +-
 3 files changed, 355 insertions(+), 3 deletions(-)
 create mode 100644 conformance/data_model/float.ion

diff --git a/conformance/README.md b/conformance/README.md
index c0da477..1751023 100644
--- a/conformance/README.md
+++ b/conformance/README.md
@@ -753,8 +753,9 @@ model-symtok    ::=  string
 
 model-field     ::=  "("  model-symtok  model-value  ")"
 
-// TODO: Determine whether we can come up with anything better for model-float
-model-float     ::= string                                 // See https://amazon-ion.github.io/ion-docs/docs/float.html
+// All "denotes" values must be given with 64-bit precision, regardless of the value being tested because the Ion data
+// model uses 64 bit floats. See https://amazon-ion.github.io/ion-docs/docs/float.html
+model-float     ::= string
 
 model-decimal   ::= int int                                // coefficient + exponent
                   | "negative_0" int                       // negative zero coefficient + exponent
@@ -783,6 +784,9 @@ The `model-content` forms `(string ...)` and `(symbol (text ...))` express text
 in terms of Unicode code points, which is needed to test parsing of escape
 sequences.
 
+Floating point number libraries can be inconsistent regarding the serialization of non-numeric float values.
+For the purpose of this conformance DSL, use `nan`, `+inf`, and `-inf` for the non-numeric values.
+
 
 # WIP TODOs
 
diff --git a/conformance/data_model/float.ion b/conformance/data_model/float.ion
new file mode 100644
index 0000000..5e0023f
--- /dev/null
+++ b/conformance/data_model/float.ion
@@ -0,0 +1,340 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+/*
+TESTING NOTES:
+
+It is assumed that floats are largely handled by a standard library with reliable test covereage.
+Do not use this test suite as the only standard for any home-spun floating point number implementation.
+
+In IEEE 754 interchange formats, binary format NaNs are represented with the exponential field filled with ones (like
+infinity values), and ANY non-zero number in the significand field. These test cases use the quiet and signalling NaN
+encodings as specified in IEEE-754-2019, although there are other legal representations of NaN.
+ */
+
+(ion_1_0 "Ion 1.0 binary"
+         (then "zero encoded as"
+               (each "f0"  (binary "40")
+                     "f32" (binary "44 00 00 00 00")
+                     "f64" (binary "48 00 00 00 00 00 00 00 00")
+                     (denotes (Float "0e0"))))
+         (then "negative zero encoded as"
+               (each "f32" (binary "44 80 00 00 00")
+                     "f64" (binary "48 80 00 00 00 00 00 00 00")
+                     (denotes (Float "-0e0"))))
+         (then "1.0 encoded as"
+               (each "f32" (binary "44 3F 80 00 00")
+                     "f64" (binary "48 3F F0 00 00 00 00 00 00")
+                     (denotes (Float "1e0"))))
+         (then "-1.0 encoded as"
+               (each "f32" (binary "44 BF 80 00 00")
+                     "f64" (binary "48 BF F0 00 00 00 00 00 00")
+                     (denotes (Float "-1e0"))))
+         (then "an ordinary non-integral number encoded as"
+               (each "f32" (binary "44 40 C4 00 00")
+                     "f64" (binary "48 40 18 80 00 00 00 00 00")
+                     (denotes (Float "6.125e0"))))
+         (then "a subnormal"
+               (then "f32 value"
+                     (binary "44 00 00 00 01")
+                     (denotes (Float "1.401298464324817e-45")))
+               (then "f64 value"
+                     (binary "48 00 00 00 00 00 00 00 01")
+                     (denotes (Float "5e-324"))))
+         (then "a negative subnormal"
+               (then "f32 value"
+                     (binary "44 80 00 00 01")
+                     (denotes (Float "-1.401298464324817e-45")))
+               (then "f64 value"
+                     (binary "48 80 00 00 00 00 00 00 01")
+                     (denotes (Float "-5e-324"))))
+         (then "NaN encoded as"
+               (each "f32 qNaN" (binary "44 FF C0 00 01")
+                     "f32 sNaN" (binary "44 FF 80 00 01")
+                     "f64 qNaN" (binary "48 7F FC 00 00 00 00 00 00")
+                     "f64 sNaN" (binary "48 7F F8 00 00 00 00 00 00")
+                     (denotes (Float "nan"))))
+         (then "infinity encoded as"
+               (each "f32" (binary "44 7F 80 00 00")
+                     "f64" (binary "48 7F F0 00 00 00 00 00 00")
+                     (denotes (Float "+inf"))))
+         (then "negative infinity encoded as"
+               (each "f32" (binary "44 FF 80 00 00")
+                     "f64" (binary "48 FF F0 00 00 00 00 00 00")
+                     (denotes (Float "-inf")))))
+
+(ion_1_0 "Illegal Ion 1.0 type ids:"
+         (each "0x41" (binary "41 00")
+               "0x42" (binary "42 00 00")
+               "0x43" (binary "42 00 00 00")
+               "0x45" (binary "42 00 00 00 00 00")
+               "0x46" (binary "42 00 00 00 00 00 00")
+               "0x47" (binary "42 00 00 00 00 00 00 00")
+               "0x49" (binary "42 00 00 00 00 00 00 00 00 00")
+               "0x4A" (binary "42 00 00 00 00 00 00 00 00 00 00")
+               "0x4B" (binary "42 00 00 00 00 00 00 00 00 00 00 00")
+               "0x4C" (binary "42 00 00 00 00 00 00 00 00 00 00 00 00")
+               "0x4D" (binary "42 00 00 00 00 00 00 00 00 00 00 00 00 00")
+               "0x4E" (binary "42 00 00 00 00 00 00 00 00 00 00 00 00 00 00")
+               (signals "invalid typeid")))
+
+(ion_1_0 "Incomplete floats signal an error for unexpected EOF"
+         (each "type id 0x44"
+               (binary "44")
+               (binary "44 00")
+               (binary "44 00 00")
+               (binary "44 00 00 00")
+               "type id 0x48"
+               (binary "48")
+               (binary "48 00")
+               (binary "48 00 00")
+               (binary "48 00 00 00")
+               (binary "48 00 00 00 00")
+               (binary "48 00 00 00 00 00")
+               (binary "48 00 00 00 00 00 00")
+               (binary "48 00 00 00 00 00 00 00")
+               (signals "unexpected EOF")))
+
+(ion_1_1 "Ion 1.1 binary"
+         (then "zero encoded as"
+               (each "f0"  (binary "6A")
+                     "f16" (binary "6B 00 00")
+                     "f32" (binary "6C 00 00 00 00")
+                     "f64" (binary "6D 00 00 00 00 00 00 00 00")
+                     (denotes (Float "0e0"))))
+         (then "negative zero encoded as"
+               (each "f16" (binary "6B 00 80")
+                     "f32" (binary "6C 00 00 00 80")
+                     "f64" (binary "6D 00 00 00 00 00 00 00 80")
+                     (denotes (Float "-0e0"))))
+         (then "1.0 encoded as"
+               (each "f16" (binary "6B 00 3C")
+                     "f32" (binary "6C 00 00 80 3F")
+                     "f64" (binary "6D 00 00 00 00 00 00 F0 3F")
+                     (denotes (Float "1e0"))))
+         (then "-1.0 encoded as"
+               (each "f16" (binary "6B 00 BC")
+                     "f32" (binary "6C 00 00 80 BF")
+                     "f64" (binary "6D 00 00 00 00 00 00 F0 BF")
+                     (denotes (Float "-1e0"))))
+         (then "an ordinary non-integral number encoded as"
+               (each "f16" (binary "6B 20 46")
+                     "f32" (binary "6C 00 00 C4 40")
+                     "f64" (binary "6D 00 00 00 00 00 80 18 40")
+                     (denotes (Float "6.125e0"))))
+         (then "a subnormal"
+               (then "f16 value"
+                     (binary "6B 01 00")
+                     (denotes (Float "5.9604645e-8")))
+               (then "f32 value"
+                     (binary "6C 01 00 00 00")
+                     (denotes (Float "1.401298464324817e-45")))
+               (then "f64 value"
+                     (binary "6D 01 00 00 00 00 00 00 00")
+                     (denotes (Float "5e-324"))))
+         (then "a negative subnormal"
+               (then "f16 value"
+                     (binary "6B 01 80")
+                     (denotes (Float "-5.9604645e-8")))
+               (then "f32 value"
+                     (binary "6C 01 00 00 80")
+                     (denotes (Float "-1.401298464324817e-45")))
+               (then "f64 value"
+                     (binary "6D 01 00 00 00 00 00 00 80")
+                     (denotes (Float "-5e-324"))))
+         (then "NaN encoded as"
+               (each "f16 qNaN" (binary "6B 01 7E")
+                     "f16 sNaN" (binary "6B 01 7C")
+                     "f32 qNaN" (binary "6C 01 00 C0 FF")
+                     "f32 sNaN" (binary "6C 01 00 80 FF")
+                     "f64 qNaN" (binary "6D 00 00 00 00 00 00 FC 7F")
+                     "f64 sNaN" (binary "6D 00 00 00 00 00 00 F8 7F")
+                     (denotes (Float "nan"))))
+         (then "infinity encoded as"
+               (each "f16" (binary "6B 00 7C")
+                     "f32" (binary "6C 00 00 80 7F")
+                     "f64" (binary "6D 00 00 00 00 00 00 F0 7F")
+                     (denotes (Float "+inf"))))
+         (then "negative infinity encoded as"
+               (each "f16" (binary "6B 00 FC")
+                     "f32" (binary "6C 00 00 80 FF")
+                     "f64" (binary "6D 00 00 00 00 00 00 F0 FF")
+                     (denotes (Float "-inf")))))
+
+(ion_1_1 "Incomplete floats signal an error for unexpected EOF"
+         (each "opcode 6B"
+               (binary "6B 00")
+               "opcode 6C"
+               (binary "6C 00")
+               (binary "6C 00 00")
+               (binary "6C 00 00 00")
+               "opcode 6D"
+               (binary "6D")
+               (binary "6D 00")
+               (binary "6D 00 00")
+               (binary "6D 00 00 00")
+               (binary "6D 00 00 00 00")
+               (binary "6D 00 00 00 00 00")
+               (binary "6D 00 00 00 00 00 00")
+               (binary "6D 00 00 00 00 00 00 00")
+               (signals "unexpected EOF")))
+
+(ion_1_x "in text, floats"
+         (each "must have an e"
+               (text "0e0")
+               "the e can be upper case"
+               (text "0E0")
+               "can have a negative exponent"
+               (text "0e-0")
+               "can have a . (dot) before the e"
+               (text "0.e0")
+               "can have a fractional number before the e"
+               (text "0.0e0")
+               "can have any number of trailing zeros before the e"
+               (text "0.00000e0")
+               "can have leading zeros in the exponent"
+               (text "0e00001")
+               "can have underscores to group digits"
+               (text "0.000_000_000_000_000_000_000_000e000_000_000_000_000_000_000_000_999_999_999_999_999_999_999_999")
+               (denotes (Float "0e0")))
+         (then "can start with a - (minus sign)"
+               (text "-1.0e0")
+               (denotes (Float "-1e0"))))
+
+(ion_1_x "in text, zero can have any exponent"
+         (each (text "0e1")
+               (text "0e2")
+               (text "0e3")
+               (text "0e4")
+               (text "0e-1")
+               (text "0e-2")
+               (text "0e-3")
+               (text "0e-4")
+               (text "0e9999999999999999999999999")
+               (text "0e-9999999999999999999999999")
+               (denotes (Float "0e0")))
+         (each (text "-0e1")
+               (text "-0e2")
+               (text "-0e3")
+               (text "-0e4")
+               (text "-0e-1")
+               (text "-0e-2")
+               (text "-0e-3")
+               (text "-0e-4")
+               (text "-0e9999999999999999999999999")
+               (text "-0e-9999999999999999999999999")
+               (denotes (Float "-0e0"))))
+
+(ion_1_x "in text, the float '1.0'"
+         (each (text "1e0")
+               (text "1e-0")
+               (text "1.e0")
+               (text "1.0e0")
+               (text "1.00000e0")
+               (text "1e00")
+               (text "1e0000000")
+               (text "0.1e1")
+               (text "0.01e2")
+               (text "0.001e3")
+               (text "0.0001e4")
+               (text "10e-1")
+               (text "100e-2")
+               (text "1000e-3")
+               (text "10000e-4")
+               (denotes (Float "1e0"))))
+
+(ion_1_x "in text, float parsing has precedence over sexp operator rules"
+         (then "+inf"                              (text  "(+inf)") (denotes (Sexp (Float "+inf"))))
+         (then "-inf"                              (text  "(-inf)") (denotes (Sexp (Float "-inf"))))
+         (then "a negative number"                 (text  "(-1e0)") (denotes (Sexp (Float "-1e0"))))
+         (then "a fractional coefficient"          (text "(1.0e0)") (denotes (Sexp (Float  "1e0"))))
+         (then "a number with . (dot) preceding e" (text  "(1.e0)") (denotes (Sexp (Float  "1e0")))))
+
+
+// the exact binary64 value is determined using IEEE-754 round-to-nearest mode with round-half-to-even as the tie-break
+(ion_1_x "in text, reading"
+         (then "a positive number"
+               (each "that should round to nearest (down)"
+                     (text "100000000000000016e0")
+                     (text "100000000000000023e0")
+                     (denotes (Float "100000000000000016e0")))
+               (each "that rounds up because of the half-even tie-breaker"
+                     (text "100000000000000024e0")
+                     "that rounds to nearest (up)"
+                     (text "100000000000000025e0")
+                     "that rounds to nearest (down)"
+                     (text "100000000000000039e0")
+                     "that rounds down because of the half-even tie-breaker"
+                     (text "100000000000000040e0")
+                     (denotes (Float "100000000000000032e0")))
+               (each "that rounds to nearest (up)"
+                     (text "100000000000000041e0")
+                     (text "100000000000000047e0")
+                     (denotes (Float "100000000000000048e0")))
+               (each "that rounds to infinity"
+                     // f64 max value is 1.7976931348623158E+308
+                     (text "1.7976931348623159e+308")
+                     (text "999999999999999999e9999999")
+                     (denotes (Float "+inf"))))
+         (then "a negative number"
+               (each "that should round to nearest (up)"
+                     (text "-100000000000000016e0")
+                     (text "-100000000000000023e0")
+                     (denotes (Float "-100000000000000016e0")))
+               (each "that rounds down because of the half-even tie-breaker"
+                     (text "-100000000000000024e0")
+                     "that rounds to nearest (down)"
+                     (text "-100000000000000025e0")
+                     "that rounds to nearest (up)"
+                     (text "-100000000000000039e0")
+                     "that rounds up because of the half-even tie-breaker"
+                     (text "-100000000000000040e0")
+                     (denotes (Float "-100000000000000032e0")))
+               (each "that rounds to nearest (down)"
+                     (text "-100000000000000041e0")
+                     (text "-100000000000000047e0")
+                     (denotes (Float "-100000000000000048e0")))
+               (each "that rounds to negative infinity"
+                     // f64 max negative value is -1.7976931348623158E+308
+                     (text "-1.7976931348623159e+308")
+                     (text "-999999999999999999e9999999")
+                     (denotes (Float "-inf")))))
+
+(ion_1_x "in text"
+         (then "the coefficient must not have leading zeros"
+               (each (text "00.0e0")
+                     (text "01.0e0")
+                     (text "00e0")
+                     (signals "invalid leading zero")))
+
+         (then "the exponent must be an integer"
+               (each (text "1.2e3.4")
+                     (text "1.2e3e4")
+                     (text "1.2e+inf")
+                     (text "1.2e-inf")
+                     (text "1.2e0x1")
+                     (signals "syntax error")))
+
+         (then "an unexpected non-numeric character should raise an error"
+               (each "unexpected non-numeric character"
+                     (text "1.1e1a")
+                     "spacing underscores in the wrong location"
+                     (text "1_.2e3")
+                     (text "1._2e3")
+                     (text "1.2_e3")
+                     (text "1.2e_3")
+                     (text "1.2e3_")
+                     "consecutive underscores"
+                     (text "-6.0__1e1")
+                     "illegal leading plus"
+                     (text "+1.2e3")
+                     "extra . (dot) character"
+                     (text "1.2.3e4")
+                     (text "1..3e4")
+                     "there must be a number following the e"
+                     (text "1.0e")
+                     (text "1.e")
+                     (text "1e")
+                     "cannot start with a . (dot)"
+                     (text ".1e2")
+                     (signals "numeric value followed by invalid character"))))
diff --git a/conformance/grammar.isl b/conformance/grammar.isl
index 4843f49..2c0faaa 100644
--- a/conformance/grammar.isl
+++ b/conformance/grammar.isl
@@ -246,7 +246,7 @@ type::{
     {ordered_elements:[{valid_values:[Null,"Null"]},     { type: model_type, occurs: optional }         ]},
     {ordered_elements:[{valid_values:[Bool,"Bool"]},     bool                                           ]},
     {ordered_elements:[{valid_values:[Int,"Int"]},       int                                            ]},
-    {ordered_elements:[{valid_values:[Float,"Float"]},   string                                         ]},
+    {ordered_elements:[{valid_values:[Float,"Float"]},   model_float                                    ]},
     model_decimal,
     model_timestamp,
     {ordered_elements:[{valid_values:[Symbol,"Symbol"]}, model_symtok                                   ]},
@@ -259,6 +259,14 @@ type::{
   ]
 }
 
+type::{
+  name: model_float,
+  type: string,
+  // optional '-', one or more digits, optional '.', zero or more digits, 'e', optional '-', one or more digits
+  //     OR one of the non numeric float values
+  regex: "^(-?\\d+[.]?\\d*[eE]-?\\d+|[+-]inf|nan)$"
+}
+
 type::{
   name: model_symtok,
   one_of:[