Move angelic execution to HerbInterpret

* Also moves the trie datastructure, and adds the package dep
Herb-AI · Jul 8, 2024 · ac34ef2 · ac34ef2
1 parent 510f35d
commit ac34ef2
Show file tree

Hide file tree

Showing 9 changed files with 500 additions and 1 deletion.
diff --git a/Project.toml b/Project.toml
@@ -4,11 +4,13 @@ authors = ["Tilman Hinnerichs <t.r.hinnerichs@tudelft.nl>", "Jaap de Jong <jaapd
 version = "0.1.3"
 
 [deps]
+DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 HerbCore = "2b23ba43-8213-43cb-b5ea-38c12b45bd45"
 HerbGrammar = "4ef9e186-2fe5-4b24-8de7-9f7291f24af7"
 HerbSpecification = "6d54aada-062f-46d8-85cf-a1ceaf058a06"
 
 [compat]
+DataStructures = "0.17,0.18"
 HerbCore = "^0.3.0"
 HerbGrammar = "^0.3.0"
 HerbSpecification = "^0.1.0"

diff --git a/src/HerbInterpret.jl b/src/HerbInterpret.jl
@@ -1,17 +1,38 @@
 module HerbInterpret
 
+using DataStructures # Queue
+
 using HerbCore
 using HerbGrammar
 using HerbSpecification
 
 include("interpreter.jl")
 
+include("angelic_conditions/bit_trie.jl")
+include("angelic_conditions/angelic_config.jl")
+include("angelic_conditions/execute_angelic.jl")
+
+include("running_test_utils.jl")
+
 export 
     SymbolTable,
     interpret,
 
     execute_on_input,
     update_✝γ_path,
-    CodePath
+    CodePath,
+
+    create_angelic_expression,
+    ConfigAngelic,
+    execute_angelic_on_input,
+    get_code_paths!,
+
+    BitTrie,
+    BitTrieNode,
+    trie_add!,
+    trie_contains,
+
+    passes_the_same_tests_or_more,
+    update_passed_tests!
 
 end # module HerbInterpret
diff --git a/src/angelic_conditions/angelic_config.jl b/src/angelic_conditions/angelic_config.jl
@@ -0,0 +1,20 @@
+"""
+    struct ConfigAngelic
+
+A configuration struct for angelic conditions. Includes both generation- and evaluation-specific parameters.
+
+# Fields
+- `max_time::Float16`: The maximum time allowed for resolving a single angelic expression.
+- `boolean_expr_max_depth::Int`: The maximum depth of boolean expressions when resolving angelic conditions.
+- `max_execute_attempts::Int`: The maximal attempts of executing the program with angelic evaluation.
+- `max_allowed_fails::Float16`: The maximum allowed fraction of failed tests during evaluation before short-circuit failure.
+- `angelic_rulenode::Union{Nothing,RuleNode}`: The angelic rulenode. Used to replace angelic conditions/holes right before evaluation.
+
+"""
+@kwdef mutable struct ConfigAngelic
+    max_time::Float16 = 0.1
+    boolean_expr_max_depth::Int64 = 3
+    max_execute_attempts::Int = 55
+    max_allowed_fails::Float16 = 0.75
+    angelic_rulenode::Union{Nothing,RuleNode} = nothing
+end
diff --git a/src/angelic_conditions/bit_trie.jl b/src/angelic_conditions/bit_trie.jl
@@ -0,0 +1,62 @@
+"""
+An implementation of a bare-bones trie data structure
+https://en.wikipedia.org/wiki/Trie
+
+This trie does not store values associated to the nodes, and the paths are built using bitstrings instead of regular strings.
+
+The trie is used to store code paths that have been traversed, and check if a given path is a prefix of any of the stored paths.
+
+"""
+@kwdef mutable struct BitTrieNode
+    left::Union{BitTrieNode,Nothing} = nothing
+    right::Union{BitTrieNode,Nothing} = nothing
+    is_leaf::Bool = false
+end
+
+@kwdef mutable struct BitTrie
+    root::Union{BitTrieNode,Nothing} = BitTrieNode()
+    size::Int = 0
+end
+
+# Adds nodes to the trie to represent the given path
+function trie_add!(trie::BitTrie, path::BitVector)
+    curr = trie.root
+    for i in eachindex(path)
+        if path[i]
+            if curr.right === nothing
+                curr.right = BitTrieNode()
+            end
+            curr = curr.right
+        else
+            if curr.left === nothing
+                curr.left = BitTrieNode()
+            end
+            curr = curr.left
+        end
+        if curr.is_leaf
+            return
+        end
+    end
+    if !curr.is_leaf
+        trie.size += 1
+        curr.is_leaf = true
+    end
+end
+
+# Checks if the trie contains the given path - used for
+function trie_contains(trie::BitTrie, path::BitVector)
+    curr = trie.root
+    if curr.is_leaf
+        return true
+    end
+    for i in eachindex(path)
+        curr = path[i] ? curr.right : curr.left
+        if curr === nothing
+            return false
+        end
+        if curr.is_leaf
+            return true
+        end
+    end
+    false
+end
diff --git a/src/angelic_conditions/execute_angelic.jl b/src/angelic_conditions/execute_angelic.jl
@@ -0,0 +1,141 @@
+"""
+    execute_angelic_on_input(
+        symboltable::SymbolTable, program::RuleNode, grammar::AbstractGrammar, input::Dict{Symbol,Any}, 
+        expected_output::Any, angelic_rulenode::RuleNode, max_attempts::Int, angelic_conditions::Dict{UInt16,UInt8})::Bool
+
+Run test case `input` on `program` containing angelic conditions. This is done by optimistically evaluating the program, by generating different code paths
+    and checking if any of them make the program pass the test case.
+
+# Arguments
+- `symboltable`: The symbol table containing the program's symbols.
+- `program`: The program to be executed.
+- `grammar`: The grammar rules of the program.
+- `input`: A dictionary where each key is a symbol used in the expression, and the value is the corresponding value to be used in the expression's evaluation.
+- `expected_output`: The expected output of the program.
+- `angelic_rulenode`: The angelic rulenode. It is used to replace angelified conditions.
+- `max_attempts`: The maximum number of attempts before assuming the angelic program cannot be resolved.
+- `angelic_conditions`: A dictionary mapping indices of angelic condition candidates, to the child index that may be changed.
+
+# Returns
+Whether the output of running `program` on `input` matches `output` within `max_attempts` attempts.
+
+"""
+function execute_angelic_on_input(
+    symboltable::SymbolTable,
+    program::RuleNode,
+    grammar::AbstractGrammar,
+    input::Dict{Symbol,Any},
+    expected_output::Any,
+    angelic_rulenode::RuleNode,
+    max_attempts::Int,
+    angelic_conditions::Dict{UInt16,UInt8}
+)::Bool
+    num_true, attempts = 0, 0
+    # We check traversed code paths by prefix -> trie is efficient for this
+    visited = BitTrie()
+    expr = create_angelic_expression(program, grammar, angelic_rulenode, angelic_conditions)
+    while num_true < max_attempts
+        code_paths = Vector{BitVector}()
+        get_code_paths!(num_true, BitVector(), visited, code_paths, max_attempts - attempts)
+        # Terminate if we generated max_attempts, or impossible to generate further paths
+        if isempty(code_paths)
+            return false
+        end
+        for code_path in code_paths
+            # println("Attempt: ", code_path)
+            actual_code_path = BitVector()
+            try
+                output = execute_on_input(symboltable, expr, input, CodePath(code_path, 0), actual_code_path)
+                # println("Actual path: ", actual_code_path)
+                if output == expected_output
+                    return true
+                end
+            catch
+            # Mark as visited and count attempt
+            finally
+                trie_add!(visited, actual_code_path)
+                attempts += 1
+            end
+        end
+        num_true += 1
+    end
+    false
+end
+
+"""
+    get_code_paths!(num_true::Int, curr::BitVector, visited::BitTrie, code_paths::Vector{BitVector}, max_length::Int)
+
+Generate code paths to be used for angelic evaluation, and stores them in `code_paths`. The function recursively explores different sequences of `true` and `false` 
+    values, which represent whether the next control statement will be skipped or not. Makes sure that the returned paths do not share prefix with visited paths.
+
+# Arguments
+- `num_true`: The number of `true` values in the code path.
+- `curr`: The current code path being generated.
+- `visited`: The visited code paths.
+- `code_paths`: The vector to store the generated code paths.
+- `max_length`: The maximum length of a code path allowed.
+
+"""
+function get_code_paths!(
+    num_true::Int,
+    curr::BitVector,
+    visited,
+    code_paths::Vector{BitVector},
+    max_length::Int
+)
+    # If enough code paths, or visited a prefix-path, return
+    if (length(code_paths) >= max_length || trie_contains(visited, curr))
+        return
+    end
+    # Add current one if enough 'true' values
+    if num_true == 0
+        push!(code_paths, deepcopy(curr))
+        return
+    end
+    # Continue with 'true' and build all paths
+    push!(curr, true)
+    get_code_paths!(num_true - 1, curr, visited, code_paths, max_length)
+    # Continue with 'false' and build all paths
+    curr[end] = false
+    get_code_paths!(num_true, curr, visited, code_paths, max_length)
+    pop!(curr)
+end
+
+"""
+    create_angelic_expression(program::RuleNode, grammar::AbstractGrammar, angelic_rulenode::RuleNode, angelic_conditions::Dict{UInt16,UInt8})::Expr
+
+Create an angelic expression, i.e. replace all remaining holes with angelic rulenode trees so that the tree can be parsed and executed.
+
+# Arguments
+- `program`: The program to turn into an angelic expression.
+- `grammar`: The grammar rules of the program.
+- `angelic_rulenode`: The angelic rulenode. It is used to replace angelified conditions.
+- `angelic_conditions`: A dictionary mapping indices of angelic condition candidates, to the child index that may be changed.
+
+# Returns
+The generated angelic expression.
+
+"""
+function create_angelic_expression(
+    program::RuleNode,
+    grammar::AbstractGrammar,
+    angelic_rulenode::RuleNode,
+    angelic_conditions::Dict{UInt16,UInt8}
+)::Expr
+    new_program = deepcopy(program)
+    # BFS traversal
+    queue = DataStructures.Queue{AbstractRuleNode}()
+    enqueue!(queue, new_program)
+    while !isempty(queue)
+        node = dequeue!(queue)
+        angelic_idx = get(angelic_conditions, node.ind, -1)
+        for (child_index, child) in enumerate(node.children)
+            if angelic_idx == child_index && child isa AbstractHole
+                node.children[child_index] = angelic_rulenode
+            else
+                enqueue!(queue, child)
+            end
+        end
+    end
+    rulenode2expr(new_program, grammar)
+end
diff --git a/src/running_test_utils.jl b/src/running_test_utils.jl
@@ -0,0 +1,95 @@
+"""
+    passes_the_same_tests_or_more(program::RuleNode, grammar::AbstractGrammar, tests::AbstractVector{<:IOExample}, passed_tests::BitVector)::Bool
+
+Checks if the provided program passes all the tests (or more) that have been provided. The function breaks early if a test fails.
+
+# Arguments
+- `program`: The program to test.
+- `grammar`: The grammar rules of the program.
+- `tests`: A vector of `IOExample` objects representing the input-output test cases.
+- `passed_tests`: A BitVector representing the tests that the program has already passed.
+
+# Returns
+Returns true if the program passes all the tests in marked in `passed_Tests`, false otherwise.
+
+"""
+function passes_the_same_tests_or_more(program::RuleNode, grammar::AbstractGrammar, tests::AbstractVector{<:IOExample}, passed_tests::BitVector)::Bool
+    symboltable = SymbolTable(grammar)
+    expr = rulenode2expr(program, grammar)
+    for (index, test) in enumerate(tests)
+        # If original does not pass, then skip
+        if !passed_tests[index]
+            continue
+        end
+        # Else check that new program also passes the test
+        try
+            output = execute_on_input(symboltable, expr, test.in)
+            if (output != test.out)
+                return false
+            end
+        catch _
+            return false
+        end
+    end
+    true
+end
+
+"""
+    update_passed_tests!(
+        program::RuleNode, grammar::AbstractGrammar, symboltable::SymbolTable, tests::AbstractVector{<:IOExample},
+        prev_passed_tests::BitVector, angelic_conditions::Dict{UInt16, UInt8}, config::ConfigAngelic)
+
+Updates the tests that the program passes. This is done by running `program` for all `tests`, and updates the `prev_passed_tests` vector with the results.
+May run the program optimistically ("angelically") if the syntax tree contains holes.
+
+# Arguments
+- `program`: The program to be tested.
+- `grammar`: The grammar rules of the program.
+- `symboltable`: A symbol table for the grammar.
+- `tests`: A vector of `IOExample` objects representing the input-output test cases.
+- `prev_passed_tests`: A `BitVector` representing the tests that the program has previously passed.
+- `angelic_conditions`: A dictionary mapping indices of angelic condition candidates, to the child index that may be changed.
+- `config`: The configuration for angelic conditions of FrAngel.
+
+"""
+function update_passed_tests!(
+    program::RuleNode,
+    grammar::AbstractGrammar,
+    symboltable::SymbolTable,
+    tests::AbstractVector{<:IOExample},
+    prev_passed_tests::BitVector,
+    angelic_conditions::Dict{UInt16,UInt8},
+    angelic_config::ConfigAngelic
+)
+    # If angelic -> evaluate optimistically
+    if contains_hole(program)
+        @assert !isa(angelic_config.angelic_rulenode, Nothing)
+        angelic_rulenode = angelic_config.angelic_rulenode::RuleNode
+        fails = 0
+        for (index, test) in enumerate(tests)
+            # Angelically evaluate the program for this test
+            prev_passed_tests[index] = execute_angelic_on_input(symboltable, program, grammar, test.in, test.out,
+                angelic_rulenode, angelic_config.max_execute_attempts, angelic_conditions)
+            if !prev_passed_tests[index]
+                fails += 1
+                # If it fails too many tests, preemtively end evaluation
+                if angelic_config.max_allowed_fails < fails / length(tests)
+                    return nothing
+                end
+            end
+        end
+        nothing
+        # Otherwise, evaluate regularly
+    else
+        expr = rulenode2expr(program, grammar)
+        for (index, test) in enumerate(tests)
+            try
+                output = execute_on_input(symboltable, expr, test.in)
+                prev_passed_tests[index] = output == test.out
+            catch _
+                prev_passed_tests[index] = false
+            end
+        end
+        expr
+    end
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -5,4 +5,7 @@ using Test
 
 @testset verbose=true "HerbInterpret.jl" begin
     include("test_execute_on_input.jl")
+
+    include("test_angelic_conditions/test_bit_trie.jl")
+    include("test_angelic_conditions/test_execute_angelic.jl")
 end