From ec4d00ae69253fc75fe954d4ef71c72bfd936365 Mon Sep 17 00:00:00 2001
From: N3N <narenarya@live.com>
Date: Sat, 4 May 2024 21:42:06 -0700
Subject: [PATCH] feat: add support for metadata and variables

---
 prompt.pml                |  27 ++++++---
 src/promptml/grammar.lark |  28 ++++++----
 src/promptml/parser.py    | 112 ++++++++++++++++++--------------------
 3 files changed, 90 insertions(+), 77 deletions(-)

diff --git a/prompt.pml b/prompt.pml
index d84c416..8631961 100644
--- a/prompt.pml
+++ b/prompt.pml
@@ -1,8 +1,8 @@
-# Prompt Description for a task
+# Define prompt
 @prompt
   # Context is used to provide background information or context for the task
   @context
-    You are a highly skilled and experienced software developer with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform.
+    You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform.
   @end
 
   # Objective is used to define the main goal or objective of the task
@@ -36,7 +36,7 @@
   # Examples are used to provide sample inputs and outputs for the task
   @examples
     @example
-      @input 
+      @input
         Design the core architecture and components for a large-scale e-commerce web application.
       @end
       @output
@@ -44,10 +44,10 @@
       @end
     @end
     @example
-      @input  
+      @input
         Outline main components for a large-scale e-commerce web application.
       @end
-      @output 
+      @output
         Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application...
       @end
     @end
@@ -60,13 +60,22 @@
       max: 3000
     @end
     @tone
-        Professional and technical 
+        Professional and technical
     @end
   @end
 
-  # Metadata includes information such as domain, difficulty, skills, and tags
+  # Metadata includes information such as domain, difficulty, custom props, etc.
   @metadata
-    @domain Software Engineering, Web Development @end
-    @difficulty Advanced @end
+    domain: 'Software Engineering'
+    difficulty: 'Advanced'
+    top_p: 0.6
+    temperature: 0.5
+    n: 1
+    method: 'greedy'
   @end
 @end
+
+# Define prompt variables
+@vars
+    role = 'highly skilled and experienced software developer'
+@end
diff --git a/src/promptml/grammar.lark b/src/promptml/grammar.lark
index 6911aa6..692f518 100644
--- a/src/promptml/grammar.lark
+++ b/src/promptml/grammar.lark
@@ -1,6 +1,11 @@
-prompt: "@prompt" sections "@end"
+?start: block+
+block:  prompt | var_block
 
-sections: section+
+var_block: "@vars" assignment* "@end"
+assignment: VAR_NAME "=" (NUMBER | STRING | FLOAT)
+VAR_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/
+
+prompt: "@prompt" section* "@end"
 
 section: context
         | objective
@@ -12,27 +17,30 @@ section: context
 context: "@context" text "@end"
 objective: "@objective" text "@end"
 
-instructions: "@instructions" instruction+ "@end"
+instructions: "@instructions" instruction* "@end"
 instruction: "@step" text "@end"
 
-examples: "@examples" example+ "@end"
+examples: "@examples" example* "@end"
 example: "@example" input output "@end"
 input: "@input" text "@end"
 output: "@output" text "@end"
 
-constraints: "@constraints" constraint+ "@end"
+constraints: "@constraints" constraint* "@end"
 constraint: length | tone
 length: "@length" "min:" INT "max:" INT "@end"
 tone: "@tone" text "@end"
 
-metadata: "@metadata" meta+ "@end"
-meta: domain | difficulty
-domain: "@domain" text "@end"
-difficulty: "@difficulty" text "@end"
+metadata: "@metadata" prop* "@end"
+prop: PROP_NAME ":" (NUMBER | STRING )
+PROP_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/
 
-text: /[^@]+/ 
+STRING: /'[^']*'/ | /"[^"]*"/
+text: /[^@]+/
 
 %import common.WS
+%import common.NUMBER
+%import common.STRING
+%import common.FLOAT
 %ignore /\#.*/  // Ignore comments
 %ignore WS
 %import common.INT
diff --git a/src/promptml/parser.py b/src/promptml/parser.py
index 680e710..5b2af56 100644
--- a/src/promptml/parser.py
+++ b/src/promptml/parser.py
@@ -1,70 +1,23 @@
 """
 This module provides a PromptParser class for parsing DSL code and extracting prompt information.
 
-The PromptParser class can parse DSL code and extract sections such as context, 
+The PromptParser class can parse DSL code and extract sections such as context,
 objective, instructions, examples, constraints, and metadata from the code.
 It uses regular expressions to search for specific
 patterns in the DSL code and extract the corresponding content.
 
 Example usage:
     dsl_code = '''
-        @prompt
-        @context
-        This is the context section.
-        @end
-
-        @objective
-        This is the objective section.
-        @end
-
-        @instructions
-        These are the instructions.
-        @end
-
-        @examples
-        @example
-        @input
-        Input example 1
-        @end
-        @output
-        Output example 1
-        @end
-        @end
-        @end
-
-        @constraints
-        @length min: 1 max: 10
-        @end
-
-        @metadata
-        @domain
-        Domain example
-        @end
-        @difficulty
-        Difficulty example
-        @end
-        @end
+        code...
     '''
 
     parser = PromptParser(dsl_code)
     prompt = parser.parse()
-
-    print(prompt)
-    # Output: {
-    #     'context': 'This is the context section.',
-    #     'objective': 'This is the objective section.',
-    #     'instructions': 'These are the instructions.',
-    #     'examples': [
-    #         {'input': 'Input example 1', 'output': 'Output example 1'}
-    #     ],
-    #     'constraints': {'length': {'min': 1, 'max': 10}},
-    #     'metadata': {'domain': 'Domain example', 'difficulty': 'Difficulty example'}
-    # }
 """
 
 import json
 import os
-
+import re
 
 from lark import Lark, Transformer
 
@@ -72,14 +25,37 @@ class PromptMLTransformer(Transformer):
     """
     A class for transforming the parsed PromptML code into a structured format.
     """
+
+    def start(self, items):
+        """ Extract the start section content."""
+
+        # Variables are in child 1, replace context with variables $x to x -> value using regex
+        prompt = items[0]
+        context = prompt["context"]
+        objective = prompt["objective"]
+        vars_ = items[1]
+
+        for k,v in vars_.items():
+            context = re.sub(r'\$' + k, v, context)
+            objective = re.sub(r'\$' + k, v, objective)
+
+        prompt["context"] = context
+        prompt["objective"] = objective
+        return prompt
+
+    def block(self, items):
+        """ Extract the block content."""
+        return items[0]
+
     def prompt(self, items):
         """ Extract the prompt content."""
         sections = {}
-        tree = items[0]
-        for child in tree.children:
-            if child.data == "section":
+        for child in items:
+            if hasattr(child, "data") and child.data == "section":
                 data = child.children[0]
                 sections.update(data)
+            else:
+                sections.update(child)
 
         return sections
 
@@ -129,14 +105,34 @@ def tone(self, items):
         """ Extract the tone constraint content."""
         return {"tone": items[0].strip()}
 
+    def var_block(self, items):
+        """ Extract the variable block content."""
+        var_map = {}
+
+        for item in items:
+            var_symbol = item.children[0].strip()
+            var_value = item.children[1].strip()
+            var_map[var_symbol] = var_value
+
+        return var_map
+
     def metadata(self, items):
         """ Extract the metadata section content."""
         metadata = {}
-        for item in items:
-            child = item.children[0]
 
-            for k,v in child.items():
-                metadata[k] = v.strip()
+        for item in items:
+            key = item.children[0].strip()
+            if key:
+                prop_type = item.children[1].type
+                if prop_type == "NUMBER":
+                    try:
+                        metadata[key] = int(item.children[1].strip())
+                    except ValueError:
+                        metadata[key] = float(item.children[1].strip())
+                elif prop_type == "STRING":
+                    metadata[key] = item.children[1].strip().strip("\"").strip("\'")
+                else:
+                    metadata[key] = item.children[1].strip()
 
         return {"metadata": metadata}
 
@@ -167,7 +163,7 @@ def __init__(self, code: str):
 
         self.code = code
         self.prompt = {}
-        self.parser = Lark(promptml_grammar, start="prompt")
+        self.parser = Lark(promptml_grammar)
 
     def parse(self):
         """