feat: first (incomplete) draft of PromptTemplateDictionary for agents

MoritzLaurer · Jan 23, 2025 · a75e64d · a75e64d
1 parent 8ce0f14
commit a75e64d
Show file tree

Hide file tree

Showing 7 changed files with 566 additions and 1 deletion.
diff --git a/docs/template_dictionaries.md b/docs/template_dictionaries.md
@@ -0,0 +1,129 @@
+# PromptTemplateDictionaries
+
+!!! note
+    This feature is highly experimental and will change in the coming days.
+
+Complex LLM systems often depend on multiple interdependent prompt templates instead of a single template. A good example for this are agents, where the general system logic, planning steps and different tasks are defined in separate templates. It can be easier to define, read and change the interdependent templates in a single file, as opposed to separate files.
+
+The `PromptTemplateDictionary` is designed for these use-cases. A `PromptTemplateDictionary` is simply a dictionary of `ChatPromptTemplate`s or `TextPromptTemplate`s which are loaded as a single Python object and stored in a single YAML file. You can load and use them like this:
+
+```py
+>>> from prompt_templates import PromptTemplateDictionary
+
+>>> template_dictionary = PromptTemplateDictionary.load_from_local(
+...     file_path="./tests/test_data/example_prompts/agent_example_1.yaml"
+... )
+
+>>> print(template_dictionary.template_dictionary)  # TODO: rename attribute
+# {'agent_system_prompt': ChatPromptTemplate(template=[{'role': 'system', 'content': 'You are a code age...', template_variables=['tool_descriptions', 'task'], metadata={}, client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard'),
+#  'agent_planning_prompt': TextPromptTemplate(template='Here is your task:\n\nTask:\n```\n{{task}}\n```\n...', template_variables=['task', 'tool_descriptions', 'managed_agents_desc...', metadata={}, client_parameters={}, custom_data={}, populator='jinja2', jinja2_security_level='standard')}
+```
+
+When integrating the `PromptTemplateDictionary` into your agent code, you can access and populate the respective template as follows. Once populated, the template becomes a list of message dicts (for `ChatPromptTemplate`) or a single string (for `TextPromptTemplate`) which can be directly passed to an LLM client.
+
+```py
+agent_system_prompt = template_dictionary["agent_system_prompt"].populate(
+    tool_descriptions="... some tool descriptions ...",
+    task="... some task ...",
+)
+print(agent_system_prompt)
+# [{'role': 'system',
+#   'content': 'You are a code agent and you have the following tools at your disposal:\n<tools>\n... some tool descriptions ...\n</tools>'},
+#  {'role': 'user',
+#   'content': 'Here is the task:\n<task>\n... some task ...\n</task>\nNow begin!'}]
+```
+
+
+A `PromptTemplateDictionary` is defined like this in a yaml file:
+
+```yaml
+prompt:
+  template_dictionary:
+    agent_system_prompt:
+      template:
+        - role: "system"
+          content: |-
+            You are a code agent and you have the following tools at your disposal:
+            <tools>
+            {{tool_descriptions}}
+            </tools>
+        - role: "user"
+          content: |-
+            Here is the task:
+            <task>
+            {{task}}
+            </task>
+            Now begin!
+      template_variables:
+        - tool_descriptions
+        - task
+    agent_planning_prompt:
+      template: |-
+        Here is your task:
+
+        Task:
+        </task>
+        {{task}}
+        <task>
+
+        Your plan can leverage any of these tools:
+        {{tool_descriptions}}
+
+        {{managed_agents_descriptions}}
+
+        List of facts that you know:
+        <facts>
+        {{answer_facts}}
+        </facts>
+
+        Now begin! Write your plan below.
+      template_variables:
+        - task
+        - tool_descriptions
+        - managed_agents_descriptions
+        - answer_facts
+  metadata:
+    name: "Example Code Agent"
+    description: "A simple code agent example"
+    tags:
+      - agent
+    version: "0.0.1"
+    author: "Guido van Bossum"
+  client_parameters: {}
+  custom_data: {}
+```
+
+
+You can either create and edit these templates directly in YAML.
+
+Alternatively, you can create a `PromptTemplateDictionary` programmatically like this: 
+
+```py
+from prompt_templates import PromptTemplateDictionary, ChatPromptTemplate, TextPromptTemplate
+
+agent_system_prompt_template = ChatPromptTemplate(
+    template=[
+        {'role': 'system', 'content': 'You are a code agent and you have the following tools at your disposal:\n<tools>\n{{tool_descriptions}}\n</tools>'},
+        {'role': 'user', 'content': 'Here is the task:\n<task>\n{{task}}\n</task>\nNow begin!'},
+    ],
+    template_variables=['tool_descriptions', 'task'],
+)
+
+agent_planning_prompt_template = TextPromptTemplate(
+    template='Here is your task:\n\nTask:\n```\n{{task}}\n```\n\nYour plan can leverage any of these tools:\n{{tool_descriptions}}\n\n{{managed_agents_descriptions}}\n\nList of facts that you know:\n```\n{{answer_facts}}\n```\n\nNow begin! Write your plan below.',
+    template_variables=['task', 'tool_descriptions', 'managed_agents_descriptions', 'answer_facts'],
+)
+
+template_dictionary = PromptTemplateDictionary(
+    template_dictionary={
+        "agent_system_prompt": agent_system_prompt_template,
+        "agent_planning_prompt": agent_planning_prompt_template,
+    }
+)
+
+# not implemented yet
+template_dictionary.save_to_local(file_path="./tests/test_data/example_prompts/agent_example_test.yaml")
+template_dictionary.save_to_hub(repo_id="moritzlaurer/agent_example_test", filename="agent_example_test.yaml", create_repo=True)
+```
+
+
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -10,12 +10,14 @@ nav:
     - Create a Prompt Template: create_template.md
     - Standard Prompt Format: standard_prompt_format.md
     - HF Hub Repo Types: repo_types_examples.md
+    - PromptTemplateDictionaries: template_dictionaries.md
     #- Tools: standard_tool_format.md
     #- Agents: agents.md
   - API Reference:
     - Prompt templates: reference/prompt_templates.md
     - Utility functions: reference/utils.md
     - Populating templates: reference/populators.md
+    - PromptTemplateDictionaries: reference/template_dictionaries.md
     #- Tools: reference/tools.md
 
 theme:

diff --git a/prompt_templates/__init__.py b/prompt_templates/__init__.py
@@ -1,5 +1,5 @@
 from .constants import Jinja2SecurityLevel, PopulatorType
-from .prompt_templates import BasePromptTemplate, ChatPromptTemplate, TextPromptTemplate
+from .prompt_templates import BasePromptTemplate, ChatPromptTemplate, PromptTemplateDictionary, TextPromptTemplate
 from .utils import format_for_client, list_prompt_templates
 
 
@@ -8,6 +8,7 @@
     "BasePromptTemplate",
     "TextPromptTemplate",
     "ChatPromptTemplate",
+    "PromptTemplateDictionary",
     "PopulatorType",
     "Jinja2SecurityLevel",
     "format_for_client",

diff --git a/prompt_templates/prompt_templates.py b/prompt_templates/prompt_templates.py
@@ -1262,3 +1262,195 @@ def to_langchain_template(self) -> "LC_ChatPromptTemplate":
             input_variables=self.template_variables,
             metadata=self.metadata,
         )
+
+
+class PromptTemplateDictionary:
+    """
+    A container class that holds multiple prompt templates (TextPromptTemplate or ChatPromptTemplate),
+    as defined under the "template_dictionary" key in a YAML file. This allows users to store and manage
+    multiple interdependent templates in one place (e.g., for an agent that needs a system prompt,
+    a planning prompt, etc.).
+
+    Attributes:
+        template_dictionary (Dict[str, BasePromptTemplate]):
+            A dictionary of sub-prompt name -> BasePromptTemplate objects.
+        metadata (Dict[str, Any]):
+            Optional top-level metadata about this multi-prompt configuration.
+        client_parameters (Dict[str, Any]):
+            Optional top-level inference parameters (e.g., temperature).
+        custom_data (Dict[str, Any]):
+            Arbitrary additional data relevant to the multi-template.
+    """
+
+    def __init__(
+        self,
+        template_dictionary: Dict[str, "BasePromptTemplate"],
+        metadata: Optional[Dict[str, Any]] = None,
+        client_parameters: Optional[Dict[str, Any]] = None,
+        custom_data: Optional[Dict[str, Any]] = None,
+    ):
+        self.template_dictionary = template_dictionary
+        self.metadata = metadata or {}
+        self.client_parameters = client_parameters or {}
+        self.custom_data = custom_data or {}
+
+    @classmethod
+    def from_dict(
+        cls,
+        prompt_file_dic: Dict[str, Any],
+        populator: PopulatorType = "jinja2",
+        jinja2_security_level: Jinja2SecurityLevel = "standard",
+    ) -> "PromptTemplateDictionary":
+        """
+        Parse the multi-template structure from a Python dict (typically loaded from a YAML file).
+
+        Each key under "template_dictionary" is treated as a separate prompt definition.
+        We detect whether to instantiate a ChatPromptTemplate or TextPromptTemplate based
+        on the "template" field (list vs. string).
+
+        Args:
+            prompt_file_dic: The parsed YAML as a Python dictionary.
+            populator: Which templating approach to use (e.g., jinja2).
+            jinja2_security_level: Jinja2 sandbox security level.
+
+        Returns:
+            PromptTemplateDictionary: An instance containing all sub-prompts.
+        """
+        # TODO: double-check alignment with_load_template_from_dict (in terms of validation and naming)
+
+        # Validate YAML structure
+        if "prompt" not in prompt_file_dic:
+            raise ValueError(
+                f"Invalid YAML structure: The top-level keys are {list(prompt_file_dic.keys())}. "
+                "The YAML file must contain the key 'prompt' as the top-level key."
+            )
+
+        prompt_data = prompt_file_dic["prompt"]
+
+        # Extract fields
+        metadata = prompt_data.get("metadata")
+        client_parameters = prompt_data.get("client_parameters")
+        custom_data = {
+            k: v
+            for k, v in prompt_data.items()
+            if k not in ["template_dictionary", "metadata", "client_parameters", "custom_data"]
+        }
+        custom_data = {**prompt_data.get("custom_data", {}), **custom_data}
+
+        template_dictionary_raw = prompt_data.get("template_dictionary")
+        if template_dictionary_raw is None:
+            raise ValueError("The 'template_dictionary' key is missing from the input data.")
+        if not isinstance(template_dictionary_raw, dict):
+            raise ValueError("The 'template_dictionary' must be a dictionary.")
+
+        template_dictionary: Dict[str, BasePromptTemplate] = {}
+        for sub_template_name, sub_template in template_dictionary_raw.items():
+            # Each sub_template is itself a dict that must have "template" and optionally "template_variables", etc.
+            if "template" not in sub_template:
+                raise ValueError(
+                    f"Entry '{sub_template_name}' must contain a 'template' key. "
+                    f"Found keys: {list(sub_template.keys())}"
+                )
+
+            template_field = sub_template["template"]
+            template_variables = sub_template.get("template_variables")
+            sub_metadata = sub_template.get("metadata")
+            sub_client_parameters = sub_template.get("client_parameters")
+            sub_custom_data = sub_template.get("custom_data")
+
+            # Decide whether it's a ChatPromptTemplate or TextPromptTemplate
+            if isinstance(template_field, list) and any(isinstance(item, dict) for item in template_field):
+                # Likely ChatPromptTemplate
+                template_dictionary[sub_template_name] = ChatPromptTemplate(
+                    template=template_field,
+                    template_variables=template_variables,
+                    metadata=sub_metadata,
+                    client_parameters=sub_client_parameters,
+                    custom_data=sub_custom_data,
+                    populator=populator,
+                    jinja2_security_level=jinja2_security_level,
+                )
+            elif isinstance(template_field, str):
+                # TextPromptTemplate
+                template_dictionary[sub_template_name] = TextPromptTemplate(
+                    template=template_field,
+                    template_variables=template_variables,
+                    metadata=sub_metadata,
+                    client_parameters=sub_client_parameters,
+                    custom_data=sub_custom_data,
+                    populator=populator,
+                    jinja2_security_level=jinja2_security_level,
+                )
+            else:
+                raise ValueError(
+                    f"Invalid template type under '{sub_template_name}'. "
+                    "Template must be either a string for text prompts "
+                    "or a list of dicts for chat prompts."
+                )
+
+        return cls(
+            template_dictionary=template_dictionary,
+            metadata=metadata,
+            client_parameters=client_parameters,
+            custom_data=custom_data,
+        )
+
+    @classmethod
+    def load_from_local(
+        cls,
+        file_path: Union[str, Path],
+        populator: PopulatorType = "jinja2",
+        jinja2_security_level: Jinja2SecurityLevel = "standard",
+    ) -> "PromptTemplateDictionary":
+        """
+        Load a multi-prompt YAML file from the local filesystem, parse it,
+        and create a PromptTemplateDictionary.
+
+        Args:
+            file_path: Path to the YAML file.
+            populator: Templating approach (jinja2, double brace, etc.).
+            jinja2_security_level: Security level for Jinja2 sandbox.
+
+        Returns:
+            PromptTemplateDictionary with all sub-prompts.
+        """
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+
+        yaml_handler = create_yaml_handler("ruamel")
+        with file_path.open("r", encoding="utf-8") as f:
+            data = yaml_handler.load(f)
+
+        return cls.from_dict(data, populator, jinja2_security_level)
+
+    def __getitem__(self, sub_template_name: str) -> "BasePromptTemplate":
+        """
+        Retrieve a sub-prompt by name.
+
+        Example:
+            >>> multi_template = PromptTemplateDictionary.load_from_local("agent_example_1.yaml")
+            >>> system_prompt = multi_template["agent_system_prompt"]
+            >>> populated = system_prompt.populate(tool_descriptions="...", task="...")
+        """
+        return self.template_dictionary[sub_template_name]
+
+    def populate(
+        self,
+        sub_template_name: str,
+        **user_provided_variables: Any,
+    ) -> Union[str, List[Dict[str, Any]]]:
+        """
+        Shortcut method to populate a single sub-prompt from this dictionary.
+
+        Args:
+            sub_template_name (str): The name of the sub-prompt to populate.
+            **user_provided_variables: Values for placeholders in the template.
+
+        Returns:
+            The populated prompt, either a list of message dicts (for chat)
+            or a single string (for text).
+        """
+        if sub_template_name not in self.template_dictionary:
+            raise KeyError(f"No sub-prompt named '{sub_template_name}' found.")
+        return self.template_dictionary[sub_template_name].populate(**user_provided_variables)