From ec4d00ae69253fc75fe954d4ef71c72bfd936365 Mon Sep 17 00:00:00 2001 From: N3N Date: Sat, 4 May 2024 21:42:06 -0700 Subject: [PATCH 1/4] feat: add support for metadata and variables --- prompt.pml | 27 ++++++--- src/promptml/grammar.lark | 28 ++++++---- src/promptml/parser.py | 112 ++++++++++++++++++-------------------- 3 files changed, 90 insertions(+), 77 deletions(-) diff --git a/prompt.pml b/prompt.pml index d84c416..8631961 100644 --- a/prompt.pml +++ b/prompt.pml @@ -1,8 +1,8 @@ -# Prompt Description for a task +# Define prompt @prompt # Context is used to provide background information or context for the task @context - You are a highly skilled and experienced software developer with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. + You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. @end # Objective is used to define the main goal or objective of the task @@ -36,7 +36,7 @@ # Examples are used to provide sample inputs and outputs for the task @examples @example - @input + @input Design the core architecture and components for a large-scale e-commerce web application. @end @output @@ -44,10 +44,10 @@ @end @end @example - @input + @input Outline main components for a large-scale e-commerce web application. @end - @output + @output Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... @end @end @@ -60,13 +60,22 @@ max: 3000 @end @tone - Professional and technical + Professional and technical @end @end - # Metadata includes information such as domain, difficulty, skills, and tags + # Metadata includes information such as domain, difficulty, custom props, etc. @metadata - @domain Software Engineering, Web Development @end - @difficulty Advanced @end + domain: 'Software Engineering' + difficulty: 'Advanced' + top_p: 0.6 + temperature: 0.5 + n: 1 + method: 'greedy' @end @end + +# Define prompt variables +@vars + role = 'highly skilled and experienced software developer' +@end diff --git a/src/promptml/grammar.lark b/src/promptml/grammar.lark index 6911aa6..692f518 100644 --- a/src/promptml/grammar.lark +++ b/src/promptml/grammar.lark @@ -1,6 +1,11 @@ -prompt: "@prompt" sections "@end" +?start: block+ +block: prompt | var_block -sections: section+ +var_block: "@vars" assignment* "@end" +assignment: VAR_NAME "=" (NUMBER | STRING | FLOAT) +VAR_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ + +prompt: "@prompt" section* "@end" section: context | objective @@ -12,27 +17,30 @@ section: context context: "@context" text "@end" objective: "@objective" text "@end" -instructions: "@instructions" instruction+ "@end" +instructions: "@instructions" instruction* "@end" instruction: "@step" text "@end" -examples: "@examples" example+ "@end" +examples: "@examples" example* "@end" example: "@example" input output "@end" input: "@input" text "@end" output: "@output" text "@end" -constraints: "@constraints" constraint+ "@end" +constraints: "@constraints" constraint* "@end" constraint: length | tone length: "@length" "min:" INT "max:" INT "@end" tone: "@tone" text "@end" -metadata: "@metadata" meta+ "@end" -meta: domain | difficulty -domain: "@domain" text "@end" -difficulty: "@difficulty" text "@end" +metadata: "@metadata" prop* "@end" +prop: PROP_NAME ":" (NUMBER | STRING ) +PROP_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ -text: /[^@]+/ +STRING: /'[^']*'/ | /"[^"]*"/ +text: /[^@]+/ %import common.WS +%import common.NUMBER +%import common.STRING +%import common.FLOAT %ignore /\#.*/ // Ignore comments %ignore WS %import common.INT diff --git a/src/promptml/parser.py b/src/promptml/parser.py index 680e710..5b2af56 100644 --- a/src/promptml/parser.py +++ b/src/promptml/parser.py @@ -1,70 +1,23 @@ """ This module provides a PromptParser class for parsing DSL code and extracting prompt information. -The PromptParser class can parse DSL code and extract sections such as context, +The PromptParser class can parse DSL code and extract sections such as context, objective, instructions, examples, constraints, and metadata from the code. It uses regular expressions to search for specific patterns in the DSL code and extract the corresponding content. Example usage: dsl_code = ''' - @prompt - @context - This is the context section. - @end - - @objective - This is the objective section. - @end - - @instructions - These are the instructions. - @end - - @examples - @example - @input - Input example 1 - @end - @output - Output example 1 - @end - @end - @end - - @constraints - @length min: 1 max: 10 - @end - - @metadata - @domain - Domain example - @end - @difficulty - Difficulty example - @end - @end + code... ''' parser = PromptParser(dsl_code) prompt = parser.parse() - - print(prompt) - # Output: { - # 'context': 'This is the context section.', - # 'objective': 'This is the objective section.', - # 'instructions': 'These are the instructions.', - # 'examples': [ - # {'input': 'Input example 1', 'output': 'Output example 1'} - # ], - # 'constraints': {'length': {'min': 1, 'max': 10}}, - # 'metadata': {'domain': 'Domain example', 'difficulty': 'Difficulty example'} - # } """ import json import os - +import re from lark import Lark, Transformer @@ -72,14 +25,37 @@ class PromptMLTransformer(Transformer): """ A class for transforming the parsed PromptML code into a structured format. """ + + def start(self, items): + """ Extract the start section content.""" + + # Variables are in child 1, replace context with variables $x to x -> value using regex + prompt = items[0] + context = prompt["context"] + objective = prompt["objective"] + vars_ = items[1] + + for k,v in vars_.items(): + context = re.sub(r'\$' + k, v, context) + objective = re.sub(r'\$' + k, v, objective) + + prompt["context"] = context + prompt["objective"] = objective + return prompt + + def block(self, items): + """ Extract the block content.""" + return items[0] + def prompt(self, items): """ Extract the prompt content.""" sections = {} - tree = items[0] - for child in tree.children: - if child.data == "section": + for child in items: + if hasattr(child, "data") and child.data == "section": data = child.children[0] sections.update(data) + else: + sections.update(child) return sections @@ -129,14 +105,34 @@ def tone(self, items): """ Extract the tone constraint content.""" return {"tone": items[0].strip()} + def var_block(self, items): + """ Extract the variable block content.""" + var_map = {} + + for item in items: + var_symbol = item.children[0].strip() + var_value = item.children[1].strip() + var_map[var_symbol] = var_value + + return var_map + def metadata(self, items): """ Extract the metadata section content.""" metadata = {} - for item in items: - child = item.children[0] - for k,v in child.items(): - metadata[k] = v.strip() + for item in items: + key = item.children[0].strip() + if key: + prop_type = item.children[1].type + if prop_type == "NUMBER": + try: + metadata[key] = int(item.children[1].strip()) + except ValueError: + metadata[key] = float(item.children[1].strip()) + elif prop_type == "STRING": + metadata[key] = item.children[1].strip().strip("\"").strip("\'") + else: + metadata[key] = item.children[1].strip() return {"metadata": metadata} @@ -167,7 +163,7 @@ def __init__(self, code: str): self.code = code self.prompt = {} - self.parser = Lark(promptml_grammar, start="prompt") + self.parser = Lark(promptml_grammar) def parse(self): """ From 9bf332c95688b5b88f93fc97e9259258bff6cfb3 Mon Sep 17 00:00:00 2001 From: N3N Date: Sat, 4 May 2024 21:58:14 -0700 Subject: [PATCH 2/4] docs: add more detail --- src/promptml/grammar.lark | 6 +++++- src/promptml/parser.py | 31 ++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/src/promptml/grammar.lark b/src/promptml/grammar.lark index 692f518..69d640b 100644 --- a/src/promptml/grammar.lark +++ b/src/promptml/grammar.lark @@ -1,4 +1,6 @@ -?start: block+ +# PromptML Grammar # + +start: block+ block: prompt | var_block var_block: "@vars" assignment* "@end" @@ -34,9 +36,11 @@ metadata: "@metadata" prop* "@end" prop: PROP_NAME ":" (NUMBER | STRING ) PROP_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ +# Token Definitions # STRING: /'[^']*'/ | /"[^"]*"/ text: /[^@]+/ +# Ignored Tokens # %import common.WS %import common.NUMBER %import common.STRING diff --git a/src/promptml/parser.py b/src/promptml/parser.py index 5b2af56..e7f29a3 100644 --- a/src/promptml/parser.py +++ b/src/promptml/parser.py @@ -117,22 +117,32 @@ def var_block(self, items): return var_map def metadata(self, items): - """ Extract the metadata section content.""" + """ + Extracts the metadata section content. + + Args: + items (list): A list of items representing the metadata section content. + + Returns: + dict: A dictionary containing the extracted metadata section content. + """ metadata = {} for item in items: key = item.children[0].strip() if key: prop_type = item.children[1].type + value = item.children[1].strip() + if prop_type == "NUMBER": try: - metadata[key] = int(item.children[1].strip()) + value = int(value) except ValueError: - metadata[key] = float(item.children[1].strip()) + value = float(value) elif prop_type == "STRING": - metadata[key] = item.children[1].strip().strip("\"").strip("\'") - else: - metadata[key] = item.children[1].strip() + value = value.strip("\"").strip("\'") + + metadata[key] = value return {"metadata": metadata} @@ -197,7 +207,14 @@ class PromptParserFromFile(PromptParser): """ A subclass of PromptParser that reads DSL code from a file. """ - def __init__(self, file_path): + def __init__(self, file_path: str): + """ + Initializes the PromptParserFromFile object by reading the DSL code from the specified file path + and passing it to the parent class constructor. + + Args: + file_path (str): The path to the DSL code file. + """ with open(file_path, 'r', encoding='utf-8') as f: dsl_code = f.read() super().__init__(dsl_code) From b002f065d23112fdc9a9c88580299a4cb6c38b3f Mon Sep 17 00:00:00 2001 From: N3N Date: Sat, 4 May 2024 22:08:24 -0700 Subject: [PATCH 3/4] docs: add logo for promptml --- README.md | 4 +++- promptml.jpeg | Bin 0 -> 14740 bytes 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 promptml.jpeg diff --git a/README.md b/README.md index 8e73217..22154a9 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # PromptML (Prompt Markup Language) -A simple, yet elegant markup language for defining AI Prompts as Code (APaC). Built to be used by AI agents to automatically prompt for other AI systems +![](./promptml.jpeg) + +A simple, yet elegant markup language for defining AI Prompts as Code (APaC). Built to be used by AI agents to automatically prompt for other AI systems. The architecture is shown as below. A `PromptML` prompt can be version controlled like any other code file. Using promptml parser package, one can easily generate a natural language prompt, and execute it against a LLM. See examples for using promptml library package: [open examples](./examples/) diff --git a/promptml.jpeg b/promptml.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..21d1a8d77e20285af334a8b28771c7c2f17b8fc4 GIT binary patch literal 14740 zcmdsebyQr>^5@`$B)Gf7KpCqFmxt+xWdN>{oT3~62?+o|Li_*^a{y@o zIw~4E8Y((E8af6BIwlq_7S^LjScKR(PjE>HNl8fviHXUnm}tl;7$}K}X`j(DFtY&J zfMhfrFF06VFtGwze@}vhfq{XAiA8{gMZii_?O}T zlTDoLod1L!rn;r}x9Lyi7d#_kVs|tmLjS={c<__WV)^u+YPCa4bA9K)e|HPgw=9OY zcPP8O1;f&5zX>pU7Im6N-EVJTm2XI=|GEVw+1PL&^VFO0C+Dg3uZ(>)-knmpBHn?@ zLMj0Je6{=)6R>THlhX5bevh|~@}V$8_cKG`BlZEb=Zj5_YjdI)3REq9tm+xfS~D%% zCS86>Y93~SxnzB;1Zo9394_0)@82;{9_xE`t-UGgU0ic#joYfhQ7xeR>awk6bvS|& zx}!(65wn>pYNh5{vS9kg;QI%PKe`B>K{H?*o7WNmWP28meEHa)S1XJ19S6>n(eK5K z4hbV8p6!ZO^u9UdS1*(bJoaw9**+C%PR^7a=_<~o)2U<^a-ZabC5>;2d1gKU#IvhJ zk3ml#V@26}?+^E)0FG`MjUk^eEoO{L22} zqJwX4uPL2Xt4tB)9{(EItIYReIQ`qw^F3&**WyIlV3&Ba*c-m-*HO(&!*wYO^=f+Y z!?tg9jV4cN<-Yt@1f$Ds?;>A_Q*hX}dhIZqeH+~kfKp<|GVCobAly6cFho9*T;FfU zos$XTeRD<~EjIG4)h_%~c~fg0!{ATTHv(7#yB?wQNNiiK*X0!}C3WzY#227d-9=%% z&9g7v4#&iiN#EE@yA$~)W+sJYGuvC;JgD5|AZ1hQ{GP*53LW!?6k)j~ov7(G^^XRI z+NW=dzS|r(WL-rr3qzB4P95Zv#|ZFRzs_>kr&77QRn!DVu2vO09zGRAsFKL`ud1L7 zrwFZAwO6tvjG0@VT7FCYe#d@SelxBdx^aGIBw1IK!Ar|v+8|&jH)Y^I|Jn$L<>dn5 zv~jjrOM|g`J?CcH=E!)oc>Z_n2LOShm!pr^{4ZLi+8MhvCmEh(t#iCe^zP>Fqv>e= zLS_c1kXlDxJtJ+t%8!-9(5W+$i>PRZ%HIA&&vMRej~2XIFU}$k>lu4)A$vz+cBha< z&a*2OW4_ZRu2$5>xH~b2;?s9;moHL;mBx%>r_75rO<%t)YgTUgrWQYGv7wYXNdjx_ z#jF0kLr65DB`5QJwMI_;)bFC1`)YkW)o-__Et|GQH@A5#P6&SDRnKMc<|QAtbVaZo z8lChkXv1co{QOu+y_t7@G!iYItLT0xE*cM$erwcIbfyqnv}NISb`{tz{xR5cJE+{F z4UdbWRW#qOba0!i4u-H>A|GjZtCNe+UPOb=uTv)*Z*NVn({>+GH+V@=)&oF5W#92M zwkDEzqjJ>(zJ45hZ_$3_^F(Cf%k4Dw{MplR6)Iq*F5Q&z1LH3%8{E+z;`>@UJnA)aZ;_ZBQn$ekjj@&m#=gG+WPb}r# zjW-jK{yB*Ipf@)H*uIDc;7L>qL({~;QF_}}6&dE#u)M_Y-hUPvaVZ={73k^Nu&KAQ zU9{-y>M@F~tdySsmtF_*As-26${+tE1`Jyp4RM={{CF+#FP;CdH);OiV*n@!Rp|aN zse}0+@%8ZkkH{z_{v8?dlfQaj{?{N98j9L80EP&%46UC$F7A^jzC8f-FHcM)o^|w6 zI-|h^}nRv%5Y&)i+#-PNy&3gdbt&1%3I9naV@e>H|!IgS)|;x)>v2289k zM=!*&WB!^B6EmsU#TOu28AiB=DpcY13I!K6*K=!66h-cf5GJ-%$RDOhH6!%T065a$ ze_!JL=`$H${?quoM&aR>AO)acO8nh}ApX~jf9d~!vx)QH88GhYu*m<7Khgh$>5&}Y ze?qpl_-|EsHujflS3ZA6*)E39ZiYcO3%6T?CtD8y-ZARWrw@R(xi-(!Y}jdq_{E=< z7-+v(av1Bno6~LDk8TYQAFpe(t-AB}U(%h5c3oV$`y&yLO2ftC)bX3v%QUa!CH~+; zs{5$jtsef1YXHv{9R>7mGpZ!3aGhs zmvsHK#qK2!fMF7N3NU#{+_dB9bm2Ph^XJf>{U1(ZW0|+Re+hToZk1(ue%dv~jkp{T z|07Ce@*iG*)cjpH9!3fGh@2kY{vr6Q7We$4-v1{xA_)-010W-zAfaNSpke;TVuVXa zMggGWq2c1wX%NtJfw(21AJAWz5)v`Qek7)qkW$b6dgSc8g0O!~BxEEcaliv0_%wS* z2mF$>>>NTsn(`t?`bqQ-H$dILro(7sUd*I>cJ|R%GpG*k8Kd|dvNgK3>(E9?oW1yu=!xSLfo|KdUq`3>VjORc1)|HnKSnI(M^ z8L1^C+m;QoJSsQ*RnB_$rFc>Zo3lG<#?#dz!lcp0{t6K_nzv?ge6E3Hh4qPYN||8= zQG`}${F*_exprJ@%{C)3D?vcW$Bu}}tcVclC(yEK3mZi(=2SgwQnLtE^kdC1We-D3 z?JSn)&s+)-TUgvGc5D%e!aRo*vvp!Sw}oV+gFN-{YjeKlKQt{tJHbhIE598 zeZ^v#9!gmwgV6Zh4fV_L9adktr@VV?(6m>a2SRNLFWvik!8=?8ewZc8$^q9`Dq#@D#kHIbNYdeCmt3oj}<~d!>NVpVygtVz=>ULs;o|qwN@CDRdKb>mBDCvnlURpD0#`Sm) z-bc!Mo1&xM>&ZMlG!?DI^@%t!jG&viM9;*Tig0hLEd)i z$B|eus~~xfG}E0I6$ukA_B3_)#6w&Dp&Zoo4gMRc~C$7ZCO z@e;>ts=E$f#jzLZ4sJ@)o`QJi!=}g6(Oy7ZcysqG`zZ_}7N& z#`MR1UQ1^>Gm7dF-NIF8+k1hLpye*5ff0f%(wRcMAfjSi%a#5-0T@fB@r$4OwI`_a z9@>R+JJIp^+x1h$3+%Ag;4Ap$D~#Q~iL*5mm+iw#+XW|M@x-xhnl;J;O^Dk7hJ=CHS*P26`%S^rzp3i+<{ip_5Bn~l(AwZ5@dli$`Xnc|Fb zX=zqOdtBJw1Y5Ad7PZF5mZg&VS-BmvHa@B9YVy{|@!%|&x-fK@pnh?Cxz|J}%__Aa z{gTwm+Q7qhb-U<`)AMU41P=25g@XVLbQBEK{{aUH=(xB+k{YJa4}|nDXc-U?7`uWF z{`l3|H}@|TL>#dEg@U~d**p56#lcvM5aflsb@J9)<>IgxYx7=Dg%ZpvzQ516-irrn zMlon-)-Cih8>=sLG*AkV#nt<6SbERa* zmiQEaH+HEW>;c9G>rRiWWG#kg{E1&O393Ou8EM~v!p zL9*PXQ1dVnE}B6JKM?OD^$cWg6%ku!{W#h8eS_SkPd;{WbiQ<^(0rj0XJe#mscJpn zDbCAY7EvV?)#mmI8>)`a78Cb>o_L%sy5mwPtngB&c{; zs3D_cQS153YjsA-FayPu*M#uducxj#jG4D8yaB!zcyS(|w)j zb=3ZWu!L!h06g1=+qa?39e?4luXFeOol*WtmA4ma<1m)?TbeIni7MY|3zQ$rI8RWO z$8nBMlyl(YPGnGqDFY21T=J+%j_CdR+^$L-EJ7Tpwaf)s+!y~LeLlmB4TCjptTG2u zV_iZ_k-C<0TcapI$;M*u=-W7$4fc;RG-MFWuvkwQ3xn_K^-J4PEEIoj3@-Db z+F<*4TG0x&lF5L}ol%rc^m2@a_$|#R-QW5v-VOZLZse};n#p&|5QF^3wVtVPs9L`V z4bEz9)~l9yi!~PUcJj-@`_r4W5$kIG!wp9-MV6N4+N31@*KbHr9{@sU!PeT*&gw<) z%Yt#}Obvuzm=D}(HE6+X(5@hp(a%|7&tE2uud#$nyP(8gb_(u21-jYI6Ey1Yc1A@e%Y}@8JvVJ*n<}mQFB`}^ z-iKE7RrKalaP{#v5kX<1oi>kY5+z8_4tYXIj1g7k{xn-K>L{TT|z@m(eKa)K#lD( zjT5i~qMiefOGI&&{D?s=I;f`XrOBpBLP?ustkIn42uh^Zbm+;yV;H&;vS<$fMgB@T zcK~R=*pK=cUl4oubj4cJ4ydmL;rDwcS6tOZtPCdA9;jgDU`dJ4u;rMrfo^i28L9N% zlx|Q3Jpi_;-@ql3Wz#t>dqJs|%F_9vIw)c5PE;7i;q26M2DJ@|o1)jJ zEs5zh^YW9O}KjluCEJRb*pp+ONJaWn)zbD=K|x zq@DJUh-aGdkcy<2>R)tWu+@(`ahIo=;Qk}fVQPLvZ`cxE z*UGSy`e`IBTgT+hpw#$mM@(Mg(@qM!5W%cEOh#^XFSw&}aQ|ahkEu=$1_*g|GO_To zOW{wJRxd6L1-GJdCVqNQlcMUZy!G0pS+-HZI417ai`BvM$TIfmoa5UKRyrmnTE@q5 zZ=SpyLzfQ)_8w^aCC%1D45D_%-Up&ipPRE;?NHmBPFeG!Dm|qT$V{ZOIWLf)a9iVh zUEY+4rD;&YH?VX4jFyB_R2bNV0-rY1!4#>U3sWY%{?v#5)mWw0f3Nv4M1&Spc9`3Q z$2Jm|Z?-xs6_@SU-*6EUx~gLQ&N=V3O|)@VxL zNMDqMm3`2OQRX!y-qDP}Oh^v&`E~wgic+#v#(X{4UF#JXPHmP5khkvg z6xf(W32g2%qtuaTkJI$6T-tp^_8n40k}E*kZYid)s4um8imD zQEG9K+gA?%++VLvZrX#7!#b;^AR0`jCbV%ovk{1%7s~#6yI#F;5<{%tm(lH@+j-Pc zr$4G9EZ~rXi#DE|%`4wO=Y;V_q2p58@=FycQ3tpMw5OqF6>{$TB_B6-s9abfZ$TCb zh`+B<@}-_LV@zU}ca)J&_YOuMrF4)~RNq2<6%(UKi`1>yZ8dtYo+Gk`tp=IlI0hCg zoFs|N87j8R9=(;e9Q@3tW%4@PvgM}!ILYm60{bCf2MgzuWP!NxaS!1vha4rRIbSJX zT0b0824nl7CLbWWj~YaS%*iL*o=%lRI@^*q-R+=@UpbXIUoa(hmsVt_USmTSS{pxD z3-=XPWf=-r3tDd))r%(kSleKvt0Zxrq0O@!pYtpav<1QBW9BASYqJzF9#|2ft69;> zRw@fP4XF2NkEkjr&rg8_r54|YDea+f%g1T3ga91DY7zpdxS0GH3ATf&bI&b7y;TTd zdOrJ`VnHoj_u%*#irsN;WeMzh)K#j5Lhy; zhND@kc@QIivFcc0{%ia(P62$6P$h+S+2cg+O@l$F#xAVFP@HuM#f$@W_bl;MbnH@oakJ}^X#m=Y$$7heCOLe9})>`Q5d`XQ5ObkU9b8C0R>O(FzhYiGqJJ|tw z1obsaY1{3rKQCNyB?11lad%H6gN6Fi%57v|AywA}?2q~a;Q6rmDC>hKjC(l%l+tYKP$n72sc>omu6!>uv zn>KXT_3;H=+INfSXR~kPmT$IyZc(Py4%PAJlV3@NFhwFjLPDtK-l}Y4vxr|VZCx#? zp2EZ7m=#Vf-Ac{fEpW6tz7}Q>2A<4=6tK?6y@`W4>+1(OmcR*Wijx_#`bjIba(`@t zY9iWY};k{<7a@>c(xL)9`E8!Na;Xk7<-U9clD>Z^@YtN)#+fnBnY7 z7-^=ok!AF`{MjAkRv57uJp%I$vQwd7kq!j`7A4ko1TJhVi}OMe?gR^Oo;FI2XE{!h zaXxc>HC{VgK4$H#$+63$d%Qox#(-@``}qA!psW&`29wNueCzIrcI<15(lTpk4|Tp! znK)^Bz|wwxN^}%YLqTX$sdViQd!=k{H2+|;4nu)q-~Dj=rnQNZ%gOEK*}J*j_Yo)l z)&>Fbq*-+47w-gLB%(0_83|*Z7-vSM&#Im{s)klRDj) zo`TYY#!T(z63(*3Q*#H|l?Gf=;8WC^XRgvHlMu&eGhhHgP}oGg)-ElL`kDbL%2`q%r4tjkiI-vSAVgv zG4~`T;Hr#rE!9Y1bTN5-lZh=oY2h6c^S6E>&R`mmoRfevmm4vw5Vvf8;5gX_u!jq) zYB7_7UQ@JBZItPd_}#PS;52DOv{MHMvQdNxM^)l4*7(HRwdvP-{}g>(Qnx|_p~aOX z!FVh1y>V?etoo2=U_^vRM)IO9@zYDJGAvq9HI>4R zN){82JH6O^afO1Vq)M}IZEQbgV|Ut}u1-7Z+aNdsjaj9XlJZ~0a4K$gLw*n$mmoE1 zm4-vwS&`$uST`@z%&q~bqeQT=Yg-;q6~V_$=XtX=ZTs*Gr2MBj)F7Alplr)e=40z^&6S*hnn zBX=|@S1i;bKLO{LSxQ}Cr1&tyeUmyZSQ8 zumAl{lf3C?Zj1xAXdTxM7(fy5IKJ_T^^&CXVxfGo)vwluECqd2p0%b{ zDK0OAQcTf$GIgbJ`AcM`ao&UEK9t}s^W57Sj9FFCRth^3&pPF8*Mx32Qjr#0%QGrQ z)K3@n2CWcde*$&^MZ?(;45aCmd+R#8VBhEomMeL}4Xj<@QL@+^`sm-4zmp-qh^c8( zhnO`gHqm@K4%jRZc-CVqGGY1t&*T3mF-aYEO4DOyRCsS*8PFeEuLWfe=S8 z4_=Zlrwg!Od0RQO;ZGp7G;sy;2GCr^fE8-H@;?1APhtocyfsvuL7RXa?0>X~rm+s! zedgjPNGMq6>@xVcVgpUn5|J>bBWK%>6DZlkcg-8og8Fo+qiAFF1R)0H~+ zi2RCCYc6iB!)dvf4JqwN*D(w!PsUBow9moyblac=>SyY|a5> zT%7k*6w!SEv@AjpuXuIDuLgb2KgOPEB!j#a#%aB3UVgz+-)q3HV#dCUNkF(FFXg+$ z!(|K~P|(7XV!p^c1z z1O6hT+i`Jp5Kmj)MGa^+_0a{>D2;Z9b~WFu*5|N_PNyDo(-KV0a3$Qhg5qgJ0POKG z{|;3%P&75Q$w?t$$NjV1Xg}_>NEpKq%e2J#s#U;=@bUN@q$mGbIEH@0Hu#;~Xvn3x zGPy`>ESm$WNC{6L{|OGOoql+Ae~DOtNZYh9|V~-RiPq2MaT00S)EZ z>$8X26*TW~q;?$M%8%RrB0Wp94;>?_`Sxg5gW}G=0r#v0tj4ljs+U9`vaQ62{~U(r z@^<)4Uo67hQo+-#-_>TQL*+B^Rhq&XR{MD7QKmjuP7&yJn3mxBzPv$;6Sf;mMAV!? z!_!<2zN9$?C46LXY$H-tfXY+CNLoR<090;uYjM+y3-(6p1%0C8w4_MZ$`V;+hrF+h z^H%{yCw~J?C61fB?W|l>6Gvdf(}G^W!7TBk!*;70`$wDIUd5axne3XBi|Y*>d+6Vm z(OF|uls=+kIe9$Kt>jcGK)PB@e_HA-#-s2P(=upu7M~x0R(DjKR~tCWu9Cwvg_YNw zBbFF6>a=%S=lrUeO;tYmsmTqz0QH6txuviB)m@jy&cR#N3c}I3avQ8*y!Cw^3k+f* ze5`gl-)GPGZO1aXnv){g?lovjL8t-LX7TTLcb0EnIu^hiW!Lwa+3jMG7ND=h*rq!4 z)8^Y~9tHBdQw9K8Qdsx0?hFaxab52@ZhsUmG}Pi<*{)FR@~P?AH6kWRPRqBztU1T% zFEzQA4&MNZ%Xo4uRjCp3?FtDMKg%Imj!xt$gGOeapNH=(4{AURZ}*h%D-E@fg}{vHZmt zzVyn_%b_ZOh>Pecxf)avtv5{iC*Oj(`4Ni_nj)KuRSWsO*|dZEHWtI_#Q&Uu3RXt3 zk3ULS-HLzCXkV2o;rDz58u2w@myn1iSw<(WouJ~XS2X|h3poBvk@{N@nY(muJ%vFO z>ogPn`ieT!n1PkZSZ6*i?A_wODJE#O~{5Xo@%#@~$O|piJU@t&RA;2!?T@ zx51`r-eIr~`g65)7}I5ZmD+t-X?EvQgza+v%`n&d$D+Ic#OpYUrxw@jHpGuYemnpY z?1ngkT?$OGNU%bFiXUX^qdMm+@q~L-m>aAD8(J7t|RSi%1)`&lumM zy)pCI9~H+ly8Llb{=I$Drx~{S0Fbr4(^z^m@pX98-m&t~d8*JvTkF2kv%B&!b)r^t z&av$Z$1ckiU&}<9{G$Jl=5%AcUo5!ExazB5cosb&CpO~-IchT2BD@P6&DquRQ8gbC zb5u*2`VR#Fyf24Rp+rAv>uN#=AJ=~BsukN7S?Iy3UBlrhiYZ)>1cD*YBsaUGf1agv zVU~KKQ+p>TMAorHgd+>wI1d@-)uhuWV=v8Gv$xaal+bJz(;WSL02GQ#g2e~b#aBii z054yY){={IfQ<@dH4(=%beNdU6$5rqY(owMJ8(JL;2xT97YH>hVRH4GedEb6?m9}w z=@U!)HfsOJrY(X@=D(^8BIQ$Danj+@5@mOazn7tIy$)VKnup}hUQERja&N*NXc$o0 zB=w3$u%1$bdgAdLDnrT{4(o3E1k97@=G?tmhJV4V$BP*%J4(ka2HUfyou}~VAh!J^ z+c5*-AJ?OvgwXupF$%FCMtVr}=4M;)xzno*Ryb2@JW`z3mepf-CV3q+z7V*M@%r;z z>h-15f5H&b^$7d{P&jBZ7hvzX%KV}aMY=AZ zT~z5{F6q_0YccB4mWT<}V-7mCAgrXYTDz2My#eB$7&%G@^~uOTdH}fXEZ=_hR3aS3 zxU3cUbie(dKuB<#mF4}XyRj<%Yj&NTs7uvK?DwC{VQ)Nx+CbMeRli&pd7X97Dk~;1 z!gcOoF zf7d)~QiXjZv8-gA9d~d zB;8aAUMXZMZqEi2ODUK|K%~>v_RkTO z>9Ctvi%)d%HM`2#w5Pa^T(Pc%_G5|56ZGXYY}44xx7NDXilWVzN>)j>S8=@b1i9Td zU#`IXBwDTR*ms6h);k41w}Tht2I4az(Y9|GZ-V}gjjeGeBU$c>e0N8y%DnoZED$7EL!v9YFz^DYWsD)U>QZBN;OY31z6y#gh= zI6RxbMZ8=|(uG7)S`p8Y==2dwP}Y-7d@anI7rAd7DrVV$CIlEBs?&Zr_}t`E({_t> z(D?wEgTJS5InF)QOZ6#7JO{gj_hA#)`JV~O{Ikoa7mOBHs$PCBFS|4R?Mp@3%V3hL z|IU@IhHdz8DEW4DP-`X3Xs_ltneTU51%mU1*LTzxUY)Rp!*8(n_)aUr`Bq16*%$yE zQvf7o{YKwKRay(mK0)7I$OaRtp@&uh>N!E!q+ywc+Wp6XU7&EX9sE4%8MAaBgXI=l zU|@oC0Y{fdm$Jsznn1z2*EdZIVe*XXnKdk!B+0o6QF)1eK8s^6(m4Oc*cQto7rv{# z=4gEPsv?DGW6Vt)<*f7k3ASWJZXDZ0N$fn=6gBoVc*)uRs$p$rOI-Nx?2>V-|6Ikx zHeZQa(rP#2XwK$!)_gD=*Le!F+1G|{zMV+bAW|^~?&qB(dO0kl_dT_qJ7y7D4Ec4; zCZaAky$KFwyb%bNtH83@ay52}YS7mxlu~~uwcn}0uOte;Mf!sHkffS)%%lwKVmTUj z;;ZLaXq%!jzqVBkfMGX1y2)^nOIbZs(1#ngbSXrJ;$|bS^awOVs*LZ_Jzd z(p+c7Ur2tVWXM!j#MubonDgkDS&dPX;7Ip8&fwWMF7Gnbm(ecXE4Qz(cI|I*48#TO zk!*L27gQ+ghb85^St^PwuYM@H{e}+GEA0pd0|6M0`g+@1l{l|R z%bf!%`=Zyk6B=J+h{$%8sNwBVO3*kQ>y!+ZL#Q(~ZP#CP(YcVq)PMK95h*l5_c3oSz47UTm1EZax@7f}Zuw5vSo3%yXzH&EY&@Zm^(x zq7$6!41G>-*{4cWnKlY}JMu^s+6RMnL8=>XV0x(ECtF{i)=OOcp1+&}b+ z=DDhK#g3h#-lD;&6XI~0Ssgfp!7!%B`LthmmSM>kVX^=U->ccbQyunx=xL#q5%bGn zbAPfP^)+`s^}0&z4u7E%dnid{%L(io;>00_-Yd*zaM79 z7thud%T9QSIhSyX(U2%t;^L!B!PlX-e`EgH-;!?!3nGjyz_l@(PLw*4szGgr zm4C%N4?`1EkHbm#Uj6p|Fa;jSSRg?dYlg$glbSQaby2D(s8YMrr@t)V4pzG-ZxRg- zhnv+&mZ>jtXah%AirFnZtiq(CCFebP`2q!Mb88bhWKav}*RxC`lJi~;rWz|{hBO;e zT#Yj2z~^pQ`PeS8Gb}`smHOPOVn^rLpzhBVOvcQZV}FJ1}1=ds~lQ*Mk+#V#09-z1xZF+d>r7ptKBe9 z&#s`*C)H4=1jVp5l_mEPO5GC8P4FfslN|SmMb3;$vtJ^P&wIoY?8L*iVMVLWgW~I5 zg$qz7x{ka*OVIX0qfn>T8{)5G-vU%r`Z2xOmWX0H))lI;7;-(GFc8s-Sjbf+Nc<=c zX%G6dCU7B_cF8z>3t6aX{R}p|KzU}CNrGBc)R4j3s&8li4VBjZ4{ImbH#GgfseOD7N|`_!{A|@2J3q4c)@}qR#tycw()33nULKMvh1JSbfHISo7zUx@ zQ1|ssk(BS`VuL(nShg@?v$*H4b?uvHgT+Q2Mb8Cbbv3K$Ff6Gt#Is-~%$0xJ2Cmw) z^@Q|RHYX@ZWjHhL@x@3o$vdx-LTie5?5RP1*+JZa=mp8UajO$%A|3vv)8J7lLy}dX za7bN)xul+Zk>+qh)A4G{31$?DQ4ClL?E^6elwY`RDvuL}?~e$b?dLX;+R z&3HjKVJ1o}^$3lVF?=q;H7YjhX_I^5rZZ8ZEUek}8Hbpj4IMkU9H6+dHw6!aq%F#o zk1fMFHIK(B6!~U__d|aaXl`)w{5iUR(^as2_92|F^Q=!UZ`9| zWg0Sr_SJ4#ZR^K%jEXox2Aai+>JEy`H`)vB?}2VJb$TJ;Dq&|OkNK1xXp3(Jg`f$` zEiVUn8o?YJ`P~k6a~X^#J0LhOK6edcW73QRld^sTfQvxU!(FE1tM_E!9wn4mvGYw@ zOuvz7r@(07Xf1{RT-d_;e&yIs`P)B^G+4ahXl zWmytC7sQT3;!;vNj6+VHcq#^@DzVvqg?Up&b7&-tVXn`Lyq@OIcsbfdl#FHm5qKo_ z14;C?)Pvkg}5 zQ8s>nd$sfBg;;xx6O~zV3WU>UJFyIn%I9d^!w&;?mM3;uVf>zBrSEleAjT&XSX7*v zbM;FhvlE95g!Aok1;wSQibdZv_?bm4_!J)i5S(q|fC?sd1+eOoY&VnTJJh@sQ?aaO6#=;z$18mqqlN^Q~8XjQRP}0!!?yG;uuPfWo!katI3NQ zy8+5I3!YUPm9LKAjR&=;GIiH15It6GhNN&H<4DeYQ{fvheUi(OF;qJzGM_cu}|(W@Yuqi!r+!F6<&Pd?mA&aUH5hbAb#i!He$F zfliGL#BWMu;-ISib^YOz(+M-|_6OQsA1+o)wqTiSeP zvhLv&ThZ6`C_AAWvj>BiO$r*5N&S+k)JQcuD;IBn`zF9VI#ZXKIhGZ|S&lXo*fe%7 zP*^r`5`~mmWXWaFdVYL4a&J`BNa=_PGmjui&^Z#2!k>m~t<+477buXufh{Ne?Vh_565jqP1|qMb=A729CKA)Cp#xkx?pHNkNqDtVc8#c9j0Gvrb03PlU|2gt7^WOk# CNV0|i literal 0 HcmV?d00001 From 0d3a6902bd6de9575e3f7f9445d1cce61ea1b326 Mon Sep 17 00:00:00 2001 From: N3N Date: Sun, 5 May 2024 19:26:01 -0700 Subject: [PATCH 4/4] add category and move difficulty --- .github/workflows/pylint.yml | 26 ++++---- README.md | 71 +++++++++++++++++----- prompt.pml | 112 ++++++++++++++++++----------------- src/promptml/grammar.lark | 6 +- src/promptml/parser.py | 47 +++++++++------ tests/test_parser.py | 13 ++-- 6 files changed, 169 insertions(+), 106 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 4e16568..974260b 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,16 +7,16 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - pip install -r requirements.txt - - name: Analysing the code with pylint - run: | - pylint $(git ls-files 'src/*.py') + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + pip install -r requirements.txt + - name: Analysing the code with pylint + run: | + pylint $(git ls-files 'src/*.py') --fail-under=9 diff --git a/README.md b/README.md index 22154a9..34bd06e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # PromptML (Prompt Markup Language) + ![](./promptml.jpeg) A simple, yet elegant markup language for defining AI Prompts as Code (APaC). Built to be used by AI agents to automatically prompt for other AI systems. @@ -8,28 +9,34 @@ The architecture is shown as below. A `PromptML` prompt can be version controlle ![prompt-ml architecture](./prompt-github.png) ## Why PromptML ? + PromptML is built to provide a way for prompt engineers to define the AI prompts in a deterministic way. This is a Domain Specific Language (DSL) which defines characteristics of a prompt including context, objective, instructions and it's metadata. A regular prompt is an amalgamation of all these aspects into one entity. PromptML splits it into multiple sections and makes the information explicit. The language grammar can be found here: [grammar.lark](./src/promptml/grammar.lark) - ## How PromptML looks ? + The language is simple. You start blocks with `@` section annotation. A section ends with `@end` marker. Comments are started with `#` key. The prompt files ends with `.pml` extension. ```pml @prompt + # Add task context @context - # Add prompt context @end + + # Add task objective @objective # This is the final question or ask @end + + # Add one or more instructions to execute the prompt @instructions @step - # Add one or more instructions to execute the prompt @end @end + + # Add one or more examples @examples @example @input @@ -40,22 +47,31 @@ The language is simple. You start blocks with `@` section annotation. A section @end @end @end + + # Add task constraints @constraints - # Add prompt constraints + @length min: 1 max: 10 @end + @end + + # Add prompt category + @category @end + + # Add custom metadata @metadata - # Add prompt metadata here @end @end ``` -See [prompt.pml](./prompt.pml) to see an example. +See [prompt.pml](./prompt.pml) to see for complete syntax. ## Design + Regular text prompts are very abstract in nature. Natural languages are very flexible but provides least reliability. How to provide context for an AI system and ask something ? Shouldn't we specify that explicitly. PromptML is an attempt to make contents of a prompt explicit with a simple language. ## Core tenets of PromptML + Below are the qualities PromptML brings to prompt engineering domain: 1. Standardization instead of fragmentation @@ -63,8 +79,8 @@ Below are the qualities PromptML brings to prompt engineering domain: 3. Enabling version control-ability 4. Promoting verbosity for better results - ## Why not use XML, YAML, or JSON for PromptML ? + First, XML, JSON, and YAML are not DSL languages. They are data formats that can represent any form of data. Second, generative AI needs a strict, yet flexible data language with fixed constraints which evolve along with the domain. PromptML is built exactly to solve those two issues. @@ -74,10 +90,13 @@ Language grammar is influenced by XML & Ruby, so if you know any one of them, yo ## Usage 1. Install Python requirements + ```bash pip install -r requirements.txt ``` + 2. import the parser and parse a promptML file + ```py from promptml.parser import PromptParser @@ -108,17 +127,18 @@ promptml_code = ''' @end @end + @category + Prompt Management + @end + @constraints - @length min: 1 max: 10 + @length min: 1 max: 10 @end @end @metadata - @domain - Web Development - @end - @difficulty - Advaned - @end + top_p: 0.9 + n: 1 + team: promptml @end @end ''' @@ -130,17 +150,38 @@ print(prompt) # Output: { # 'context': 'This is the context section.', # 'objective': 'This is the objective section.', +# 'category': 'Prompt Management', # 'instructions': ['Step 1'], # 'examples': [ # {'input': 'Input example 1', 'output': 'Output example 1'} # ], # 'constraints': {'length': {'min': 1, 'max': 10}}, -# 'metadata': {'domain': 'Web Development', 'difficulty': 'Advanced'} +# 'metadata': {'top_p': 0.9, 'n': 1, 'team': 'promptml'} # } +``` + +## Defining variables + +You can define variables in the promptML file and use them in the prompt `context` and `objective`. The variables are defined in the `@vars` section and referenced using `$var` syntax in either `context` or `objective` sections. + +```pml +@vars + name = "John Doe" +@end + +@prompt + @context + You are a name changing expert. + @end + @objective + You have to change the name: $name to an ancient name. + @end +@end ``` ## TODO + We are currently working on: 1. Supporting more annotations (Ex: temperature, top_p) diff --git a/prompt.pml b/prompt.pml index 8631961..770dc28 100644 --- a/prompt.pml +++ b/prompt.pml @@ -1,81 +1,87 @@ +# Define prompt variables +@vars + role = 'highly skilled and experienced software developer' +@end + # Define prompt @prompt # Context is used to provide background information or context for the task @context - You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. + You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. @end # Objective is used to define the main goal or objective of the task @objective - Design and implement the core architecture and components for a scalable and efficient web application that can handle a large number of concurrent users while providing a seamless and responsive user experience. + Design and implement the core architecture and components for a scalable and efficient web application that can handle a large number of concurrent users while providing a seamless and responsive user experience. @end # Instructions are used to provide detailed steps or guidelines for completing the task @instructions - # steps can be used to break down the task into smaller parts - @step - Identify the key features and requirements of the web application based on the provided context. - @end - @step - Propose a suitable architecture (e.g., monolithic, microservices, etc.) and justify your choice. - @end - @step - Outline the essential components or modules of the application, such as user authentication, data storage, real-time communication, and so on. - @end - @step - Discuss the potential technologies, frameworks, and tools you would use to implement each component, highlighting their strengths and trade-offs. - @end - @step - Address scalability and performance concerns, including techniques for load balancing, caching, and database optimization. - @end - @step - Describe how you would ensure the security and privacy of user data, including authentication, authorization, and data encryption. - @end + # steps can be used to break down the task into smaller parts + @step + Identify the key features and requirements of the web application based on the provided context. + @end + @step + Propose a suitable architecture (e.g., monolithic, microservices, etc.) and justify your choice. + @end + @step + Outline the essential components or modules of the application, such as user authentication, data storage, real-time communication, and so on. + @end + @step + Discuss the potential technologies, frameworks, and tools you would use to implement each component, highlighting their strengths and trade-offs. + @end + @step + Address scalability and performance concerns, including techniques for load balancing, caching, and database optimization. + @end + @step + Describe how you would ensure the security and privacy of user data, including authentication, authorization, and data encryption. + @end @end # Examples are used to provide sample inputs and outputs for the task @examples - @example - @input - Design the core architecture and components for a large-scale e-commerce web application. - @end - @output - For a large-scale e-commerce web application, a microservices architecture would be suitable due to its inherent scalability and flexibility... + @example + @input + Design the core architecture and components for a large-scale e-commerce web application. + @end + @output + For a large-scale e-commerce web application, a microservices architecture would be suitable due to its inherent scalability and flexibility... + @end @end - @end - @example - @input - Outline main components for a large-scale e-commerce web application. + @example + @input + Outline main components for a large-scale e-commerce web application. + @end + @output + Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... + @end @end - @output - Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... - @end - @end @end # Constraints are used to specify any limitations or restrictions for the task @constraints - @length - min: 1000 - max: 3000 - @end - @tone - Professional and technical - @end + @length + min: 1000 + max: 3000 + @end + @tone + Professional and technical + @end + @difficulty + Advanced + @end + @end + + # categories are used to classify the task into different categories + @category + Software Engineering @end # Metadata includes information such as domain, difficulty, custom props, etc. @metadata - domain: 'Software Engineering' - difficulty: 'Advanced' - top_p: 0.6 - temperature: 0.5 - n: 1 - method: 'greedy' + top_p: 0.6 + temperature: 0.5 + n: 1 + internal: 'true' @end @end - -# Define prompt variables -@vars - role = 'highly skilled and experienced software developer' -@end diff --git a/src/promptml/grammar.lark b/src/promptml/grammar.lark index 69d640b..b222340 100644 --- a/src/promptml/grammar.lark +++ b/src/promptml/grammar.lark @@ -14,6 +14,7 @@ section: context | instructions | examples | constraints + | category | metadata context: "@context" text "@end" @@ -28,10 +29,13 @@ input: "@input" text "@end" output: "@output" text "@end" constraints: "@constraints" constraint* "@end" -constraint: length | tone +constraint: length | tone | difficulty +difficulty: "@difficulty" text "@end" length: "@length" "min:" INT "max:" INT "@end" tone: "@tone" text "@end" +category: "@category" text "@end" + metadata: "@metadata" prop* "@end" prop: PROP_NAME ":" (NUMBER | STRING ) PROP_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ diff --git a/src/promptml/parser.py b/src/promptml/parser.py index e7f29a3..0ba1d29 100644 --- a/src/promptml/parser.py +++ b/src/promptml/parser.py @@ -8,7 +8,7 @@ Example usage: dsl_code = ''' - code... + ... ''' parser = PromptParser(dsl_code) @@ -23,30 +23,41 @@ class PromptMLTransformer(Transformer): """ - A class for transforming the parsed PromptML code into a structured format. + A class for transforming the parsed PromptML tree into a Python dictionary. """ def start(self, items): """ Extract the start section content.""" + prompt = {} + vars_ = {} + for item in items: + if item["type"] == "vars": + vars_ = item["data"] + elif item["type"] == "prompt": + prompt = item["data"] - # Variables are in child 1, replace context with variables $x to x -> value using regex - prompt = items[0] - context = prompt["context"] + # context seems to be a keyword in Python, so we'll use context_ instead + context_ = prompt["context"] objective = prompt["objective"] - vars_ = items[1] + # Replace variables in context and objective with values for k,v in vars_.items(): - context = re.sub(r'\$' + k, v, context) - objective = re.sub(r'\$' + k, v, objective) + context_ = context_.replace(r'$' + k, v.replace("'", '').replace('"', '')) + objective = objective.replace(r'$' + k, v.replace("'", '').replace('"', '')) - prompt["context"] = context + prompt["context"] = context_ prompt["objective"] = objective + return prompt def block(self, items): """ Extract the block content.""" return items[0] + def category(self, items): + """ Extract the category content.""" + return {"category": items[0].strip()} + def prompt(self, items): """ Extract the prompt content.""" sections = {} @@ -57,7 +68,7 @@ def prompt(self, items): else: sections.update(child) - return sections + return {"type": "prompt", "data": sections} def context(self, items): """ Extract the context section content.""" @@ -105,6 +116,10 @@ def tone(self, items): """ Extract the tone constraint content.""" return {"tone": items[0].strip()} + def difficulty(self, items): + """ Extract the difficulty constraint content.""" + return {"difficulty": items[0].strip()} + def var_block(self, items): """ Extract the variable block content.""" var_map = {} @@ -114,7 +129,7 @@ def var_block(self, items): var_value = item.children[1].strip() var_map[var_symbol] = var_value - return var_map + return {"type": "vars", "data": var_map} def metadata(self, items): """ @@ -146,18 +161,11 @@ def metadata(self, items): return {"metadata": metadata} - def domain(self, items): - """ Extract the domain metadata content.""" - return {"domain": items[0]} - - def difficulty(self, items): - """ Extract the difficulty metadata content.""" - return {"difficulty": items[0]} - def text(self, items): """ Extract the text content.""" return items[0] + class PromptParser: """A class for parsing prompt markup language code and extract information. """ @@ -203,6 +211,7 @@ def deserialize_json(self, serialized_data): """ self.prompt = json.loads(serialized_data) + class PromptParserFromFile(PromptParser): """ A subclass of PromptParser that reads DSL code from a file. diff --git a/tests/test_parser.py b/tests/test_parser.py index 72dcc88..a913f77 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,11 +1,11 @@ -""" Test cases for prompt parser +""" Test cases for prompt parser """ from src.promptml.parser import PromptParserFromFile from unittest import TestCase class TestPromptParser(TestCase): - # read prompt from prompt.aiml file + # read prompt from prompt.pml file def setUp(self): self.prompt_parser = PromptParserFromFile('prompt.pml') @@ -57,14 +57,17 @@ def test_parse(self): "min": 1000, "max": 3000 }, - "tone": "Professional and technical" + "tone": "Professional and technical", + "difficulty": "Advanced" } ) self.assertEqual( res["metadata"], { - "domain": "Software Engineering, Web Development", - "difficulty": "Advanced" + "top_p": 0.6, + "temperature": 0.5, + "n": 1, + "internal": "true" } )