From 4c33890cf3675d517be08a68bf3d3ffef71d0005 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Wed, 31 Jul 2024 22:26:47 +0200 Subject: [PATCH 1/7] new parser --- src/parsing/ll1.ml | 1316 ++++++++++++++++++++++++++++++++++++++++ src/parsing/lpLexer.ml | 95 +++ src/parsing/parser.ml | 17 +- 3 files changed, 1427 insertions(+), 1 deletion(-) create mode 100644 src/parsing/ll1.ml diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml new file mode 100644 index 000000000..57c8b41d9 --- /dev/null +++ b/src/parsing/ll1.ml @@ -0,0 +1,1316 @@ +open Lplib +open Common open Pos open Logger +open Syntax +open Core +open LpLexer +open Lexing +open Sedlexing + +let log = Logger.make 'n' "pars" "parsing" +let log = log.pp + +let the_current_token : (token * position * position) Stdlib.ref = + Stdlib.ref (EOF, dummy_pos, dummy_pos) + +let current_token() : token = + let (t,_,_) = !the_current_token in + (*if log_enabled() then log "current_token: %a" pp_token t;*) + t + +let current_pos() : position * position = + let (_,p1,p2) = !the_current_token in (p1,p2) + +let consume_token (lb:lexbuf) : unit = + the_current_token := LpLexer.token lb ()(*; + if log_enabled() then log "read new token"*) + +let make_pos (lps:position * position): 'a -> 'a loc = + Pos.make_pos (fst lps, snd (current_pos())) + +let qid_of_path (lps: position * position): + string list -> (string list * string) loc = function + | [] -> assert false + | id::mp -> make_pos lps (List.rev mp, id) + +let make_abst (pos1:position) (ps:p_params list) (t:p_term) (pos2:position) + :p_term = + if ps = [] then t else make_pos (pos1,pos2) (P_Abst(ps,t)) + +let make_prod (pos1:position) (ps:p_params list) (t:p_term) (pos2:position) + :p_term = + if ps = [] then t else make_pos (pos1,pos2) (P_Prod(ps,t)) + +let ident_of_term pos1 {elt; _} = + match elt with + | P_Iden({elt=([], x); pos}, _) -> Pos.make pos x + | _ -> LpLexer.syntax_error pos1 "not an identifier" + +let expected (msg:string) (tokens:token list): 'a = + if msg <> "" then syntax_error (current_pos()) ("expected: "^msg) + else + match tokens with + | [] -> assert false + | t::ts -> + let soft = string_of_token in + syntax_error (current_pos()) + (List.fold_left (fun s t -> s^", "^soft t) ("expected: "^soft t) ts) + +let list (elt:lexbuf -> 'a) (lb:lexbuf): 'a list = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let acc = ref [] in + (try while true do acc := elt lb :: !acc done with SyntaxError _ -> ()); + List.rev !acc + +let nelist (elt:lexbuf -> 'a) (lb:lexbuf): 'a list = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let x = elt lb in + x :: list elt lb + +let consume (token:token) (lb:lexbuf): unit = + if current_token() = token then consume_token lb + else expected "" [token] + +let prefix (token:token) (elt:lexbuf -> 'a) (lb:lexbuf): 'a = + consume token lb; elt lb + +let consume_STRINGLIT (lb:lexbuf): string = + match current_token() with + | STRINGLIT s -> + consume_token lb; + s + | _ -> + expected "" [STRINGLIT""] + +let consume_SWITCH (lb:lexbuf): bool = + match current_token() with + | SWITCH b -> + consume_token lb; + b + | _ -> + expected "" [SWITCH true] + +let consume_NAT (lb:lexbuf): string = + match current_token() with + | NAT s -> + consume_token lb; + s + | _ -> + expected "" [NAT""] + +let qid (lb:lexbuf): (string list * string) loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 ([], s) + | QID p -> + let pos1 = current_pos() in + consume_token lb; + qid_of_path pos1 p + | _ -> + expected "" [UID"";QID[]] + +let qid_expl (lb:lexbuf): (string list * string) loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID_EXPL s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 ([], s) + | QID_EXPL p -> + let pos1 = current_pos() in + consume_token lb; + qid_of_path pos1 p + | _ -> + expected "" [UID_EXPL"";QID_EXPL[]] + +let uid (lb:lexbuf): string loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 s + | _ -> + expected "" [UID""] + +let param (lb:lexbuf): string loc option = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID s + | NAT s -> + let pos1 = current_pos() in + consume_token lb; + Some (make_pos pos1 s) + | UNDERSCORE -> + consume_token lb; + None + | _ -> + expected "non-qualified identifier or \"_\"" [UID"";NAT"";UNDERSCORE] + +let int (lb:lexbuf): string = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | NAT s + | NEG_NAT s -> + consume_token lb; + s + | _ -> + expected "integer" [NAT"";NEG_NAT""] + +let float_or_int (lb:lexbuf): string = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | NAT s + | NEG_NAT s + | FLOAT s -> + consume_token lb; + s + | _ -> + expected "integer or float" [NAT"";NEG_NAT"";FLOAT""] + +let uid_or_int (lb:lexbuf): string loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID s + | NAT s + | NEG_NAT s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 s + | _ -> + expected "non-qualified identifier" [UID"";NAT"";NEG_NAT""] + +let qid_or_int (lb:lexbuf): (string list * string) loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | QID p -> + let pos1 = current_pos() in + consume_token lb; + qid_of_path pos1 p + | UID s + | NAT s + | NEG_NAT s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 ([],s) + | _ -> + expected "possibly qualified identifier" [UID"";QID[];NAT"";NEG_NAT""] + +let path (lb:lexbuf): string list loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + (*| UID s -> + let pos1 = current_pos() in + LpLexer.syntax_error pos1 "Unqualified identifier"*) + | QID p -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (List.rev p) + | _ -> + expected "" [QID[]] + +let qid_or_rule (lb:lexbuf): (string list * string) loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 ([], s) + | QID p -> + let pos1 = current_pos() in + consume_token lb; + qid_of_path pos1 p + | UNIF_RULE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (Ghost.sign.sign_path, Unif_rule.equiv.sym_name) + | COERCE_RULE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (Ghost.sign.sign_path, Coercion.coerce.sym_name) + | _ -> + expected "" [UID"";QID[];UNIF_RULE;COERCE_RULE] + +let term_id (lb:lexbuf): p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID _ + | QID _ -> + let pos1 = current_pos() in + let i = qid lb in + make_pos pos1 (P_Iden(i, false)) + | UID_EXPL _ + | QID_EXPL _ -> + let pos1 = current_pos() in + let i = qid_expl lb in + make_pos pos1 (P_Iden(i, true)) + | _ -> + expected "" [UID"";QID[];UID_EXPL"";QID_EXPL[]] + +let rec command pos1 p_sym_mod (lb:lexbuf): p_command = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | SIDE _ + | ASSOCIATIVE + | COMMUTATIVE + | CONSTANT + | INJECTIVE + | SEQUENTIAL + | PRIVATE + | OPAQUE + | PROTECTED -> + assert (p_sym_mod = []); + let pos1 = current_pos() in + command pos1 (nelist modifier lb) lb + (* qid_or_int *) + | UID _ + | QID _ + | NAT _ + | NEG_NAT _ -> + begin + match p_sym_mod with + | [{elt=P_opaq;_}] -> + let i = qid_or_int lb in + make_pos pos1 (P_opaque i) + | [] -> + expected "command keyword missing" [] + | {elt=P_opaq;_}::{pos;_}::_ -> + expected "an opaque command must be followed by an identifier" [] + | _ -> + expected "" [SYMBOL] + end + | REQUIRE -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | OPEN -> + consume_token lb; + let ps = nelist path lb in + make_pos pos1 (P_require(true,ps)) + | _ -> + let ps = nelist path lb in + begin + match current_token() with + | AS -> + let p = + match ps with + | [p] -> p + | _ -> expected "a single module before \"as\"" [] + in + consume_token lb; + let i = uid lb in + make_pos pos1 (P_require_as(p,i)) + | _ -> + make_pos pos1 (P_require(false,ps)) + end + end + | OPEN -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + let l = list path lb in + make_pos pos1 (P_open l) + | SYMBOL -> + let pos1 = current_pos() in + consume_token lb; + let p_sym_nam = uid_or_int lb in + let p_sym_arg = list params lb in + begin + match current_token() with + | COLON -> + consume_token lb; + let p_sym_typ = Some(term lb) in + begin + match current_token() with + | BEGIN -> + let p_sym_prf = Some (proof lb) in + let p_sym_def = false in + let sym = + {p_sym_mod; p_sym_nam; p_sym_arg; p_sym_typ; + p_sym_trm=None; p_sym_def; p_sym_prf} + in make_pos pos1 (P_symbol(sym)) + | ASSIGN -> + consume_token lb; + let p_sym_trm, p_sym_prf = term_proof lb in + let p_sym_def = true in + let sym = + {p_sym_mod; p_sym_nam; p_sym_arg; p_sym_typ; + p_sym_trm; p_sym_def; p_sym_prf} + in make_pos pos1 (P_symbol(sym)) + | SEMICOLON -> + let p_sym_trm = None in + let p_sym_def = false in + let p_sym_prf = None in + let sym = + {p_sym_mod; p_sym_nam; p_sym_arg; p_sym_typ; + p_sym_trm; p_sym_def; p_sym_prf} + in make_pos pos1 (P_symbol(sym)) + | _ -> + expected "" [BEGIN;ASSIGN] + end + | ASSIGN -> + consume_token lb; + let p_sym_trm, p_sym_prf = term_proof lb in + let p_sym_def = true in + let p_sym_typ = None in + let sym = + {p_sym_mod; p_sym_nam; p_sym_arg; p_sym_typ; + p_sym_trm; p_sym_def; p_sym_prf} + in make_pos pos1 (P_symbol(sym)) + | _ -> + expected "" [COLON;ASSIGN] + end + | L_PAREN + | L_SQ_BRACKET -> + let pos1 = current_pos() in + let xs = nelist params lb in + consume INDUCTIVE lb; + let i = inductive lb in + let is = list (prefix WITH inductive) lb in + make_pos pos1 (P_inductive(p_sym_mod,xs,i::is)) + | INDUCTIVE -> + let pos1 = current_pos() in + consume_token lb; + let i = inductive lb in + let is = list (prefix WITH inductive) lb in + make_pos pos1 (P_inductive(p_sym_mod,[],i::is)) + | RULE -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + let r = rule lb in + let rs = list (prefix WITH rule) lb in + make_pos pos1 (P_rules(r::rs)) + | UNIF_RULE -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + let e = equation lb in + consume HOOK_ARROW lb; + consume L_SQ_BRACKET lb; + let eq1 = equation lb in + let eqs = list (prefix SEMICOLON equation) lb in + let es = eq1::eqs in + consume R_SQ_BRACKET lb; + (* FIXME: give sensible positions instead of Pos.none and P.appl. *) + let equiv = P.qiden Ghost.sign.sign_path Unif_rule.equiv.sym_name in + let cons = P.qiden Ghost.sign.sign_path Unif_rule.cons.sym_name in + let mk_equiv (t, u) = P.appl (P.appl equiv t) u in + let lhs = mk_equiv e in + let es = List.rev_map mk_equiv es in + let (en, es) = List.(hd es, tl es) in + let cat e es = P.appl (P.appl cons e) es in + let rhs = List.fold_right cat es en in + let r = make_pos pos1 (lhs, rhs) in + make_pos pos1 (P_unif_rule(r)) + | COERCE_RULE -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + let r = rule lb in + make_pos pos1 (P_coercion r) + | BUILTIN -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | STRINGLIT s -> + consume_token lb; + consume ASSIGN lb; + let i = qid_or_int lb in + make_pos pos1 (P_builtin(s,i)) + | _ -> + expected "" [STRINGLIT""] + end + | NOTATION -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + consume_token lb; + let i = qid_or_int lb in + let n = notation lb in + make_pos pos1 (P_notation(i,n)) + | _ -> + if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) + let pos1 = current_pos() in + let q = query lb in + make_pos pos1 (P_query(q)) + +and inductive (lb:lexbuf): p_inductive = + let pos0 = current_pos() in + let i = uid lb in + let pos1 = current_pos() in + let ps = list params lb in + consume COLON lb; + let t = term lb in + let pos2 = current_pos() in + let t = make_prod (fst pos1) ps t (snd pos2) in + consume ASSIGN lb; + begin + match current_token() with + | UID _ + | NAT _ + | NEG_NAT _ -> + let c = constructor lb in + let cs = list (prefix VBAR constructor) lb in + let l = c::cs in + make_pos pos0 (i,t,l) + | VBAR -> + let l = list (prefix VBAR constructor) lb in + make_pos pos0 (i,t,l) + | SEMICOLON -> + let l = [] in + make_pos pos0 (i,t,l) + | _ -> + expected "identifier" [] + end + +and constructor (lb:lexbuf): p_ident * p_term = + let i = uid_or_int lb in + let pos1 = current_pos() in + let ps = list params lb in + consume COLON lb; + let t = term lb in + i, make_prod (fst pos1) ps t (snd (current_pos())) + +and modifier (lb:lexbuf): p_modifier = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | SIDE d -> + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | ASSOCIATIVE -> + consume_token lb; + make_pos pos1 (P_prop (Term.Assoc((d = Pratter.Left)))) + | _ -> + expected "" [ASSOCIATIVE] + end + | ASSOCIATIVE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_prop (Term.Assoc false)) + | COMMUTATIVE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_prop Term.Commu) + | CONSTANT -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_prop Term.Const) + | INJECTIVE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_prop Term.Injec) + | OPAQUE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_opaq + | SEQUENTIAL -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_mstrat Term.Sequen) + | _ -> + exposition lb + +and exposition (lb:lexbuf): p_modifier = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | PRIVATE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_expo Term.Privat) + | PROTECTED -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_expo Term.Protec) + | _ -> + expected "" [PRIVATE;PROTECTED] + +and notation (lb:lexbuf): string Sign.notation = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | INFIX -> + consume_token lb; + begin + match current_token() with + | SIDE d -> + consume_token lb; + let p = float_or_int lb in + Sign.Infix(d, p) + | _ -> + let p = float_or_int lb in + Sign.Infix(Pratter.Neither, p) + end + | POSTFIX -> + consume_token lb; + let p = float_or_int lb in + Sign.Postfix p + | PREFIX -> + consume_token lb; + let p = float_or_int lb in + Sign.Prefix p + | QUANTIFIER -> + consume_token lb; + Sign.Quant + | _ -> + expected "" [INFIX;POSTFIX;PREFIX;QUANTIFIER] + +and rule (lb:lexbuf): (p_term * p_term) loc = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let pos1 = current_pos() in + let l = term lb in + consume HOOK_ARROW lb; + let r = term lb in + make_pos pos1 (l, r) + +and equation (lb:lexbuf): p_term * p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let l = term lb in + consume EQUIV lb; + let r = term lb in + (l, r) + +and query (lb:lexbuf): p_query = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | ASSERT b -> + let pos1 = current_pos() in + consume_token lb; + let ps = list params lb in + consume TURNSTILE lb; + let t = term lb in + begin + match current_token() with + | COLON -> + consume_token lb; + let a = term lb in + let pos2 = current_pos() in + let t = make_abst (fst pos1) ps t (snd pos2) in + let a = make_prod (fst pos1) ps a (snd pos2) in + make_pos pos1 (P_query_assert(b, P_assert_typing(t,a))) + | EQUIV -> + consume_token lb; + let u = term lb in + let pos2 = current_pos() in + let t = make_abst (fst pos1) ps t (snd pos2) in + let u = make_abst (fst pos1) ps u (snd pos2) in + make_pos pos1 (P_query_assert(b, P_assert_conv(t, u))) + | _ -> + expected "" [COLON;EQUIV] + end + | COMPUTE -> + let pos1 = current_pos() in + consume_token lb; + let t = term lb in + make_pos pos1 (P_query_normalize(t, {strategy=SNF; steps=None})) + | PRINT -> + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | SEMICOLON -> + make_pos pos1 (P_query_print None) + | _ -> + let i = qid_or_rule lb in + make_pos pos1 (P_query_print (Some i)) + end + | PROOFTERM -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_query_proofterm + | DEBUG_FLAGS fl -> + let pos1 = current_pos() in + consume_token lb; + let (b, s) = fl in + make_pos pos1 (P_query_debug(b, s)) + | FLAG -> + let pos1 = current_pos() in + consume_token lb; + let s = consume_STRINGLIT lb in + let b = consume_SWITCH lb in + make_pos pos1 (P_query_flag(s,b)) + | PROVER -> + let pos1 = current_pos() in + consume_token lb; + let s = consume_STRINGLIT lb in + make_pos pos1 (P_query_prover(s)) + | PROVER_TIMEOUT -> + let pos1 = current_pos() in + consume_token lb; + let n = consume_NAT lb in + make_pos pos1 (P_query_prover_timeout n) + | VERBOSE -> + let pos1 = current_pos() in + consume_token lb; + let n = consume_NAT lb in + make_pos pos1 (P_query_verbose n) + | TYPE_QUERY -> + let pos1 = current_pos() in + consume_token lb; + let t = term lb in + make_pos pos1 (P_query_infer(t, {strategy=NONE; steps=None})) + (*| SEARCH s -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_query_search s)*) + | _ -> + expected "query" [] + +and term_proof (lb:lexbuf): p_term option * (p_proof * p_proof_end) option = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | BEGIN -> + let p = proof lb in + None, Some p + | _ -> + let t = term lb in + begin + match current_token() with + | BEGIN -> + let p = proof lb in + Some t, Some p + | _ -> + Some t, None + end + +and proof (lb:lexbuf): p_proof * p_proof_end = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + consume BEGIN lb; + match current_token() with + | L_CU_BRACKET -> + let l = nelist subproof lb in + if current_token() = SEMICOLON then consume_token lb; + let pe = proof_end lb in + l, pe + (*queries*) + | ASSERT _ + | COMPUTE + | PRINT + | PROOFTERM + | DEBUG + | FLAG + | PROVER + | PROVER_TIMEOUT + | VERBOSE + | SEARCH + (*tactics*) + | ADMIT + | APPLY + | ASSUME + | FAIL + | GENERALIZE + | HAVE + | INDUCTION + | REFINE + | REFLEXIVITY + | REMOVE + | REWRITE + | SIMPLIFY + | SOLVE + | SYMMETRY + | TRY + | WHY3 -> + let l = steps lb in + let pe = proof_end lb in + [l], pe + | END + | ABORT + | ADMITTED -> + let pe = proof_end lb in + [], pe + | _ -> + expected "subproof, tactic or query" [] + +and subproof (lb:lexbuf): p_proofstep list = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | L_CU_BRACKET -> + consume_token lb; + let l = steps lb in + consume R_CU_BRACKET lb; + l + | _ -> + expected "" [L_CU_BRACKET] + +and steps (lb:lexbuf): p_proofstep list = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + (*queries*) + | ASSERT _ + | COMPUTE + | PRINT + | PROOFTERM + | DEBUG + | FLAG + | PROVER + | PROVER_TIMEOUT + | VERBOSE + | SEARCH + (*tactics*) + | ADMIT + | APPLY + | ASSUME + | FAIL + | GENERALIZE + | HAVE + | INDUCTION + | REFINE + | REFLEXIVITY + | REMOVE + | REWRITE + | SIMPLIFY + | SOLVE + | SYMMETRY + | TRY + | WHY3 -> + let a = step lb in + let acc = list (prefix SEMICOLON step) lb in + if current_token() = SEMICOLON then consume_token lb; + a::acc + | END + | ABORT + | ADMITTED -> + [] + | _ -> + expected "tactic or query" [] + +and step (lb:lexbuf): p_proofstep = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let t = tactic lb in + let l = list subproof lb in + Tactic(t, l) + +and proof_end (lb:lexbuf): p_proof_end = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | ABORT -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 Syntax.P_proof_abort + | ADMITTED -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 Syntax.P_proof_admitted + | END -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 Syntax.P_proof_end + | _ -> + expected "" [ABORT;ADMITTED;END] + +and tactic (lb:lexbuf): p_tactic = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + (*queries*) + | ASSERT _ + | COMPUTE + | PRINT + | PROOFTERM + | DEBUG + | FLAG + | PROVER + | PROVER_TIMEOUT + | VERBOSE + | SEARCH -> + let pos1 = current_pos() in + make_pos pos1 (P_tac_query (query lb)) + | ADMIT -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_tac_admit + | APPLY -> + let pos1 = current_pos() in + consume_token lb; + let t = term lb in + make_pos pos1 (P_tac_apply t) + | ASSUME -> + let pos1 = current_pos() in + consume_token lb; + let xs = nelist param lb in + make_pos pos1 (P_tac_assume xs) + | FAIL -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_tac_fail + | GENERALIZE -> + let pos1 = current_pos() in + consume_token lb; + let i = uid lb in + make_pos pos1 (P_tac_generalize i) + | HAVE -> + let pos1 = current_pos() in + consume_token lb; + let i = uid lb in + consume COLON lb; + let t = term lb in + make_pos pos1 (P_tac_have(i,t)) + | INDUCTION -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_tac_induction + | REFINE -> + let pos1 = current_pos() in + consume_token lb; + let t = term lb in + make_pos pos1 (P_tac_refine t) + | REFLEXIVITY -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_tac_refl + | REMOVE -> + let pos1 = current_pos() in + consume_token lb; + let xs = nelist uid lb in + make_pos pos1 (P_tac_remove xs) + | REWRITE -> + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | SIDE d -> + consume_token lb; + begin + match current_token() with + | DOT -> + consume_token lb; + let p = rw_patt_spec lb in + let t = term lb in + let b = d <> Pratter.Left in + make_pos pos1 (P_tac_rewrite(b,Some p,t)) + | _ -> + let t = term lb in + let b = d <> Pratter.Left in + make_pos pos1 (P_tac_rewrite(b,None,t)) + end + | DOT -> + consume_token lb; + let p = rw_patt_spec lb in + let t = term lb in + make_pos pos1 (P_tac_rewrite(true,Some p,t)) + | _ -> + let t = term lb in + make_pos pos1 (P_tac_rewrite(true,None,t)) + end + | SIMPLIFY -> + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | UID _ + | QID _ + | NAT _ + | NEG_NAT _ -> + let i = Some (qid_or_int lb) in + make_pos pos1 (P_tac_simpl i) + | _ -> + let i = None in + make_pos pos1 (P_tac_simpl i) + end + | SOLVE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_tac_solve + | SYMMETRY -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_tac_sym + | TRY -> + let pos1 = current_pos() in + consume_token lb; + let t = tactic lb in + make_pos pos1 (P_tac_try t) + | WHY3 -> + let pos1 = current_pos() in + consume_token lb; + begin + match current_token() with + | STRINGLIT s -> + make_pos pos1 (P_tac_why3 (Some s)) + | _ -> + make_pos pos1 (P_tac_why3 None) + end + | _ -> + expected "tactic" [] + +and rw_patt (lb:lexbuf): p_rw_patt = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + (* bterm *) + | BACKQUOTE + | PI + | LAMBDA + | LET + (* aterm *) + | UID _ + | QID _ + | UID_EXPL _ + | QID_EXPL _ + | UNDERSCORE + | TYPE_TERM + | UID_META _ + | UID_PATT _ + | L_PAREN + | L_SQ_BRACKET + | NAT _ + | NEG_NAT _ -> + let pos1 = current_pos() in + let t1 = term lb in + begin + match current_token() with + | IN -> + consume_token lb; + let t2 = term lb in + begin + match current_token() with + | IN -> + consume_token lb; + let t3 = term lb in + let x = ident_of_term pos1 t2 in + make_pos pos1 (Rw_TermInIdInTerm(t1,(x,t3))) + | _ -> + let x = ident_of_term pos1 t1 in + make_pos pos1 (Rw_IdInTerm(x,t2)) + end + | AS -> + consume_token lb; + let t2 = term lb in + begin + match current_token() with + | IN -> + consume_token lb; + let t3 = term lb in + let x = ident_of_term pos1 t2 in + make_pos pos1 (Rw_TermAsIdInTerm(t1,(x,t3))) + | _ -> + expected "" [IN] + end + | _ -> + make_pos pos1 (Rw_Term(t1)) + end + | IN -> + let pos1 = current_pos() in + consume_token lb; + let t1 = term lb in + begin + match current_token() with + | IN -> + consume_token lb; + let t2 = term lb in + let x = ident_of_term pos1 t1 in + make_pos pos1 (Rw_InIdInTerm(x,t2)) + | _ -> + make_pos pos1 (Rw_InTerm(t1)) + end + | _ -> + expected "term or keyword \"in\"" [] + +and rw_patt_spec (lb:lexbuf): p_rw_patt = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | L_SQ_BRACKET -> + consume_token lb; + let p = rw_patt lb in + consume R_SQ_BRACKET lb; (*add info on opening bracket*) + p + | _ -> + expected "" [L_SQ_BRACKET] + +and params (lb:lexbuf): p_params = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | L_PAREN -> + consume_token lb; + let ps = nelist param lb in + begin + match current_token() with + | COLON -> + consume_token lb; + let typ = term lb in + consume R_PAREN lb; + ps, Some typ, false + | R_PAREN -> + consume_token lb; + ps, None, false + | _ -> + expected "" [COLON;R_PAREN] + end + | L_SQ_BRACKET -> + consume_token lb; + let ps = nelist param lb in + begin + match current_token() with + | COLON -> + consume_token lb; + let typ = term lb in + consume R_SQ_BRACKET lb; + ps, Some typ, true + | R_SQ_BRACKET -> + consume_token lb; + ps, None, true + | _ -> + expected "" [COLON;R_SQ_BRACKET] + end + | _ -> + let x = param lb in + [x], None, false + +and term (lb:lexbuf): p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + (* bterm *) + | BACKQUOTE + | PI + | LAMBDA + | LET -> + bterm lb + (* aterm *) + | UID _ + | QID _ + | UID_EXPL _ + | QID_EXPL _ + | UNDERSCORE + | TYPE_TERM + | UID_META _ + | UID_PATT _ + | L_PAREN + | L_SQ_BRACKET + | NAT _ + | NEG_NAT _ -> + let pos1 = current_pos() in + let h = aterm lb in + app pos1 h lb + | _ -> + expected "term" [] + +and app (pos1:position * position) (t: p_term) (lb:lexbuf): p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + (* aterm *) + | UID _ + | QID _ + | UID_EXPL _ + | QID_EXPL _ + | UNDERSCORE + | TYPE_TERM + | UID_META _ + | UID_PATT _ + | L_PAREN + | L_SQ_BRACKET + | NAT _ + | NEG_NAT _ -> + let u = aterm lb in + app pos1 (make_pos pos1 (P_Appl(t,u))) lb + (* bterm *) + | BACKQUOTE + | PI + | LAMBDA + | LET -> + let u = bterm lb in + make_pos pos1 (P_Appl(t,u)) + (* other cases *) + | ARROW -> + consume_token lb; + let u = term lb in + make_pos pos1 (P_Arro(t,u)) + | _ -> + t + +and bterm (lb:lexbuf): p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | BACKQUOTE -> + let pos1 = current_pos() in + consume_token lb; + let q = term_id lb in + let b = binder lb in + let b = make_pos pos1 (P_Abst(fst b, snd b)) in + make_pos pos1 (P_Appl(q, b)) + | PI -> + let pos1 = current_pos() in + consume_token lb; + let b = binder lb in + make_pos pos1 (P_Prod(fst b, snd b)) + | LAMBDA -> + let pos1 = current_pos() in + consume_token lb; + let b = binder lb in + make_pos pos1 (P_Abst(fst b, snd b)) + | LET -> + let pos1 = current_pos() in + consume_token lb; + let x = uid lb in + let a = list params lb in + begin + match current_token() with + | COLON -> + consume_token lb; + let b = Some (term lb) in + consume ASSIGN lb; + let t = term lb in + consume IN lb; + let u = term lb in + make_pos pos1 (P_LLet(x, a, b, t, u)) + | _ -> + let b = None in + consume ASSIGN lb; + let t = term lb in + consume IN lb; + let u = term lb in + make_pos pos1 (P_LLet(x, a, b, t, u)) + end + | _ -> + expected "" [BACKQUOTE;PI;LAMBDA;LET] + +and aterm (lb:lexbuf): p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID _ + | QID _ + | UID_EXPL _ + | QID_EXPL _ -> + term_id lb + | UNDERSCORE -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_Wild + | TYPE_TERM -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 P_Type + | UID_META s -> + let pos1 = current_pos() in + consume_token lb; + let i = make_pos pos1 s in + begin + match current_token() with + | DOT -> + consume_token lb; + make_pos pos1 (P_Meta(i,Array.of_list (env lb))) + | _ -> + make_pos pos1 (P_Meta(i,[||])) + end + | UID_PATT s -> + let pos1 = current_pos() in + consume_token lb; + let i = if s = "_" then None else Some(make_pos pos1 s) in + begin + match current_token() with + | DOT -> + consume_token lb; + make_pos pos1 (P_Patt(i, Some(Array.of_list (env lb)))) + | _ -> + make_pos pos1 (P_Patt(i, None)) + end + | L_PAREN -> + let pos1 = current_pos() in + consume_token lb; + let t = term lb in + consume R_PAREN lb; + make_pos pos1 (P_Wrap(t)) + | L_SQ_BRACKET -> + let pos1 = current_pos() in + consume_token lb; + let t = term lb in + consume R_SQ_BRACKET lb; + make_pos pos1 (P_Expl(t)) + | NAT n -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_NLit n) + | NEG_NAT n -> + let pos1 = current_pos() in + consume_token lb; + make_pos pos1 (P_Iden(make_pos pos1 ([],n), false)) + | _ -> + expected "identifier, \"_\", or term between parentheses or square \ + brackets" [] + +and env (lb:lexbuf): p_term list = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | L_SQ_BRACKET -> + consume_token lb; + begin + match current_token() with + | R_SQ_BRACKET -> + consume_token lb; + [] + | _ -> + let t = term lb in + let ts = list (prefix SEMICOLON term) lb in + consume R_SQ_BRACKET lb; + t::ts + end + | _ -> + expected "" [L_SQ_BRACKET] + +and binder (lb:lexbuf): p_params list * p_term = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID _ + | NAT _ + | UNDERSCORE -> + let s = param lb in + begin + match current_token() with + | UID _ + | NAT _ + | UNDERSCORE + | L_PAREN + | L_SQ_BRACKET -> + let ps = list params lb in + consume COMMA lb; + let p = [s], None, false in + p::ps, term lb + | COMMA -> + consume_token lb; + let p = [s], None, false in + [p], term lb + | COLON -> + consume_token lb; + let a = term lb in + consume COMMA lb; + let p = [s], Some a, false in + [p], term lb + | _ -> + expected "parameter list" + [UID"";NAT"";UNDERSCORE;L_PAREN;L_SQ_BRACKET;COMMA] + end + | L_PAREN -> + let ps = nelist params lb in + begin + match current_token() with + | COMMA -> + consume_token lb; + ps, term lb + | _ -> + expected "" [COMMA] + end + | L_SQ_BRACKET -> + let ps = nelist params lb in + begin + match current_token() with + | COMMA -> + consume_token lb; + ps, term lb + | _ -> + expected "" [COMMA] + end + | _ -> + expected "" [UID"";NAT"";UNDERSCORE;L_PAREN;L_SQ_BRACKET] + +let command (lb:lexbuf): p_command = + (*if log_enabled() then log "------------------- start reading command";*) + consume_token lb; + if current_token() = EOF then raise End_of_file + else + let c = command (dummy_pos,dummy_pos) [] lb in + match current_token() with + | SEMICOLON -> c + | _ -> expected "" [SEMICOLON] diff --git a/src/parsing/lpLexer.ml b/src/parsing/lpLexer.ml index 6c23af3c9..fc7decbaf 100644 --- a/src/parsing/lpLexer.ml +++ b/src/parsing/lpLexer.ml @@ -344,3 +344,98 @@ let token : lexbuf -> unit -> token * Lexing.position * Lexing.position = let token = let r = ref (EOF, Lexing.dummy_pos, Lexing.dummy_pos) in fun lb () -> Debug.(record_time Lexing (fun () -> r := token lb ())); !r + +let string_of_token = function + | EOF -> "end of file" + | ABORT -> "abort" + | ADMIT -> "admit" + | ADMITTED -> "admitted" + | APPLY -> "apply" + | AS -> "as" + | ASSERT _ -> "assert or assertnot" + | ASSOCIATIVE -> "associative" + | ASSUME -> "assume" + | BEGIN -> "begin" + | BUILTIN -> "builtin" + | COERCE_RULE -> "coerce_rule" + | COMMUTATIVE -> "commutative" + | COMPUTE -> "compute" + | CONSTANT -> "constant" + | DEBUG -> "debug" + | END -> "end" + | FAIL -> "fail" + | FLAG -> "flag" + | GENERALIZE -> "generalize" + | HAVE -> "have" + | IN -> "in" + | INDUCTION -> "induction" + | INDUCTIVE -> "inductive" + | INFIX -> "infix" + | INJECTIVE -> "injective" + | LET -> "let" + | NOTATION -> "notation" + | OPAQUE -> "opaque" + | OPEN -> "open" + | POSTFIX -> "postfix" + | PREFIX -> "prefix" + | PRINT -> "print" + | PRIVATE -> "private" + | PROOFTERM -> "proofterm" + | PROTECTED -> "protected" + | PROVER -> "prover" + | PROVER_TIMEOUT -> "prover_timeout" + | QUANTIFIER -> "quantifier" + | REFINE -> "refine" + | REFLEXIVITY -> "reflexivity" + | REMOVE -> "remove" + | REQUIRE -> "require" + | REWRITE -> "rewrite" + | RULE -> "rule" + | SEARCH -> "search" + | SEQUENTIAL -> "sequential" + | SIMPLIFY -> "simplify" + | SOLVE -> "solve" + | SYMBOL -> "symbol" + | SYMMETRY -> "symmetry" + | TRY -> "try" + | TYPE_QUERY -> "type" + | TYPE_TERM -> "TYPE" + | UNIF_RULE -> "unif_rule" + | VERBOSE -> "verbose" + | WHY3 -> "why3" + | WITH -> "with" + | DEBUG_FLAGS _ -> "debug flags" + | NAT _ -> "natural number" + | NEG_NAT _ -> "negative integer" + | FLOAT _ -> "float" + | SIDE _ -> "left or right" + | STRINGLIT _ -> "string literal" + | SWITCH _ -> "on or off" + | ASSIGN -> "≔" + | ARROW -> "→" + | BACKQUOTE -> "`" + | COMMA -> "," + | COLON -> ":" + | DOT -> "." + | EQUIV -> "≡" + | HOOK_ARROW -> "↪" + | LAMBDA -> "λ" + | L_CU_BRACKET -> "{" + | L_PAREN -> "(" + | L_SQ_BRACKET -> "[" + | PI -> "Π" + | R_CU_BRACKET -> "}" + | R_PAREN -> ")" + | R_SQ_BRACKET -> "]" + | SEMICOLON -> ";" + | TURNSTILE -> "⊢" + | VBAR -> "|" + | UNDERSCORE -> "_" + | UID _ -> "non-qualified identifier" + | UID_EXPL _ -> "@-prefixed non-qualified identifier" + | UID_META _ -> "?-prefixed metavariable number" + | UID_PATT _ -> "$-prefixed non-qualified identifier" + | QID _ -> "qualified identifier" + | QID_EXPL _ -> "@-prefixed qualified identifier" + +let pp_token ppf t = Base.string ppf (string_of_token t) diff --git a/src/parsing/parser.ml b/src/parsing/parser.ml index 5495f6741..2ea099a95 100644 --- a/src/parsing/parser.ml +++ b/src/parsing/parser.ml @@ -104,7 +104,22 @@ sig let parse = parse ~grammar_entry:LpParser.command let parse_string = parse_string ~grammar_entry:LpParser.command - let parse_file = parse_file ~grammar_entry:LpParser.command + let parse_file fname = (*parse_file ~grammar_entry:LpParser.command fname*) + let inchan = open_in fname in + let lb = Sedlexing.Utf8.from_channel inchan in + Sedlexing.set_filename lb fname; + let generator _ = + try Some(Ll1.command lb) + with + | End_of_file -> close_in inchan; None + | LpLexer.SyntaxError {pos=None; _} -> assert false + | LpLexer.SyntaxError {pos=Some pos; elt} -> parser_fatal pos "%s" elt + | LpParser.Error -> + let pos = Pos.locate (Sedlexing.lexing_positions lb) in + parser_fatal pos "Unexpected token: \"%s\"." + (Sedlexing.Utf8.lexeme lb) + in + Stream.from generator end From f414bc860ecac7460e38cd33731e5157b26fe4d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Wed, 31 Jul 2024 22:50:07 +0200 Subject: [PATCH 2/7] wip --- src/parsing/ll1.ml | 109 ++++++++++++++++++++++++++++++++++++++++- src/parsing/lpLexer.ml | 95 ----------------------------------- 2 files changed, 107 insertions(+), 97 deletions(-) diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml index 57c8b41d9..d36f16015 100644 --- a/src/parsing/ll1.ml +++ b/src/parsing/ll1.ml @@ -6,8 +6,10 @@ open LpLexer open Lexing open Sedlexing -let log = Logger.make 'n' "pars" "parsing" -let log = log.pp +(*let log = Logger.make 'n' "pars" "parsing" +let log = log.pp*) + +(* token management *) let the_current_token : (token * position * position) Stdlib.ref = Stdlib.ref (EOF, dummy_pos, dummy_pos) @@ -24,6 +26,8 @@ let consume_token (lb:lexbuf) : unit = the_current_token := LpLexer.token lb ()(*; if log_enabled() then log "read new token"*) +(* building positions and terms *) + let make_pos (lps:position * position): 'a -> 'a loc = Pos.make_pos (fst lps, snd (current_pos())) @@ -45,6 +49,103 @@ let ident_of_term pos1 {elt; _} = | P_Iden({elt=([], x); pos}, _) -> Pos.make pos x | _ -> LpLexer.syntax_error pos1 "not an identifier" +(* error messages *) + +let string_of_token = function + | EOF -> "end of file" + | ABORT -> "abort" + | ADMIT -> "admit" + | ADMITTED -> "admitted" + | APPLY -> "apply" + | AS -> "as" + | ASSERT _ -> "assert or assertnot" + | ASSOCIATIVE -> "associative" + | ASSUME -> "assume" + | BEGIN -> "begin" + | BUILTIN -> "builtin" + | COERCE_RULE -> "coerce_rule" + | COMMUTATIVE -> "commutative" + | COMPUTE -> "compute" + | CONSTANT -> "constant" + | DEBUG -> "debug" + | END -> "end" + | FAIL -> "fail" + | FLAG -> "flag" + | GENERALIZE -> "generalize" + | HAVE -> "have" + | IN -> "in" + | INDUCTION -> "induction" + | INDUCTIVE -> "inductive" + | INFIX -> "infix" + | INJECTIVE -> "injective" + | LET -> "let" + | NOTATION -> "notation" + | OPAQUE -> "opaque" + | OPEN -> "open" + | POSTFIX -> "postfix" + | PREFIX -> "prefix" + | PRINT -> "print" + | PRIVATE -> "private" + | PROOFTERM -> "proofterm" + | PROTECTED -> "protected" + | PROVER -> "prover" + | PROVER_TIMEOUT -> "prover_timeout" + | QUANTIFIER -> "quantifier" + | REFINE -> "refine" + | REFLEXIVITY -> "reflexivity" + | REMOVE -> "remove" + | REQUIRE -> "require" + | REWRITE -> "rewrite" + | RULE -> "rule" + | SEARCH -> "search" + | SEQUENTIAL -> "sequential" + | SIMPLIFY -> "simplify" + | SOLVE -> "solve" + | SYMBOL -> "symbol" + | SYMMETRY -> "symmetry" + | TRY -> "try" + | TYPE_QUERY -> "type" + | TYPE_TERM -> "TYPE" + | UNIF_RULE -> "unif_rule" + | VERBOSE -> "verbose" + | WHY3 -> "why3" + | WITH -> "with" + | DEBUG_FLAGS _ -> "debug flags" + | NAT _ -> "natural number" + | NEG_NAT _ -> "negative integer" + | FLOAT _ -> "float" + | SIDE _ -> "left or right" + | STRINGLIT _ -> "string literal" + | SWITCH _ -> "on or off" + | ASSIGN -> "≔" + | ARROW -> "→" + | BACKQUOTE -> "`" + | COMMA -> "," + | COLON -> ":" + | DOT -> "." + | EQUIV -> "≡" + | HOOK_ARROW -> "↪" + | LAMBDA -> "λ" + | L_CU_BRACKET -> "{" + | L_PAREN -> "(" + | L_SQ_BRACKET -> "[" + | PI -> "Π" + | R_CU_BRACKET -> "}" + | R_PAREN -> ")" + | R_SQ_BRACKET -> "]" + | SEMICOLON -> ";" + | TURNSTILE -> "⊢" + | VBAR -> "|" + | UNDERSCORE -> "_" + | UID _ -> "non-qualified identifier" + | UID_EXPL _ -> "@-prefixed non-qualified identifier" + | UID_META _ -> "?-prefixed metavariable number" + | UID_PATT _ -> "$-prefixed non-qualified identifier" + | QID _ -> "qualified identifier" + | QID_EXPL _ -> "@-prefixed qualified identifier" + +let pp_token ppf t = Base.string ppf (string_of_token t) + let expected (msg:string) (tokens:token list): 'a = if msg <> "" then syntax_error (current_pos()) ("expected: "^msg) else @@ -55,6 +156,8 @@ let expected (msg:string) (tokens:token list): 'a = syntax_error (current_pos()) (List.fold_left (fun s t -> s^", "^soft t) ("expected: "^soft t) ts) +(* generic parsing functions *) + let list (elt:lexbuf -> 'a) (lb:lexbuf): 'a list = (*if log_enabled() then log "expected: %s" __FUNCTION__;*) let acc = ref [] in @@ -73,6 +176,8 @@ let consume (token:token) (lb:lexbuf): unit = let prefix (token:token) (elt:lexbuf -> 'a) (lb:lexbuf): 'a = consume token lb; elt lb +(* parsing functions *) + let consume_STRINGLIT (lb:lexbuf): string = match current_token() with | STRINGLIT s -> diff --git a/src/parsing/lpLexer.ml b/src/parsing/lpLexer.ml index fc7decbaf..6c23af3c9 100644 --- a/src/parsing/lpLexer.ml +++ b/src/parsing/lpLexer.ml @@ -344,98 +344,3 @@ let token : lexbuf -> unit -> token * Lexing.position * Lexing.position = let token = let r = ref (EOF, Lexing.dummy_pos, Lexing.dummy_pos) in fun lb () -> Debug.(record_time Lexing (fun () -> r := token lb ())); !r - -let string_of_token = function - | EOF -> "end of file" - | ABORT -> "abort" - | ADMIT -> "admit" - | ADMITTED -> "admitted" - | APPLY -> "apply" - | AS -> "as" - | ASSERT _ -> "assert or assertnot" - | ASSOCIATIVE -> "associative" - | ASSUME -> "assume" - | BEGIN -> "begin" - | BUILTIN -> "builtin" - | COERCE_RULE -> "coerce_rule" - | COMMUTATIVE -> "commutative" - | COMPUTE -> "compute" - | CONSTANT -> "constant" - | DEBUG -> "debug" - | END -> "end" - | FAIL -> "fail" - | FLAG -> "flag" - | GENERALIZE -> "generalize" - | HAVE -> "have" - | IN -> "in" - | INDUCTION -> "induction" - | INDUCTIVE -> "inductive" - | INFIX -> "infix" - | INJECTIVE -> "injective" - | LET -> "let" - | NOTATION -> "notation" - | OPAQUE -> "opaque" - | OPEN -> "open" - | POSTFIX -> "postfix" - | PREFIX -> "prefix" - | PRINT -> "print" - | PRIVATE -> "private" - | PROOFTERM -> "proofterm" - | PROTECTED -> "protected" - | PROVER -> "prover" - | PROVER_TIMEOUT -> "prover_timeout" - | QUANTIFIER -> "quantifier" - | REFINE -> "refine" - | REFLEXIVITY -> "reflexivity" - | REMOVE -> "remove" - | REQUIRE -> "require" - | REWRITE -> "rewrite" - | RULE -> "rule" - | SEARCH -> "search" - | SEQUENTIAL -> "sequential" - | SIMPLIFY -> "simplify" - | SOLVE -> "solve" - | SYMBOL -> "symbol" - | SYMMETRY -> "symmetry" - | TRY -> "try" - | TYPE_QUERY -> "type" - | TYPE_TERM -> "TYPE" - | UNIF_RULE -> "unif_rule" - | VERBOSE -> "verbose" - | WHY3 -> "why3" - | WITH -> "with" - | DEBUG_FLAGS _ -> "debug flags" - | NAT _ -> "natural number" - | NEG_NAT _ -> "negative integer" - | FLOAT _ -> "float" - | SIDE _ -> "left or right" - | STRINGLIT _ -> "string literal" - | SWITCH _ -> "on or off" - | ASSIGN -> "≔" - | ARROW -> "→" - | BACKQUOTE -> "`" - | COMMA -> "," - | COLON -> ":" - | DOT -> "." - | EQUIV -> "≡" - | HOOK_ARROW -> "↪" - | LAMBDA -> "λ" - | L_CU_BRACKET -> "{" - | L_PAREN -> "(" - | L_SQ_BRACKET -> "[" - | PI -> "Π" - | R_CU_BRACKET -> "}" - | R_PAREN -> ")" - | R_SQ_BRACKET -> "]" - | SEMICOLON -> ";" - | TURNSTILE -> "⊢" - | VBAR -> "|" - | UNDERSCORE -> "_" - | UID _ -> "non-qualified identifier" - | UID_EXPL _ -> "@-prefixed non-qualified identifier" - | UID_META _ -> "?-prefixed metavariable number" - | UID_PATT _ -> "$-prefixed non-qualified identifier" - | QID _ -> "qualified identifier" - | QID_EXPL _ -> "@-prefixed qualified identifier" - -let pp_token ppf t = Base.string ppf (string_of_token t) From de901bd2a8dca3eb93832c6cd7e303bc54731ed8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Thu, 1 Aug 2024 19:43:04 +0200 Subject: [PATCH 3/7] rewrite parser.ml --- src/parsing/ll1.ml | 3 + src/parsing/lpLexer.ml | 16 +-- src/parsing/parser.ml | 247 +++++++++++++++++++++++------------------ 3 files changed, 151 insertions(+), 115 deletions(-) diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml index d36f16015..337638677 100644 --- a/src/parsing/ll1.ml +++ b/src/parsing/ll1.ml @@ -176,6 +176,9 @@ let consume (token:token) (lb:lexbuf): unit = let prefix (token:token) (elt:lexbuf -> 'a) (lb:lexbuf): 'a = consume token lb; elt lb +let alone (entry:lexbuf -> 'a) (lb:lexbuf): 'a = + let x = entry lb in if current_token() != EOF then expected "" [EOF] else x + (* parsing functions *) let consume_STRINGLIT (lb:lexbuf): string = diff --git a/src/parsing/lpLexer.ml b/src/parsing/lpLexer.ml index 6c23af3c9..e492af8ad 100644 --- a/src/parsing/lpLexer.ml +++ b/src/parsing/lpLexer.ml @@ -4,13 +4,13 @@ open Lplib open Sedlexing open Common open Pos -let remove_first : Sedlexing.lexbuf -> string = fun lb -> +let remove_first : lexbuf -> string = fun lb -> Utf8.sub_lexeme lb 1 (lexeme_length lb - 1) -let remove_last : Sedlexing.lexbuf -> string = fun lb -> +let remove_last : lexbuf -> string = fun lb -> Utf8.sub_lexeme lb 0 (lexeme_length lb - 1) -let remove_ends : Sedlexing.lexbuf -> string = fun lb -> +let remove_ends : lexbuf -> string = fun lb -> Utf8.sub_lexeme lb 1 (lexeme_length lb - 2) exception SyntaxError of strloc @@ -18,10 +18,10 @@ exception SyntaxError of strloc let syntax_error : Lexing.position * Lexing.position -> string -> 'a = fun pos msg -> raise (SyntaxError (Pos.make_pos pos msg)) -let fail : Sedlexing.lexbuf -> string -> 'a = fun lb msg -> - syntax_error (Sedlexing.lexing_positions lb) msg +let fail : lexbuf -> string -> 'a = fun lb msg -> + syntax_error (lexing_positions lb) msg -let invalid_character : Sedlexing.lexbuf -> 'a = fun lb -> +let invalid_character : lexbuf -> 'a = fun lb -> fail lb "Invalid character" (** Tokens. *) @@ -337,8 +337,8 @@ and comment next i lb = a parser. *) let token : lexbuf -> unit -> token * Lexing.position * Lexing.position = fun lb () -> try with_tokenizer token lb () with - | Sedlexing.MalFormed -> fail lb "Not Utf8 encoded file" - | Sedlexing.InvalidCodepoint k -> + | MalFormed -> fail lb "Not Utf8 encoded file" + | InvalidCodepoint k -> fail lb ("Invalid Utf8 code point " ^ string_of_int k) let token = diff --git a/src/parsing/parser.ml b/src/parsing/parser.ml index 2ea099a95..07b977eac 100644 --- a/src/parsing/parser.ml +++ b/src/parsing/parser.ml @@ -7,6 +7,10 @@ open Lplib open Base open Common +open Syntax +open Lexing + +type lexpos = Lexing.position (** [parser_fatal pos fmt] is a wrapper for [Error.fatal] that enforces that the error has an attached source code position. *) @@ -15,158 +19,187 @@ let parser_fatal : Pos.pos -> ('a,'b) koutfmt -> 'a = fun pos fmt -> (** Module type of a parser. *) module type PARSER = sig - val parse : in_channel -> Syntax.ast - (** [parse inchan] returns a stream of commands parsed from - channel [inchan]. Commands are parsed lazily and the channel is + val parse_in_channel : in_channel -> ast + (** [parse ic] returns a stream of commands parsed from + channel [ic]. Commands are parsed lazily and the channel is closed once all entries are parsed. *) - val parse_file : string -> Syntax.ast + val parse_file : string -> ast (** [parse_file fname] returns a stream of parsed commands of file [fname]. Commands are parsed lazily. *) - val parse_string : string -> string -> Syntax.ast + val parse_string : string -> string -> ast (** [parse_string f s] returns a stream of parsed commands from string [s] which comes from file [f] ([f] can be anything). *) end -module Lp : -sig - include PARSER +(** Parsing dk syntax. *) +module Dk : PARSER = struct - val parse_term : in_channel -> Syntax.p_term Stream.t - (** [parse inchan] returns a stream of terms parsed from - channel [inchan]. Terms are parsed lazily and the channel is - closed once all entries are parsed. *) + open Lexing - val parse_term_file : string -> Syntax.p_term Stream.t - (** [parse_file fname] returns a stream of parsed terms of file - [fname]. Terms are parsed lazily. *) + (* defined in OCaml >= 4.11 only *) + let set_filename (lb:lexbuf) (fname:string): unit = + lb.lex_curr_p <- {lb.lex_curr_p with pos_fname = fname} - val parse_term_string : string -> string -> Syntax.p_term Stream.t - (** [parse_string f s] returns a stream of parsed terms from string [s] - which comes from file [f] ([f] can be anything). *) + (* old code: + let parse_lexbuf : + ?ic:in_channel -> ?fname:string -> lexbuf -> p_command Stream.t = + fun ?ic ?fname lb -> + Option.iter (set_filename lb) fname; + let generator _ = + try Some (command lb) + with + | End_of_file -> Option.iter close_in ic; None + | DkParser.Error -> + let pos = Pos.locate (lb.lex_start_p, lb.lex_curr_p) in + parser_fatal pos "Unexpected token \"%s\"." (lexeme lb) + in + Stream.from generator + + let parse_string fname s = parse_lexbuf ~fname (from_string s) + + let parse_in_channel ic = + try parse_lexbuf ~ic (from_channel ic) + with e -> close_in ic; raise e + + let parse_file fname = + let ic = open_in fname in + parse_lexbuf ~ic ~fname (from_channel ic)*) + + let parse_lexbuf (icopt:in_channel option) (entry:lexbuf -> 'a) (lb:lexbuf) + : 'a Stream.t = + let generator _ = + try Some(entry lb) + with + | End_of_file -> Option.iter close_in icopt; None + | DkParser.Error -> + let pos = Pos.locate (lb.lex_start_p, lb.lex_curr_p) in + parser_fatal pos "Unexpected token \"%s\"." (lexeme lb) + in + Stream.from generator + + let parse_in_channel (entry:lexbuf -> 'a) (ic:in_channel): 'a Stream.t = + parse_lexbuf (Some ic) entry (from_channel ic) + + let parse_file entry fname = parse_in_channel entry (open_in fname) + + let parse_string (entry: lexbuf -> 'a) (fname:string) (s:string) + : 'a Stream.t = + let lb = from_string s in + set_filename lb fname; + parse_lexbuf None entry lb + + let command = + let r = ref (Pos.none (P_open [])) in + fun (lb:lexbuf): p_command -> + Debug.(record_time Parsing + (fun () -> r := DkParser.line DkLexer.token lb)); !r + + (* exported functions *) + let parse_string = parse_string command + let parse_in_channel = parse_in_channel command + let parse_file = parse_file command + +end + +(** Parsing lp syntax. *) +module Lp : +sig + include PARSER val parse_search_query_string : string -> string -> SearchQuerySyntax.query Stream.t (** [parse_search_query_string f s] returns a stream of parsed terms from string [s] which comes from file [f] ([f] can be anything). *) - val parse_qid : string -> Core.Term.qident end = struct - let stream_of_lexbuf : - grammar_entry:(LpLexer.token,'b) MenhirLib.Convert.traditional -> - ?inchan:in_channel -> ?fname:string -> Sedlexing.lexbuf -> + open LpLexer + open Sedlexing + + (* old Menhir parser *) + + type tokenizer = unit -> token * lexpos * lexpos + type 'a parser = tokenizer -> 'a + + let parse_lexbuf : + grammar_entry:(token,'b) MenhirLib.Convert.traditional -> + ?ic:in_channel -> ?fname:string -> lexbuf -> (* Input channel passed as parameter to be closed at the end of stream. *) 'a Stream.t = - fun ~grammar_entry ?inchan ?fname lb -> - Option.iter (Sedlexing.set_filename lb) fname; - let parse = - MenhirLib.Convert.Simplified.traditional2revised - grammar_entry - in + fun ~grammar_entry ?ic ?fname lb -> + Option.iter (set_filename lb) fname; + let parse: 'a parser = + MenhirLib.Convert.Simplified.traditional2revised grammar_entry in let token = LpLexer.token lb in let generator _ = - try Some(parse token) + try Some (parse token) with - | End_of_file -> Option.iter close_in inchan; None - | LpLexer.SyntaxError {pos=None; _} -> assert false - | LpLexer.SyntaxError {pos=Some pos; elt} -> parser_fatal pos "%s" elt + | End_of_file -> Option.iter close_in ic; None + | SyntaxError {pos=None; _} -> assert false + | SyntaxError {pos=Some pos; elt} -> parser_fatal pos "%s" elt | LpParser.Error -> - let pos = Pos.locate (Sedlexing.lexing_positions lb) in - parser_fatal pos "Unexpected token: \"%s\"." - (Sedlexing.Utf8.lexeme lb) + let pos = Pos.locate (lexing_positions lb) in + parser_fatal pos "Unexpected token: \"%s\"." (Utf8.lexeme lb) in Stream.from generator - let parse ~grammar_entry inchan = - stream_of_lexbuf ~grammar_entry ~inchan - (Sedlexing.Utf8.from_channel inchan) - - let parse_file ~grammar_entry fname = - let inchan = open_in fname in - stream_of_lexbuf ~grammar_entry ~inchan ~fname - (Sedlexing.Utf8.from_channel inchan) - let parse_string ~grammar_entry fname s = - stream_of_lexbuf ~grammar_entry ~fname (Sedlexing.Utf8.from_string s) + parse_lexbuf ~grammar_entry ~fname (Utf8.from_string s) - let parse_term = parse ~grammar_entry:LpParser.term_alone - let parse_term_string = parse_string ~grammar_entry:LpParser.term_alone let parse_search_query_string = parse_string ~grammar_entry:LpParser.search_query_alone - let parse_term_file = parse_file ~grammar_entry:LpParser.term_alone - - let parse_qid s = - let stream = parse_string ~grammar_entry:LpParser.qid_alone "LPSearch" s in - (Stream.next stream).elt - - let parse = parse ~grammar_entry:LpParser.command - let parse_string = parse_string ~grammar_entry:LpParser.command - let parse_file fname = (*parse_file ~grammar_entry:LpParser.command fname*) - let inchan = open_in fname in - let lb = Sedlexing.Utf8.from_channel inchan in - Sedlexing.set_filename lb fname; + + (*let parse_in_channel ~grammar_entry ic = + parse_lexbuf ~grammar_entry ~ic (Utf8.from_channel ic) + + let parse_file ~grammar_entry fname = + let ic = open_in fname in + parse_lexbuf ~grammar_entry ~ic ~fname (Utf8.from_channel ic) + + let parse_in_channel = parse_in_channel ~grammar_entry:LpParser.command + let parse_file fname = parse_file ~grammar_entry:LpParser.command fname + let parse_string = parse_string ~grammar_entry:LpParser.command*) + + (* new parser *) + + let parse_lexbuf icopt entry lb = let generator _ = - try Some(Ll1.command lb) + try Some(entry lb) with - | End_of_file -> close_in inchan; None - | LpLexer.SyntaxError {pos=None; _} -> assert false - | LpLexer.SyntaxError {pos=Some pos; elt} -> parser_fatal pos "%s" elt - | LpParser.Error -> - let pos = Pos.locate (Sedlexing.lexing_positions lb) in - parser_fatal pos "Unexpected token: \"%s\"." - (Sedlexing.Utf8.lexeme lb) + | End_of_file -> Option.iter close_in icopt; None + | SyntaxError{pos=None; _} -> assert false + | SyntaxError{pos=Some pos; elt} -> parser_fatal pos "%s" elt in Stream.from generator -end - -(** Parsing dk syntax. *) -module Dk : PARSER = struct - - let token : Lexing.lexbuf -> DkTokens.token = - let r = ref DkTokens.EOF in fun lb -> - Debug.(record_time Lexing (fun () -> r := DkLexer.token lb)); !r + let parse_string entry fname s = + let lb = Utf8.from_string s in + set_filename lb fname; + parse_lexbuf None entry lb - let command : - (Lexing.lexbuf -> DkTokens.token) -> Lexing.lexbuf -> Syntax.p_command = - let r = ref (Pos.none (Syntax.P_open [])) in fun token lb -> - Debug.(record_time Parsing (fun () -> r := DkParser.line token lb)); !r + let parse_in_channel entry ic = + parse_lexbuf (Some ic) entry (Utf8.from_channel ic) - let stream_of_lexbuf : - ?inchan:in_channel -> ?fname:string -> Lexing.lexbuf -> - (* Input channel passed as parameter to be closed at the end of stream. *) - Syntax.p_command Stream.t = - fun ?inchan ?fname lb -> - let fn n = - lb.lex_curr_p <- {lb.lex_curr_p with pos_fname = n} - in - Option.iter fn fname; - (*In OCaml >= 4.11: Lexing.set_filename lb fname;*) - let generator _ = - try Some (command token lb) - with - | End_of_file -> Option.iter close_in inchan; None - | DkParser.Error -> - let pos = Pos.locate (Lexing.(lb.lex_start_p, lb.lex_curr_p)) in - parser_fatal pos "Unexpected token \"%s\"." (Lexing.lexeme lb) - in - Stream.from generator + (*let parse_file entry fname = + let ic = open_in fname in + let lb = Utf8.from_channel ic in + set_filename lb fname; (* useful? *) + let x = parse_lexbuf entry lb in + close_in ic; + x*) - let parse inchan = - try stream_of_lexbuf ~inchan (Lexing.from_channel inchan) - with e -> close_in inchan; raise e + let parse_file entry fname = parse_in_channel entry (open_in fname) - let parse_file fname = - let inchan = open_in fname in - stream_of_lexbuf ~inchan ~fname (Lexing.from_channel inchan) + (* exported functions *) + let parse_string = parse_string Ll1.command + let parse_in_channel = parse_in_channel Ll1.command + let parse_file = parse_file Ll1.command - let parse_string fname s = stream_of_lexbuf ~fname (Lexing.from_string s) end -(* Include parser of new syntax so that functions are directly available.*) include Lp (** [path_of_string s] converts the string [s] into a path. *) @@ -196,7 +229,7 @@ let qident_of_string : string -> Core.Term.qident = fun s -> (** [parse_file fname] selects and runs the correct parser on file [fname], by looking at its extension. *) -let parse_file : string -> Syntax.ast = fun fname -> +let parse_file : string -> ast = fun fname -> match Filename.check_suffix fname Library.lp_src_extension with | true -> Lp.parse_file fname | false -> Dk.parse_file fname From c13d4ec87886e7b60f9ab18a16dcea54a16c2ba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Thu, 1 Aug 2024 20:50:56 +0200 Subject: [PATCH 4/7] extend new parser to search command --- src/parsing/ll1.ml | 116 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml index 337638677..0d00636c7 100644 --- a/src/parsing/ll1.ml +++ b/src/parsing/ll1.ml @@ -5,6 +5,7 @@ open Core open LpLexer open Lexing open Sedlexing +open SearchQuerySyntax (*let log = Logger.make 'n' "pars" "parsing" let log = log.pp*) @@ -357,6 +358,8 @@ let term_id (lb:lexbuf): p_term = | _ -> expected "" [UID"";QID[];UID_EXPL"";QID_EXPL[]] +(* commands *) + let rec command pos1 p_sym_mod (lb:lexbuf): p_command = (*if log_enabled() then log "expected: %s" __FUNCTION__;*) match current_token() with @@ -684,6 +687,8 @@ and equation (lb:lexbuf): p_term * p_term = let r = term lb in (l, r) +(* queries *) + and query (lb:lexbuf): p_query = (*if log_enabled() then log "expected: %s" __FUNCTION__;*) match current_token() with @@ -787,6 +792,8 @@ and term_proof (lb:lexbuf): p_term option * (p_proof * p_proof_end) option = Some t, None end +(* proofs *) + and proof (lb:lexbuf): p_proof * p_proof_end = (*if log_enabled() then log "expected: %s" __FUNCTION__;*) consume BEGIN lb; @@ -1129,6 +1136,8 @@ and rw_patt_spec (lb:lexbuf): p_rw_patt = | _ -> expected "" [L_SQ_BRACKET] +(* terms *) + and params (lb:lexbuf): p_params = (*if log_enabled() then log "expected: %s" __FUNCTION__;*) match current_token() with @@ -1413,6 +1422,113 @@ and binder (lb:lexbuf): p_params list * p_term = | _ -> expected "" [UID"";NAT"";UNDERSCORE;L_PAREN;L_SQ_BRACKET] +(* search *) + +and where (lb:lexbuf): bool * inside option = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | UID u -> + let r = + match u with + | "=" -> Some Exact + | ">" -> Some Inside + | "≥" + | ">=" -> None + | _ -> expected "\">\", \"=\", \"≥\",\">=\"" [] + in + consume_token lb; + let g = + match current_token() with + | GENERALIZE -> consume_token lb; true + | _ -> false + in + g,r + | _ -> + expected "\">\", \"=\", \"≥\",\">=\"" [] + +and asearch_query(lb:lexbuf): query = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + match current_token() with + | TYPE_QUERY -> + consume_token lb; + let g, w = where lb in + let t = aterm lb in + if w <> None then expected "\"≥\", \">=\"" [] + else QBase(QSearch(t,g,Some(QType None))) + | RULE -> + consume_token lb; + let g, w = where lb in + let t = aterm lb in + QBase(QSearch(t,g,Some (QXhs(w,None)))) + | UID k -> + consume_token lb; + let g, w = where lb in + let t = aterm lb in + begin + match k, t.elt with + | "name", P_Iden(id,false) -> + assert (fst id.elt = []); + if w <> Some Exact then expected "\"=\"" [] + else if g then + expected "\"generalize\" cannot be used with \"name\"" [] + else QBase(QName(snd id.elt)) + | "name", _ -> + expected "path prefix" [] + | "anywhere", _ -> + if w <> None then expected "\"≥\", \">=\"" [] + else QBase(QSearch(t,g,None)) + | "spine",_ -> + QBase(QSearch(t,g,Some(QType(Some(Spine w))))) + | "concl",_ -> + QBase(QSearch(t,g,Some(QType(Some(Conclusion w))))) + | "hyp",_ -> + QBase(QSearch(t,g,Some(QType(Some(Hypothesis w))))) + | "lhs",_ -> + QBase(QSearch(t,g,Some(QXhs(w,Some Lhs)))) + | "rhs",_ -> + QBase(QSearch(t,g,Some(QXhs(w,Some Rhs)))) + | _ -> + expected "Unknown keyword" [] + end + | L_PAREN -> + consume_token lb; + let q = search_query lb in + consume R_PAREN lb; + q + | _ -> + expected "" [TYPE_QUERY;RULE;UID"";L_PAREN] + +and csearch_query (lb:lexbuf): query = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let aq = asearch_query lb in + match current_token() with + | COMMA -> + let aqs = list (prefix COMMA asearch_query) lb in + List.fold_left (fun x aq -> QOpp(x,Intersect,aq)) aq aqs + | _ -> + aq + +and ssearch_query (lb:lexbuf): query = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let cq = csearch_query lb in + match current_token() with + | SEMICOLON -> + let cqs = list (prefix SEMICOLON csearch_query) lb in + List.fold_left (fun x cq -> QOpp(x,Union,cq)) cq cqs + | _ -> + cq + +and search_query (lb:lexbuf): query = + (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + let q = ssearch_query lb in + let qids = list (prefix VBAR qid) lb in + let path_of_qid qid = + let p,n = qid.elt in + if p = [] then n + else Format.asprintf "%a.%a" Print.path p Print.uid n + in + List.fold_left (fun x qid -> QFilter(x,Path(path_of_qid qid))) q qids + let command (lb:lexbuf): p_command = (*if log_enabled() then log "------------------- start reading command";*) consume_token lb; From 4c8c8ee18b1584614fdece17b874e243c5f2308b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Fri, 2 Aug 2024 19:48:42 +0200 Subject: [PATCH 5/7] wip --- src/common/error.ml | 3 +- src/common/pos.ml | 4 +- src/handle/command.ml | 4 +- src/handle/query.ml | 2 +- src/parsing/ll1.ml | 199 ++++++++++++++++++++------------------- src/parsing/lpParser.mly | 2 +- src/parsing/parser.ml | 57 ++++++----- src/parsing/pretty.ml | 26 ++++- src/parsing/syntax.ml | 27 +++++- src/tool/indexing.ml | 43 ++++----- src/tool/indexing.mli | 2 + 11 files changed, 209 insertions(+), 160 deletions(-) diff --git a/src/common/error.ml b/src/common/error.ml index 975284cc7..49dd4eba8 100644 --- a/src/common/error.ml +++ b/src/common/error.ml @@ -73,4 +73,5 @@ let handle_exceptions : (unit -> unit) -> unit = fun f -> try f () with | Fatal(None, msg) -> exit_with "%s" msg | Fatal(Some(p), msg) -> exit_with "[%a] %s" Pos.pp p msg - | e -> exit_with "Uncaught [%s]." (Printexc.to_string e) + | e -> + exit_with "Uncaught exception: %s" (Printexc.to_string e) diff --git a/src/common/pos.ml b/src/common/pos.ml index 4587ab0f7..4ed68be8a 100644 --- a/src/common/pos.ml +++ b/src/common/pos.ml @@ -102,8 +102,8 @@ let to_string : ?print_fname:bool -> pos -> string = let popt_to_string : ?print_fname:bool -> popt -> string = fun ?(print_fname=true) pop -> match pop with - | None -> "Unknown location " - | Some (p) -> to_string ~print_fname p ^ " " + | None -> "Unknown location" + | Some (p) -> to_string ~print_fname p (** [pp ppf pos] prints the optional position [pos] on [ppf]. *) let pp : popt Lplib.Base.pp = fun ppf p -> diff --git a/src/handle/command.ml b/src/handle/command.ml index a9c820deb..32c1929f4 100644 --- a/src/handle/command.ml +++ b/src/handle/command.ml @@ -579,8 +579,8 @@ let get_proof_data : compiler -> sig_state -> p_command -> cmd_output = with | Timeout as e -> raise e | Fatal(Some(Some(_)),_) as e -> raise e - | Fatal(None ,m) -> fatal pos "Error on command.@.%s" m - | Fatal(Some(None) ,m) -> fatal pos "Error on command.@.%s" m + | Fatal(None ,m) -> fatal pos "%s" m + | Fatal(Some(None) ,m) -> fatal pos "%s" m | e -> fatal pos "Uncaught exception: %s." (Printexc.to_string e) diff --git a/src/handle/query.ml b/src/handle/query.ml index 7c84b3692..886289194 100644 --- a/src/handle/query.ml +++ b/src/handle/query.ml @@ -156,7 +156,7 @@ let handle : Sig_state.t -> proof_state option -> p_query -> result = let ctxt = Env.to_ctxt env in let p = new_problem() in match elt with - | P_query_search s -> return string (Tool.Indexing.search_cmd_txt s) + | P_query_search q -> return string (Tool.Indexing.query_results q) | P_query_debug(_,_) | P_query_verbose(_) | P_query_flag(_,_) diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml index 0d00636c7..2943795d7 100644 --- a/src/parsing/ll1.ml +++ b/src/parsing/ll1.ml @@ -5,51 +5,12 @@ open Core open LpLexer open Lexing open Sedlexing -open SearchQuerySyntax -(*let log = Logger.make 'n' "pars" "parsing" -let log = log.pp*) +let log = Logger.make 'n' "pars" "parsing" +let log = log.pp (* token management *) -let the_current_token : (token * position * position) Stdlib.ref = - Stdlib.ref (EOF, dummy_pos, dummy_pos) - -let current_token() : token = - let (t,_,_) = !the_current_token in - (*if log_enabled() then log "current_token: %a" pp_token t;*) - t - -let current_pos() : position * position = - let (_,p1,p2) = !the_current_token in (p1,p2) - -let consume_token (lb:lexbuf) : unit = - the_current_token := LpLexer.token lb ()(*; - if log_enabled() then log "read new token"*) - -(* building positions and terms *) - -let make_pos (lps:position * position): 'a -> 'a loc = - Pos.make_pos (fst lps, snd (current_pos())) - -let qid_of_path (lps: position * position): - string list -> (string list * string) loc = function - | [] -> assert false - | id::mp -> make_pos lps (List.rev mp, id) - -let make_abst (pos1:position) (ps:p_params list) (t:p_term) (pos2:position) - :p_term = - if ps = [] then t else make_pos (pos1,pos2) (P_Abst(ps,t)) - -let make_prod (pos1:position) (ps:p_params list) (t:p_term) (pos2:position) - :p_term = - if ps = [] then t else make_pos (pos1,pos2) (P_Prod(ps,t)) - -let ident_of_term pos1 {elt; _} = - match elt with - | P_Iden({elt=([], x); pos}, _) -> Pos.make pos x - | _ -> LpLexer.syntax_error pos1 "not an identifier" - (* error messages *) let string_of_token = function @@ -147,26 +108,65 @@ let string_of_token = function let pp_token ppf t = Base.string ppf (string_of_token t) +let the_current_token : (token * position * position) Stdlib.ref = + Stdlib.ref (EOF, dummy_pos, dummy_pos) + +let current_token() : token = + let (t,_,_) = !the_current_token in + if log_enabled() then log "current_token: %a" pp_token t; + t + +let current_pos() : position * position = + let (_,p1,p2) = !the_current_token in (p1,p2) + let expected (msg:string) (tokens:token list): 'a = - if msg <> "" then syntax_error (current_pos()) ("expected: "^msg) + if msg <> "" then syntax_error (current_pos()) ("Expected: "^msg^".") else match tokens with | [] -> assert false | t::ts -> let soft = string_of_token in syntax_error (current_pos()) - (List.fold_left (fun s t -> s^", "^soft t) ("expected: "^soft t) ts) + (List.fold_left (fun s t -> s^", "^soft t) ("Expected: "^soft t) ts + ^".") + +let consume_token (lb:lexbuf) : unit = + the_current_token := token lb (); + if log_enabled() then log "read new token" + +(* building positions and terms *) + +let make_pos (lps:position * position): 'a -> 'a loc = + Pos.make_pos (fst lps, snd (current_pos())) + +let qid_of_path (lps: position * position): + string list -> (string list * string) loc = function + | [] -> assert false + | id::mp -> make_pos lps (List.rev mp, id) + +let make_abst (pos1:position) (ps:p_params list) (t:p_term) (pos2:position) + :p_term = + if ps = [] then t else make_pos (pos1,pos2) (P_Abst(ps,t)) + +let make_prod (pos1:position) (ps:p_params list) (t:p_term) (pos2:position) + :p_term = + if ps = [] then t else make_pos (pos1,pos2) (P_Prod(ps,t)) + +let ident_of_term pos1 {elt; _} = + match elt with + | P_Iden({elt=([], x); pos}, _) -> Pos.make pos x + | _ -> LpLexer.syntax_error pos1 "not an identifier." (* generic parsing functions *) let list (elt:lexbuf -> 'a) (lb:lexbuf): 'a list = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) let acc = ref [] in (try while true do acc := elt lb :: !acc done with SyntaxError _ -> ()); List.rev !acc let nelist (elt:lexbuf -> 'a) (lb:lexbuf): 'a list = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) let x = elt lb in x :: list elt lb @@ -207,7 +207,7 @@ let consume_NAT (lb:lexbuf): string = expected "" [NAT""] let qid (lb:lexbuf): (string list * string) loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID s -> let pos1 = current_pos() in @@ -221,7 +221,7 @@ let qid (lb:lexbuf): (string list * string) loc = expected "" [UID"";QID[]] let qid_expl (lb:lexbuf): (string list * string) loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID_EXPL s -> let pos1 = current_pos() in @@ -235,7 +235,7 @@ let qid_expl (lb:lexbuf): (string list * string) loc = expected "" [UID_EXPL"";QID_EXPL[]] let uid (lb:lexbuf): string loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID s -> let pos1 = current_pos() in @@ -245,7 +245,7 @@ let uid (lb:lexbuf): string loc = expected "" [UID""] let param (lb:lexbuf): string loc option = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID s | NAT s -> @@ -259,7 +259,7 @@ let param (lb:lexbuf): string loc option = expected "non-qualified identifier or \"_\"" [UID"";NAT"";UNDERSCORE] let int (lb:lexbuf): string = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | NAT s | NEG_NAT s -> @@ -269,7 +269,7 @@ let int (lb:lexbuf): string = expected "integer" [NAT"";NEG_NAT""] let float_or_int (lb:lexbuf): string = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | NAT s | NEG_NAT s @@ -280,7 +280,7 @@ let float_or_int (lb:lexbuf): string = expected "integer or float" [NAT"";NEG_NAT"";FLOAT""] let uid_or_int (lb:lexbuf): string loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID s | NAT s @@ -292,7 +292,7 @@ let uid_or_int (lb:lexbuf): string loc = expected "non-qualified identifier" [UID"";NAT"";NEG_NAT""] let qid_or_int (lb:lexbuf): (string list * string) loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | QID p -> let pos1 = current_pos() in @@ -308,7 +308,7 @@ let qid_or_int (lb:lexbuf): (string list * string) loc = expected "possibly qualified identifier" [UID"";QID[];NAT"";NEG_NAT""] let path (lb:lexbuf): string list loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with (*| UID s -> let pos1 = current_pos() in @@ -321,7 +321,7 @@ let path (lb:lexbuf): string list loc = expected "" [QID[]] let qid_or_rule (lb:lexbuf): (string list * string) loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID s -> let pos1 = current_pos() in @@ -343,7 +343,7 @@ let qid_or_rule (lb:lexbuf): (string list * string) loc = expected "" [UID"";QID[];UNIF_RULE;COERCE_RULE] let term_id (lb:lexbuf): p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID _ | QID _ -> @@ -361,7 +361,7 @@ let term_id (lb:lexbuf): p_term = (* commands *) let rec command pos1 p_sym_mod (lb:lexbuf): p_command = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | SIDE _ | ASSOCIATIVE @@ -589,7 +589,7 @@ and constructor (lb:lexbuf): p_ident * p_term = i, make_prod (fst pos1) ps t (snd (current_pos())) and modifier (lb:lexbuf): p_modifier = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | SIDE d -> let pos1 = current_pos() in @@ -630,7 +630,7 @@ and modifier (lb:lexbuf): p_modifier = exposition lb and exposition (lb:lexbuf): p_modifier = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | PRIVATE -> let pos1 = current_pos() in @@ -644,7 +644,7 @@ and exposition (lb:lexbuf): p_modifier = expected "" [PRIVATE;PROTECTED] and notation (lb:lexbuf): string Sign.notation = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | INFIX -> consume_token lb; @@ -673,7 +673,7 @@ and notation (lb:lexbuf): string Sign.notation = expected "" [INFIX;POSTFIX;PREFIX;QUANTIFIER] and rule (lb:lexbuf): (p_term * p_term) loc = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) let pos1 = current_pos() in let l = term lb in consume HOOK_ARROW lb; @@ -681,7 +681,7 @@ and rule (lb:lexbuf): (p_term * p_term) loc = make_pos pos1 (l, r) and equation (lb:lexbuf): p_term * p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) let l = term lb in consume EQUIV lb; let r = term lb in @@ -690,7 +690,7 @@ and equation (lb:lexbuf): p_term * p_term = (* queries *) and query (lb:lexbuf): p_query = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | ASSERT b -> let pos1 = current_pos() in @@ -768,15 +768,16 @@ and query (lb:lexbuf): p_query = consume_token lb; let t = term lb in make_pos pos1 (P_query_infer(t, {strategy=NONE; steps=None})) - (*| SEARCH s -> + | SEARCH -> let pos1 = current_pos() in consume_token lb; - make_pos pos1 (P_query_search s)*) + let q = search lb in + make_pos pos1 (P_query_search q) | _ -> expected "query" [] and term_proof (lb:lexbuf): p_term option * (p_proof * p_proof_end) option = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | BEGIN -> let p = proof lb in @@ -795,7 +796,7 @@ and term_proof (lb:lexbuf): p_term option * (p_proof * p_proof_end) option = (* proofs *) and proof (lb:lexbuf): p_proof * p_proof_end = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) consume BEGIN lb; match current_token() with | L_CU_BRACKET -> @@ -843,7 +844,7 @@ and proof (lb:lexbuf): p_proof * p_proof_end = expected "subproof, tactic or query" [] and subproof (lb:lexbuf): p_proofstep list = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | L_CU_BRACKET -> consume_token lb; @@ -854,7 +855,7 @@ and subproof (lb:lexbuf): p_proofstep list = expected "" [L_CU_BRACKET] and steps (lb:lexbuf): p_proofstep list = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with (*queries*) | ASSERT _ @@ -896,13 +897,13 @@ and steps (lb:lexbuf): p_proofstep list = expected "tactic or query" [] and step (lb:lexbuf): p_proofstep = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) let t = tactic lb in let l = list subproof lb in Tactic(t, l) and proof_end (lb:lexbuf): p_proof_end = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | ABORT -> let pos1 = current_pos() in @@ -920,7 +921,7 @@ and proof_end (lb:lexbuf): p_proof_end = expected "" [ABORT;ADMITTED;END] and tactic (lb:lexbuf): p_tactic = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with (*queries*) | ASSERT _ @@ -1054,7 +1055,7 @@ and tactic (lb:lexbuf): p_tactic = expected "tactic" [] and rw_patt (lb:lexbuf): p_rw_patt = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with (* bterm *) | BACKQUOTE @@ -1126,7 +1127,7 @@ and rw_patt (lb:lexbuf): p_rw_patt = expected "term or keyword \"in\"" [] and rw_patt_spec (lb:lexbuf): p_rw_patt = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | L_SQ_BRACKET -> consume_token lb; @@ -1139,7 +1140,7 @@ and rw_patt_spec (lb:lexbuf): p_rw_patt = (* terms *) and params (lb:lexbuf): p_params = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | L_PAREN -> consume_token lb; @@ -1178,7 +1179,7 @@ and params (lb:lexbuf): p_params = [x], None, false and term (lb:lexbuf): p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with (* bterm *) | BACKQUOTE @@ -1206,7 +1207,7 @@ and term (lb:lexbuf): p_term = expected "term" [] and app (pos1:position * position) (t: p_term) (lb:lexbuf): p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with (* aterm *) | UID _ @@ -1239,7 +1240,7 @@ and app (pos1:position * position) (t: p_term) (lb:lexbuf): p_term = t and bterm (lb:lexbuf): p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | BACKQUOTE -> let pos1 = current_pos() in @@ -1285,7 +1286,7 @@ and bterm (lb:lexbuf): p_term = expected "" [BACKQUOTE;PI;LAMBDA;LET] and aterm (lb:lexbuf): p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID _ | QID _ @@ -1349,7 +1350,7 @@ and aterm (lb:lexbuf): p_term = brackets" [] and env (lb:lexbuf): p_term list = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | L_SQ_BRACKET -> consume_token lb; @@ -1368,7 +1369,7 @@ and env (lb:lexbuf): p_term list = expected "" [L_SQ_BRACKET] and binder (lb:lexbuf): p_params list * p_term = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID _ | NAT _ @@ -1425,7 +1426,7 @@ and binder (lb:lexbuf): p_params list * p_term = (* search *) and where (lb:lexbuf): bool * inside option = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) + if log_enabled() then log "Expected: %s" __FUNCTION__; match current_token() with | UID u -> let r = @@ -1446,8 +1447,8 @@ and where (lb:lexbuf): bool * inside option = | _ -> expected "\">\", \"=\", \"≥\",\">=\"" [] -and asearch_query(lb:lexbuf): query = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) +and asearch (lb:lexbuf): query = + if log_enabled() then log "Expected: %s" __FUNCTION__; match current_token() with | TYPE_QUERY -> consume_token lb; @@ -1459,7 +1460,7 @@ and asearch_query(lb:lexbuf): query = consume_token lb; let g, w = where lb in let t = aterm lb in - QBase(QSearch(t,g,Some (QXhs(w,None)))) + QBase(QSearch(t,g,Some(QXhs(w,None)))) | UID k -> consume_token lb; let g, w = where lb in @@ -1492,35 +1493,35 @@ and asearch_query(lb:lexbuf): query = end | L_PAREN -> consume_token lb; - let q = search_query lb in + let q = search lb in consume R_PAREN lb; q | _ -> - expected "" [TYPE_QUERY;RULE;UID"";L_PAREN] + expected "name, anywhere, rule, lhs, rhs, type, concl, hyp, spine" [] -and csearch_query (lb:lexbuf): query = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) - let aq = asearch_query lb in +and csearch (lb:lexbuf): query = + if log_enabled() then log "Expected: %s" __FUNCTION__; + let aq = asearch lb in match current_token() with | COMMA -> - let aqs = list (prefix COMMA asearch_query) lb in + let aqs = list (prefix COMMA asearch) lb in List.fold_left (fun x aq -> QOpp(x,Intersect,aq)) aq aqs | _ -> aq -and ssearch_query (lb:lexbuf): query = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) - let cq = csearch_query lb in +and ssearch (lb:lexbuf): query = + if log_enabled() then log "Expected: %s" __FUNCTION__; + let cq = csearch lb in match current_token() with | SEMICOLON -> - let cqs = list (prefix SEMICOLON csearch_query) lb in + let cqs = list (prefix SEMICOLON csearch) lb in List.fold_left (fun x cq -> QOpp(x,Union,cq)) cq cqs | _ -> cq -and search_query (lb:lexbuf): query = - (*if log_enabled() then log "expected: %s" __FUNCTION__;*) - let q = ssearch_query lb in +and search (lb:lexbuf): query = + if log_enabled() then log "Expected: %s" __FUNCTION__; + let q = ssearch lb in let qids = list (prefix VBAR qid) lb in let path_of_qid qid = let p,n = qid.elt in @@ -1530,7 +1531,7 @@ and search_query (lb:lexbuf): query = List.fold_left (fun x qid -> QFilter(x,Path(path_of_qid qid))) q qids let command (lb:lexbuf): p_command = - (*if log_enabled() then log "------------------- start reading command";*) + if log_enabled() then log "------------------- start reading command"; consume_token lb; if current_token() = EOF then raise End_of_file else diff --git a/src/parsing/lpParser.mly b/src/parsing/lpParser.mly index c25a151fc..d7b7e3c62 100644 --- a/src/parsing/lpParser.mly +++ b/src/parsing/lpParser.mly @@ -207,7 +207,7 @@ query: | TYPE_QUERY t=term { make_pos $sloc (P_query_infer(t, {strategy=NONE; steps=None}))} | SEARCH s=STRINGLIT - { make_pos $sloc (P_query_search s) } + { make_pos $sloc (P_query_search(QBase(QName s))) } qid_or_rule: | i=qid { i } diff --git a/src/parsing/parser.ml b/src/parsing/parser.ml index 07b977eac..c65120415 100644 --- a/src/parsing/parser.ml +++ b/src/parsing/parser.ml @@ -10,8 +10,6 @@ open Common open Syntax open Lexing -type lexpos = Lexing.position - (** [parser_fatal pos fmt] is a wrapper for [Error.fatal] that enforces that the error has an attached source code position. *) let parser_fatal : Pos.pos -> ('a,'b) koutfmt -> 'a = fun pos fmt -> @@ -103,25 +101,22 @@ module Dk : PARSER = struct end +open LpLexer +open Sedlexing + (** Parsing lp syntax. *) module Lp : sig include PARSER - val parse_search_query_string : - string -> string -> SearchQuerySyntax.query Stream.t - (** [parse_search_query_string f s] returns a stream of parsed terms from - string [s] which comes from file [f] ([f] can be anything). *) + val parse_search_query_string : (*fname*)string -> (*query*)string -> query end = struct - open LpLexer - open Sedlexing - (* old Menhir parser *) - type tokenizer = unit -> token * lexpos * lexpos + type tokenizer = unit -> token * position * position type 'a parser = tokenizer -> 'a let parse_lexbuf : @@ -139,7 +134,8 @@ sig with | End_of_file -> Option.iter close_in ic; None | SyntaxError {pos=None; _} -> assert false - | SyntaxError {pos=Some pos; elt} -> parser_fatal pos "%s" elt + | SyntaxError {pos=Some pos; elt} -> + parser_fatal pos "Syntax error. %s" elt | LpParser.Error -> let pos = Pos.locate (lexing_positions lb) in parser_fatal pos "Unexpected token: \"%s\"." (Utf8.lexeme lb) @@ -149,8 +145,8 @@ sig let parse_string ~grammar_entry fname s = parse_lexbuf ~grammar_entry ~fname (Utf8.from_string s) - let parse_search_query_string = - parse_string ~grammar_entry:LpParser.search_query_alone + (*let parse_search_query_string = + parse_string ~grammar_entry:LpParser.search_query_alone*) (*let parse_in_channel ~grammar_entry ic = parse_lexbuf ~grammar_entry ~ic (Utf8.from_channel ic) @@ -165,22 +161,25 @@ sig (* new parser *) - let parse_lexbuf icopt entry lb = + let parse_lexbuf (icopt: in_channel option) (entry: lexbuf -> 'a) + (lb: lexbuf): 'a Stream.t = let generator _ = try Some(entry lb) with | End_of_file -> Option.iter close_in icopt; None | SyntaxError{pos=None; _} -> assert false - | SyntaxError{pos=Some pos; elt} -> parser_fatal pos "%s" elt + | SyntaxError{pos=Some pos; elt} -> + parser_fatal pos "Syntax error. %s" elt in Stream.from generator - let parse_string entry fname s = + let parse_string (entry: lexbuf -> 'a) (fname: string) (s: string) + : 'a Stream.t = let lb = Utf8.from_string s in set_filename lb fname; parse_lexbuf None entry lb - let parse_in_channel entry ic = + let parse_in_channel (entry: lexbuf -> 'a) (ic: in_channel): 'a Stream.t = parse_lexbuf (Some ic) entry (Utf8.from_channel ic) (*let parse_file entry fname = @@ -191,9 +190,13 @@ sig close_in ic; x*) - let parse_file entry fname = parse_in_channel entry (open_in fname) + let parse_file (entry: lexbuf -> 'a) (fname: string): 'a Stream.t = + parse_in_channel entry (open_in fname) (* exported functions *) + let parse_search_query_string (fname: string) (s: string): query = + Stream.next (parse_string (Ll1.alone Ll1.search) fname s) + let parse_string = parse_string Ll1.command let parse_in_channel = parse_in_channel Ll1.command let parse_file = parse_file Ll1.command @@ -202,30 +205,32 @@ end include Lp +open Error + (** [path_of_string s] converts the string [s] into a path. *) let path_of_string : string -> Path.t = fun s -> - let open LpLexer in - let lb = Sedlexing.Utf8.from_string s in + let lb = Utf8.from_string s in try begin match token lb () with | UID s, _, _ -> [s] | QID p, _, _ -> List.rev p - | _ -> Error.fatal_no_pos "\"%s\" is not a path." s + | _ -> fatal_no_pos "Syntax error: \"%s\" is not a path." s end - with SyntaxError _ -> Error.fatal_no_pos "\"%s\" is not a path." s + with SyntaxError _ -> + fatal_no_pos "Syntax error: \"%s\" is not a path." s (** [qident_of_string s] converts the string [s] into a qident. *) let qident_of_string : string -> Core.Term.qident = fun s -> - let open LpLexer in - let lb = Sedlexing.Utf8.from_string s in + let lb = Utf8.from_string s in try begin match token lb () with | QID [], _, _ -> assert false | QID (s::p), _, _ -> (List.rev p, s) - | _ -> Error.fatal_no_pos "\"%s\" is not a qualified identifier." s + | _ -> + fatal_no_pos "Syntax error: \"%s\" is not a qualified identifier." s end with SyntaxError _ -> - Error.fatal_no_pos "\"%s\" is not a qualified identifier." s + fatal_no_pos "Syntax error: \"%s\" is not a qualified identifier." s (** [parse_file fname] selects and runs the correct parser on file [fname], by looking at its extension. *) diff --git a/src/parsing/pretty.ml b/src/parsing/pretty.ml index f41b97ba2..08e054b54 100644 --- a/src/parsing/pretty.ml +++ b/src/parsing/pretty.ml @@ -239,6 +239,30 @@ let assertion : p_assertion pp = fun ppf a -> | P_assert_typing (t, a) -> out ppf "@[%a@ : %a@]" term t term a | P_assert_conv (t, u) -> out ppf "@[%a@ ≡ %a@]" term t term u +module Search = struct + let inside ppf s = string ppf (match s with Exact -> " =" | Inside -> " >") + let where elt ppf = function + | Spine x -> out ppf "spine%a" elt x + | Conclusion x -> out ppf "concl%a" elt x + | Hypothesis x -> out ppf "hyp%a" elt x + let constr ppf = function + | QType x -> out ppf "type%a" (Option.pp (where (Option.pp inside))) x + | QXhs (i,None) -> out ppf "rule%a" (Option.pp inside) i + | QXhs(i,Some Lhs) -> out ppf "lhs%a" (Option.pp inside) i + | QXhs(i,Some Rhs) -> out ppf "rhs%a" (Option.pp inside) i + let generalize ppf b = if b then string ppf " generalize" + let base_query ppf = function + | QName s -> out ppf "name %s" s + | QSearch(t,g,None) -> out ppf "anywhere%a%a" generalize g term t + | QSearch(t,g,Some c) -> out ppf "%a%a%a" constr c generalize g term t + let op ppf o = string ppf (match o with Union -> "; " | Intersect -> ", ") + let filter ppf (Path s) = out ppf " | %s" s + let rec query ppf = function + | QBase b -> base_query ppf b + | QOpp(q1,o,q2) -> out ppf "%a%a%a" query q1 op o query q2 + | QFilter(q,f) -> out ppf "%a%a" query q filter f +end + let query : p_query pp = fun ppf { elt; _ } -> match elt with | P_query_assert(true, a) -> out ppf "assertnot ⊢ %a" assertion a @@ -255,7 +279,7 @@ let query : p_query pp = fun ppf { elt; _ } -> | P_query_print(Some qid) -> out ppf "print %a" qident qid | P_query_proofterm -> out ppf "proofterm" | P_query_verbose i -> out ppf "verbose %s" i - | P_query_search s -> out ppf "search \"%s\"" s + | P_query_search q -> out ppf "search \"%a\"" Search.query q let rec tactic : p_tactic pp = fun ppf { elt; _ } -> begin match elt with diff --git a/src/parsing/syntax.ml b/src/parsing/syntax.ml index 3da081297..34f9c6c54 100644 --- a/src/parsing/syntax.ml +++ b/src/parsing/syntax.ml @@ -204,6 +204,29 @@ type p_assertion = | P_assert_conv of p_term * p_term (** The two given terms should be convertible. *) +(** Search queries. *) +type side = Lhs | Rhs +type inside = Exact | Inside +type 'a where = + | Spine of 'a + | Conclusion of 'a + | Hypothesis of 'a +type constr = + | QType of (inside option) where option + | QXhs of inside option * side option +type base_query = + | QName of string + | QSearch of p_term * (*generalize:*)bool * constr option +type op = + | Intersect + | Union +type filter = + | Path of string +type query = + | QBase of base_query + | QOpp of query * op * query + | QFilter of query * filter + (** Parser-level representation of a query command. *) type p_query_aux = | P_query_verbose of string @@ -226,7 +249,7 @@ type p_query_aux = (** Print information about a symbol or the current goals. *) | P_query_proofterm (** Print the current proof term (possibly containing open goals). *) - | P_query_search of string + | P_query_search of query (** Runs a search query *) (* I use a string here to be parsed later to avoid polluting LambdaPi code with index and retrieval code *) @@ -252,6 +275,7 @@ type p_tactic_aux = | P_tac_sym | P_tac_why3 of string option | P_tac_try of p_tactic + and p_tactic = p_tactic_aux loc (** [is_destructive t] says whether tactic [t] changes the current goal. *) @@ -398,6 +422,7 @@ let eq_p_query : p_query eq = fun {elt=q1;_} {elt=q2;_} -> | P_query_verbose n1, P_query_verbose n2 -> n1 = n2 | P_query_debug (b1,s1), P_query_debug (b2,s2) -> b1 = b2 && s1 = s2 | P_query_proofterm, P_query_proofterm -> true + | P_query_search q1, P_query_search q2 -> q1 = q2 | _, _ -> false let eq_p_tactic : p_tactic eq = fun {elt=t1;_} {elt=t2;_} -> diff --git a/src/tool/indexing.ml b/src/tool/indexing.ml index a21a507af..46a74304f 100644 --- a/src/tool/indexing.ml +++ b/src/tool/indexing.ml @@ -195,11 +195,11 @@ end module DB = struct (* fix codomain type *) - type side = Parsing.SearchQuerySyntax.side = Lhs | Rhs + type side = Parsing.Syntax.side = Lhs | Rhs - type inside = Parsing.SearchQuerySyntax.inside = Exact | Inside + type inside = Parsing.Syntax.inside = Exact | Inside - type 'inside where = 'inside Parsing.SearchQuerySyntax.where = + type 'inside where = 'inside Parsing.Syntax.where = | Spine of 'inside | Conclusion of 'inside | Hypothesis of 'inside @@ -562,7 +562,7 @@ include DB module QueryLanguage = struct - open Parsing.SearchQuerySyntax + open Parsing.Syntax let match_opt p x = match p,x with @@ -637,28 +637,19 @@ include QueryLanguage module UserLevelQueries = struct - let search_cmd_gen ~fail ~pp_results s = - try - let pstream = Parsing.Parser.Lp.parse_search_query_string "LPSearch" s in - let pq = Stream.next pstream in - let mok _ = None in - let items = answer_query ~mok [] pq in - Format.asprintf "%a@." pp_results items - with - | Stream.Failure -> - fail (Format.asprintf "Syntax error: a query was expected") - | Common.Error.Fatal(_,msg) -> - fail (Format.asprintf "Error: %s@." msg) - | exn -> - fail (Format.asprintf "Error: %s@." (Printexc.to_string exn)) - - let search_cmd_html s = - search_cmd_gen ~fail:(fun x -> "" ^ x ^ "") - ~pp_results:html_of_item_set s - - let search_cmd_txt s = - search_cmd_gen ~fail:(fun x -> Common.Error.fatal_no_pos "%s" x) - ~pp_results:pp_item_set s + let query_results_gen pp_results q = + let mok _ = None in + let items = answer_query ~mok [] q in + Format.asprintf "%a@." pp_results items + + let search_cmd_html s = + query_results_gen html_of_item_set + (Parsing.Parser.Lp.parse_search_query_string "" s) + + let query_results = query_results_gen pp_item_set + + let search_cmd_txt s = + query_results (Parsing.Parser.Lp.parse_search_query_string "" s) end diff --git a/src/tool/indexing.mli b/src/tool/indexing.mli index cf41cb0c4..76218706d 100644 --- a/src/tool/indexing.mli +++ b/src/tool/indexing.mli @@ -3,6 +3,8 @@ val empty : unit -> unit val index_sign : rules:string list -> Core.Sign.t -> unit val dump : unit -> unit +val query_results: Parsing.Syntax.query -> string + (* search command used by cli *) val search_cmd_txt: string -> string From 0fefcf590f688d87b4bcbf0380cfa144a5c58531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Thu, 2 Jan 2025 19:39:44 +0100 Subject: [PATCH 6/7] add parsing of set tactic --- src/parsing/ll1.ml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml index 2943795d7..f429450eb 100644 --- a/src/parsing/ll1.ml +++ b/src/parsing/ll1.ml @@ -61,6 +61,7 @@ let string_of_token = function | RULE -> "rule" | SEARCH -> "search" | SEQUENTIAL -> "sequential" + | SET -> "set" | SIMPLIFY -> "simplify" | SOLVE -> "solve" | SYMBOL -> "symbol" @@ -827,6 +828,7 @@ and proof (lb:lexbuf): p_proof * p_proof_end = | REFLEXIVITY | REMOVE | REWRITE + | SET | SIMPLIFY | SOLVE | SYMMETRY @@ -880,6 +882,7 @@ and steps (lb:lexbuf): p_proofstep list = | REFLEXIVITY | REMOVE | REWRITE + | SET | SIMPLIFY | SOLVE | SYMMETRY @@ -1013,6 +1016,13 @@ and tactic (lb:lexbuf): p_tactic = let t = term lb in make_pos pos1 (P_tac_rewrite(true,None,t)) end + | SET -> + let pos1 = current_pos() in + consume_token lb; + let i = uid lb in + consume ASSIGN lb; + let t = term lb in + make_pos pos1 (P_tac_set(i,t)) | SIMPLIFY -> let pos1 = current_pos() in consume_token lb; From e1c530454bfe9c7f871623e7f8dc71b7011f6771 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Blanqui?= Date: Sat, 25 Jan 2025 18:03:55 +0100 Subject: [PATCH 7/7] wip --- src/parsing/ll1.ml | 103 ++++++++++++--------------------------------- 1 file changed, 28 insertions(+), 75 deletions(-) diff --git a/src/parsing/ll1.ml b/src/parsing/ll1.ml index f429450eb..42aeea374 100644 --- a/src/parsing/ll1.ml +++ b/src/parsing/ll1.ml @@ -40,6 +40,7 @@ let string_of_token = function | INDUCTIVE -> "inductive" | INFIX -> "infix" | INJECTIVE -> "injective" + | INT _ -> "integer" | LET -> "let" | NOTATION -> "notation" | OPAQUE -> "opaque" @@ -74,8 +75,6 @@ let string_of_token = function | WHY3 -> "why3" | WITH -> "with" | DEBUG_FLAGS _ -> "debug flags" - | NAT _ -> "natural number" - | NEG_NAT _ -> "negative integer" | FLOAT _ -> "float" | SIDE _ -> "left or right" | STRINGLIT _ -> "string literal" @@ -199,13 +198,13 @@ let consume_SWITCH (lb:lexbuf): bool = | _ -> expected "" [SWITCH true] -let consume_NAT (lb:lexbuf): string = +let consume_INT (lb:lexbuf): string = match current_token() with - | NAT s -> + | INT s -> consume_token lb; s | _ -> - expected "" [NAT""] + expected "" [INT""] let qid (lb:lexbuf): (string list * string) loc = (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) @@ -248,8 +247,7 @@ let uid (lb:lexbuf): string loc = let param (lb:lexbuf): string loc option = (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with - | UID s - | NAT s -> + | UID s -> let pos1 = current_pos() in consume_token lb; Some (make_pos pos1 s) @@ -257,56 +255,26 @@ let param (lb:lexbuf): string loc option = consume_token lb; None | _ -> - expected "non-qualified identifier or \"_\"" [UID"";NAT"";UNDERSCORE] + expected "non-qualified identifier or \"_\"" [UID"";UNDERSCORE] let int (lb:lexbuf): string = (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with - | NAT s - | NEG_NAT s -> + | INT s -> consume_token lb; s | _ -> - expected "integer" [NAT"";NEG_NAT""] + expected "integer" [INT""] let float_or_int (lb:lexbuf): string = (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with - | NAT s - | NEG_NAT s + | INT s | FLOAT s -> consume_token lb; s | _ -> - expected "integer or float" [NAT"";NEG_NAT"";FLOAT""] - -let uid_or_int (lb:lexbuf): string loc = - (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) - match current_token() with - | UID s - | NAT s - | NEG_NAT s -> - let pos1 = current_pos() in - consume_token lb; - make_pos pos1 s - | _ -> - expected "non-qualified identifier" [UID"";NAT"";NEG_NAT""] - -let qid_or_int (lb:lexbuf): (string list * string) loc = - (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) - match current_token() with - | QID p -> - let pos1 = current_pos() in - consume_token lb; - qid_of_path pos1 p - | UID s - | NAT s - | NEG_NAT s -> - let pos1 = current_pos() in - consume_token lb; - make_pos pos1 ([],s) - | _ -> - expected "possibly qualified identifier" [UID"";QID[];NAT"";NEG_NAT""] + expected "integer or float" [INT"";FLOAT""] let path (lb:lexbuf): string list loc = (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) @@ -376,15 +344,13 @@ let rec command pos1 p_sym_mod (lb:lexbuf): p_command = assert (p_sym_mod = []); let pos1 = current_pos() in command pos1 (nelist modifier lb) lb - (* qid_or_int *) + (* qid *) | UID _ - | QID _ - | NAT _ - | NEG_NAT _ -> + | QID _ -> begin match p_sym_mod with | [{elt=P_opaq;_}] -> - let i = qid_or_int lb in + let i = qid lb in make_pos pos1 (P_opaque i) | [] -> expected "command keyword missing" [] @@ -429,7 +395,7 @@ let rec command pos1 p_sym_mod (lb:lexbuf): p_command = | SYMBOL -> let pos1 = current_pos() in consume_token lb; - let p_sym_nam = uid_or_int lb in + let p_sym_nam = uid lb in let p_sym_arg = list params lb in begin match current_token() with @@ -534,7 +500,7 @@ let rec command pos1 p_sym_mod (lb:lexbuf): p_command = | STRINGLIT s -> consume_token lb; consume ASSIGN lb; - let i = qid_or_int lb in + let i = qid lb in make_pos pos1 (P_builtin(s,i)) | _ -> expected "" [STRINGLIT""] @@ -543,7 +509,7 @@ let rec command pos1 p_sym_mod (lb:lexbuf): p_command = if p_sym_mod <> [] then expected "" [SYMBOL]; (*or modifiers*) let pos1 = current_pos() in consume_token lb; - let i = qid_or_int lb in + let i = qid lb in let n = notation lb in make_pos pos1 (P_notation(i,n)) | _ -> @@ -564,9 +530,7 @@ and inductive (lb:lexbuf): p_inductive = consume ASSIGN lb; begin match current_token() with - | UID _ - | NAT _ - | NEG_NAT _ -> + | UID _ -> let c = constructor lb in let cs = list (prefix VBAR constructor) lb in let l = c::cs in @@ -582,7 +546,7 @@ and inductive (lb:lexbuf): p_inductive = end and constructor (lb:lexbuf): p_ident * p_term = - let i = uid_or_int lb in + let i = uid lb in let pos1 = current_pos() in let ps = list params lb in consume COLON lb; @@ -757,12 +721,12 @@ and query (lb:lexbuf): p_query = | PROVER_TIMEOUT -> let pos1 = current_pos() in consume_token lb; - let n = consume_NAT lb in + let n = consume_INT lb in make_pos pos1 (P_query_prover_timeout n) | VERBOSE -> let pos1 = current_pos() in consume_token lb; - let n = consume_NAT lb in + let n = consume_INT lb in make_pos pos1 (P_query_verbose n) | TYPE_QUERY -> let pos1 = current_pos() in @@ -1029,10 +993,8 @@ and tactic (lb:lexbuf): p_tactic = begin match current_token() with | UID _ - | QID _ - | NAT _ - | NEG_NAT _ -> - let i = Some (qid_or_int lb) in + | QID _ -> + let i = Some (qid lb) in make_pos pos1 (P_tac_simpl i) | _ -> let i = None in @@ -1083,8 +1045,7 @@ and rw_patt (lb:lexbuf): p_rw_patt = | UID_PATT _ | L_PAREN | L_SQ_BRACKET - | NAT _ - | NEG_NAT _ -> + | INT _ -> let pos1 = current_pos() in let t1 = term lb in begin @@ -1208,8 +1169,7 @@ and term (lb:lexbuf): p_term = | UID_PATT _ | L_PAREN | L_SQ_BRACKET - | NAT _ - | NEG_NAT _ -> + | INT _ -> let pos1 = current_pos() in let h = aterm lb in app pos1 h lb @@ -1230,8 +1190,7 @@ and app (pos1:position * position) (t: p_term) (lb:lexbuf): p_term = | UID_PATT _ | L_PAREN | L_SQ_BRACKET - | NAT _ - | NEG_NAT _ -> + | INT _ -> let u = aterm lb in app pos1 (make_pos pos1 (P_Appl(t,u))) lb (* bterm *) @@ -1347,14 +1306,10 @@ and aterm (lb:lexbuf): p_term = let t = term lb in consume R_SQ_BRACKET lb; make_pos pos1 (P_Expl(t)) - | NAT n -> + | INT n -> let pos1 = current_pos() in consume_token lb; make_pos pos1 (P_NLit n) - | NEG_NAT n -> - let pos1 = current_pos() in - consume_token lb; - make_pos pos1 (P_Iden(make_pos pos1 ([],n), false)) | _ -> expected "identifier, \"_\", or term between parentheses or square \ brackets" [] @@ -1382,13 +1337,11 @@ and binder (lb:lexbuf): p_params list * p_term = (*if log_enabled() then log "Expected: %s" __FUNCTION__;*) match current_token() with | UID _ - | NAT _ | UNDERSCORE -> let s = param lb in begin match current_token() with | UID _ - | NAT _ | UNDERSCORE | L_PAREN | L_SQ_BRACKET -> @@ -1408,7 +1361,7 @@ and binder (lb:lexbuf): p_params list * p_term = [p], term lb | _ -> expected "parameter list" - [UID"";NAT"";UNDERSCORE;L_PAREN;L_SQ_BRACKET;COMMA] + [UID"";UNDERSCORE;L_PAREN;L_SQ_BRACKET;COMMA] end | L_PAREN -> let ps = nelist params lb in @@ -1431,7 +1384,7 @@ and binder (lb:lexbuf): p_params list * p_term = expected "" [COMMA] end | _ -> - expected "" [UID"";NAT"";UNDERSCORE;L_PAREN;L_SQ_BRACKET] + expected "" [UID"";UNDERSCORE;L_PAREN;L_SQ_BRACKET] (* search *)