From 6f19f2d0b397689a828524bbd4f53247275c0d93 Mon Sep 17 00:00:00 2001 From: Kunlin Yu Date: Tue, 31 Dec 2024 10:16:18 +0800 Subject: [PATCH] Use bison instead of bison++ Signed-off-by: Kunlin Yu --- .gitignore | 1 + CMakeLists.txt | 14 ++- dockerfile | 2 +- include/cql2cpp/cql2_lexer-internal.h | 11 +- include/cql2cpp/cql2_parser.h | 16 +-- include/cql2cpp/cql2cpp.h | 9 +- include/cql2cpp/global_yylex.h | 26 +++++ src/cql2_lexer.l | 153 +++++++++++++------------- src/cql2_parser.y | 149 ++++++++++++------------- src/global_yylex.cc | 31 ++++++ src/main.cc | 1 + 11 files changed, 233 insertions(+), 180 deletions(-) create mode 100644 include/cql2cpp/global_yylex.h create mode 100644 src/global_yylex.cc diff --git a/.gitignore b/.gitignore index c5f842f..f08a307 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ # Build directory build +build_dbg # Prerequisites *.d diff --git a/CMakeLists.txt b/CMakeLists.txt index cd9c324..5f69ba8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,15 +37,23 @@ set(BISON_FILE ${CMAKE_SOURCE_DIR}/src/cql2_parser.y) set(BISON_OUTPUT ${CMAKE_BINARY_DIR}/cql2_parser_base.cc) add_custom_command( OUTPUT ${BISON_OUTPUT} - COMMAND bison++ -v -d -o ${BISON_OUTPUT} ${BISON_FILE} + COMMAND bison -v -d -o ${BISON_OUTPUT} ${BISON_FILE} DEPENDS ${BISON_FILE} - COMMENT "Generating ${BISON_OUTPUT} from ${BISON_FILE} using bison++" + COMMENT "Generating ${BISON_OUTPUT} from ${BISON_FILE} using bison" ) add_definitions(-DUSE_UNSTABLE_GEOS_CPP_API) +set(CQL2CPP_SRC + src/id_generator.cc + src/ast_node.cc + src/global_yylex.cc + ${FLEX_OUTPUT} + ${BISON_OUTPUT} +) + # library -add_library(cql2cpp src/id_generator.cc src/ast_node.cc ${FLEX_OUTPUT} ${BISON_OUTPUT}) +add_library(cql2cpp ${CQL2CPP_SRC}) target_compile_options(cql2cpp PRIVATE -Wno-register -Wno-write-strings) target_link_libraries(cql2cpp GEOS::geos glog::glog) diff --git a/dockerfile b/dockerfile index 6abc95d..22cde72 100644 --- a/dockerfile +++ b/dockerfile @@ -17,7 +17,7 @@ RUN apt-get install -y gcc-aarch64-linux-gnu RUN apt-get install -y g++-aarch64-linux-gnu # install dependencies -RUN apt-get install -y flex bison++ +RUN apt-get install -y flex bison RUN apt-get install -y libgflags-dev libgoogle-glog-dev libgtest-dev RUN apt-get install -y libgeos++-dev diff --git a/include/cql2cpp/cql2_lexer-internal.h b/include/cql2cpp/cql2_lexer-internal.h index 4e2a654..fefb5cf 100644 --- a/include/cql2cpp/cql2_lexer-internal.h +++ b/include/cql2cpp/cql2_lexer-internal.h @@ -10,20 +10,16 @@ #pragma once -#include - +#include #include #include -#include -#include // This file will be part of cc file generated by flex++. // And the #include will appear above this file // https://stackoverflow.com/questions/71300080/change-yylex-in-c-flex class Cql2Lexer : public yyFlexLexer { private: - yy_Cql2ParserBase_stype* lval_; - std::list token_texts_; + cql2cpp::Cql2ParserBase::value_type* yylval_; public: Cql2Lexer() : yyFlexLexer() {} @@ -32,5 +28,6 @@ class Cql2Lexer : public yyFlexLexer { int yylex(); - void RegisterLval(yy_Cql2ParserBase_stype* lval) { lval_ = lval; } + void set_yylval(cql2cpp::Cql2ParserBase::value_type* yylval) { yylval_ = yylval; } + }; diff --git a/include/cql2cpp/cql2_parser.h b/include/cql2cpp/cql2_parser.h index 9633e4b..96098c7 100644 --- a/include/cql2cpp/cql2_parser.h +++ b/include/cql2cpp/cql2_parser.h @@ -13,23 +13,19 @@ #include #include -#include +#include -class Cql2Parser : public Cql2ParserBase { +class Cql2Parser : public cql2cpp::Cql2ParserBase { private: - FlexLexer* lexer_; + cql2cpp::AstNode* root_; public: - Cql2Parser(FlexLexer* lexer) : lexer_(lexer) { - yydebug = false; - } + Cql2Parser() : cql2cpp::Cql2ParserBase(&root_) {} - void yyerror(char* s) override { - LOG(ERROR) << "Cql2Parser Error: " << s << std::endl; + void error(const std::string& msg) override { + LOG(ERROR) << "Cql2Parser Error: " << msg << std::endl; } - int yylex() override { return lexer_->yylex(); } - cql2cpp::AstNode* root() const { return root_; } void DeConstructRoot() { diff --git a/include/cql2cpp/cql2cpp.h b/include/cql2cpp/cql2cpp.h index 3f39bf9..8b94074 100644 --- a/include/cql2cpp/cql2cpp.h +++ b/include/cql2cpp/cql2cpp.h @@ -22,6 +22,7 @@ #include "feature_source.h" #include "node_evaluator.h" #include "tree_dot.h" +#include "global_yylex.h" namespace cql2cpp { @@ -121,13 +122,13 @@ class Cql2Cpp { std::string* error_msg) { std::istringstream iss(cql2_query); std::ostringstream oss; - Cql2Lexer* lexer = new Cql2Lexer(iss, oss); + + set_text_lexer(new Cql2Lexer(iss, oss)); cql2cpp::AstNode::set_ostream(&oss); - Cql2Parser parser(lexer); - lexer->RegisterLval(&parser.yylval); - int ret = parser.yyparse(); + Cql2Parser parser; + int ret = parser.parse(); if (error_msg != nullptr) *error_msg = oss.str(); if (ret == 0) { *root = parser.root(); diff --git a/include/cql2cpp/global_yylex.h b/include/cql2cpp/global_yylex.h new file mode 100644 index 0000000..15639b3 --- /dev/null +++ b/include/cql2cpp/global_yylex.h @@ -0,0 +1,26 @@ +/* + * File Name: global_yylex.h + * + * Copyright (c) 2024 IndoorSpatial + * + * Author: Kunlin Yu + * Create Date: 2024/12/31 + * + */ + +#pragma once + +#include + +enum LexerInstance { + CQL2_TEXT, +}; + +void set_current_lexer(LexerInstance lexer); + +class Cql2Lexer; +void set_text_lexer(Cql2Lexer* lexer); + +// bison expect a global yylex +// https://www.gnu.org/software/bison/manual/bison.html#C_002b_002b-Scanner-Interface +int yylex(cql2cpp::Cql2ParserBase::value_type* yylval); diff --git a/src/cql2_lexer.l b/src/cql2_lexer.l index 2a588cb..c97129d 100644 --- a/src/cql2_lexer.l +++ b/src/cql2_lexer.l @@ -1,6 +1,13 @@ %{ +#include #include -#include + +using cql2cpp::Cql2ParserBase; + +void print(const std::string& prefix, const char* yytext) { + LOG(INFO) << "TOKEN " << prefix << " \"" << yytext << "\""; +} + %} %option noyywrap @@ -12,156 +19,144 @@ CHAR_LIT \'[^\']*\' %% {CHAR_LIT} { - yyout << "TOKEN CHAR_LIT " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::CHAR_LIT; + print("CHAR_LIT", yytext); + yylval_->emplace() = yytext; + return Cql2ParserBase::token::CHAR_LIT; } [+-]?{DIGIT}+ { - yyout << "TOKEN NUMBER_INT " << yytext << std::endl; - lval_->num_int = atoi(yytext); - return Cql2ParserBase::NUMBER_INT; + print("DIGIT_INT", yytext); + yylval_->emplace() = atoi(yytext); + return Cql2ParserBase::token::NUMBER_INT; } [+-]?{DIGIT}+"."{DIGIT}* { - yyout << "TOKEN NUMBER_FLOAT " << yytext << std::endl; - lval_->num_float = atof(yytext); - return Cql2ParserBase::NUMBER_FLOAT; + print("DIGIT_FLOAT", yytext); + yylval_->emplace() = atof(yytext); + return Cql2ParserBase::token::NUMBER_FLOAT; } [+-]?"."{DIGIT}* { - yyout << "TOKEN NUMBER_FLOAT " << yytext << std::endl; - lval_->num_float = atof(yytext); - return Cql2ParserBase::NUMBER_FLOAT; + print("DIGIT_FLOAT", yytext); + yylval_->emplace() = atof(yytext); + return Cql2ParserBase::token::NUMBER_FLOAT; } "TRUE" { - yyout << "TOKEN " << yytext << std::endl; - lval_->boolean = true; - return Cql2ParserBase::TRUE; + print("BOOL_LIT", yytext); + yylval_->emplace() = true; + return Cql2ParserBase::token::TRUE; } "FALSE" { - yyout << "TOKEN " << yytext << std::endl; - lval_->boolean = false; - return Cql2ParserBase::FALSE; + print("BOOL_LIT", yytext); + yylval_->emplace() = false; + return Cql2ParserBase::token::FALSE; } "AND" { - yyout << "TOKEN " << yytext << std::endl; - return Cql2ParserBase::AND; + print("BOOL_EXPR", yytext); + return Cql2ParserBase::token::AND; } "OR" { - yyout << "TOKEN " << yytext << std::endl; - return Cql2ParserBase::OR; + print("BOOL_EXPR", yytext); + return Cql2ParserBase::token::OR; } "NOT" { - yyout << "TOKEN " << yytext << std::endl; - return Cql2ParserBase::NOT; + print("BOOL_EXPR", yytext); + return Cql2ParserBase::token::NOT; } "CASEI" { - yyout << "TOKEN " << yytext << std::endl; - return Cql2ParserBase::CASEI; + print("", yytext); + return Cql2ParserBase::token::CASEI; } "ACCENTI" { - yyout << "TOKEN " << yytext << std::endl; - return Cql2ParserBase::ACCENTI; + print("", yytext); + return Cql2ParserBase::token::ACCENTI; } "(" { - yyout << "TOKEN LPT " << yytext << std::endl; - lval_->c = '('; - return Cql2ParserBase::LPT; + print("LPT", yytext); + return Cql2ParserBase::token::LPT; } ")" { - yyout << "TOKEN RPT " << yytext << std::endl; - lval_->c = ')'; - return Cql2ParserBase::RPT; + print("RPT", yytext); + return Cql2ParserBase::token::RPT; } "\"" { - yyout << "TOKEN DQUOTE " << yytext << std::endl; - return Cql2ParserBase::DQUOTE; + print("DQUOTE", yytext); + return Cql2ParserBase::token::DQUOTE; } "'" { - yyout << "TOKEN SQUOTE " << yytext << std::endl; - return Cql2ParserBase::SQUOTE; + print("SQUOTE", yytext); + return Cql2ParserBase::token::SQUOTE; } "," { - yyout << "TOKEN COMMA " << yytext << std::endl; - lval_->c = ','; - return Cql2ParserBase::COMMA; + print("COMMA", yytext); + return Cql2ParserBase::token::COMMA; } S_INTERSECTS|S_EQUALS|S_DISJOINT|S_TOUCHES|S_WITHIN|S_OVERLAPS|S_CROSSES|S_CONTAINS { - yyout << "TOKEN " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::SPT_FUNC; + print("SPATIAL", yytext); + yylval_->emplace(yytext); + return Cql2ParserBase::token::SPT_FUNC; } POINT[Z]?"("[0-9. +\-\n]+")" { - yyout << "TOKEN " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::POINT_WKT; + print("GEOMETRY", yytext); + yylval_->emplace() = yytext; + return Cql2ParserBase::token::POINT_WKT; } LINESTRING[Z]?"("[0-9. +\-,\n]+")" { - yyout << "TOKEN " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::LINESTRING_WKT; + print("GEOMETRY", yytext); + yylval_->emplace() = yytext; + return Cql2ParserBase::token::LINESTRING_WKT; } POLYGON[Z]?"(("[0-9. +\-,\n]+"))" { - yyout << "TOKEN " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::POLYGON_WKT; + print("GEOMETRY", yytext); + yylval_->emplace() = yytext; + return Cql2ParserBase::token::POLYGON_WKT; } BBOX"("[0-9. +\-,\n]+")" { - yyout << "TOKEN " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::BBOX_TEXT; + print("BBOX", yytext); + yylval_->emplace() = yytext; + return Cql2ParserBase::token::BBOX_TEXT; } IN { - yyout << "TOKEN " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::IN; + print("IN", yytext); + return Cql2ParserBase::token::IN; } {ID} { - yyout << "TOKEN ID " << yytext << std::endl; - token_texts_.emplace_back(yytext); - lval_->str = token_texts_.back().c_str(); - return Cql2ParserBase::ID; + print("ID", yytext); + yylval_->emplace(yytext); + return Cql2ParserBase::token::ID; } -"+" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::PLUS; } -"-" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::MINUS;} -"*" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::MULT; } -"/" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::DIV; } -"=" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::EQ; } -"<" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::LT; } -">" { yyout << "TOKEN " << yytext << std::endl; return Cql2ParserBase::GT; } +"+" { print("OP", yytext); return Cql2ParserBase::token::PLUS; } +"-" { print("OP", yytext); return Cql2ParserBase::token::MINUS;} +"*" { print("OP", yytext); return Cql2ParserBase::token::MULT; } +"/" { print("OP", yytext); return Cql2ParserBase::token::DIV; } +"=" { print("OP", yytext); return Cql2ParserBase::token::EQ; } +"<" { print("OP", yytext); return Cql2ParserBase::token::LT; } +">" { print("OP", yytext); return Cql2ParserBase::token::GT; } "{"[^{}\n]*"}" {/* eat up one-line comments */} [ \t\n]+ {/* eat up whitespace */} -. { yyout << "Unrecognized character: " << yytext << std::endl; } +. { LOG(INFO) << "Unrecognized character: " << yytext << std::endl; } %% diff --git a/src/cql2_parser.y b/src/cql2_parser.y index b8e82c2..56f6167 100644 --- a/src/cql2_parser.y +++ b/src/cql2_parser.y @@ -1,16 +1,10 @@ -%{ +%code { #include #include #include #include #include -extern int yylex(); // Declare the lexer function -extern void yyerror(const char *s); // Declare the error handler - -#define YY_Cql2ParserBase_ERROR_BODY {} -#define YY_Cql2ParserBase_LEX_BODY { return 0; } - using cql2cpp::AstNode; using cql2cpp::BoolExpression; @@ -32,82 +26,85 @@ using cql2cpp::In; using cql2cpp::NotIn; using cql2cpp::NameOp; -%} -%header{ +void cql2cpp::Cql2ParserBase::error(const std::string& msg) { + LOG(ERROR) << msg; +} + +} + +%code requires { #include #include -#define YY_Cql2ParserBase_MEMBERS cql2cpp::AstNode* root_; -#define YY_Cql2ParserBase_DEBUG 1 -#define YYDEBUG - -%} - -%name Cql2ParserBase - -%union { - bool boolean; - int num_int; - double num_float; - const char* str; - char c; - cql2cpp::AstNode* node; - geos::geom::Geometry* geom; - geos::geom::Envelope* env; + +} + +%code provides { + +#include + } -%token NUMBER_INT -%token NUMBER_FLOAT -%token BOOLEAN -%token TRUE FALSE -%token ID -%token CHAR_LIT -%token SPT_FUNC -%token arrayFunction +%language "C++" +%define api.namespace {cql2cpp} +%define api.parser.class {Cql2ParserBase} +%define api.value.type variant +%define parse.error verbose +%parse-param {cql2cpp::AstNode **root_} + +%token NUMBER_INT +%token NUMBER_FLOAT +%token BOOLEAN +%token TRUE FALSE +%token ID +%token CHAR_LIT +%token SPT_FUNC +%token arrayFunction %token PLUS MINUS MULT DIV %token EQ GT LT // = > < %token AND OR NOT %token IN -%token LPT RPT COMMA // ( ) , +%token LPT RPT COMMA // ( ) , %token CASEI ACCENTI %token SQUOTE DQUOTE -%token POINT_WKT -%token LINESTRING_WKT -%token POLYGON_WKT -%token BBOX_TEXT - -%type booleanExpression -%type booleanTerm -%type booleanFactor -%type booleanPrimary -%type booleanLiteral -%type characterExpression -%type characterClause -%type propertyName -%type predicate -%type comparisonPredicate -%type binaryComparisonPredicate -%type scalarExpression -%type numericLiteral -%type spatialPredicate -%type geomExpression -%type spatialInstance -%type isInListPredicate -%type inList -%type arrayPredicate -%type arrayExpression -%type array -%type arrayElement -%type function -%type argumentList -%type argument -%type geometryLiteral -%type pointTaggedText -%type linestringTaggedText -%type polygonTaggedText -%type bboxTaggedText +%token POINT_WKT +%token LINESTRING_WKT +%token POLYGON_WKT +%token BBOX_TEXT + +%type booleanExpression +%type booleanTerm +%type booleanFactor +%type booleanPrimary +%type booleanLiteral +%type characterExpression +%type characterClause +%type propertyName +%type predicate +%type comparisonPredicate +%type binaryComparisonPredicate +%type scalarExpression +%type numericLiteral +%type spatialPredicate +%type geomExpression +%type spatialInstance +%type isInListPredicate +%type inList +%type arrayPredicate +%type arrayExpression +%type array +%type arrayElement +%type function +%type argumentList +%type argument + +%type geometryLiteral +%type pointTaggedText +%type linestringTaggedText +%type polygonTaggedText +%type bboxTaggedText %left PLUS MINUS %left MULT DIV @@ -119,7 +116,7 @@ using cql2cpp::NameOp; // Grammar rules: program: - booleanExpression { root_ = $1 } + booleanExpression { *root_ = $1; } booleanExpression: booleanTerm @@ -137,7 +134,7 @@ booleanPrimary: function | predicate | booleanLiteral - | LPT booleanExpression RPT { $$ = $2 } + | LPT booleanExpression RPT { $$ = $2; } booleanLiteral: TRUE { $$ = new AstNode($1); } @@ -166,7 +163,7 @@ binaryComparisonPredicate: | scalarExpression LT scalarExpression { $$ = new AstNode(BinCompPred, cql2cpp::Lesser, {$1, $3}); } | scalarExpression GT scalarExpression { $$ = new AstNode(BinCompPred, cql2cpp::Greater, {$1, $3}); } | scalarExpression LT EQ scalarExpression { $$ = new AstNode(BinCompPred, cql2cpp::LesserEqual, {$1, $4}); } - | scalarExpression GT EQ scalarExpression { $$ = new AstNode(BinCompPred, cql2cpp::GreaterEqual, {$1, $4}) } + | scalarExpression GT EQ scalarExpression { $$ = new AstNode(BinCompPred, cql2cpp::GreaterEqual, {$1, $4}); } scalarExpression: numericLiteral @@ -208,7 +205,7 @@ spatialInstance: if (p) { $$ = new AstNode(p.release()); } else { - yyerror("Can not parse WKT"); + error("Can not parse WKT"); YYERROR; } } @@ -217,7 +214,7 @@ spatialInstance: if (p) { $$ = new AstNode(p.release()); } else { - yyerror("Can not parse BBOX"); + error("Can not parse BBOX"); YYERROR; } } @@ -229,7 +226,7 @@ arrayExpression: LPT RPT { $$ = new AstNode(Array, NullOp, {}); } | propertyName | function - | LPT array RPT { $$ = $2 } + | LPT array RPT { $$ = $2; } array: arrayElement { $$ = new AstNode(Array, NullOp, {$1}); } diff --git a/src/global_yylex.cc b/src/global_yylex.cc new file mode 100644 index 0000000..0751794 --- /dev/null +++ b/src/global_yylex.cc @@ -0,0 +1,31 @@ +/* + * File Name: global_yylex.cc + * + * Copyright (c) 2024 IndoorSpatial + * + * Author: Kunlin Yu + * Create Date: 2024/12/31 + * + */ +#include +#include + +LexerInstance global_lexer_instance = CQL2_TEXT; + +void set_current_lexer(LexerInstance lexer) { global_lexer_instance = lexer; } + +static Cql2Lexer* lexer_text = nullptr; +void set_text_lexer(Cql2Lexer* lexer) { + if (lexer_text) delete lexer_text; + lexer_text = lexer; +} + +int yylex(cql2cpp::Cql2ParserBase::value_type* yylval) { + if (global_lexer_instance == CQL2_TEXT) { + lexer_text->set_yylval(yylval); + return lexer_text->yylex(); + } + + return -1; +} + diff --git a/src/main.cc b/src/main.cc index 45fe867..69a3678 100644 --- a/src/main.cc +++ b/src/main.cc @@ -9,6 +9,7 @@ */ #include +#include #include #include #include