From bf5bcce99beb78450a9d5e735e1e8d64e13f2e6d Mon Sep 17 00:00:00 2001 From: afzalabbasi Date: Fri, 30 Aug 2024 17:09:12 +0500 Subject: [PATCH 01/12] Parquet file task --- go.mod | 42 +++- go.sum | 93 +++++--- main.go | 14 ++ model/table.go | 8 + repository/tables_repository.go | 108 ++++++++++ service/db/db.go | 63 ++++++ service/merge_tree_service.go | 364 ++++++++++++++++++++++++++++++++ tests/merge_service_test.go | 93 ++++++++ tests/tables_test.go | 75 +++++++ 9 files changed, 822 insertions(+), 38 deletions(-) create mode 100644 model/table.go create mode 100644 repository/tables_repository.go create mode 100644 service/merge_tree_service.go create mode 100644 tests/merge_service_test.go create mode 100644 tests/tables_test.go diff --git a/go.mod b/go.mod index 38fe3be..eda4c33 100644 --- a/go.mod +++ b/go.mod @@ -1,23 +1,45 @@ module quackpipe -go 1.20 +go 1.21 + +toolchain go1.21.3 require ( + github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 + github.com/google/uuid v1.6.0 github.com/gorilla/mux v1.8.1 github.com/marcboeker/go-duckdb v1.7.0 + github.com/stretchr/testify v1.9.0 ) require ( + github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect + github.com/andybalholm/brotli v1.1.0 // indirect github.com/apache/arrow/go/v14 v14.0.2 // indirect - github.com/goccy/go-json v0.10.2 // indirect - github.com/google/flatbuffers v23.5.26+incompatible // indirect - github.com/klauspost/compress v1.16.7 // indirect - github.com/klauspost/cpuid/v2 v2.2.5 // indirect + github.com/apache/thrift v0.20.0 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/goccy/go-json v0.10.3 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/klauspost/asmfmt v1.3.2 // indirect + github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect + github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect - github.com/pierrec/lz4/v4 v4.1.18 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect - golang.org/x/mod v0.13.0 // indirect - golang.org/x/sys v0.13.0 // indirect - golang.org/x/tools v0.14.0 // indirect - golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect + golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect + golang.org/x/mod v0.20.0 // indirect + golang.org/x/net v0.28.0 // indirect + golang.org/x/sync v0.8.0 // indirect + golang.org/x/sys v0.23.0 // indirect + golang.org/x/text v0.17.0 // indirect + golang.org/x/tools v0.24.0 // indirect + golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect + google.golang.org/grpc v1.63.2 // indirect + google.golang.org/protobuf v1.34.2 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 3b1fb07..f54b565 100644 --- a/go.sum +++ b/go.sum @@ -1,47 +1,84 @@ +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= +github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= +github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= +github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= github.com/apache/arrow/go/v14 v14.0.2 h1:N8OkaJEOfI3mEZt07BIkvo4sC6XDbL+48MBPWO5IONw= github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= +github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 h1:3vA3hoM7fM4pJHG1dt0CEMzTwitFvutUrAiIwY+Bp+A= +github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50/go.mod h1:pAdO1xbg0WTJ++tq74I5xKX+yUD7MG0cEI24P+jko10= +github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= +github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg= -github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= -github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= +github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= +github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= +github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= +github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/marcboeker/go-duckdb v1.7.0 h1:c9DrS13ta+gqVgg9DiEW8I+PZBE85nBMLL/YMooYoUY= github.com/marcboeker/go-duckdb v1.7.0/go.mod h1:WtWeqqhZoTke/Nbd7V9lnBx7I2/A/q0SAq/urGzPCMs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= +github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= +github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= -golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= -golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/sync v0.4.0 h1:zxkM55ReGkDlKSM+Fu41A+zmbZuaPVbGMzvvdUPznYQ= -golang.org/x/sync v0.4.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= +golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= +golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= +golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= +golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= -gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY= +golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= +golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= +golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= +golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk= +golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= +gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= +google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= +google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= +google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= +google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index c5cadaa..a5ab7b3 100644 --- a/main.go +++ b/main.go @@ -3,10 +3,13 @@ package main import ( "flag" "fmt" + "log" "net/http" "os" "quackpipe/model" + "quackpipe/repository" "quackpipe/router" + "quackpipe/service/db" "quackpipe/utils" ) @@ -29,6 +32,17 @@ func initFlags() *model.CommandLineFlags { var appFlags *model.CommandLineFlags func main() { + + dbConn, err := db.ConnectDuckDB("test.db") + if err != nil { + log.Fatalf("failed to connect to DuckDB: %v", err) + } + defer dbConn.Close() + err = repository.CreateDuckDBTablesTable(dbConn) + if err != nil { + log.Fatalf("failed to create metadata table: %v", err) + } + appFlags = initFlags() if *appFlags.Stdin { rows, duration, format, err := utils.ReadFromScanner(*appFlags) diff --git a/model/table.go b/model/table.go new file mode 100644 index 0000000..6f8efa1 --- /dev/null +++ b/model/table.go @@ -0,0 +1,8 @@ +package model + +type Table struct { + Name string + Path string + Fields [][2]string + OrderBy []string +} diff --git a/repository/tables_repository.go b/repository/tables_repository.go new file mode 100644 index 0000000..7134017 --- /dev/null +++ b/repository/tables_repository.go @@ -0,0 +1,108 @@ +package repository + +import ( + "database/sql" + "fmt" + "strings" +) + +func CreateDuckDBTablesTable(db *sql.DB) error { + // Adjusted schema using DuckDB's ARRAY type + query := ` + CREATE TABLE IF NOT EXISTS tables ( + name VARCHAR PRIMARY KEY, + path VARCHAR, + field_names VARCHAR[], + field_types VARCHAR[], + order_by VARCHAR[], + engine VARCHAR, + timestamp_field VARCHAR, + timestamp_precision VARCHAR, + partition_by VARCHAR[] + ); + ` + + // Execute the query to create the table if it doesn't exist + _, err := db.Exec(query) + if err != nil { + return fmt.Errorf("failed to create 'tables' table in DuckDB: %v", err) + } + + return nil +} + +func InsertTableMetadata(db *sql.DB, name, path string, fieldNames []string, fieldTypes []string, orderBy []string, engine string, timestampField, timestampPrecision, partitionBy []string) error { + + fieldNamesStr := fmt.Sprintf("[%s]", strings.Join(fieldNames, ", ")) + fieldTypesStr := fmt.Sprintf("[%s]", strings.Join(fieldTypes, ", ")) + orderByStr := fmt.Sprintf("[%s]", strings.Join(orderBy, ", ")) + partitionByStr := fmt.Sprintf("[%s]", strings.Join(partitionBy, ", ")) + timestampPrecisionStr := fmt.Sprintf("[%s]", strings.Join(timestampPrecision, ", ")) + timestampFieldStr := fmt.Sprintf("[%s]", strings.Join(timestampField, ", ")) + query := `INSERT INTO tables ( + name, path, field_names, field_types, order_by, engine, timestamp_field, timestamp_precision, partition_by + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT (name) DO UPDATE SET + path = excluded.path, + order_by = excluded.order_by, + engine = excluded.engine, + timestamp_field = excluded.timestamp_field, + timestamp_precision = excluded.timestamp_precision, + partition_by = excluded.partition_by;` + // Prepare the SQL statement + stmt, err := db.Prepare(query) + if err != nil { + return fmt.Errorf("failed to prepare SQL statement: %w", err) + } + defer stmt.Close() + + // Execute the SQL statement with the converted array literals + _, err = stmt.Exec(name, path, fieldNamesStr, fieldTypesStr, orderByStr, engine, timestampFieldStr, timestampPrecisionStr, partitionByStr) + if err != nil { + return fmt.Errorf("failed to insert table metadata: %w", err) + } + return nil +} + +func DisplayAllData(db *sql.DB, tableName string) error { + query := fmt.Sprintf("SELECT * FROM %s", tableName) + + rows, err := db.Query(query) + if err != nil { + return fmt.Errorf("failed to query table data: %w", err) + } + defer rows.Close() + + columns, err := rows.Columns() + if err != nil { + return fmt.Errorf("failed to get columns: %w", err) + } + + values := make([]interface{}, len(columns)) + valuePtrs := make([]interface{}, len(columns)) + for i := range columns { + valuePtrs[i] = &values[i] + } + + fmt.Println("Table Data:") + for rows.Next() { + err := rows.Scan(valuePtrs...) + if err != nil { + return fmt.Errorf("failed to scan row: %w", err) + } + + for i, col := range values { + if col != nil { + fmt.Printf("%s: %v\t", columns[i], col) + } + } + fmt.Println() + } + + if err := rows.Err(); err != nil { + return fmt.Errorf("error occurred during row iteration: %w", err) + } + + return nil +} diff --git a/service/db/db.go b/service/db/db.go index c497b29..3f1a5cd 100644 --- a/service/db/db.go +++ b/service/db/db.go @@ -3,6 +3,7 @@ package db import ( "context" "database/sql" + "fmt" _ "github.com/marcboeker/go-duckdb" // load duckdb driver "os" "quackpipe/model" @@ -52,3 +53,65 @@ func check(args ...interface{}) { panic(err) } } + +// ConnectDuckDB opens and returns a connection to DuckDB. +func ConnectDuckDB(filePath string) (*sql.DB, error) { + // Open DuckDB connection (this will create a DuckDB instance in the specified file) + db, err := sql.Open("duckdb", filePath) + if err != nil { + return nil, fmt.Errorf("failed to open DuckDB: %w", err) + } + + // Test the connection + if err = db.Ping(); err != nil { + db.Close() + return nil, fmt.Errorf("failed to connect to DuckDB: %w", err) + } + + fmt.Println("Connected to DuckDB successfully.") + return db, nil +} + +// CreateTablesTable creates the metadata table if it doesn't already exist +func CreateTablesTable(db *sql.DB) error { + query := ` + CREATE TABLE IF NOT EXISTS tables ( + name VARCHAR PRIMARY KEY, + path VARCHAR, + field_names VARCHAR[], + field_types VARCHAR[], + order_by VARCHAR[], + engine VARCHAR[], + timestamp_field VARCHAR[], + timestamp_precision VARCHAR[], + partition_by VARCHAR[] + ); + ` + _, err := db.Exec(query) + if err != nil { + return fmt.Errorf("failed to create tables metadata: %w", err) + } + return nil +} + +func InsertTableMetadata(db *sql.DB, name, path string, fieldNames, fieldTypes, orderBy, engine, timestampField, timestampPrecision, partitionBy []string) error { + query := ` + INSERT INTO tables (name, path, field_names, field_types, order_by, engine, timestamp_field, timestamp_precision, partition_by) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(name) DO UPDATE SET + path = excluded.path, + field_names = excluded.field_names, + field_types = excluded.field_types, + order_by = excluded.order_by, + engine = excluded.engine, + timestamp_field = excluded.timestamp_field, + timestamp_precision = excluded.timestamp_precision, + partition_by = excluded.partition_by; + ` + + _, err := db.Exec(query, name, path, fieldNames, fieldTypes, orderBy, engine, timestampField, timestampPrecision, partitionBy) + if err != nil { + return fmt.Errorf("failed to insert table metadata: %w", err) + } + return nil +} diff --git a/service/merge_tree_service.go b/service/merge_tree_service.go new file mode 100644 index 0000000..e49210f --- /dev/null +++ b/service/merge_tree_service.go @@ -0,0 +1,364 @@ +package service + +import ( + "database/sql" + "errors" + "fmt" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/google/uuid" + _ "github.com/marcboeker/go-duckdb" + "os" + "path/filepath" + "quackpipe/model" + "strings" + "time" +) + +type IMergeTree interface { + Store(table *model.Table, columns map[string][]any) error + Merge(table *model.Table) error +} + +type MergeTreeService struct { + db *sql.DB +} + +func NewMergeTreeService(dbPath string) (*MergeTreeService, error) { + conn, err := sql.Open("duckdb", dbPath) + if err != nil { + return nil, fmt.Errorf("failed to open DuckDB connection: %v", err) + } + + return &MergeTreeService{db: conn}, nil +} + +func (s *MergeTreeService) Close() error { + return s.db.Close() +} + +func validateData(table *model.Table, columns map[string][]any) error { + + fieldMap := make(map[string]string) + for _, field := range table.Fields { + fieldMap[field[0]] = field[1] + } + + // Check if columns map size matches the table.Fields size + if len(columns) != len(table.Fields) { + return errors.New("columns size does not match table fields size") + } + + var dataLength int + for _, data := range columns { + if dataLength == 0 { + dataLength = len(data) // Initialize dataLength with the length of the first column + } else if len(data) != dataLength { + return errors.New("columns length and data length mismatch") + } + } + for column, data := range columns { + + // Validate if the column exists in the table definition + columnType, ok := fieldMap[column] + if !ok { + return fmt.Errorf("invalid column: %s", column) + } + // Validate data types for each column + switch columnType { + case "UInt64": + for _, val := range data { + if _, ok := val.(uint64); !ok { + return fmt.Errorf("invalid data type for column %s: expected uint64", column) + } + } + case "Int64": + for _, val := range data { + if _, ok := val.(int64); !ok { + return fmt.Errorf("invalid data type for column %s: expected int64", column) + } + } + case "String": + for _, val := range data { + if _, ok := val.(string); !ok { + return fmt.Errorf("invalid data type for column %s: expected string", column) + } + } + case "Float64": + for _, val := range data { + if _, ok := val.(float64); !ok { + return fmt.Errorf("invalid data type for column %s: expected float64", column) + } + } + default: + return fmt.Errorf("unsupported column type: %s", columnType) + } + } + + return nil +} + +func (s *MergeTreeService) createParquetSchema(table *model.Table) *arrow.Schema { + fields := make([]arrow.Field, len(table.Fields)) + for i, field := range table.Fields { + var fieldType arrow.DataType + switch field[1] { + case "UInt64": + fieldType = arrow.PrimitiveTypes.Uint64 + case "Int64": + fieldType = arrow.PrimitiveTypes.Int64 + case "String": + fieldType = arrow.BinaryTypes.String + case "Float64": + fieldType = arrow.PrimitiveTypes.Float64 + default: + panic(fmt.Sprintf("unsupported field type: %s", field[1])) + } + fields[i] = arrow.Field{Name: field[0], Type: fieldType} + } + return arrow.NewSchema(fields, nil) +} + +func (s *MergeTreeService) writeParquetFile(table *model.Table, columns map[string][]any) error { + schema := s.createParquetSchema(table) + outputFile := filepath.Join(table.Path, "data", table.Name+uuid.New().String()+".parquet") + file, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create parquet file: %v", err) + } + defer file.Close() + + // Create a new Arrow memory pool + pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) + + // Create Arrow RecordBatch + recordBatch := array.NewRecordBuilder(pool, schema) + defer recordBatch.Release() + + // Create a field map for easier access to column types + fieldMap := make(map[string]string) + for _, field := range table.Fields { + fieldMap[field[0]] = field[1] + } + for columnName, dataSlice := range columns { + columnType, ok := fieldMap[columnName] + if !ok { + return fmt.Errorf("unknown column: %s", columnName) + } + + // Get the index of the column from the schema + columnIndex := -1 + for i, field := range schema.Fields() { + if field.Name == columnName { + columnIndex = i + break + } + } + if columnIndex == -1 { + return fmt.Errorf("column %s not found in schema", columnName) + } + + builder := recordBatch.Field(columnIndex).(array.Builder) + + // Handle data slice based on its type + switch columnType { + case "UInt64": + if b, ok := builder.(*array.Uint64Builder); ok { + for _, value := range dataSlice { + if v, ok := value.(uint64); ok { + b.Append(v) + } else { + return fmt.Errorf("invalid data type for column %s, expected uint64", columnName) + } + } + } else { + return fmt.Errorf("type mismatch for column %s", columnName) + } + case "Int64": + if b, ok := builder.(*array.Int64Builder); ok { + for _, value := range dataSlice { + if v, ok := value.(int64); ok { + b.Append(v) + } else { + return fmt.Errorf("invalid data type for column %s, expected int64", columnName) + } + } + } else { + return fmt.Errorf("type mismatch for column %s", columnName) + } + case "String": + if b, ok := builder.(*array.StringBuilder); ok { + for _, value := range dataSlice { + if v, ok := value.(string); ok { + b.Append(v) + } else { + return fmt.Errorf("invalid data type for column %s, expected string", columnName) + } + } + } else { + return fmt.Errorf("type mismatch for column %s", columnName) + } + case "Float64": + if b, ok := builder.(*array.Float64Builder); ok { + for _, value := range dataSlice { + if v, ok := value.(float64); ok { + b.Append(v) + } else { + return fmt.Errorf("invalid data type for column %s, expected float64", columnName) + } + } + } else { + return fmt.Errorf("type mismatch for column %s", columnName) + } + default: + return fmt.Errorf("unsupported column type for column %s: %s", columnName, columnType) + } + } + + // Finalize the record batch + batch := recordBatch.NewRecord() + defer batch.Release() + + // Set up Parquet writer properties + writerProps := parquet.NewWriterProperties( + parquet.WithMaxRowGroupLength(100), + ) + arrprops := pqarrow.NewArrowWriterProperties() + + // Create Parquet file writer + writer, err := pqarrow.NewFileWriter(schema, file, writerProps, arrprops) + if err != nil { + return fmt.Errorf("failed to create Parquet file writer: %v", err) + } + defer writer.Close() + + // Write the record batch to the Parquet file + if err := writer.Write(batch); err != nil { + return fmt.Errorf("failed to write record batch to parquet file: %v", err) + } + + return nil +} + +func (s *MergeTreeService) Store(table *model.Table, columns map[string][]any) error { + if err := validateData(table, columns); err != nil { + return err + } + + if err := s.writeParquetFile(table, columns); err != nil { + return err + } + + return nil +} + +// Merge method implementation +func (s *MergeTreeService) Merge(table *model.Table) error { + dataDir := filepath.Join(table.Path, "data") + tmpDir := filepath.Join(table.Path, "tmp") + + if err := os.MkdirAll(tmpDir, 0755); err != nil { + return err + } + + files, err := os.ReadDir(dataDir) + if err != nil { + return err + } + + var parquetFiles []string + for _, file := range files { + if strings.HasSuffix(file.Name(), ".parquet") { + parquetFiles = append(parquetFiles, filepath.Join(dataDir, file.Name())) + } + } + + if len(parquetFiles) == 0 { + return errors.New("no parquet files to merge") + } + + // Plan the merge to keep the size under 4GB + const maxFileSize = 4 * 1024 * 1024 * 1024 + var filesToMerge []string + var currentSize int64 + + for _, file := range parquetFiles { + fileInfo, err := os.Stat(file) + if err != nil { + return err + } + + if currentSize+fileInfo.Size() > maxFileSize { + if err := mergeFiles(filesToMerge, table, tmpDir); err != nil { + return err + } + filesToMerge = nil + currentSize = 0 + } + + filesToMerge = append(filesToMerge, file) + currentSize += fileInfo.Size() + } + + if len(filesToMerge) > 0 { + if err := mergeFiles(filesToMerge, table, tmpDir); err != nil { + return err + } + } + + return nil +} + +func mergeFiles(files []string, table *model.Table, tmpDir string) error { + // Create a temporary merged file + tmpFilePath := filepath.Join(tmpDir, fmt.Sprintf("%s_%d.parquet", table.Name, time.Now().UnixNano())) + + // Prepare DuckDB connection + conn, err := sql.Open("duckdb", "test") + if err != nil { + return err + } + defer conn.Close() + + //// Drop the table if it exists + //dropTableSQL := `DROP TABLE IF EXISTS temp_table` + //_, err = conn.Exec(dropTableSQL) + //if err != nil { + // return err + //} + + // Create a temporary table in DuckDB using parquet_scan with an array of files + createTableSQL := fmt.Sprintf(`COPY(SELECT * FROM read_parquet (ARRAY['%s']) order by %s)TO '%s' (FORMAT 'parquet')`, strings.Join(files, "','"), strings.Join(table.OrderBy, ","), tmpFilePath) + _, err = conn.Exec(createTableSQL) + if err != nil { + return err + } + + //// Perform the merge + //mergeSQL := fmt.Sprintf( + // `COPY (SELECT * FROM temp_table ORDER BY %s) TO '%s' (FORMAT 'parquet')`, + // strings.Join(table.OrderBy, ","), + // tmpFilePath, + //) + //_, err = conn.Exec(mergeSQL) + //if err != nil { + // return err + //} + + // Cleanup old files + for _, file := range files { + if err := os.Remove(file); err != nil { + return err + } + } + + finalFilePath := filepath.Join(filepath.Dir(files[0]), filepath.Base(tmpFilePath)) + if err := os.Rename(tmpFilePath, finalFilePath); err != nil { + return err + } + + return nil +} diff --git a/tests/merge_service_test.go b/tests/merge_service_test.go new file mode 100644 index 0000000..15a5f93 --- /dev/null +++ b/tests/merge_service_test.go @@ -0,0 +1,93 @@ +package tests + +import ( + "fmt" + "github.com/stretchr/testify/assert" + "quackpipe/model" + "quackpipe/service" + "testing" +) + +func TestStore_Success(t *testing.T) { + var table = &model.Table{ + Name: "experimental", + Path: "/tmp/example", + Fields: [][2]string{{"timestamp", "UInt64"}, {"str2", "String"}, {"value", "Float64"}}, + OrderBy: []string{"timestamp"}, + } + + // Initialize the MergeTreeService + mt, err := service.NewMergeTreeService("test") + assert.NoError(t, err, "Failed to create MergeTreeService") + + // Call Store method + err = mt.Store(table, map[string][]any{ + "str2": []any{"a", "b", "c"}, + "timestamp": []any{uint64(1628596100), uint64(1628596001), uint64(1628596002)}, + "value": []any{float64(1.1), float64(2.2), float64(3.3)}, + }) + + err = mt.Merge(table) + // Assert no error occurred + assert.NoError(t, err, "Store method returned an error") +} + +func TestStore(t *testing.T) { + table := &model.Table{ + Fields: [][2]string{ + {"str", "String"}, + {"timestamp", "UInt64"}, + {"value", "Float64"}, + }, + } + mt, err := service.NewMergeTreeService("test") + if err != nil { + fmt.Println(err.Error()) + } + // Test: data entries have the invalid type + t.Run("InvalidDataType", func(t *testing.T) { + err := mt.Store(table, map[string][]any{ + "str": []any{123, "b", "c"}, // invalid: int instead of string + "timestamp": []any{uint64(1628596000), uint64(1628596001), uint64(1628596002)}, + "value": []any{float64(1.1), float64(2.2), float64(3.3)}, + }) + + assert.Error(t, err) + assert.EqualError(t, err, "invalid data type for column str: expected string") + }) + + // Test: data entries are not of the same size + t.Run("UnequalDataSizes", func(t *testing.T) { + err := mt.Store(table, map[string][]any{ + "str": []any{"a", "b"}, // size 2 + "timestamp": []any{uint64(1628596000), uint64(1628596001), uint64(1628596002)}, // size 3 + "value": []any{float64(1.1), float64(2.2), float64(3.3)}, // size 3 + }) + + assert.Error(t, err) + assert.EqualError(t, err, "columns length and data length mismatch") + }) + + // Test: data size is less than columns size + t.Run("DataSizeLessThanColumns", func(t *testing.T) { + err := mt.Store(table, map[string][]any{ + "str": []any{"a"}, // size 1 + "timestamp": []any{uint64(1628596000), uint64(1628596001), uint64(1628596002)}, // size 3 + "value": []any{float64(1.1), float64(2.2), float64(3.3)}, // size 3 + }) + + assert.Error(t, err) + assert.EqualError(t, err, "columns length and data length mismatch") + }) + + // Test: columns size is not equal to the table.Fields size + t.Run("ColumnsSizeMismatch", func(t *testing.T) { + err := mt.Store(table, map[string][]any{ + "str": []any{"a", "b", "c"}, + "timestamp": []any{uint64(1628596000), uint64(1628596001), uint64(1628596002)}, + }) // Missing "value" column + + assert.Error(t, err) + assert.EqualError(t, err, "columns size does not match table fields size") + }) +} diff --git a/tests/tables_test.go b/tests/tables_test.go new file mode 100644 index 0000000..52262f8 --- /dev/null +++ b/tests/tables_test.go @@ -0,0 +1,75 @@ +package tests + +import ( + "database/sql" + "github.com/google/uuid" + "log" + "os" + "quackpipe/repository" + db2 "quackpipe/service/db" + "testing" +) + +var dbFilePath string +var db *sql.DB + +// TestMain is called by the testing framework before any tests are run. +func TestMain(m *testing.M) { + var err error + dbFilePath = "test2" + ".db" + // Initialize the database + db, err = db2.ConnectDuckDB(dbFilePath) + if err != nil { + log.Fatalf("failed to open DuckDB database: %v", err) + } + + // Create necessary tables + if err := repository.CreateDuckDBTablesTable(db); err != nil { + db.Close() // Ensure DB is closed on error + log.Fatalf("failed to create DuckDB tables table: %v", err) + } + + // Run the tests + code := m.Run() + + // Teardown - close the database + db.Close() + + // Exit with the test result code + os.Exit(code) +} + +func TestPersistentStorage(t *testing.T) { + // Insert some metadata + if err := repository.InsertTableMetadata(db, + "test_table"+uuid.New().String(), + "/path/to/table_!", []string{"field3"}, + []string{"VARCHAR 2"}, + []string{"field1 ASC 2"}, "some_engine_1", + []string{"created_at_2"}, + []string{"SECOND_2"}, + []string{"partition_field_2"}); err != nil { + t.Fatalf("failed to insert table metadata: %v", err) + } + + db.Close() + + // Reconnect to the database + var err error + db, err = db2.ConnectDuckDB(dbFilePath) + if err != nil { + t.Fatalf("failed to reopen DuckDB database: %v", err) + } + defer db.Close() + + // Display all data from the 'tables' table + if err := repository.DisplayAllData(db, "tables"); err != nil { + t.Fatalf("failed to display table data: %v", err) + } + +} + +func generateUniqueID() string { + id := uuid.New() + return id.String() +} From a5437eeb7e7d59492452f0d8c594b65c56b3b20f Mon Sep 17 00:00:00 2001 From: akvlad Date: Sat, 19 Oct 2024 17:42:52 +0300 Subject: [PATCH 02/12] parquet writer initial --- benchmark/main.go | 131 +++++ config/app_flags.go | 5 + config/config_test.yaml | 4 + config/configuration.go | 33 ++ config/configuration_test.go | 11 + e2e_test.go | 104 ++++ go.mod | 40 +- go.sum | 93 ++- handler/api_handler.go | 17 +- handler/quackpipe/create_table.go | 1 + main.go | 42 +- merge/handlers/create_table.go | 72 +++ merge/handlers/insert_into.go | 55 ++ merge/merge.go | 46 ++ merge/merge_test.go | 39 ++ merge/parsers/ndjson_insert_parser.go | 133 +++++ merge/parsers/parsers.go | 34 ++ merge/repository/registry.go | 147 +++++ .../repository}/tables_repository.go | 55 +- merge/service/merge_tree_service.go | 546 ++++++++++++++++++ merge/shared/consts.go | 6 + model/flags.go | 1 + model/table.go | 12 +- router/apiRouter.go | 18 +- router/route.go | 29 +- service/db/db.go | 44 -- service/merge_tree_service.go | 364 ------------ utils/promise/promise.go | 45 ++ 28 files changed, 1620 insertions(+), 507 deletions(-) create mode 100644 benchmark/main.go create mode 100644 config/app_flags.go create mode 100644 config/config_test.yaml create mode 100644 config/configuration.go create mode 100644 config/configuration_test.go create mode 100644 e2e_test.go create mode 100644 handler/quackpipe/create_table.go create mode 100644 merge/handlers/create_table.go create mode 100644 merge/handlers/insert_into.go create mode 100644 merge/merge.go create mode 100644 merge/merge_test.go create mode 100644 merge/parsers/ndjson_insert_parser.go create mode 100644 merge/parsers/parsers.go create mode 100644 merge/repository/registry.go rename {repository => merge/repository}/tables_repository.go (56%) create mode 100644 merge/service/merge_tree_service.go create mode 100644 merge/shared/consts.go delete mode 100644 service/merge_tree_service.go create mode 100644 utils/promise/promise.go diff --git a/benchmark/main.go b/benchmark/main.go new file mode 100644 index 0000000..059b99f --- /dev/null +++ b/benchmark/main.go @@ -0,0 +1,131 @@ +package main + +import ( + "bytes" + "fmt" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "github.com/prometheus/client_golang/prometheus/promhttp" + "io" + "net/http" + "strings" + "sync" + "sync/atomic" + "time" +) + +const CLIENTS = 1 + +var requestDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "insert_request_duration_seconds", + Help: "Duration of HTTP requests in seconds", + ConstLabels: prometheus.Labels{ + "job": "quackdb_benchmark", + "clients": fmt.Sprintf("%d", CLIENTS), + "mbps": "30", + }, + Buckets: []float64{0.1, 0.5, 1, 5, 10, 20, 30}, +}) + +var totalRequests = promauto.NewCounter(prometheus.CounterOpts{ + Name: "total_insert_requests", + Help: "Duration of HTTP requests in seconds", + ConstLabels: prometheus.Labels{ + "job": "quackdb_benchmark", + "clients": fmt.Sprintf("%d", CLIENTS), + "mbps": "30", + }, +}) + +var totalBytes = promauto.NewCounter(prometheus.CounterOpts{ + Name: "total_insert_bytes", + Help: "Duration of HTTP requests in seconds", + ConstLabels: prometheus.Labels{ + "job": "quackdb_benchmark", + "clients": fmt.Sprintf("%d", CLIENTS), + "mbps": "30", + }, +}) + +func main() { + go func() { + http.Handle("/metrics", promhttp.Handler()) + if err := http.ListenAndServe(":9090", nil); err != nil { + panic(err) + } + }() + time.Sleep(time.Minute) + runBenchmark(30, CLIENTS, time.Minute*5) +} + +func runBenchmark(mbps int, clients int, timeout time.Duration) { + resp, err := http.Post("http://localhost:8333/quackdb/create", "application/x-yaml", + strings.NewReader(`create_table: test +fields: + timestamp_ns: Int64 + fingerprint: Int64 + str: String + value: Float64 +engine: Merge +order_by: + - timestamp_ns +timestamp: + field: timestamp_ns + precision: ns +partition_by: "" +`)) + if err != nil { + panic(err) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if resp.StatusCode != 200 { + panic(fmt.Errorf("[%d]: %s", resp.StatusCode, string(body))) + } + + bPClient := mbps * 1024 * 1024 / clients + wg := &sync.WaitGroup{} + var working int32 = 1 + t := time.NewTicker(time.Second) + for i := 0; i < clients; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + var _wg sync.WaitGroup + for range t.C { + if atomic.LoadInt32(&working) != 1 { + return + } + for i := 0; i < clients; i++ { + _wg.Add(1) + go func() { + defer _wg.Done() + bodyBuilder := strings.Builder{} + for bodyBuilder.Len() < bPClient { + s := fmt.Sprintf( + "{\"timestamp_ns\": %d, \"fingerprint\": 1234567890, \"str\": \"hello %[1]d\", \"value\": 123.456}\n", + time.Now().UnixNano()) + bodyBuilder.WriteString(s) + } + body = []byte(bodyBuilder.String()) + start := time.Now() + res, err := http.Post("http://localhost:8333/quackdb/test/insert", "application/x-ndjson", + bytes.NewReader(body), + ) + if err != nil { + panic(err) + } + defer res.Body.Close() + requestDuration.Observe(time.Since(start).Seconds()) + totalRequests.Inc() + totalBytes.Add(float64(len(body))) + }() + } + _wg.Wait() + } + }(i) + } + time.Sleep(timeout) + atomic.StoreInt32(&working, 0) + wg.Wait() +} diff --git a/config/app_flags.go b/config/app_flags.go new file mode 100644 index 0000000..a317335 --- /dev/null +++ b/config/app_flags.go @@ -0,0 +1,5 @@ +package config + +import "quackpipe/model" + +var AppFlags *model.CommandLineFlags diff --git a/config/config_test.yaml b/config/config_test.yaml new file mode 100644 index 0000000..039e161 --- /dev/null +++ b/config/config_test.yaml @@ -0,0 +1,4 @@ +quack_pipe: + root: /tmp/data + merge_timeout_s: 10 + secret: q1w2e3r4t5 \ No newline at end of file diff --git a/config/configuration.go b/config/configuration.go new file mode 100644 index 0000000..757db78 --- /dev/null +++ b/config/configuration.go @@ -0,0 +1,33 @@ +package config + +import ( + "github.com/spf13/viper" +) + +type QuackPipeConfiguration struct { + Enabled bool `json:"enabled" mapstructure:"enabled" default:"false"` + Root string `json:"root" mapstructure:"root" default:""` + MergeTimeoutS int `json:"merge_timeout_s" mapstructure:"merge_timeout_s" default:"60"` + Secret string `json:"secret" mapstructure:"secret" default:""` +} + +type Configuration struct { + QuackPipe QuackPipeConfiguration `json:"quack_pipe" mapstructure:"quack_pipe" default:""` + DBPath string `json:"db_path" mapstructure:"db_path" default:"/tmp/db"` +} + +var Config *Configuration + +func InitConfig(file string) { + viper.SetConfigFile(file) + viper.AutomaticEnv() + err := viper.ReadInConfig() + if err != nil { + panic(err) + } + Config = &Configuration{} + err = viper.Unmarshal(Config) + if err != nil { + panic(err) + } +} diff --git a/config/configuration_test.go b/config/configuration_test.go new file mode 100644 index 0000000..c867db7 --- /dev/null +++ b/config/configuration_test.go @@ -0,0 +1,11 @@ +package config + +import ( + "fmt" + "testing" +) + +func TestInitConfig(t *testing.T) { + InitConfig("config_test.yaml") + fmt.Println(Config) +} diff --git a/e2e_test.go b/e2e_test.go new file mode 100644 index 0000000..6ce2b94 --- /dev/null +++ b/e2e_test.go @@ -0,0 +1,104 @@ +package main + +import ( + "fmt" + "io" + "net/http" + "quackpipe/config" + "quackpipe/merge" + "quackpipe/model" + "quackpipe/router" + "strings" + "testing" + "time" +) + +func TestE2E(t *testing.T) { + config.Config = &config.Configuration{ + QuackPipe: config.QuackPipeConfiguration{ + Enabled: true, + Root: "_testdata", + MergeTimeoutS: 10, + Secret: "q1w2e3r4t5", + }, + DBPath: "_testdata", + } + config.AppFlags = &model.CommandLineFlags{ + Host: toPtr("localhost"), + Port: toPtr("8123"), + Stdin: toPtr(false), + Alias: toPtr(true), + Format: toPtr(""), + Params: toPtr(""), + DBPath: toPtr("_testdata"), + Config: toPtr(""), + } + go runServer() + time.Sleep(1 * time.Second) + resp, err := http.Post("http://localhost:8123/quackdb/create", "application/x-yaml", + strings.NewReader(`create_table: test +fields: + timestamp_ns: Int64 + fingerprint: Int64 + str: String + value: Float64 +engine: Merge +order_by: + - timestamp_ns +timestamp: + field: timestamp_ns + precision: ns +partition_by: "" +`)) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + if resp.StatusCode != 200 { + t.Fatalf("[%d]: %s", resp.StatusCode, string(body)) + } + fmt.Println(string(body)) + + resp, err = http.Post("http://localhost:8123/quackdb/test/insert", "application/x-ndjson", + strings.NewReader( + `{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456} +{"timestamp_ns": 1668326823000000000, "fingerprint": 1234567890, "str": "hello", "value": 123.456}`, + ), + ) + if err != nil { + t.Fatal(err) + } + defer resp.Body.Close() + body, err = io.ReadAll(resp.Body) + if err != nil { + t.Fatal(err) + } + if resp.StatusCode != 200 { + t.Fatalf("[%d]: %s", resp.StatusCode, string(body)) + } + fmt.Println(string(body)) + + return +} + +func toPtr[X any](val X) *X { + return &val +} + +func runServer() { + if config.Config.QuackPipe.Enabled { + merge.Init() + } + r := router.NewRouter(config.AppFlags) + fmt.Printf("QuackPipe API Running: %s:%s\n", *config.AppFlags.Host, *config.AppFlags.Port) + if err := http.ListenAndServe(*config.AppFlags.Host+":"+*config.AppFlags.Port, r); err != nil { + panic(err) + } +} diff --git a/go.mod b/go.mod index eda4c33..6d51130 100644 --- a/go.mod +++ b/go.mod @@ -6,40 +6,66 @@ toolchain go1.21.3 require ( github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 + github.com/go-faster/jx v1.1.0 github.com/google/uuid v1.6.0 github.com/gorilla/mux v1.8.1 - github.com/marcboeker/go-duckdb v1.7.0 + github.com/marcboeker/go-duckdb v1.8.0 + github.com/prometheus/client_golang v1.20.4 + github.com/spf13/viper v1.19.0 github.com/stretchr/testify v1.9.0 + github.com/tidwall/btree v1.7.0 + golang.org/x/sync v0.8.0 + gopkg.in/yaml.v3 v3.0.1 ) require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect github.com/andybalholm/brotli v1.1.0 // indirect - github.com/apache/arrow/go/v14 v14.0.2 // indirect + github.com/apache/arrow/go/v17 v17.0.0 // indirect github.com/apache/thrift v0.20.0 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-faster/errors v0.6.1 // indirect github.com/goccy/go-json v0.10.3 // indirect github.com/golang/snappy v0.0.4 // indirect github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/hashicorp/hcl v1.0.0 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect github.com/klauspost/compress v1.17.9 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect + github.com/magiconair/properties v1.8.7 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pelletier/go-toml/v2 v2.2.2 // indirect github.com/pierrec/lz4/v4 v4.1.21 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect + github.com/prometheus/client_model v0.6.1 // indirect + github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/procfs v0.15.1 // indirect + github.com/sagikazarmark/locafero v0.4.0 // indirect + github.com/sagikazarmark/slog-shim v0.1.0 // indirect + github.com/segmentio/asm v1.2.0 // indirect + github.com/sourcegraph/conc v0.3.0 // indirect + github.com/spf13/afero v1.11.0 // indirect + github.com/spf13/cast v1.6.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/subosito/gotenv v1.6.0 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect + go.uber.org/atomic v1.9.0 // indirect + go.uber.org/multierr v1.9.0 // indirect golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect golang.org/x/mod v0.20.0 // indirect golang.org/x/net v0.28.0 // indirect - golang.org/x/sync v0.8.0 // indirect golang.org/x/sys v0.23.0 // indirect golang.org/x/text v0.17.0 // indirect golang.org/x/tools v0.24.0 // indirect golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c // indirect google.golang.org/grpc v1.63.2 // indirect google.golang.org/protobuf v1.34.2 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect + gopkg.in/ini.v1 v1.67.0 // indirect ) diff --git a/go.sum b/go.sum index f54b565..85443df 100644 --- a/go.sum +++ b/go.sum @@ -2,14 +2,28 @@ github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvK github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= -github.com/apache/arrow/go/v14 v14.0.2 h1:N8OkaJEOfI3mEZt07BIkvo4sC6XDbL+48MBPWO5IONw= -github.com/apache/arrow/go/v14 v14.0.2/go.mod h1:u3fgh3EdgN/YQ8cVQRguVW3R+seMybFg8QBQ5LU+eBY= +github.com/apache/arrow/go/v17 v17.0.0 h1:RRR2bdqKcdbss9Gxy2NS/hK8i4LDMh23L6BbkN5+F54= +github.com/apache/arrow/go/v17 v17.0.0/go.mod h1:jR7QHkODl15PfYyjM2nU+yTLScZ/qfj7OSUZmJ8putc= github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 h1:3vA3hoM7fM4pJHG1dt0CEMzTwitFvutUrAiIwY+Bp+A= github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50/go.mod h1:pAdO1xbg0WTJ++tq74I5xKX+yUD7MG0cEI24P+jko10= github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= +github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/go-faster/errors v0.6.1 h1:nNIPOBkprlKzkThvS/0YaX8Zs9KewLCOSFQS5BU06FI= +github.com/go-faster/errors v0.6.1/go.mod h1:5MGV2/2T9yvlrbhe9pD9LO5Z/2zCSq2T8j+Jpi2LAyY= +github.com/go-faster/jx v1.1.0 h1:ZsW3wD+snOdmTDy9eIVgQdjUpXRRV4rqW8NS3t+20bg= +github.com/go-faster/jx v1.1.0/go.mod h1:vKDNikrKoyUmpzaJ0OkIkRQClNHFX/nF3dnTJZb3skg= github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= @@ -22,36 +36,88 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/marcboeker/go-duckdb v1.7.0 h1:c9DrS13ta+gqVgg9DiEW8I+PZBE85nBMLL/YMooYoUY= -github.com/marcboeker/go-duckdb v1.7.0/go.mod h1:WtWeqqhZoTke/Nbd7V9lnBx7I2/A/q0SAq/urGzPCMs= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= +github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= +github.com/marcboeker/go-duckdb v1.8.0 h1:iOWv1wTL0JIMqpyns6hCf5XJJI4fY6lmJNk+itx5RRo= +github.com/marcboeker/go-duckdb v1.8.0/go.mod h1:2oV8BZv88S16TKGKM+Lwd0g7DX84x0jMxjTInThC8Is= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= +github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= +github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= +github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= +github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= +github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= +github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= +github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= +github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= +github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= +github.com/segmentio/asm v1.2.0 h1:9BQrFxC+YOHJlTlHGkTrFWf59nbL3XnCoFLTwDCI7ys= +github.com/segmentio/asm v1.2.0/go.mod h1:BqMnlJP91P8d+4ibuonYZw9mfnzI9HfxselHZr5aAcs= +github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= +github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= +github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= +github.com/spf13/afero v1.11.0/go.mod h1:GH9Y3pIexgf1MTIWtNGyogA5MwRIDXGUr+hbWNoBjkY= +github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= +github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= +github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/tidwall/btree v1.7.0 h1:L1fkJH/AuEh5zBnnBbmTwQ5Lt+bRJ5A8EWecslvo9iI= +github.com/tidwall/btree v1.7.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= +go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= +go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= +go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= @@ -71,14 +137,17 @@ golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUO golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1:cZGRis4/ot9uVm639a+rHCUaG0JJHEsdyzSQTMX+suY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c h1:lfpJ/2rWPa/kJgxyyXM8PrNnfCzcmxJ265mADgwmvLI= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= +gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/handler/api_handler.go b/handler/api_handler.go index 9e614b2..dca2679 100644 --- a/handler/api_handler.go +++ b/handler/api_handler.go @@ -5,8 +5,8 @@ import ( "fmt" "io" "net/http" + "quackpipe/config" "quackpipe/controller/root" - "quackpipe/model" "quackpipe/utils" ) @@ -14,16 +14,15 @@ import ( var staticPlay string type Handler struct { - FlagInformation *model.CommandLineFlags } -func (u *Handler) Handlers(w http.ResponseWriter, r *http.Request) { +func (u *Handler) Handlers(w http.ResponseWriter, r *http.Request) error { var bodyBytes []byte var query string var err error - defaultFormat := *u.FlagInformation.Format - defaultParams := *u.FlagInformation.Params - defaultPath := *u.FlagInformation.DBPath + defaultFormat := *config.AppFlags.Format + defaultParams := *config.AppFlags.Params + defaultPath := *config.AppFlags.DBPath // handle query parameter if r.URL.Query().Get("query") != "" { query = r.URL.Query().Get("query") @@ -31,7 +30,7 @@ func (u *Handler) Handlers(w http.ResponseWriter, r *http.Request) { bodyBytes, err = io.ReadAll(r.Body) if err != nil { fmt.Printf("Body reading error: %v", err) - return + return nil } defer r.Body.Close() query = string(bodyBytes) @@ -66,12 +65,12 @@ func (u *Handler) Handlers(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(staticPlay)) } else { - result, err := root.QueryOperation(u.FlagInformation, query, r, defaultPath, defaultFormat, defaultParams) + result, err := root.QueryOperation(config.AppFlags, query, r, defaultPath, defaultFormat, defaultParams) if err != nil { _, _ = w.Write([]byte(err.Error())) } else { _, _ = w.Write([]byte(result)) } } - + return nil } diff --git a/handler/quackpipe/create_table.go b/handler/quackpipe/create_table.go new file mode 100644 index 0000000..023912b --- /dev/null +++ b/handler/quackpipe/create_table.go @@ -0,0 +1 @@ +package quackpipe diff --git a/main.go b/main.go index a5ab7b3..3f40fe7 100644 --- a/main.go +++ b/main.go @@ -3,13 +3,12 @@ package main import ( "flag" "fmt" - "log" "net/http" "os" + "quackpipe/config" + "quackpipe/merge" "quackpipe/model" - "quackpipe/repository" "quackpipe/router" - "quackpipe/service/db" "quackpipe/utils" ) @@ -20,8 +19,8 @@ func initFlags() *model.CommandLineFlags { appFlags.Host = flag.String("host", "0.0.0.0", "API host. Default 0.0.0.0") appFlags.Port = flag.String("port", "8123", "API port. Default 8123") appFlags.Format = flag.String("format", "JSONCompact", "API port. Default JSONCompact") + appFlags.Config = flag.String("config", "config.yaml", "path to the configuration file") appFlags.Params = flag.String("params", "", "DuckDB optional parameters. Default to none.") - appFlags.DBPath = flag.String("dbpath", "/tmp/", "DuckDB DB storage path. Default to /tmp/") appFlags.Stdin = flag.Bool("stdin", false, "STDIN query. Default false") appFlags.Alias = flag.Bool("alias", false, "Built-in CH Aliases. Default true") flag.Parse() @@ -29,23 +28,10 @@ func initFlags() *model.CommandLineFlags { return appFlags } -var appFlags *model.CommandLineFlags - func main() { - - dbConn, err := db.ConnectDuckDB("test.db") - if err != nil { - log.Fatalf("failed to connect to DuckDB: %v", err) - } - defer dbConn.Close() - err = repository.CreateDuckDBTablesTable(dbConn) - if err != nil { - log.Fatalf("failed to create metadata table: %v", err) - } - - appFlags = initFlags() - if *appFlags.Stdin { - rows, duration, format, err := utils.ReadFromScanner(*appFlags) + config.AppFlags = initFlags() + if *config.AppFlags.Stdin { + rows, duration, format, err := utils.ReadFromScanner(*config.AppFlags) if err != nil { fmt.Println(err) os.Exit(1) @@ -58,13 +44,15 @@ func main() { fmt.Println(results) } - } else { - r := router.NewRouter(appFlags) - fmt.Printf("QuackPipe API Running: %s:%s\n", *appFlags.Host, *appFlags.Port) - if err := http.ListenAndServe(*appFlags.Host+":"+*appFlags.Port, r); err != nil { - panic(err) - } - + } + config.InitConfig(*config.AppFlags.Config) + if config.Config.QuackPipe.Enabled { + merge.Init() + } + r := router.NewRouter(config.AppFlags) + fmt.Printf("QuackPipe API Running: %s:%s\n", *config.AppFlags.Host, *config.AppFlags.Port) + if err := http.ListenAndServe(*config.AppFlags.Host+":"+*config.AppFlags.Port, r); err != nil { + panic(err) } } diff --git a/merge/handlers/create_table.go b/merge/handlers/create_table.go new file mode 100644 index 0000000..120f762 --- /dev/null +++ b/merge/handlers/create_table.go @@ -0,0 +1,72 @@ +package handlers + +import ( + "fmt" + "gopkg.in/yaml.v3" + "io" + "net/http" + "path/filepath" + "quackpipe/config" + "quackpipe/merge/repository" + "quackpipe/model" +) + +type TimestampField struct { + Field string `json:"field" yaml:"field"` + Precision string `json:"precision" yaml:"precision"` +} + +type CreateTableRequest struct { + CreateTable string `json:"create_table" yaml:"create_table"` + Fields map[string]string `json:"fields" yaml:"fields"` + Engine string `json:"engine" yaml:"engine"` + OrderBy []string `json:"order_by" yaml:"order_by"` + Timestamp TimestampField `json:"timestamp" yaml:"timestamp"` + PartitionBy string `json:"partition_by" yaml:"partition_by"` +} + +func CreateTableHandler(w http.ResponseWriter, r *http.Request) error { + defer r.Body.Close() + body, err := io.ReadAll(r.Body) + if err != nil { + return err + } + var req CreateTableRequest + err = yaml.Unmarshal(body, &req) + if err != nil { + return err + } + + var fields [][2]string + for field, fieldType := range req.Fields { + fields = append(fields, [2]string{field, fieldType}) + } + + for _, field := range req.OrderBy { + if _, ok := req.Fields[field]; !ok { + return fmt.Errorf("field %s does not exist", field) + } + } + + if _, ok := req.Fields[req.Timestamp.Field]; !ok { + return fmt.Errorf("field %s does not exist", req.Timestamp.Field) + } + + table := model.Table{ + Name: req.CreateTable, + Path: filepath.Join(config.Config.QuackPipe.Root, req.CreateTable), + Fields: fields, + Engine: req.Engine, + OrderBy: req.OrderBy, + TimestampField: req.Timestamp.Field, + TimestampPrecision: req.Timestamp.Precision, + PartitionBy: req.PartitionBy, + } + err = repository.RegisterNewTable(&table) + if err != nil { + return err + } + w.WriteHeader(http.StatusOK) + w.Write([]byte("Ok")) + return nil +} diff --git a/merge/handlers/insert_into.go b/merge/handlers/insert_into.go new file mode 100644 index 0000000..b227b62 --- /dev/null +++ b/merge/handlers/insert_into.go @@ -0,0 +1,55 @@ +package handlers + +import ( + "github.com/gorilla/mux" + "net/http" + "quackpipe/merge/parsers" + "quackpipe/merge/repository" + "quackpipe/utils/promise" +) + +func InsertIntoHandler(w http.ResponseWriter, r *http.Request) error { + contentType := r.Header.Get("Content-Type") + parameters := mux.Vars(r) + tableName := parameters["table"] + table, err := repository.GetTable(tableName) + if err != nil { + return err + } + + var fieldNames []string + var fieldTypes []string + for _, field := range table.Table.Fields { + fieldNames = append(fieldNames, field[0]) + fieldTypes = append(fieldTypes, field[1]) + } + + parser, err := parsers.GetParser(contentType, fieldNames, fieldTypes) + if err != nil { + return err + } + res, err := parser.ParseReader(r.Body) + if err != nil { + return err + } + var promises []*promise.Promise[int32] + for _res := range res { + if _res.Error != nil { + go func() { + for range res { + } + }() + return _res.Error + } + promises = append(promises, table.Store(_res.Data)) + } + for _, p := range promises { + _, err = p.Get() + if err != nil { + return err + } + } + w.WriteHeader(http.StatusOK) + w.Write([]byte("Ok")) + return nil +} diff --git a/merge/merge.go b/merge/merge.go new file mode 100644 index 0000000..6775a0e --- /dev/null +++ b/merge/merge.go @@ -0,0 +1,46 @@ +package merge + +import ( + "quackpipe/config" + "quackpipe/merge/handlers" + "quackpipe/merge/repository" + "quackpipe/router" + "quackpipe/service/db" +) + +func Init() { + conn, err := db.ConnectDuckDB(config.Config.DBPath + "/ddb.db") + if err != nil { + panic(err) + } + + _, err = conn.Exec("INSTALL json; LOAD json;") + if err != nil { + panic(err) + } + + err = repository.CreateDuckDBTablesTable(conn) + if err != nil { + panic(err) + } + + err = repository.InitRegistry(conn) + if err != nil { + panic(err) + } + + InitHandlers() +} + +func InitHandlers() { + router.RegisterRoute(&router.Route{ + Path: "/quackdb/create", + Methods: []string{"POST"}, + Handler: handlers.CreateTableHandler, + }) + router.RegisterRoute(&router.Route{ + Path: "/quackdb/{table}/insert", + Methods: []string{"POST"}, + Handler: handlers.InsertIntoHandler, + }) +} diff --git a/merge/merge_test.go b/merge/merge_test.go new file mode 100644 index 0000000..6dafd65 --- /dev/null +++ b/merge/merge_test.go @@ -0,0 +1,39 @@ +package merge + +import ( + "quackpipe/config" + "quackpipe/merge/repository" + "quackpipe/model" + "testing" +) + +func TestMerge(t *testing.T) { + config.Config = &config.Configuration{ + QuackPipe: config.QuackPipeConfiguration{ + Enabled: true, + Root: ".", + MergeTimeoutS: 10, + Secret: "q1w2e3r4t5", + }, + DBPath: ".", + } + Init() + err := repository.RegisterNewTable(&model.Table{ + Name: "test", + Path: "/tmp/test", + Fields: [][2]string{ + {"timestamp", "UInt64"}, + {"value", "Float64"}, + }, + Engine: "Merge", + OrderBy: []string{ + "timestamp", + }, + TimestampField: "timestamp", + TimestampPrecision: "s", + PartitionBy: "timestamp / 3600 / 24", + }) + if err != nil { + t.Fatal(err) + } +} diff --git a/merge/parsers/ndjson_insert_parser.go b/merge/parsers/ndjson_insert_parser.go new file mode 100644 index 0000000..93d49eb --- /dev/null +++ b/merge/parsers/ndjson_insert_parser.go @@ -0,0 +1,133 @@ +package parsers + +import ( + "bufio" + "bytes" + "fmt" + "github.com/go-faster/jx" + "io" + "quackpipe/merge/shared" +) + +type NDJSONParser struct { + fields map[string]string + lines map[string]any +} + +func (N *NDJSONParser) Parse(data []byte) (chan *ParserResponse, error) { + return N.ParseReader(bytes.NewReader(data)) +} + +func (N *NDJSONParser) ParseReader(r io.Reader) (chan *ParserResponse, error) { + scanner := bufio.NewScanner(r) + scanner.Split(bufio.ScanLines) + N.resetLines() + res := make(chan *ParserResponse) + go func() { + defer close(res) + bytesParsed := 0 + linesLen := 0 + for scanner.Scan() { + err := N.parseLine(scanner.Bytes()) + if err != nil { + res <- &ParserResponse{Error: err} + return + } + bytesParsed += len(scanner.Bytes()) + linesLen++ + if bytesParsed >= 10*1024*1024 { + res <- &ParserResponse{Data: N.lines} + N.resetLines() + bytesParsed = 0 + linesLen = 0 + } + } + if linesLen > 0 { + res <- &ParserResponse{Data: N.lines} + N.resetLines() + } + }() + return res, nil +} + +func (N *NDJSONParser) resetLines() { + N.lines = make(map[string]any) + for k, v := range N.fields { + switch v { + case shared.TYPE_STRING: + N.lines[k] = make([]string, 0) + case shared.TYPE_INT64: + N.lines[k] = make([]int64, 0) + case shared.TYPE_UINT64: + N.lines[k] = make([]uint64, 0) + case shared.TYPE_FLOAT64: + N.lines[k] = make([]float64, 0) + } + } +} + +func (N *NDJSONParser) parseLine(line []byte) error { + d := jx.DecodeBytes(line) + return d.Obj(func(d *jx.Decoder, key string) error { + tp, ok := N.fields[key] + if !ok { + return fmt.Errorf("field %s not found", key) + } + switch tp { + case shared.TYPE_STRING: + str, err := d.Str() + if err != nil { + return err + } + if _, ok := N.lines[key].([]string); !ok { + return fmt.Errorf("field %s is not a string", key) + } + N.lines[key] = append(N.lines[key].([]string), str) + case shared.TYPE_INT64: + str, err := d.Int64() + if err != nil { + return err + } + field := N.lines[key] + if _, ok := field.([]int64); !ok { + return fmt.Errorf("field %s is not a string", key) + } + field = append(field.([]int64), str) + N.lines[key] = field + case shared.TYPE_UINT64: + str, err := d.UInt64() + if err != nil { + return err + } + field := N.lines[key] + if _, ok := field.([]uint64); !ok { + return fmt.Errorf("field %s is not a string", key) + } + field = append(field.([]uint64), str) + N.lines[key] = field + case shared.TYPE_FLOAT64: + str, err := d.Float64() + if err != nil { + return err + } + field := N.lines[key] + if _, ok := field.([]float64); !ok { + return fmt.Errorf("field %s is not a string", key) + } + field = append(field.([]float64), str) + N.lines[key] = field + } + return nil + }) +} + +var _ = func() int { + RegisterParser("application/x-ndjson", func(fieldNames []string, fieldTypes []string) IParser { + fields := make(map[string]string) + for i, name := range fieldNames { + fields[name] = fieldTypes[i] + } + return &NDJSONParser{fields: fields} + }) + return 0 +}() diff --git a/merge/parsers/parsers.go b/merge/parsers/parsers.go new file mode 100644 index 0000000..b3d86c4 --- /dev/null +++ b/merge/parsers/parsers.go @@ -0,0 +1,34 @@ +package parsers + +import ( + "fmt" + "io" + "strings" +) + +type ParserFactory func(fieldNames []string, fieldTypes []string) IParser + +var registry = make(map[string]ParserFactory) + +type IParser interface { + Parse(data []byte) (chan *ParserResponse, error) + ParseReader(r io.Reader) (chan *ParserResponse, error) +} + +type ParserResponse struct { + Data map[string]any + Error error +} + +func RegisterParser(name string, parser ParserFactory) { + registry[name] = parser +} + +func GetParser(name string, fieldNames []string, fieldTypes []string) (IParser, error) { + for _name, parser := range registry { + if strings.HasPrefix(name, _name) { + return parser(fieldNames, fieldTypes), nil + } + } + return nil, fmt.Errorf("parser %s not found", name) +} diff --git a/merge/repository/registry.go b/merge/repository/registry.go new file mode 100644 index 0000000..1e25cd2 --- /dev/null +++ b/merge/repository/registry.go @@ -0,0 +1,147 @@ +package repository + +import ( + "database/sql" + "fmt" + "os" + "path/filepath" + "quackpipe/config" + "quackpipe/merge/service" + "quackpipe/model" + "quackpipe/service/db" + "sync" + "time" +) + +var conn *sql.DB + +var registry = make(map[string]*service.MergeTreeService) +var mergeTicker *time.Ticker +var registryMtx sync.Mutex + +func InitRegistry(_conn *sql.DB) error { + var err error + if _conn == nil { + _conn, err = db.ConnectDuckDB(config.Config.DBPath + "/ddb.db") + if err != nil { + return err + } + } + conn = _conn + err = CreateDuckDBTablesTable(conn) + if err != nil { + return err + } + err = PopulateRegistry() + if err != nil { + return err + } + go RunMerge() + return nil +} + +func GetTable(name string) (*service.MergeTreeService, error) { + table, ok := registry[name] + if !ok { + return nil, fmt.Errorf("table %q not found", name) + } + return table, nil +} + +func RunMerge() { + mergeTicker = time.NewTicker(time.Second * 10) + for range mergeTicker.C { + _registry := make(map[string]*service.MergeTreeService, len(registry)) + func() { + registryMtx.Lock() + defer registryMtx.Unlock() + for k, v := range registry { + _registry[k] = v + } + }() + for _, table := range _registry { + err := table.Merge() + if err != nil { + fmt.Println(err) + } + } + } +} + +func RegisterNewTable(table *model.Table) error { + if _, ok := registry[table.Name]; ok { + return nil + } + fieldNames := make([]string, len(table.Fields)) + fieldTypes := make([]string, len(table.Fields)) + for i, field := range table.Fields { + fieldNames[i] = field[0] + fieldTypes[i] = field[1] + } + err := createTableFolders(table) + if err != nil { + return err + } + err = InsertTableMetadata( + conn, table.Name, table.Path, + fieldNames, fieldTypes, table.OrderBy, + table.Engine, table.TimestampField, table.TimestampPrecision, table.PartitionBy) + if err != nil { + return err + } + registryMtx.Lock() + registry[table.Name] = service.NewMergeTreeService(table) + registry[table.Name].Run() + registryMtx.Unlock() + return nil +} + +func PopulateRegistry() error { + res, err := conn.Query(` +SELECT name,path, field_names, field_types, order_by, engine, timestamp_field, timestamp_precision, partition_by +FROM tables +`) + if err != nil { + return err + } + defer res.Close() + + for res.Next() { + var table model.Table + var ( + fieldNames []any + fieldTypes []any + orderBy []any + ) + err = res.Scan( + &table.Name, &table.Path, + &fieldNames, &fieldTypes, &orderBy, + &table.Engine, &table.TimestampField, &table.TimestampPrecision, &table.PartitionBy, + ) + if err != nil { + return err + } + for i, n := range fieldNames { + table.Fields = append(table.Fields, [2]string{n.(string), fieldTypes[i].(string)}) + } + for _, n := range orderBy { + table.OrderBy = append(table.OrderBy, n.(string)) + } + func() { + registryMtx.Lock() + defer registryMtx.Unlock() + registry[table.Name] = service.NewMergeTreeService(&table) + registry[table.Name].Run() + }() + } + return nil + +} + +func createTableFolders(table *model.Table) error { + err := os.MkdirAll(filepath.Join(table.Path, "tmp"), 0755) + if err != nil { + return err + } + return os.MkdirAll(filepath.Join(table.Path, "data"), 0755) +} diff --git a/repository/tables_repository.go b/merge/repository/tables_repository.go similarity index 56% rename from repository/tables_repository.go rename to merge/repository/tables_repository.go index 7134017..0276e59 100644 --- a/repository/tables_repository.go +++ b/merge/repository/tables_repository.go @@ -2,10 +2,13 @@ package repository import ( "database/sql" + "encoding/json" "fmt" - "strings" + "sync" ) +var dbMtx sync.Mutex + func CreateDuckDBTablesTable(db *sql.DB) error { // Adjusted schema using DuckDB's ARRAY type query := ` @@ -18,7 +21,7 @@ func CreateDuckDBTablesTable(db *sql.DB) error { engine VARCHAR, timestamp_field VARCHAR, timestamp_precision VARCHAR, - partition_by VARCHAR[] + partition_by VARCHAR ); ` @@ -31,38 +34,30 @@ func CreateDuckDBTablesTable(db *sql.DB) error { return nil } -func InsertTableMetadata(db *sql.DB, name, path string, fieldNames []string, fieldTypes []string, orderBy []string, engine string, timestampField, timestampPrecision, partitionBy []string) error { - - fieldNamesStr := fmt.Sprintf("[%s]", strings.Join(fieldNames, ", ")) - fieldTypesStr := fmt.Sprintf("[%s]", strings.Join(fieldTypes, ", ")) - orderByStr := fmt.Sprintf("[%s]", strings.Join(orderBy, ", ")) - partitionByStr := fmt.Sprintf("[%s]", strings.Join(partitionBy, ", ")) - timestampPrecisionStr := fmt.Sprintf("[%s]", strings.Join(timestampPrecision, ", ")) - timestampFieldStr := fmt.Sprintf("[%s]", strings.Join(timestampField, ", ")) - query := `INSERT INTO tables ( - name, path, field_names, field_types, order_by, engine, timestamp_field, timestamp_precision, partition_by - ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT (name) DO UPDATE SET - path = excluded.path, - order_by = excluded.order_by, - engine = excluded.engine, - timestamp_field = excluded.timestamp_field, - timestamp_precision = excluded.timestamp_precision, - partition_by = excluded.partition_by;` - // Prepare the SQL statement - stmt, err := db.Prepare(query) +func InsertTableMetadata(db *sql.DB, name, path string, fieldNames []string, fieldTypes []string, orderBy []string, + engine string, timestampField, timestampPrecision, partitionBy string) error { + fieldNamesJSON, err := json.Marshal(fieldNames) if err != nil { - return fmt.Errorf("failed to prepare SQL statement: %w", err) + return err } - defer stmt.Close() - - // Execute the SQL statement with the converted array literals - _, err = stmt.Exec(name, path, fieldNamesStr, fieldTypesStr, orderByStr, engine, timestampFieldStr, timestampPrecisionStr, partitionByStr) + fieldTypesJSON, err := json.Marshal(fieldTypes) if err != nil { - return fmt.Errorf("failed to insert table metadata: %w", err) + return err } - return nil + orderByJSON, err := json.Marshal(orderBy) + if err != nil { + return err + } + + query := `INSERT INTO tables ( + name, path, field_names, field_types, order_by, engine, timestamp_field, timestamp_precision, partition_by + ) SELECT ?, ?, ?::JSON::VARCHAR[], ?::JSON::VARCHAR[], ?::JSON::VARCHAR[], ?, ?, ?, ? ON CONFLICT DO NOTHING` + _, err = db.Exec(query, + name, path, + string(fieldNamesJSON), string(fieldTypesJSON), string(orderByJSON), + engine, timestampField, timestampPrecision, partitionBy) + + return err } func DisplayAllData(db *sql.DB, tableName string) error { diff --git a/merge/service/merge_tree_service.go b/merge/service/merge_tree_service.go new file mode 100644 index 0000000..b60ee87 --- /dev/null +++ b/merge/service/merge_tree_service.go @@ -0,0 +1,546 @@ +package service + +import ( + "context" + "errors" + "fmt" + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/apache/arrow/go/v18/arrow/memory" + "github.com/apache/arrow/go/v18/parquet" + "github.com/apache/arrow/go/v18/parquet/pqarrow" + "github.com/google/uuid" + _ "github.com/marcboeker/go-duckdb" + "github.com/tidwall/btree" + "golang.org/x/sync/errgroup" + "golang.org/x/sync/semaphore" + "os" + "path/filepath" + "quackpipe/model" + "quackpipe/service/db" + "quackpipe/utils/promise" + "sort" + "strings" + "sync" + "sync/atomic" + "time" +) + +type IMergeTree interface { + Store(columns map[string][]any) error + Merge() error + Run() + Stop() +} + +type MergeTreeService struct { + Table *model.Table + ticker *time.Ticker + working uint32 + promises []*promise.Promise[int32] + recordBatch *array.RecordBuilder + mtx sync.Mutex + schema *arrow.Schema + lastIterationTime [3]time.Time + dataIndexes *btree.BTreeG[int32] + dataStore map[string]any +} + +func NewMergeTreeService(t *model.Table) *MergeTreeService { + res := &MergeTreeService{ + Table: t, + working: 0, + promises: []*promise.Promise[int32]{}, + recordBatch: nil, + } + res.dataStore = res.createDataStore() + res.dataIndexes = btree.NewBTreeG(res.Less) + res.schema = res.createParquetSchema() + pool := memory.NewGoAllocator() + res.recordBatch = array.NewRecordBuilder(pool, res.schema) + + return res +} + +func (s *MergeTreeService) createDataStore() map[string]any { + res := make(map[string]any) + for _, f := range s.Table.Fields { + switch f[1] { + case "UInt64": + res[f[0]] = make([]uint64, 0, 1000000) + case "Int64": + res[f[0]] = make([]int64, 0, 1000000) + case "String": + res[f[0]] = make([]string, 0, 1000000) + case "Float64": + res[f[0]] = make([]float64, 0, 1000000) + } + } + return res +} + +func (s *MergeTreeService) size() int32 { + return int32(s.dataIndexes.Len()) +} + +func getFieldType(t *model.Table, fieldName string) string { + for _, field := range t.Fields { + if field[0] == fieldName { + return field[1] + } + } + return "" +} + +func (s *MergeTreeService) Less(a, b int32) bool { + for _, o := range s.Table.OrderBy { + t := getFieldType(s.Table, o) + switch t { + case "UInt64": + if s.dataStore[o].([]uint64)[a] > s.dataStore[o].([]uint64)[b] { + return false + } + case "Int64": + if s.dataStore[o].([]int64)[a] > s.dataStore[o].([]int64)[b] { + return false + } + case "String": + if s.dataStore[o].([]string)[a] > s.dataStore[o].([]string)[b] { + return false + } + case "Float64": + if s.dataStore[o].([]float64)[a] > s.dataStore[o].([]float64)[b] { + return false + } + } + } + return true +} + +func GetColumnLength(column any) int { + switch column := column.(type) { + case []string: + return len(column) + case []int64: + return len(column) + case []uint64: + return len(column) + case []float64: + return len(column) + default: + return 0 + } +} + +func validateData(table *model.Table, columns map[string]any) error { + + fieldMap := make(map[string]string) + for _, field := range table.Fields { + fieldMap[field[0]] = field[1] + } + + // Check if columns map size matches the table.Fields size + if len(columns) != len(table.Fields) { + return errors.New("columns size does not match table fields size") + } + + var ( + dataLength int + first = true + ) + for _, data := range columns { + if first { + dataLength = GetColumnLength(data) + first = false + continue + } + if GetColumnLength(data) != dataLength { + return errors.New("columns length mismatch") + } + } + for column, data := range columns { + + // Validate if the column exists in the table definition + columnType, ok := fieldMap[column] + if !ok { + return fmt.Errorf("invalid column: %s", column) + } + // Validate data types for each column + switch columnType { + case "UInt64": + if _, ok := data.([]uint64); !ok { + return fmt.Errorf("invalid data type for column %s: expected uint64", column) + } + case "Int64": + if _, ok := data.([]int64); !ok { + return fmt.Errorf("invalid data type for column %s: expected int64", column) + } + case "String": + if _, ok := data.([]string); !ok { + return fmt.Errorf("invalid data type for column %s: expected string", column) + } + case "Float64": + if _, ok := data.([]float64); !ok { + return fmt.Errorf("invalid data type for column %s: expected float64", column) + } + default: + return fmt.Errorf("unsupported column type: %s", columnType) + } + } + + return nil +} + +func (s *MergeTreeService) createParquetSchema() *arrow.Schema { + fields := make([]arrow.Field, len(s.Table.Fields)) + for i, field := range s.Table.Fields { + var fieldType arrow.DataType + switch field[1] { + case "UInt64": + fieldType = arrow.PrimitiveTypes.Uint64 + case "Int64": + fieldType = arrow.PrimitiveTypes.Int64 + case "String": + fieldType = arrow.BinaryTypes.String + case "Float64": + fieldType = arrow.PrimitiveTypes.Float64 + default: + panic(fmt.Sprintf("unsupported field type: %s", field[1])) + } + fields[i] = arrow.Field{Name: field[0], Type: fieldType} + } + return arrow.NewSchema(fields, nil) +} + +func (s *MergeTreeService) writeParquetFile(columns map[string]any) *promise.Promise[int32] { + s.mtx.Lock() + defer s.mtx.Unlock() + var oldSize, newSize int32 + for k, v := range columns { + tp := getFieldType(s.Table, k) + switch tp { + case "UInt64": + oldSize = int32(len(s.dataStore[k].([]uint64))) + s.dataStore[k] = append(s.dataStore[k].([]uint64), v.([]uint64)...) + newSize = int32(len(s.dataStore[k].([]uint64))) + case "Int64": + oldSize = int32(len(s.dataStore[k].([]int64))) + s.dataStore[k] = append(s.dataStore[k].([]int64), v.([]int64)...) + newSize = int32(len(s.dataStore[k].([]int64))) + case "String": + oldSize = int32(len(s.dataStore[k].([]string))) + s.dataStore[k] = append(s.dataStore[k].([]string), v.([]string)...) + newSize = int32(len(s.dataStore[k].([]string))) + case "Float64": + oldSize = int32(len(s.dataStore[k].([]float64))) + s.dataStore[k] = append(s.dataStore[k].([]float64), v.([]float64)...) + newSize = int32(len(s.dataStore[k].([]float64))) + } + } + for i := oldSize; i < newSize; i++ { + s.dataIndexes.Set(i) + } + + p := promise.New[int32]() + s.promises = append(s.promises, p) + return p +} + +func (s *MergeTreeService) flush() { + s.mtx.Lock() + dataStore := s.dataStore + indexes := s.dataIndexes + s.dataStore = s.createDataStore() + s.dataIndexes = btree.NewBTreeG(s.Less) + promises := s.promises + s.promises = nil + s.mtx.Unlock() + onError := func(err error) { + for _, p := range promises { + p.Done(0, err) + } + } + if indexes.Len() == 0 { + onError(nil) + return + } + for i, f := range s.Table.Fields { + it := indexes.Iter() + switch f[1] { + case "UInt64": + _data := dataStore[f[0]].([]uint64) + for it.Next() { + s.recordBatch.Field(i).(*array.Uint64Builder).Append(_data[it.Item()]) + } + case "Int64": + _data := dataStore[f[0]].([]int64) + for it.Next() { + s.recordBatch.Field(i).(*array.Int64Builder).Append(_data[it.Item()]) + } + case "String": + _data := dataStore[f[0]].([]string) + for it.Next() { + s.recordBatch.Field(i).(*array.StringBuilder).Append(_data[it.Item()]) + } + case "Float64": + _data := dataStore[f[0]].([]float64) + for it.Next() { + s.recordBatch.Field(i).(*array.Float64Builder).Append(_data[it.Item()]) + } + } + } + record := s.recordBatch.NewRecord() + defer record.Release() + if record.Column(0).Data().Len() == 0 { + onError(nil) + return + } + fileName := s.Table.Name + uuid.New().String() + ".1.parquet" + outputTmpFile := filepath.Join(s.Table.Path, "data", fileName) + outputFile := filepath.Join(s.Table.Path, "data", fileName) + file, err := os.Create(outputTmpFile) + if err != nil { + onError(err) + return + } + defer file.Close() + // Set up Parquet writer properties + writerProps := parquet.NewWriterProperties( + parquet.WithMaxRowGroupLength(100), + ) + arrprops := pqarrow.NewArrowWriterProperties() + + // Create Parquet file writer + writer, err := pqarrow.NewFileWriter(s.schema, file, writerProps, arrprops) + if err != nil { + onError(err) + return + } + defer writer.Close() + err = writer.Write(record) + if err != nil { + onError(err) + return + } + onError(os.Rename(outputTmpFile, outputFile)) +} + +func (s *MergeTreeService) Run() { + s.mtx.Lock() + defer s.mtx.Unlock() + if !atomic.CompareAndSwapUint32(&s.working, 0, 1) { + return + } + go func() { + s.ticker = time.NewTicker(time.Millisecond * 100) + for range s.ticker.C { + s.flush() + } + }() +} + +func (s *MergeTreeService) Stop() { + s.mtx.Lock() + defer s.mtx.Unlock() + if s.ticker != nil { + s.ticker.Stop() + } + if s.recordBatch != nil { + s.recordBatch.Release() + } + atomic.StoreUint32(&s.working, 0) +} + +func (s *MergeTreeService) Store(columns map[string]any) *promise.Promise[int32] { + if err := validateData(s.Table, columns); err != nil { + return promise.Fulfilled(err, int32(0)) + } + + return s.writeParquetFile(columns) +} + +type PlanMerge struct { + From []string + To string + Iteration int +} + +type FileDesc struct { + name string + size int64 +} + +func (s *MergeTreeService) planMerge(dataDir string) ([]PlanMerge, error) { + files, err := os.ReadDir(dataDir) + if err != nil { + return nil, err + } + var parquetFiles []FileDesc + for _, file := range files { + if strings.HasSuffix(file.Name(), ".parquet") { + name := filepath.Join(dataDir, file.Name()) + stat, err := os.Stat(name) + if err != nil { + return nil, err + } + parquetFiles = append(parquetFiles, struct { + name string + size int64 + }{name, stat.Size()}) + } + } + sort.Slice(parquetFiles, func(a, b int) bool { + return parquetFiles[a].size > parquetFiles[b].size + }) + res := make([]PlanMerge, 0) + if time.Now().Sub(s.lastIterationTime[0]).Seconds() > 10 { + var _res []PlanMerge + parquetFiles, _res = s._planMerge(parquetFiles, 40*1024*1024, 40*1024*1024, 1) + res = append(res, _res...) + s.lastIterationTime[0] = time.Now() + } + if time.Now().Sub(s.lastIterationTime[1]).Seconds() > 100 { + var _res []PlanMerge + parquetFiles, _res = s._planMerge(parquetFiles, 400*1024*1024, 400*1024*1024, 2) + res = append(res, _res...) + s.lastIterationTime[1] = time.Now() + } + if time.Now().Sub(s.lastIterationTime[2]).Seconds() > 1000 { + var _res []PlanMerge + parquetFiles, _res = s._planMerge(parquetFiles, 4000*1024*1024, 4000*1024*1024, 3) + res = append(res, _res...) + s.lastIterationTime[2] = time.Now() + } + return res, nil +} + +func checkSuffix(name string, iteration int) bool { + for i := iteration + 1; i >= 1; i-- { + if strings.HasSuffix(name, fmt.Sprintf("%d.parquet", i)) { + return true + } + } + return false +} + +func (s *MergeTreeService) _planMerge(parquetFiles []FileDesc, maxFileSize int64, + maxResSize int64, iteration int) ([]FileDesc, []PlanMerge) { + res := make([]PlanMerge, 1) + res[0].To = fmt.Sprintf("%s_%d.%d.parquet", s.Table.Name, time.Now().UnixNano(), iteration+1) + res[0].Iteration = iteration + mergeSize := int64(0) + for i := len(parquetFiles) - 1; i >= 0; i-- { + if !checkSuffix(parquetFiles[i].name, iteration) { + continue + } + if parquetFiles[i].size > maxFileSize { + break + } + mergeSize += parquetFiles[i].size + res[len(res)-1].From = append(res[len(res)-1].From, parquetFiles[i].name) + if mergeSize > maxResSize { + res = append(res, PlanMerge{ + From: nil, + To: fmt.Sprintf("%s_%d.%d.parquet", s.Table.Name, time.Now().UnixNano(), iteration+1), + Iteration: iteration, + }) + mergeSize = 0 + } + parquetFiles = parquetFiles[:i] + } + for len(res) > 0 && len(res[len(res)-1].From) < 1 { + res = res[:len(res)-1] + } + return parquetFiles, res +} + +// Merge method implementation +func (s *MergeTreeService) Merge() error { + dataDir := filepath.Join(s.Table.Path, "data") + tmpDir := filepath.Join(s.Table.Path, "tmp") + + plan, err := s.planMerge(dataDir) + if err != nil { + return err + } + sem := semaphore.NewWeighted(10) + wg := errgroup.Group{} + for _, p := range plan { + _p := p + wg.Go(func() error { + sem.Acquire(context.Background(), 1) + defer sem.Release(1) + return mergeFiles(s.Table, &_p, tmpDir, dataDir) + }) + } + return nil +} + +func mergeFiles(table *model.Table, p *PlanMerge, tmpDir, dataDir string) error { + // Create a temporary merged file + tmpFilePath := filepath.Join(tmpDir, p.To) + + // Prepare DuckDB connection + + conn, err := db.ConnectDuckDB("?allow_unsigned_extensions=1") + if err != nil { + return err + } + conn.Exec("LOAD '/home/hromozeka/QXIP/quackpipe/chsql.duckdb_extension'") + conn.Exec("INSTALL '/home/hromozeka/QXIP/quackpipe/chsql.duckdb_extension'") + defer conn.Close() + + //// Drop the table if it exists + //dropTableSQL := `DROP TABLE IF EXISTS temp_table` + //_, err = conn.Exec(dropTableSQL) + //if err != nil { + // return err + //} + + if p.Iteration == 1 { + createTableSQL := fmt.Sprintf( + `COPY(SELECT * FROM read_parquet_ordered (ARRAY['%s'], '%s'))TO '%s' (FORMAT 'parquet')`, + strings.Join(p.From, "','"), + strings.Join(table.OrderBy, ","), tmpFilePath) + _, err = conn.Exec(createTableSQL) + if err != nil { + return err + } + } else { + createTableSQL := fmt.Sprintf( + `COPY(SELECT * FROM read_parquet_ordered (ARRAY['%s'], '%s'))TO '%s' (FORMAT 'parquet')`, + strings.Join(p.From, "','"), + strings.Join(table.OrderBy, ","), tmpFilePath) + _, err = conn.Exec(createTableSQL) + if err != nil { + return err + } + } + + // Create a temporary table in DuckDB using parquet_scan with an array of files + + //// Perform the merge + //mergeSQL := fmt.Sprintf( + // `COPY (SELECT * FROM temp_table ORDER BY %s) TO '%s' (FORMAT 'parquet')`, + // strings.Join(table.OrderBy, ","), + // tmpFilePath, + //) + //_, err = conn.Exec(mergeSQL) + //if err != nil { + // return err + //} + + // Cleanup old files + for _, file := range p.From { + if err := os.Remove(file); err != nil { + return err + } + } + + finalFilePath := filepath.Join(dataDir, p.To) + if err := os.Rename(tmpFilePath, finalFilePath); err != nil { + return err + } + + return nil +} diff --git a/merge/shared/consts.go b/merge/shared/consts.go new file mode 100644 index 0000000..4cc2c96 --- /dev/null +++ b/merge/shared/consts.go @@ -0,0 +1,6 @@ +package shared + +const TYPE_STRING = "String" +const TYPE_INT64 = "Int64" +const TYPE_UINT64 = "UInt64" +const TYPE_FLOAT64 = "Float64" diff --git a/model/flags.go b/model/flags.go index c9f4adf..155ddae 100644 --- a/model/flags.go +++ b/model/flags.go @@ -9,4 +9,5 @@ type CommandLineFlags struct { Format *string `json:"format"` Params *string `json:"params"` DBPath *string `json:"dbpath"` + Config *string `json:"config"` } diff --git a/model/table.go b/model/table.go index 6f8efa1..8ff8c43 100644 --- a/model/table.go +++ b/model/table.go @@ -1,8 +1,12 @@ package model type Table struct { - Name string - Path string - Fields [][2]string - OrderBy []string + Name string + Path string + Fields [][2]string + Engine string + OrderBy []string + TimestampField string + TimestampPrecision string + PartitionBy string } diff --git a/router/apiRouter.go b/router/apiRouter.go index 6062f70..4ee3a79 100644 --- a/router/apiRouter.go +++ b/router/apiRouter.go @@ -1,14 +1,16 @@ package router import ( - "github.com/gorilla/mux" handlers "quackpipe/handler" - "quackpipe/model" ) -// APIHandler function for the root endpoint -func APIHandler(router *mux.Router, FlagInformation *model.CommandLineFlags) handlers.Handler { - HandlerInfo := handlers.Handler{FlagInformation: FlagInformation} - router.HandleFunc("/", HandlerInfo.Handlers).Methods("POST", "GET") - return HandlerInfo -} +var _ = func() int { + HandlerInfo := handlers.Handler{} + r := Route{ + Path: "/", + Methods: []string{"POST", "GET"}, + Handler: HandlerInfo.Handlers, + } + RegisterRoute(&r) + return 0 +}() diff --git a/router/route.go b/router/route.go index 94e88cf..acd122d 100644 --- a/router/route.go +++ b/router/route.go @@ -2,12 +2,37 @@ package router import ( "github.com/gorilla/mux" + "net/http" "quackpipe/model" ) +type Route struct { + Path string + Methods []string + Handler func(w http.ResponseWriter, r *http.Request) error +} + +func WithErrorHandle(hndl func(w http.ResponseWriter, r *http.Request) error, +) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + err := hndl(w, r) + if err != nil { + w.WriteHeader(500) + w.Write([]byte(err.Error())) + } + } +} + +var handlerRegistry []*Route = nil + +func RegisterRoute(r *Route) { + handlerRegistry = append(handlerRegistry, r) +} + func NewRouter(flagInformation *model.CommandLineFlags) *mux.Router { router := mux.NewRouter() - // Register module routes - APIHandler(router, flagInformation) + for _, r := range handlerRegistry { + router.HandleFunc(r.Path, WithErrorHandle(r.Handler)).Methods(r.Methods...) + } return router } diff --git a/service/db/db.go b/service/db/db.go index 3f1a5cd..cf35395 100644 --- a/service/db/db.go +++ b/service/db/db.go @@ -71,47 +71,3 @@ func ConnectDuckDB(filePath string) (*sql.DB, error) { fmt.Println("Connected to DuckDB successfully.") return db, nil } - -// CreateTablesTable creates the metadata table if it doesn't already exist -func CreateTablesTable(db *sql.DB) error { - query := ` - CREATE TABLE IF NOT EXISTS tables ( - name VARCHAR PRIMARY KEY, - path VARCHAR, - field_names VARCHAR[], - field_types VARCHAR[], - order_by VARCHAR[], - engine VARCHAR[], - timestamp_field VARCHAR[], - timestamp_precision VARCHAR[], - partition_by VARCHAR[] - ); - ` - _, err := db.Exec(query) - if err != nil { - return fmt.Errorf("failed to create tables metadata: %w", err) - } - return nil -} - -func InsertTableMetadata(db *sql.DB, name, path string, fieldNames, fieldTypes, orderBy, engine, timestampField, timestampPrecision, partitionBy []string) error { - query := ` - INSERT INTO tables (name, path, field_names, field_types, order_by, engine, timestamp_field, timestamp_precision, partition_by) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT(name) DO UPDATE SET - path = excluded.path, - field_names = excluded.field_names, - field_types = excluded.field_types, - order_by = excluded.order_by, - engine = excluded.engine, - timestamp_field = excluded.timestamp_field, - timestamp_precision = excluded.timestamp_precision, - partition_by = excluded.partition_by; - ` - - _, err := db.Exec(query, name, path, fieldNames, fieldTypes, orderBy, engine, timestampField, timestampPrecision, partitionBy) - if err != nil { - return fmt.Errorf("failed to insert table metadata: %w", err) - } - return nil -} diff --git a/service/merge_tree_service.go b/service/merge_tree_service.go deleted file mode 100644 index e49210f..0000000 --- a/service/merge_tree_service.go +++ /dev/null @@ -1,364 +0,0 @@ -package service - -import ( - "database/sql" - "errors" - "fmt" - "github.com/apache/arrow/go/v18/arrow" - "github.com/apache/arrow/go/v18/arrow/array" - "github.com/apache/arrow/go/v18/arrow/memory" - "github.com/apache/arrow/go/v18/parquet" - "github.com/apache/arrow/go/v18/parquet/pqarrow" - "github.com/google/uuid" - _ "github.com/marcboeker/go-duckdb" - "os" - "path/filepath" - "quackpipe/model" - "strings" - "time" -) - -type IMergeTree interface { - Store(table *model.Table, columns map[string][]any) error - Merge(table *model.Table) error -} - -type MergeTreeService struct { - db *sql.DB -} - -func NewMergeTreeService(dbPath string) (*MergeTreeService, error) { - conn, err := sql.Open("duckdb", dbPath) - if err != nil { - return nil, fmt.Errorf("failed to open DuckDB connection: %v", err) - } - - return &MergeTreeService{db: conn}, nil -} - -func (s *MergeTreeService) Close() error { - return s.db.Close() -} - -func validateData(table *model.Table, columns map[string][]any) error { - - fieldMap := make(map[string]string) - for _, field := range table.Fields { - fieldMap[field[0]] = field[1] - } - - // Check if columns map size matches the table.Fields size - if len(columns) != len(table.Fields) { - return errors.New("columns size does not match table fields size") - } - - var dataLength int - for _, data := range columns { - if dataLength == 0 { - dataLength = len(data) // Initialize dataLength with the length of the first column - } else if len(data) != dataLength { - return errors.New("columns length and data length mismatch") - } - } - for column, data := range columns { - - // Validate if the column exists in the table definition - columnType, ok := fieldMap[column] - if !ok { - return fmt.Errorf("invalid column: %s", column) - } - // Validate data types for each column - switch columnType { - case "UInt64": - for _, val := range data { - if _, ok := val.(uint64); !ok { - return fmt.Errorf("invalid data type for column %s: expected uint64", column) - } - } - case "Int64": - for _, val := range data { - if _, ok := val.(int64); !ok { - return fmt.Errorf("invalid data type for column %s: expected int64", column) - } - } - case "String": - for _, val := range data { - if _, ok := val.(string); !ok { - return fmt.Errorf("invalid data type for column %s: expected string", column) - } - } - case "Float64": - for _, val := range data { - if _, ok := val.(float64); !ok { - return fmt.Errorf("invalid data type for column %s: expected float64", column) - } - } - default: - return fmt.Errorf("unsupported column type: %s", columnType) - } - } - - return nil -} - -func (s *MergeTreeService) createParquetSchema(table *model.Table) *arrow.Schema { - fields := make([]arrow.Field, len(table.Fields)) - for i, field := range table.Fields { - var fieldType arrow.DataType - switch field[1] { - case "UInt64": - fieldType = arrow.PrimitiveTypes.Uint64 - case "Int64": - fieldType = arrow.PrimitiveTypes.Int64 - case "String": - fieldType = arrow.BinaryTypes.String - case "Float64": - fieldType = arrow.PrimitiveTypes.Float64 - default: - panic(fmt.Sprintf("unsupported field type: %s", field[1])) - } - fields[i] = arrow.Field{Name: field[0], Type: fieldType} - } - return arrow.NewSchema(fields, nil) -} - -func (s *MergeTreeService) writeParquetFile(table *model.Table, columns map[string][]any) error { - schema := s.createParquetSchema(table) - outputFile := filepath.Join(table.Path, "data", table.Name+uuid.New().String()+".parquet") - file, err := os.Create(outputFile) - if err != nil { - return fmt.Errorf("failed to create parquet file: %v", err) - } - defer file.Close() - - // Create a new Arrow memory pool - pool := memory.NewCheckedAllocator(memory.NewGoAllocator()) - - // Create Arrow RecordBatch - recordBatch := array.NewRecordBuilder(pool, schema) - defer recordBatch.Release() - - // Create a field map for easier access to column types - fieldMap := make(map[string]string) - for _, field := range table.Fields { - fieldMap[field[0]] = field[1] - } - for columnName, dataSlice := range columns { - columnType, ok := fieldMap[columnName] - if !ok { - return fmt.Errorf("unknown column: %s", columnName) - } - - // Get the index of the column from the schema - columnIndex := -1 - for i, field := range schema.Fields() { - if field.Name == columnName { - columnIndex = i - break - } - } - if columnIndex == -1 { - return fmt.Errorf("column %s not found in schema", columnName) - } - - builder := recordBatch.Field(columnIndex).(array.Builder) - - // Handle data slice based on its type - switch columnType { - case "UInt64": - if b, ok := builder.(*array.Uint64Builder); ok { - for _, value := range dataSlice { - if v, ok := value.(uint64); ok { - b.Append(v) - } else { - return fmt.Errorf("invalid data type for column %s, expected uint64", columnName) - } - } - } else { - return fmt.Errorf("type mismatch for column %s", columnName) - } - case "Int64": - if b, ok := builder.(*array.Int64Builder); ok { - for _, value := range dataSlice { - if v, ok := value.(int64); ok { - b.Append(v) - } else { - return fmt.Errorf("invalid data type for column %s, expected int64", columnName) - } - } - } else { - return fmt.Errorf("type mismatch for column %s", columnName) - } - case "String": - if b, ok := builder.(*array.StringBuilder); ok { - for _, value := range dataSlice { - if v, ok := value.(string); ok { - b.Append(v) - } else { - return fmt.Errorf("invalid data type for column %s, expected string", columnName) - } - } - } else { - return fmt.Errorf("type mismatch for column %s", columnName) - } - case "Float64": - if b, ok := builder.(*array.Float64Builder); ok { - for _, value := range dataSlice { - if v, ok := value.(float64); ok { - b.Append(v) - } else { - return fmt.Errorf("invalid data type for column %s, expected float64", columnName) - } - } - } else { - return fmt.Errorf("type mismatch for column %s", columnName) - } - default: - return fmt.Errorf("unsupported column type for column %s: %s", columnName, columnType) - } - } - - // Finalize the record batch - batch := recordBatch.NewRecord() - defer batch.Release() - - // Set up Parquet writer properties - writerProps := parquet.NewWriterProperties( - parquet.WithMaxRowGroupLength(100), - ) - arrprops := pqarrow.NewArrowWriterProperties() - - // Create Parquet file writer - writer, err := pqarrow.NewFileWriter(schema, file, writerProps, arrprops) - if err != nil { - return fmt.Errorf("failed to create Parquet file writer: %v", err) - } - defer writer.Close() - - // Write the record batch to the Parquet file - if err := writer.Write(batch); err != nil { - return fmt.Errorf("failed to write record batch to parquet file: %v", err) - } - - return nil -} - -func (s *MergeTreeService) Store(table *model.Table, columns map[string][]any) error { - if err := validateData(table, columns); err != nil { - return err - } - - if err := s.writeParquetFile(table, columns); err != nil { - return err - } - - return nil -} - -// Merge method implementation -func (s *MergeTreeService) Merge(table *model.Table) error { - dataDir := filepath.Join(table.Path, "data") - tmpDir := filepath.Join(table.Path, "tmp") - - if err := os.MkdirAll(tmpDir, 0755); err != nil { - return err - } - - files, err := os.ReadDir(dataDir) - if err != nil { - return err - } - - var parquetFiles []string - for _, file := range files { - if strings.HasSuffix(file.Name(), ".parquet") { - parquetFiles = append(parquetFiles, filepath.Join(dataDir, file.Name())) - } - } - - if len(parquetFiles) == 0 { - return errors.New("no parquet files to merge") - } - - // Plan the merge to keep the size under 4GB - const maxFileSize = 4 * 1024 * 1024 * 1024 - var filesToMerge []string - var currentSize int64 - - for _, file := range parquetFiles { - fileInfo, err := os.Stat(file) - if err != nil { - return err - } - - if currentSize+fileInfo.Size() > maxFileSize { - if err := mergeFiles(filesToMerge, table, tmpDir); err != nil { - return err - } - filesToMerge = nil - currentSize = 0 - } - - filesToMerge = append(filesToMerge, file) - currentSize += fileInfo.Size() - } - - if len(filesToMerge) > 0 { - if err := mergeFiles(filesToMerge, table, tmpDir); err != nil { - return err - } - } - - return nil -} - -func mergeFiles(files []string, table *model.Table, tmpDir string) error { - // Create a temporary merged file - tmpFilePath := filepath.Join(tmpDir, fmt.Sprintf("%s_%d.parquet", table.Name, time.Now().UnixNano())) - - // Prepare DuckDB connection - conn, err := sql.Open("duckdb", "test") - if err != nil { - return err - } - defer conn.Close() - - //// Drop the table if it exists - //dropTableSQL := `DROP TABLE IF EXISTS temp_table` - //_, err = conn.Exec(dropTableSQL) - //if err != nil { - // return err - //} - - // Create a temporary table in DuckDB using parquet_scan with an array of files - createTableSQL := fmt.Sprintf(`COPY(SELECT * FROM read_parquet (ARRAY['%s']) order by %s)TO '%s' (FORMAT 'parquet')`, strings.Join(files, "','"), strings.Join(table.OrderBy, ","), tmpFilePath) - _, err = conn.Exec(createTableSQL) - if err != nil { - return err - } - - //// Perform the merge - //mergeSQL := fmt.Sprintf( - // `COPY (SELECT * FROM temp_table ORDER BY %s) TO '%s' (FORMAT 'parquet')`, - // strings.Join(table.OrderBy, ","), - // tmpFilePath, - //) - //_, err = conn.Exec(mergeSQL) - //if err != nil { - // return err - //} - - // Cleanup old files - for _, file := range files { - if err := os.Remove(file); err != nil { - return err - } - } - - finalFilePath := filepath.Join(filepath.Dir(files[0]), filepath.Base(tmpFilePath)) - if err := os.Rename(tmpFilePath, finalFilePath); err != nil { - return err - } - - return nil -} diff --git a/utils/promise/promise.go b/utils/promise/promise.go new file mode 100644 index 0000000..dbef957 --- /dev/null +++ b/utils/promise/promise.go @@ -0,0 +1,45 @@ +package promise + +import ( + "sync" + "sync/atomic" +) + +type Promise[T any] struct { + lock sync.Mutex + err error + res T + pending int32 +} + +func New[T any]() *Promise[T] { + res := &Promise[T]{ + pending: 1, + } + res.lock.Lock() + return res +} + +func Fulfilled[T any](err error, res T) *Promise[T] { + return &Promise[T]{ + err: err, + res: res, + pending: 0, + } +} + +func (p *Promise[T]) Get() (T, error) { + p.lock.Lock() + defer p.lock.Unlock() + return p.res, p.err +} + +func (p *Promise[T]) Done(res T, err error) { + if atomic.LoadInt32(&p.pending) == 0 { + return + } + p.pending = 0 + p.res = res + p.err = err + p.lock.Unlock() +} From a20bb440ad3b328152b8a0af8d61781cd2fe771f Mon Sep 17 00:00:00 2001 From: akvlad Date: Sat, 19 Oct 2024 17:44:29 +0300 Subject: [PATCH 03/12] typo --- config/config_test.yaml | 2 +- e2e_test.go | 2 +- merge/merge_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config/config_test.yaml b/config/config_test.yaml index 039e161..fb8a903 100644 --- a/config/config_test.yaml +++ b/config/config_test.yaml @@ -1,4 +1,4 @@ quack_pipe: root: /tmp/data merge_timeout_s: 10 - secret: q1w2e3r4t5 \ No newline at end of file + secret: XXXXXX \ No newline at end of file diff --git a/e2e_test.go b/e2e_test.go index 6ce2b94..42850db 100644 --- a/e2e_test.go +++ b/e2e_test.go @@ -19,7 +19,7 @@ func TestE2E(t *testing.T) { Enabled: true, Root: "_testdata", MergeTimeoutS: 10, - Secret: "q1w2e3r4t5", + Secret: "XXXXXX", }, DBPath: "_testdata", } diff --git a/merge/merge_test.go b/merge/merge_test.go index 6dafd65..ce7d73d 100644 --- a/merge/merge_test.go +++ b/merge/merge_test.go @@ -13,7 +13,7 @@ func TestMerge(t *testing.T) { Enabled: true, Root: ".", MergeTimeoutS: 10, - Secret: "q1w2e3r4t5", + Secret: "XXXXXX", }, DBPath: ".", } From f3a2e818567f437d22367fb7492fa7491b78a12b Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 14:07:21 +0200 Subject: [PATCH 04/12] config update --- Dockerfile | 2 +- config/configuration.go | 26 +++++++++++++++++++------- go.mod | 6 ++---- go.sum | 4 ++-- main.go | 4 ++-- merge/merge.go | 7 ++++++- merge/repository/registry.go | 2 +- 7 files changed, 33 insertions(+), 18 deletions(-) diff --git a/Dockerfile b/Dockerfile index 53ea7f4..78a40e1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.20 AS builder +FROM golang:1.23 AS builder WORKDIR / COPY . . RUN CGO_ENABLED=1 go build -o quackpipe . diff --git a/config/configuration.go b/config/configuration.go index 757db78..83a0748 100644 --- a/config/configuration.go +++ b/config/configuration.go @@ -1,7 +1,9 @@ package config import ( + "fmt" "github.com/spf13/viper" + "strings" ) type QuackPipeConfiguration struct { @@ -13,21 +15,31 @@ type QuackPipeConfiguration struct { type Configuration struct { QuackPipe QuackPipeConfiguration `json:"quack_pipe" mapstructure:"quack_pipe" default:""` - DBPath string `json:"db_path" mapstructure:"db_path" default:"/tmp/db"` } var Config *Configuration func InitConfig(file string) { - viper.SetConfigFile(file) + + viper.SetEnvPrefix("") + viper.SetEnvKeyReplacer(strings.NewReplacer(".", "_")) viper.AutomaticEnv() - err := viper.ReadInConfig() - if err != nil { - panic(err) + // If a file is provided, use it as the config file + if file != "" { + viper.SetConfigFile(file) + err := viper.ReadInConfig() + if err != nil { + panic(fmt.Errorf("error reading config file: %s", err)) + } + fmt.Println("Using config file:", viper.ConfigFileUsed()) + } else { + fmt.Println("Using environment variables for configuration") } + Config = &Configuration{} - err = viper.Unmarshal(Config) + err := viper.Unmarshal(Config) if err != nil { - panic(err) + panic(fmt.Errorf("unable to decode into struct: %s", err)) } + fmt.Printf("Loaded configuration: %+v\n", Config) } diff --git a/go.mod b/go.mod index 6d51130..70bf87c 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,6 @@ module quackpipe -go 1.21 - -toolchain go1.21.3 +go 1.23 require ( github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 @@ -11,7 +9,7 @@ require ( github.com/gorilla/mux v1.8.1 github.com/marcboeker/go-duckdb v1.8.0 github.com/prometheus/client_golang v1.20.4 - github.com/spf13/viper v1.19.0 + github.com/spf13/viper v1.18.1 github.com/stretchr/testify v1.9.0 github.com/tidwall/btree v1.7.0 golang.org/x/sync v0.8.0 diff --git a/go.sum b/go.sum index 85443df..b605cab 100644 --- a/go.sum +++ b/go.sum @@ -93,8 +93,8 @@ github.com/spf13/cast v1.6.0 h1:GEiTHELF+vaR5dhz3VqZfFSzZjYbgeKDpBxQVS4GYJ0= github.com/spf13/cast v1.6.0/go.mod h1:ancEpBxwJDODSW/UG4rDrAqiKolqNNh2DX3mk86cAdo= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= -github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= +github.com/spf13/viper v1.18.1 h1:rmuU42rScKWlhhJDyXZRKJQHXFX02chSVW1IvkPGiVM= +github.com/spf13/viper v1.18.1/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/main.go b/main.go index 3f40fe7..e9aa9b6 100644 --- a/main.go +++ b/main.go @@ -19,7 +19,7 @@ func initFlags() *model.CommandLineFlags { appFlags.Host = flag.String("host", "0.0.0.0", "API host. Default 0.0.0.0") appFlags.Port = flag.String("port", "8123", "API port. Default 8123") appFlags.Format = flag.String("format", "JSONCompact", "API port. Default JSONCompact") - appFlags.Config = flag.String("config", "config.yaml", "path to the configuration file") + appFlags.Config = flag.String("config", "", "path to the configuration file") appFlags.Params = flag.String("params", "", "DuckDB optional parameters. Default to none.") appFlags.Stdin = flag.Bool("stdin", false, "STDIN query. Default false") appFlags.Alias = flag.Bool("alias", false, "Built-in CH Aliases. Default true") @@ -43,7 +43,7 @@ func main() { } else { fmt.Println(results) } - + return } config.InitConfig(*config.AppFlags.Config) if config.Config.QuackPipe.Enabled { diff --git a/merge/merge.go b/merge/merge.go index 6775a0e..29eba3c 100644 --- a/merge/merge.go +++ b/merge/merge.go @@ -1,6 +1,7 @@ package merge import ( + "os" "quackpipe/config" "quackpipe/merge/handlers" "quackpipe/merge/repository" @@ -9,7 +10,11 @@ import ( ) func Init() { - conn, err := db.ConnectDuckDB(config.Config.DBPath + "/ddb.db") + err := os.MkdirAll(config.Config.QuackPipe.Root, 0750) + if err != nil { + panic(err) + } + conn, err := db.ConnectDuckDB(config.Config.QuackPipe.Root + "/ddb.db") if err != nil { panic(err) } diff --git a/merge/repository/registry.go b/merge/repository/registry.go index 1e25cd2..664324d 100644 --- a/merge/repository/registry.go +++ b/merge/repository/registry.go @@ -22,7 +22,7 @@ var registryMtx sync.Mutex func InitRegistry(_conn *sql.DB) error { var err error if _conn == nil { - _conn, err = db.ConnectDuckDB(config.Config.DBPath + "/ddb.db") + _conn, err = db.ConnectDuckDB(config.Config.QuackPipe.Root + "/ddb.db") if err != nil { return err } From d74f0e3d051208b2f3580997b1bf1f7122d80c1a Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 14:08:59 +0200 Subject: [PATCH 05/12] dockerignore --- .dockerignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..c419d97 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +_data \ No newline at end of file From 3055f3cd5f50b20e6ad3e49cde7cd783bbecc49b Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 14:10:08 +0200 Subject: [PATCH 06/12] dockerignore --- go.mod | 2 ++ 1 file changed, 2 insertions(+) diff --git a/go.mod b/go.mod index 70bf87c..67afe6f 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,8 @@ module quackpipe go 1.23 +toolchain go1.23 + require ( github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 github.com/go-faster/jx v1.1.0 From f769bb11abd6891e1ec0fcef73745b008c7c54ae Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 14:11:30 +0200 Subject: [PATCH 07/12] dockerignore --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 67afe6f..5b0efe4 100644 --- a/go.mod +++ b/go.mod @@ -2,7 +2,7 @@ module quackpipe go 1.23 -toolchain go1.23 +toolchain go1.23.0 require ( github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 From 95b0d0180e9778dc9d53e81fc51daf31f6b78d0c Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 14:21:42 +0200 Subject: [PATCH 08/12] check table name before folders --- go.mod | 26 +++++++------- go.sum | 62 ++++++++++++++++++---------------- merge/handlers/create_table.go | 3 -- merge/repository/registry.go | 10 ++++++ 4 files changed, 55 insertions(+), 46 deletions(-) diff --git a/go.mod b/go.mod index 574e564..29032f8 100644 --- a/go.mod +++ b/go.mod @@ -20,9 +20,9 @@ require ( require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c // indirect - github.com/andybalholm/brotli v1.1.0 // indirect - github.com/apache/arrow/go/v17 v17.0.0 // indirect - github.com/apache/thrift v0.20.0 // indirect + github.com/andybalholm/brotli v1.1.1 // indirect + github.com/apache/arrow-go/v18 v18.0.0 // indirect + github.com/apache/thrift v0.21.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect @@ -33,7 +33,7 @@ require ( github.com/google/flatbuffers v24.3.25+incompatible // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect github.com/klauspost/cpuid/v2 v2.2.8 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect @@ -57,15 +57,15 @@ require ( github.com/zeebo/xxh3 v1.0.2 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect - golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect - golang.org/x/mod v0.20.0 // indirect - golang.org/x/net v0.28.0 // indirect - golang.org/x/sys v0.23.0 // indirect - golang.org/x/text v0.17.0 // indirect - golang.org/x/tools v0.24.0 // indirect + golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect + golang.org/x/mod v0.21.0 // indirect + golang.org/x/net v0.30.0 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/text v0.19.0 // indirect + golang.org/x/tools v0.26.0 // indirect golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c // indirect - google.golang.org/grpc v1.63.2 // indirect - google.golang.org/protobuf v1.34.2 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + google.golang.org/grpc v1.67.1 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect ) diff --git a/go.sum b/go.sum index b605cab..ec9eedc 100644 --- a/go.sum +++ b/go.sum @@ -1,13 +1,13 @@ github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c h1:RGWPOewvKIROun94nF7v2cua9qP+thov/7M50KEoeSU= github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c/go.mod h1:X0CRv0ky0k6m906ixxpzmDRLvX58TFUKS2eePweuyxk= -github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= -github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= -github.com/apache/arrow/go/v17 v17.0.0 h1:RRR2bdqKcdbss9Gxy2NS/hK8i4LDMh23L6BbkN5+F54= -github.com/apache/arrow/go/v17 v17.0.0/go.mod h1:jR7QHkODl15PfYyjM2nU+yTLScZ/qfj7OSUZmJ8putc= +github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= +github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= +github.com/apache/arrow-go/v18 v18.0.0 h1:1dBDaSbH3LtulTyOVYaBCHO3yVRwjV+TZaqn3g6V7ZM= +github.com/apache/arrow-go/v18 v18.0.0/go.mod h1:t6+cWRSmKgdQ6HsxisQjok+jBpKGhRDiqcf3p0p/F+A= github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50 h1:3vA3hoM7fM4pJHG1dt0CEMzTwitFvutUrAiIwY+Bp+A= github.com/apache/arrow/go/v18 v18.0.0-20240829005432-58415d1fac50/go.mod h1:pAdO1xbg0WTJ++tq74I5xKX+yUD7MG0cEI24P+jko10= -github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= -github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= +github.com/apache/thrift v0.21.0 h1:tdPmh/ptjE1IJnhbhrcl2++TauVjy242rkV/UzJChnE= +github.com/apache/thrift v0.21.0/go.mod h1:W1H8aR/QRtYNvrPeFXBtobyRkd0/YVhTc6i07XIAgDw= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -40,8 +40,8 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM= github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= @@ -52,8 +52,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0VQdvPDY= github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= -github.com/marcboeker/go-duckdb v1.8.0 h1:iOWv1wTL0JIMqpyns6hCf5XJJI4fY6lmJNk+itx5RRo= -github.com/marcboeker/go-duckdb v1.8.0/go.mod h1:2oV8BZv88S16TKGKM+Lwd0g7DX84x0jMxjTInThC8Is= +github.com/marcboeker/go-duckdb v1.8.3 h1:ZkYwiIZhbYsT6MmJsZ3UPTHrTZccDdM4ztoqSlEMXiQ= +github.com/marcboeker/go-duckdb v1.8.3/go.mod h1:C9bYRE1dPYb1hhfu/SSomm78B0FXmNgRvv6YBW/Hooc= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= @@ -110,6 +110,8 @@ github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8 github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/tidwall/btree v1.7.0 h1:L1fkJH/AuEh5zBnnBbmTwQ5Lt+bRJ5A8EWecslvo9iI= github.com/tidwall/btree v1.7.0/go.mod h1:twD9XRA5jj9VUQGELzDO4HPQTNJsoWWfYEL+EUQ2cKY= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= @@ -118,31 +120,31 @@ go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= -golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= -golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= -golang.org/x/mod v0.20.0 h1:utOm6MM3R3dnawAiJgn0y+xvuYRsm1RKM/4giyfDgV0= -golang.org/x/mod v0.20.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= -golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= +golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= +golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0= +golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= -golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= -golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/tools v0.24.0 h1:J1shsA93PJUEVaUSaay7UXAyE8aimq3GW0pjlolpa24= -golang.org/x/tools v0.24.0/go.mod h1:YhNqVBIfWHdzvTLs0d8LCuMhkKUgSUKldakyV7W/WDQ= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9 h1:LLhsEBxRTBLuKlQxFBYUOU8xyFgXv6cOTp2HASDlsDk= golang.org/x/xerrors v0.0.0-20240716161551-93cc26a95ae9/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= -gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= -gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c h1:lfpJ/2rWPa/kJgxyyXM8PrNnfCzcmxJ265mADgwmvLI= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240314234333-6e1732d8331c/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= -google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= -google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0= +gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= diff --git a/merge/handlers/create_table.go b/merge/handlers/create_table.go index 120f762..1d7ce85 100644 --- a/merge/handlers/create_table.go +++ b/merge/handlers/create_table.go @@ -5,8 +5,6 @@ import ( "gopkg.in/yaml.v3" "io" "net/http" - "path/filepath" - "quackpipe/config" "quackpipe/merge/repository" "quackpipe/model" ) @@ -54,7 +52,6 @@ func CreateTableHandler(w http.ResponseWriter, r *http.Request) error { table := model.Table{ Name: req.CreateTable, - Path: filepath.Join(config.Config.QuackPipe.Root, req.CreateTable), Fields: fields, Engine: req.Engine, OrderBy: req.OrderBy, diff --git a/merge/repository/registry.go b/merge/repository/registry.go index 664324d..5b406d6 100644 --- a/merge/repository/registry.go +++ b/merge/repository/registry.go @@ -9,6 +9,7 @@ import ( "quackpipe/merge/service" "quackpipe/model" "quackpipe/service/db" + "regexp" "sync" "time" ) @@ -68,7 +69,13 @@ func RunMerge() { } } +var tableNameCheck = regexp.MustCompile(`^[a-zA-Z0-9_]+$`) + func RegisterNewTable(table *model.Table) error { + if !tableNameCheck.MatchString(table.Name) { + return fmt.Errorf("invalid table name, only letters and _ are accepted: %q", table.Name) + } + table.Path = filepath.Join(config.Config.QuackPipe.Root, table.Name) if _, ok := registry[table.Name]; ok { return nil } @@ -139,6 +146,9 @@ FROM tables } func createTableFolders(table *model.Table) error { + if !tableNameCheck.MatchString(table.Name) { + return fmt.Errorf("invalid table name, only letters and _ are accepted: %q", table.Name) + } err := os.MkdirAll(filepath.Join(table.Path, "tmp"), 0755) if err != nil { return err From 8530f127d2d4fae67b193597f5d0d8a2a6c1768d Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 14:38:12 +0200 Subject: [PATCH 09/12] fix codeQL warnings --- merge/service/merge_tree_service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merge/service/merge_tree_service.go b/merge/service/merge_tree_service.go index b60ee87..3702837 100644 --- a/merge/service/merge_tree_service.go +++ b/merge/service/merge_tree_service.go @@ -295,7 +295,7 @@ func (s *MergeTreeService) flush() { onError(nil) return } - fileName := s.Table.Name + uuid.New().String() + ".1.parquet" + fileName := uuid.New().String() + ".1.parquet" outputTmpFile := filepath.Join(s.Table.Path, "data", fileName) outputFile := filepath.Join(s.Table.Path, "data", fileName) file, err := os.Create(outputTmpFile) From 6eec5754f9458bc9e5a85406931fff551035abf6 Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 15:22:00 +0200 Subject: [PATCH 10/12] load chsql --- merge/service/merge_tree_service.go | 59 ++++++++++------------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/merge/service/merge_tree_service.go b/merge/service/merge_tree_service.go index 3702837..72d59df 100644 --- a/merge/service/merge_tree_service.go +++ b/merge/service/merge_tree_service.go @@ -486,49 +486,28 @@ func mergeFiles(table *model.Table, p *PlanMerge, tmpDir, dataDir string) error if err != nil { return err } - conn.Exec("LOAD '/home/hromozeka/QXIP/quackpipe/chsql.duckdb_extension'") - conn.Exec("INSTALL '/home/hromozeka/QXIP/quackpipe/chsql.duckdb_extension'") - defer conn.Close() - - //// Drop the table if it exists - //dropTableSQL := `DROP TABLE IF EXISTS temp_table` - //_, err = conn.Exec(dropTableSQL) - //if err != nil { - // return err - //} - - if p.Iteration == 1 { - createTableSQL := fmt.Sprintf( - `COPY(SELECT * FROM read_parquet_ordered (ARRAY['%s'], '%s'))TO '%s' (FORMAT 'parquet')`, - strings.Join(p.From, "','"), - strings.Join(table.OrderBy, ","), tmpFilePath) - _, err = conn.Exec(createTableSQL) - if err != nil { - return err - } - } else { - createTableSQL := fmt.Sprintf( - `COPY(SELECT * FROM read_parquet_ordered (ARRAY['%s'], '%s'))TO '%s' (FORMAT 'parquet')`, - strings.Join(p.From, "','"), - strings.Join(table.OrderBy, ","), tmpFilePath) - _, err = conn.Exec(createTableSQL) - if err != nil { - return err - } + _, err = conn.Exec("INSTALL chsql FROM community") + if err != nil { + fmt.Println("Error loading chsql extension: ", err) + return err } + _, err = conn.Exec("LOAD chsql") + if err != nil { + fmt.Println("Error loading chsql extension: ", err) + return err + } + defer conn.Close() - // Create a temporary table in DuckDB using parquet_scan with an array of files + createTableSQL := fmt.Sprintf( + `COPY(SELECT * FROM read_parquet_mergetree(ARRAY['%s'], '%s'))TO '%s' (FORMAT 'parquet')`, + strings.Join(p.From, "','"), + strings.Join(table.OrderBy, ","), tmpFilePath) + _, err = conn.Exec(createTableSQL) - //// Perform the merge - //mergeSQL := fmt.Sprintf( - // `COPY (SELECT * FROM temp_table ORDER BY %s) TO '%s' (FORMAT 'parquet')`, - // strings.Join(table.OrderBy, ","), - // tmpFilePath, - //) - //_, err = conn.Exec(mergeSQL) - //if err != nil { - // return err - //} + if err != nil { + fmt.Println("Error read_parquet_mergetree: ", err) + return err + } // Cleanup old files for _, file := range p.From { From 98b295f54a27f42740131545ed44e28ec173cec7 Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 18:57:47 +0200 Subject: [PATCH 11/12] datatypes with aliases --- merge/data_types/data_types.go | 80 +++++++++++++++++ merge/data_types/float64.go | 29 +++++++ merge/data_types/generic.go | 49 +++++++++++ merge/data_types/int64.go | 29 +++++++ merge/data_types/string.go | 29 +++++++ merge/data_types/uint64.go | 29 +++++++ merge/merge_test.go | 1 - merge/parsers/ndjson_insert_parser.go | 60 ++----------- merge/service/merge_tree_service.go | 118 +++++--------------------- merge/shared/consts.go | 5 -- 10 files changed, 272 insertions(+), 157 deletions(-) create mode 100644 merge/data_types/data_types.go create mode 100644 merge/data_types/float64.go create mode 100644 merge/data_types/generic.go create mode 100644 merge/data_types/int64.go create mode 100644 merge/data_types/string.go create mode 100644 merge/data_types/uint64.go diff --git a/merge/data_types/data_types.go b/merge/data_types/data_types.go new file mode 100644 index 0000000..bc89881 --- /dev/null +++ b/merge/data_types/data_types.go @@ -0,0 +1,80 @@ +package data_types + +import ( + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/go-faster/jx" + "github.com/tidwall/btree" +) + +type DataType interface { + MakeStore() any + ParseJson(dec *jx.Decoder, store any) (any, error) + Less(store any, i int32, j int32) bool + ValidateData(data any) error + ArrowDataType() arrow.DataType + AppendStore(store any, data any) (any, error) + WriteToBatch(batch array.Builder, data any, indexes *btree.BTreeG[int32]) error +} + +var DataTypes = map[string]DataType{ + "Int64": Int64{}, + "BIGINT": Int64{}, + "INT8": Int64{}, + "LONG": Int64{}, + + "UInt64": UInt64{}, + "UBIGINT": UInt64{}, + + "Float64": Float64{}, + "DOUBLE": Float64{}, + "FLOAT8": Float64{}, + + "String": String{}, + "VARCHAR": String{}, + "CHAR": String{}, + "BPCHAR": String{}, + "TEXT": String{}, + + /*"UHUGEINT": UInt64{}, + "UINTEGER": UInt64{}, + "USMALLINT": UInt64{}, + "UTINYINT": UInt64{}, + + + "INTEGER": Int64{}, + "INT4": Int64{}, + "INT": Int64{}, + "SIGNED": Int64{}, + "SMALLINT": Int64{}, + "INT2": Int64{}, + "SHORT": Int64{}, + "TINYINT": Int64{}, + "INT1": Int64{}, + "HUGEINT": Int64{}, + + "FLOAT": Float64{}, + "FLOAT4": Float64{}, + "REAL": Float64{}, + + "BIT": Bit{}, + "BITSTRING": Bit{}, + "BLOB": Blob{}, + "BYTEA": Blob{}, + "BINARY": Blob{}, + "VARBINARY": Blob{}, + "BOOLEAN": Boolean{}, + "BOOL": Boolean{}, + "LOGICAL": Boolean{}, + "DATE": Date{}, + "DECIMAL": Decimal{}, + "NUMERIC": Decimal{}, + "INTERVAL": Interval{}, + "JSON": Json{}, + "TIME": Time{}, + "TIMESTAMP WITH TIME ZONE": TimestampWithTimeZone{}, + "TIMESTAMPTZ": TimestampWithTimeZone{}, + "TIMESTAMP": Timestamp{}, + "DATETIME": Timestamp{}, + "UUID": Uuid{},*/ +} diff --git a/merge/data_types/float64.go b/merge/data_types/float64.go new file mode 100644 index 0000000..c16a1fe --- /dev/null +++ b/merge/data_types/float64.go @@ -0,0 +1,29 @@ +package data_types + +import ( + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/go-faster/jx" + "github.com/tidwall/btree" +) + +type Float64 struct { + generic[float64] +} + +func (f Float64) ParseJson(dec *jx.Decoder, store any) (any, error) { + return f.generic.ParseJson(dec.Float64, store.([]float64)) +} + +func (f Float64) Less(a any, i int32, j int32) bool { + return a.([]float64)[i] < a.([]float64)[j] +} + +func (f Float64) ArrowDataType() arrow.DataType { + return arrow.PrimitiveTypes.Float64 +} + +func (f Float64) WriteToBatch(batch array.Builder, data any, index *btree.BTreeG[int32]) error { + _batch := batch.(*array.Float64Builder) + return f.generic.WriteToBatch(_batch.AppendValues, _batch.Append, data, index) +} diff --git a/merge/data_types/generic.go b/merge/data_types/generic.go new file mode 100644 index 0000000..56c3594 --- /dev/null +++ b/merge/data_types/generic.go @@ -0,0 +1,49 @@ +package data_types + +import ( + "fmt" + "github.com/tidwall/btree" +) + +type generic[T any] struct{} + +func (i generic[T]) MakeStore() any { + return make([]T, 1000) +} + +func (i generic[T]) ParseJson(dec func() (T, error), store []T) ([]T, error) { + _i, err := dec() + if err != nil { + return store, err + } + store = append(store, _i) + return store, nil +} + +func (i generic[T]) ValidateData(data any) error { + if _, ok := data.([]T); !ok { + return fmt.Errorf("invalid data type") + } + return nil +} + +func (i generic[T]) AppendStore(store any, data any) (any, error) { + _data := data.([]T) + _store := store.([]T) + _store = append(_store, _data...) + return _store, nil +} + +func (i generic[T]) WriteToBatch(appendArray func([]T, []bool), append func(T), + data any, index *btree.BTreeG[int32]) error { + if index == nil { + appendArray(data.([]T), nil) + return nil + } + _data := data.([]T) + it := index.Iter() + for it.Next() { + append(_data[it.Item()]) + } + return nil +} diff --git a/merge/data_types/int64.go b/merge/data_types/int64.go new file mode 100644 index 0000000..e90a899 --- /dev/null +++ b/merge/data_types/int64.go @@ -0,0 +1,29 @@ +package data_types + +import ( + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/go-faster/jx" + "github.com/tidwall/btree" +) + +type Int64 struct { + generic[int64] +} + +func (i Int64) ParseJson(dec *jx.Decoder, store any) (any, error) { + return i.generic.ParseJson(dec.Int64, store.([]int64)) +} + +func (i Int64) Less(a any, k int32, j int32) bool { + return a.([]int64)[k] < a.([]int64)[j] +} + +func (i Int64) ArrowDataType() arrow.DataType { + return arrow.PrimitiveTypes.Int64 +} + +func (i Int64) WriteToBatch(batch array.Builder, data any, index *btree.BTreeG[int32]) error { + _batch := batch.(*array.Int64Builder) + return i.generic.WriteToBatch(_batch.AppendValues, _batch.Append, data, index) +} diff --git a/merge/data_types/string.go b/merge/data_types/string.go new file mode 100644 index 0000000..ecfa2cb --- /dev/null +++ b/merge/data_types/string.go @@ -0,0 +1,29 @@ +package data_types + +import ( + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/go-faster/jx" + "github.com/tidwall/btree" +) + +type String struct { + generic[string] +} + +func (i String) ParseJson(dec *jx.Decoder, store any) (any, error) { + return i.generic.ParseJson(dec.Str, store.([]string)) +} + +func (i String) Less(a any, k int32, j int32) bool { + return a.([]string)[k] < a.([]string)[j] +} + +func (i String) ArrowDataType() arrow.DataType { + return arrow.BinaryTypes.String +} + +func (i String) WriteToBatch(batch array.Builder, data any, index *btree.BTreeG[int32]) error { + _batch := batch.(*array.StringBuilder) + return i.generic.WriteToBatch(_batch.AppendValues, _batch.Append, data, index) +} diff --git a/merge/data_types/uint64.go b/merge/data_types/uint64.go new file mode 100644 index 0000000..03c6d13 --- /dev/null +++ b/merge/data_types/uint64.go @@ -0,0 +1,29 @@ +package data_types + +import ( + "github.com/apache/arrow/go/v18/arrow" + "github.com/apache/arrow/go/v18/arrow/array" + "github.com/go-faster/jx" + "github.com/tidwall/btree" +) + +type UInt64 struct { + generic[uint64] +} + +func (i UInt64) ParseJson(dec *jx.Decoder, store any) (any, error) { + return i.generic.ParseJson(dec.UInt64, store.([]uint64)) +} + +func (i UInt64) Less(a any, k int32, j int32) bool { + return a.([]uint64)[k] < a.([]uint64)[j] +} + +func (i UInt64) ArrowDataType() arrow.DataType { + return arrow.PrimitiveTypes.Uint64 +} + +func (i UInt64) WriteToBatch(batch array.Builder, data any, index *btree.BTreeG[int32]) error { + _batch := batch.(*array.Uint64Builder) + return i.generic.WriteToBatch(_batch.AppendValues, _batch.Append, data, index) +} diff --git a/merge/merge_test.go b/merge/merge_test.go index ce7d73d..5cf1242 100644 --- a/merge/merge_test.go +++ b/merge/merge_test.go @@ -15,7 +15,6 @@ func TestMerge(t *testing.T) { MergeTimeoutS: 10, Secret: "XXXXXX", }, - DBPath: ".", } Init() err := repository.RegisterNewTable(&model.Table{ diff --git a/merge/parsers/ndjson_insert_parser.go b/merge/parsers/ndjson_insert_parser.go index 93d49eb..081c37d 100644 --- a/merge/parsers/ndjson_insert_parser.go +++ b/merge/parsers/ndjson_insert_parser.go @@ -6,7 +6,7 @@ import ( "fmt" "github.com/go-faster/jx" "io" - "quackpipe/merge/shared" + "quackpipe/merge/data_types" ) type NDJSONParser struct { @@ -53,16 +53,7 @@ func (N *NDJSONParser) ParseReader(r io.Reader) (chan *ParserResponse, error) { func (N *NDJSONParser) resetLines() { N.lines = make(map[string]any) for k, v := range N.fields { - switch v { - case shared.TYPE_STRING: - N.lines[k] = make([]string, 0) - case shared.TYPE_INT64: - N.lines[k] = make([]int64, 0) - case shared.TYPE_UINT64: - N.lines[k] = make([]uint64, 0) - case shared.TYPE_FLOAT64: - N.lines[k] = make([]float64, 0) - } + N.lines[k] = data_types.DataTypes[v].MakeStore() } } @@ -73,49 +64,10 @@ func (N *NDJSONParser) parseLine(line []byte) error { if !ok { return fmt.Errorf("field %s not found", key) } - switch tp { - case shared.TYPE_STRING: - str, err := d.Str() - if err != nil { - return err - } - if _, ok := N.lines[key].([]string); !ok { - return fmt.Errorf("field %s is not a string", key) - } - N.lines[key] = append(N.lines[key].([]string), str) - case shared.TYPE_INT64: - str, err := d.Int64() - if err != nil { - return err - } - field := N.lines[key] - if _, ok := field.([]int64); !ok { - return fmt.Errorf("field %s is not a string", key) - } - field = append(field.([]int64), str) - N.lines[key] = field - case shared.TYPE_UINT64: - str, err := d.UInt64() - if err != nil { - return err - } - field := N.lines[key] - if _, ok := field.([]uint64); !ok { - return fmt.Errorf("field %s is not a string", key) - } - field = append(field.([]uint64), str) - N.lines[key] = field - case shared.TYPE_FLOAT64: - str, err := d.Float64() - if err != nil { - return err - } - field := N.lines[key] - if _, ok := field.([]float64); !ok { - return fmt.Errorf("field %s is not a string", key) - } - field = append(field.([]float64), str) - N.lines[key] = field + var err error + N.lines[key], err = data_types.DataTypes[tp].ParseJson(d, N.lines[key]) + if err != nil { + return fmt.Errorf("invalid data for field %s: %w", key, err) } return nil }) diff --git a/merge/service/merge_tree_service.go b/merge/service/merge_tree_service.go index 72d59df..f954e9e 100644 --- a/merge/service/merge_tree_service.go +++ b/merge/service/merge_tree_service.go @@ -16,6 +16,7 @@ import ( "golang.org/x/sync/semaphore" "os" "path/filepath" + "quackpipe/merge/data_types" "quackpipe/model" "quackpipe/service/db" "quackpipe/utils/promise" @@ -65,16 +66,7 @@ func NewMergeTreeService(t *model.Table) *MergeTreeService { func (s *MergeTreeService) createDataStore() map[string]any { res := make(map[string]any) for _, f := range s.Table.Fields { - switch f[1] { - case "UInt64": - res[f[0]] = make([]uint64, 0, 1000000) - case "Int64": - res[f[0]] = make([]int64, 0, 1000000) - case "String": - res[f[0]] = make([]string, 0, 1000000) - case "Float64": - res[f[0]] = make([]float64, 0, 1000000) - } + res[f[0]] = data_types.DataTypes[f[1]].MakeStore() } return res } @@ -95,23 +87,8 @@ func getFieldType(t *model.Table, fieldName string) string { func (s *MergeTreeService) Less(a, b int32) bool { for _, o := range s.Table.OrderBy { t := getFieldType(s.Table, o) - switch t { - case "UInt64": - if s.dataStore[o].([]uint64)[a] > s.dataStore[o].([]uint64)[b] { - return false - } - case "Int64": - if s.dataStore[o].([]int64)[a] > s.dataStore[o].([]int64)[b] { - return false - } - case "String": - if s.dataStore[o].([]string)[a] > s.dataStore[o].([]string)[b] { - return false - } - case "Float64": - if s.dataStore[o].([]float64)[a] > s.dataStore[o].([]float64)[b] { - return false - } + if !data_types.DataTypes[t].Less(s.dataStore[o], a, b) { + return false } } return true @@ -166,26 +143,14 @@ func validateData(table *model.Table, columns map[string]any) error { return fmt.Errorf("invalid column: %s", column) } // Validate data types for each column - switch columnType { - case "UInt64": - if _, ok := data.([]uint64); !ok { - return fmt.Errorf("invalid data type for column %s: expected uint64", column) - } - case "Int64": - if _, ok := data.([]int64); !ok { - return fmt.Errorf("invalid data type for column %s: expected int64", column) - } - case "String": - if _, ok := data.([]string); !ok { - return fmt.Errorf("invalid data type for column %s: expected string", column) - } - case "Float64": - if _, ok := data.([]float64); !ok { - return fmt.Errorf("invalid data type for column %s: expected float64", column) - } - default: + t, ok := data_types.DataTypes[columnType] + if !ok { return fmt.Errorf("unsupported column type: %s", columnType) } + err := t.ValidateData(data) + if err != nil { + return fmt.Errorf("invalid data for column %s: %w", column, err) + } } return nil @@ -194,19 +159,7 @@ func validateData(table *model.Table, columns map[string]any) error { func (s *MergeTreeService) createParquetSchema() *arrow.Schema { fields := make([]arrow.Field, len(s.Table.Fields)) for i, field := range s.Table.Fields { - var fieldType arrow.DataType - switch field[1] { - case "UInt64": - fieldType = arrow.PrimitiveTypes.Uint64 - case "Int64": - fieldType = arrow.PrimitiveTypes.Int64 - case "String": - fieldType = arrow.BinaryTypes.String - case "Float64": - fieldType = arrow.PrimitiveTypes.Float64 - default: - panic(fmt.Sprintf("unsupported field type: %s", field[1])) - } + var fieldType = data_types.DataTypes[field[1]].ArrowDataType() fields[i] = arrow.Field{Name: field[0], Type: fieldType} } return arrow.NewSchema(fields, nil) @@ -218,24 +171,13 @@ func (s *MergeTreeService) writeParquetFile(columns map[string]any) *promise.Pro var oldSize, newSize int32 for k, v := range columns { tp := getFieldType(s.Table, k) - switch tp { - case "UInt64": - oldSize = int32(len(s.dataStore[k].([]uint64))) - s.dataStore[k] = append(s.dataStore[k].([]uint64), v.([]uint64)...) - newSize = int32(len(s.dataStore[k].([]uint64))) - case "Int64": - oldSize = int32(len(s.dataStore[k].([]int64))) - s.dataStore[k] = append(s.dataStore[k].([]int64), v.([]int64)...) - newSize = int32(len(s.dataStore[k].([]int64))) - case "String": - oldSize = int32(len(s.dataStore[k].([]string))) - s.dataStore[k] = append(s.dataStore[k].([]string), v.([]string)...) - newSize = int32(len(s.dataStore[k].([]string))) - case "Float64": - oldSize = int32(len(s.dataStore[k].([]float64))) - s.dataStore[k] = append(s.dataStore[k].([]float64), v.([]float64)...) - newSize = int32(len(s.dataStore[k].([]float64))) + var err error + oldSize = int32(GetColumnLength(s.dataStore[k])) + s.dataStore[k], err = data_types.DataTypes[tp].AppendStore(s.dataStore[k], v) + if err != nil { + return promise.Fulfilled[int32](err, 0) } + newSize = int32(GetColumnLength(s.dataStore[k])) } for i := oldSize; i < newSize; i++ { s.dataIndexes.Set(i) @@ -265,28 +207,10 @@ func (s *MergeTreeService) flush() { return } for i, f := range s.Table.Fields { - it := indexes.Iter() - switch f[1] { - case "UInt64": - _data := dataStore[f[0]].([]uint64) - for it.Next() { - s.recordBatch.Field(i).(*array.Uint64Builder).Append(_data[it.Item()]) - } - case "Int64": - _data := dataStore[f[0]].([]int64) - for it.Next() { - s.recordBatch.Field(i).(*array.Int64Builder).Append(_data[it.Item()]) - } - case "String": - _data := dataStore[f[0]].([]string) - for it.Next() { - s.recordBatch.Field(i).(*array.StringBuilder).Append(_data[it.Item()]) - } - case "Float64": - _data := dataStore[f[0]].([]float64) - for it.Next() { - s.recordBatch.Field(i).(*array.Float64Builder).Append(_data[it.Item()]) - } + err := data_types.DataTypes[f[1]].WriteToBatch(s.recordBatch.Field(i), dataStore[f[0]], indexes) + if err != nil { + onError(err) + return } } record := s.recordBatch.NewRecord() diff --git a/merge/shared/consts.go b/merge/shared/consts.go index 4cc2c96..a29b5e4 100644 --- a/merge/shared/consts.go +++ b/merge/shared/consts.go @@ -1,6 +1 @@ package shared - -const TYPE_STRING = "String" -const TYPE_INT64 = "Int64" -const TYPE_UINT64 = "UInt64" -const TYPE_FLOAT64 = "Float64" From c0a26312408ca1615dc51bc4aed67cbf20681ad8 Mon Sep 17 00:00:00 2001 From: akvlad Date: Mon, 27 Jan 2025 19:04:06 +0200 Subject: [PATCH 12/12] debug --- merge/data_types/data_types.go | 1 + 1 file changed, 1 insertion(+) diff --git a/merge/data_types/data_types.go b/merge/data_types/data_types.go index bc89881..0865f6a 100644 --- a/merge/data_types/data_types.go +++ b/merge/data_types/data_types.go @@ -31,6 +31,7 @@ var DataTypes = map[string]DataType{ "FLOAT8": Float64{}, "String": String{}, + "STRING": String{}, "VARCHAR": String{}, "CHAR": String{}, "BPCHAR": String{},