diff --git a/0492aa7e.5a804266.js b/0492aa7e.5a804266.js deleted file mode 100644 index a03c19143..000000000 --- a/0492aa7e.5a804266.js +++ /dev/null @@ -1 +0,0 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[6],{119:function(e,a,n){"use strict";n.d(a,"a",(function(){return b})),n.d(a,"b",(function(){return u}));var t=n(0),r=n.n(t);function s(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function l(e){for(var a=1;a=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var i=r.a.createContext({}),c=function(e){var a=r.a.useContext(i),n=a;return e&&(n="function"==typeof e?e(a):l(l({},a),e)),n},b=function(e){var a=c(e.components);return r.a.createElement(i.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},m=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,s=e.originalType,o=e.parentName,i=p(e,["components","mdxType","originalType","parentName"]),b=c(n),m=t,u=b["".concat(o,".").concat(m)]||b[m]||d[m]||s;return n?r.a.createElement(u,l(l({ref:a},i),{},{components:n})):r.a.createElement(u,l({ref:a},i))}));function u(e,a){var n=arguments,t=a&&a.mdxType;if("string"==typeof e||t){var s=n.length,o=new Array(s);o[0]=m;var l={};for(var p in a)hasOwnProperty.call(a,p)&&(l[p]=a[p]);l.originalType=e,l.mdxType="string"==typeof e?e:t,o[1]=l;for(var i=2;i<none> is not a term",id:"datasets-and-none-is-not-a-term",children:[]},{value:"Example",id:"example",children:[]}],c={toc:i};function b(e){var a=e.components,n=Object(r.a)(e,o);return Object(s.b)("wrapper",Object(t.a)({},c,n,{components:a,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"introduction"},"Introduction"),Object(s.b)("p",null,"By default, Spark uses reflection to derive schemas and encoders from case\nclasses. This doesn't work well when there are messages that contain types that\nSpark does not understand such as enums, ",Object(s.b)("inlineCode",{parentName:"p"},"ByteString"),"s and ",Object(s.b)("inlineCode",{parentName:"p"},"oneof"),"s. To get around this, sparksql-scalapb provides its own ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s for protocol buffers."),Object(s.b)("p",null,"However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder")," for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses ",Object(s.b)("a",{parentName:"p",href:"https://github.com/typelevel/frameless"},"frameless")," which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types."),Object(s.b)("h2",{id:"setting-up-your-project"},"Setting up your project"),Object(s.b)("p",null,"We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and\nyour compiled protos. Make sure in project/plugins.sbt you have a line\nthat adds sbt-assembly:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")\n')),Object(s.b)("p",null,"To add sparksql-scalapb to your project, add ",Object(s.b)("em",{parentName:"p"},"one")," of the following lines that\nmatches ",Object(s.b)("em",{parentName:"p"},"both the version of ScalaPB and Spark")," you use:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'// Spark 3.3 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.2"\n\n// Spark 3.2 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.2"\n\n// Spark 3.1 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.2"\n\n// Spark 3.0 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"\n\n// Spark 3.3 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.2"\n\n// Spark 3.2 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.2"\n\n// Spark 3.1 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.2"\n\n// Spark 3.0 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"\n\n// Spark 2.x and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"\n\n// Spark 2.x and ScalaPB 0.9\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"\n')),Object(s.b)("p",null,"Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal\nAPIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1."),Object(s.b)("p",null,"Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with\nthe current version. In addition, it comes with incompatible versions of scala-collection-compat\nand shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'assemblyShadeRules in assembly := Seq(\n ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,\n ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,\n ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll\n)\n')),Object(s.b)("p",null,"See ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test/blob/master/build.sbt"},"complete example of build.sbt"),"."),Object(s.b)("h2",{id:"using-sparksql-scalapb"},"Using sparksql-scalapb"),Object(s.b)("p",null,"We assume you have a ",Object(s.b)("inlineCode",{parentName:"p"},"SparkSession")," assigned to the variable ",Object(s.b)("inlineCode",{parentName:"p"},"spark"),". In a standalone Scala program, this can be created with:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import org.apache.spark.sql.SparkSession\n\nval spark: SparkSession = SparkSession\n .builder()\n .appName("ScalaPB Demo")\n .master("local[2]")\n .getOrCreate()\n// spark: SparkSession = org.apache.spark.sql.SparkSession@60e9e7ab\n')),Object(s.b)("p",null,Object(s.b)("em",{parentName:"p"},"IMPORTANT"),": Ensure you do not import ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._")," to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import ",Object(s.b)("inlineCode",{parentName:"p"},"StringToColumn")," to convert ",Object(s.b)("inlineCode",{parentName:"p"},'$"col name"')," into a ",Object(s.b)("inlineCode",{parentName:"p"},"Column"),". Add an import ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits")," to add ScalaPB's encoders for protocol buffers into the implicit search scope:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.sql.{Dataset, DataFrame, functions => F}\nimport spark.implicits.StringToColumn\nimport scalapb.spark.ProtoSQL\n\nimport scalapb.spark.Implicits._\n")),Object(s.b)("p",null,"The code snippets below use the ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/ScalaPB/blob/master/docs/src/main/protobuf/person.proto"},Object(s.b)("inlineCode",{parentName:"a"},"Person")," message"),"."),Object(s.b)("p",null,"We start by creating some test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import scalapb.docs.person.Person\nimport scalapb.docs.person.Person.{Address, AddressType}\n\nval testData = Seq(\n Person(name="John", age=32, addresses=Vector(\n Address(addressType=AddressType.HOME, street="Market", city="SF"))\n ),\n Person(name="Mike", age=29, addresses=Vector(\n Address(addressType=AddressType.WORK, street="Castro", city="MV"),\n Address(addressType=AddressType.HOME, street="Church", city="MV"))\n ),\n Person(name="Bart", age=27)\n)\n')),Object(s.b)("p",null,"We can create a ",Object(s.b)("inlineCode",{parentName:"p"},"DataFrame")," from the test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val df = ProtoSQL.createDataFrame(spark, testData)\n// df: DataFrame = [name: string, age: int ... 1 more field]\ndf.printSchema()\n// root\n// |-- name: string (nullable = true)\n// |-- age: integer (nullable = true)\n// |-- addresses: array (nullable = false)\n// | |-- element: struct (containsNull = false)\n// | | |-- address_type: string (nullable = true)\n// | | |-- street: string (nullable = true)\n// | | |-- city: string (nullable = true)\n// \ndf.show()\n// +----+---+--------------------+\n// |name|age| addresses|\n// +----+---+--------------------+\n// |John| 32|[{HOME, Market, SF}]|\n// |Mike| 29|[{WORK, Castro, M...|\n// |Bart| 27| []|\n// +----+---+--------------------+\n//\n")),Object(s.b)("p",null,"and then process it as any other Dataframe in Spark:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'df.select($"name", F.size($"addresses").alias("address_count")).show()\n// +----+-------------+\n// |name|address_count|\n// +----+-------------+\n// |John| 1|\n// |Mike| 2|\n// |Bart| 0|\n// +----+-------------+\n// \n\nval nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))\n// nameAndAddress: DataFrame = [name: string, firstAddress: struct]\n\nnameAndAddress.show()\n// +----+------------------+\n// |name| firstAddress|\n// +----+------------------+\n// |John|{HOME, Market, SF}|\n// |Mike|{WORK, Castro, MV}|\n// |Bart| null|\n// +----+------------------+\n//\n')),Object(s.b)("p",null,"Using the datasets API it is possible to bring the data back to ScalaPB case classes:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"nameAndAddress.as[(String, Option[Address])].collect().foreach(println)\n// (John,Some(Address(HOME,Market,SF,UnknownFieldSet(Map()))))\n// (Mike,Some(Address(WORK,Castro,MV,UnknownFieldSet(Map()))))\n// (Bart,None)\n")),Object(s.b)("p",null,"You can create a Dataset directly using Spark APIs:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"spark.createDataset(testData)\n// res5: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("h2",{id:"from-binary-to-protos-and-back"},"From Binary to protos and back"),Object(s.b)("p",null,"In some situations, you may need to deal with datasets that contain serialized protocol buffers. This can be handled by mapping the datasets through ScalaPB's ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom")," and ",Object(s.b)("inlineCode",{parentName:"p"},"toByteArray")," functions."),Object(s.b)("p",null,"Let's start by preparing a dataset with test binary data by mapping our ",Object(s.b)("inlineCode",{parentName:"p"},"testData"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val binaryDS: Dataset[Array[Byte]] = spark.createDataset(testData.map(_.toByteArray))\n// binaryDS: Dataset[Array[Byte]] = [value: binary]\n\nbinaryDS.show()\n// +--------------------+\n// | value|\n// +--------------------+\n// |[0A 04 4A 6F 68 6...|\n// |[0A 04 4D 69 6B 6...|\n// |[0A 04 42 61 72 7...|\n// +--------------------+\n//\n")),Object(s.b)("p",null,"To turn this dataset into a ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Person]"),", we map it through ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosDS: Dataset[Person] = binaryDS.map(Person.parseFrom(_))\n// protosDS: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("p",null,"to turn a dataset of protos into ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Array[Byte]]"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosBinary: Dataset[Array[Byte]] = protosDS.map(_.toByteArray)\n// protosBinary: Dataset[Array[Byte]] = [value: binary]\n")),Object(s.b)("h2",{id:"on-enums"},"On enums"),Object(s.b)("p",null,"In SparkSQL-ScalaPB, enums are represented as strings. Unrecognized enum values are represented as strings containing the numeric value."),Object(s.b)("h2",{id:"dataframes-and-datasets-from-rdds"},"Dataframes and Datasets from RDDs"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.rdd.RDD\n\nval protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)\n\nval protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)\n\nval protoDS: Dataset[Person] = spark.createDataset(protoRDD)\n")),Object(s.b)("h2",{id:"udfs"},"UDFs"),Object(s.b)("p",null,"If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ",Object(s.b)("inlineCode",{parentName:"p"},"ProtoSQL.udf")," to create UDFs. For example, if you need to parse a binary column into a proto:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'val binaryDF = protosBinary.toDF("value")\n// binaryDF: DataFrame = [value: binary]\n\nval parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }\n// parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$16311/0x00000001047c5840@3fd04c7e\n\nbinaryDF.withColumn("person", parsePersons($"value"))\n// res7: DataFrame = [value: binary, person: struct]\n')),Object(s.b)("h2",{id:"primitive-wrappers"},"Primitive wrappers"),Object(s.b)("p",null,"In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs\nwitha single field named ",Object(s.b)("inlineCode",{parentName:"p"},"value"),". A better representation in Spark would be a\nnullable field of the primitive type. The better representation will be the\ndefault in 0.11.x. To enable this representation today, replace the usages of\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL")," with ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.withPrimitiveWrappers"),".\nInstead of importing ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits._"),", import\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.implicits._")),Object(s.b)("p",null,"See example in ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/sparksql-scalapb/blob/80f3162b69313d57f95d3dcbfee865809873567a/sparksql-scalapb/src/test/scala/WrappersSpec.scala#L42-L59"},"WrappersSpec"),"."),Object(s.b)("h2",{id:"datasets-and-none-is-not-a-term"},"Datasets and ",Object(s.b)("inlineCode",{parentName:"h2"}," is not a term")),Object(s.b)("p",null,"You will see this error if for some reason Spark's ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s are being picked up\ninstead of the ones provided by sparksql-scalapb. Please ensure you are not importing ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._"),". See instructions above for imports."),Object(s.b)("h2",{id:"example"},"Example"),Object(s.b)("p",null,"Check out a ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test"},"complete example")," here."))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/0492aa7e.ee5cec50.js b/0492aa7e.ee5cec50.js new file mode 100644 index 000000000..575bcb197 --- /dev/null +++ b/0492aa7e.ee5cec50.js @@ -0,0 +1 @@ +(window.webpackJsonp=window.webpackJsonp||[]).push([[6],{119:function(e,a,n){"use strict";n.d(a,"a",(function(){return b})),n.d(a,"b",(function(){return u}));var t=n(0),r=n.n(t);function s(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function l(e){for(var a=1;a=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var i=r.a.createContext({}),c=function(e){var a=r.a.useContext(i),n=a;return e&&(n="function"==typeof e?e(a):l(l({},a),e)),n},b=function(e){var a=c(e.components);return r.a.createElement(i.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},m=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,s=e.originalType,o=e.parentName,i=p(e,["components","mdxType","originalType","parentName"]),b=c(n),m=t,u=b["".concat(o,".").concat(m)]||b[m]||d[m]||s;return n?r.a.createElement(u,l(l({ref:a},i),{},{components:n})):r.a.createElement(u,l({ref:a},i))}));function u(e,a){var n=arguments,t=a&&a.mdxType;if("string"==typeof e||t){var s=n.length,o=new Array(s);o[0]=m;var l={};for(var p in a)hasOwnProperty.call(a,p)&&(l[p]=a[p]);l.originalType=e,l.mdxType="string"==typeof e?e:t,o[1]=l;for(var i=2;i<none> is not a term",id:"datasets-and-none-is-not-a-term",children:[]},{value:"Example",id:"example",children:[]}],c={toc:i};function b(e){var a=e.components,n=Object(r.a)(e,o);return Object(s.b)("wrapper",Object(t.a)({},c,n,{components:a,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"introduction"},"Introduction"),Object(s.b)("p",null,"By default, Spark uses reflection to derive schemas and encoders from case\nclasses. This doesn't work well when there are messages that contain types that\nSpark does not understand such as enums, ",Object(s.b)("inlineCode",{parentName:"p"},"ByteString"),"s and ",Object(s.b)("inlineCode",{parentName:"p"},"oneof"),"s. To get around this, sparksql-scalapb provides its own ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s for protocol buffers."),Object(s.b)("p",null,"However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder")," for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses ",Object(s.b)("a",{parentName:"p",href:"https://github.com/typelevel/frameless"},"frameless")," which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types."),Object(s.b)("h2",{id:"setting-up-your-project"},"Setting up your project"),Object(s.b)("p",null,"We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and\nyour compiled protos. Make sure in project/plugins.sbt you have a line\nthat adds sbt-assembly:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")\n')),Object(s.b)("p",null,"To add sparksql-scalapb to your project, add ",Object(s.b)("em",{parentName:"p"},"one")," of the following lines that\nmatches ",Object(s.b)("em",{parentName:"p"},"both the version of ScalaPB and Spark")," you use:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'// Spark 3.5 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql35-scalapb0_11" % "1.0.4"\n\n// Spark 3.4 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql34-scalapb0_11" % "1.0.4"\n\n// Spark 3.3 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.4"\n\n// Spark 3.2 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.4"\n\n// Spark 3.1 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.4"\n\n// Spark 3.0 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"\n\n// Spark 3.3 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.4"\n\n// Spark 3.2 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.4"\n\n// Spark 3.1 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.4"\n\n// Spark 3.0 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"\n\n// Spark 2.x and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"\n\n// Spark 2.x and ScalaPB 0.9\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"\n')),Object(s.b)("p",null,"Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal\nAPIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1."),Object(s.b)("p",null,"Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with\nthe current version. In addition, it comes with incompatible versions of scala-collection-compat\nand shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'assemblyShadeRules in assembly := Seq(\n ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,\n ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,\n ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll\n)\n')),Object(s.b)("p",null,"See ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test/blob/master/build.sbt"},"complete example of build.sbt"),"."),Object(s.b)("h2",{id:"using-sparksql-scalapb"},"Using sparksql-scalapb"),Object(s.b)("p",null,"We assume you have a ",Object(s.b)("inlineCode",{parentName:"p"},"SparkSession")," assigned to the variable ",Object(s.b)("inlineCode",{parentName:"p"},"spark"),". In a standalone Scala program, this can be created with:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import org.apache.spark.sql.SparkSession\n\nval spark: SparkSession = SparkSession\n .builder()\n .appName("ScalaPB Demo")\n .master("local[2]")\n .getOrCreate()\n// spark: SparkSession = org.apache.spark.sql.SparkSession@68d6fe03\n')),Object(s.b)("p",null,Object(s.b)("em",{parentName:"p"},"IMPORTANT"),": Ensure you do not import ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._")," to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import ",Object(s.b)("inlineCode",{parentName:"p"},"StringToColumn")," to convert ",Object(s.b)("inlineCode",{parentName:"p"},'$"col name"')," into a ",Object(s.b)("inlineCode",{parentName:"p"},"Column"),". Add an import ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits")," to add ScalaPB's encoders for protocol buffers into the implicit search scope:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.sql.{Dataset, DataFrame, functions => F}\nimport spark.implicits.StringToColumn\nimport scalapb.spark.ProtoSQL\n\nimport scalapb.spark.Implicits._\n")),Object(s.b)("p",null,"The code snippets below use the ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/ScalaPB/blob/master/docs/src/main/protobuf/person.proto"},Object(s.b)("inlineCode",{parentName:"a"},"Person")," message"),"."),Object(s.b)("p",null,"We start by creating some test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import scalapb.docs.person.Person\nimport scalapb.docs.person.Person.{Address, AddressType}\n\nval testData = Seq(\n Person(name="John", age=32, addresses=Vector(\n Address(addressType=AddressType.HOME, street="Market", city="SF"))\n ),\n Person(name="Mike", age=29, addresses=Vector(\n Address(addressType=AddressType.WORK, street="Castro", city="MV"),\n Address(addressType=AddressType.HOME, street="Church", city="MV"))\n ),\n Person(name="Bart", age=27)\n)\n')),Object(s.b)("p",null,"We can create a ",Object(s.b)("inlineCode",{parentName:"p"},"DataFrame")," from the test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val df = ProtoSQL.createDataFrame(spark, testData)\n// df: DataFrame = [name: string, age: int ... 1 more field]\ndf.printSchema()\n// root\n// |-- name: string (nullable = true)\n// |-- age: integer (nullable = true)\n// |-- addresses: array (nullable = false)\n// | |-- element: struct (containsNull = false)\n// | | |-- address_type: string (nullable = true)\n// | | |-- street: string (nullable = true)\n// | | |-- city: string (nullable = true)\n// \ndf.show()\n// +----+---+--------------------+\n// |name|age| addresses|\n// +----+---+--------------------+\n// |John| 32|[{HOME, Market, SF}]|\n// |Mike| 29|[{WORK, Castro, M...|\n// |Bart| 27| []|\n// +----+---+--------------------+\n//\n")),Object(s.b)("p",null,"and then process it as any other Dataframe in Spark:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'df.select($"name", F.size($"addresses").alias("address_count")).show()\n// +----+-------------+\n// |name|address_count|\n// +----+-------------+\n// |John| 1|\n// |Mike| 2|\n// |Bart| 0|\n// +----+-------------+\n// \n\nval nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))\n// nameAndAddress: DataFrame = [name: string, firstAddress: struct]\n\nnameAndAddress.show()\n// +----+------------------+\n// |name| firstAddress|\n// +----+------------------+\n// |John|{HOME, Market, SF}|\n// |Mike|{WORK, Castro, MV}|\n// |Bart| null|\n// +----+------------------+\n//\n')),Object(s.b)("p",null,"Using the datasets API it is possible to bring the data back to ScalaPB case classes:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"nameAndAddress.as[(String, Option[Address])].collect().foreach(println)\n// (John,Some(Address(HOME,Market,SF,UnknownFieldSet(Map()))))\n// (Mike,Some(Address(WORK,Castro,MV,UnknownFieldSet(Map()))))\n// (Bart,None)\n")),Object(s.b)("p",null,"You can create a Dataset directly using Spark APIs:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"spark.createDataset(testData)\n// res5: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("h2",{id:"from-binary-to-protos-and-back"},"From Binary to protos and back"),Object(s.b)("p",null,"In some situations, you may need to deal with datasets that contain serialized protocol buffers. This can be handled by mapping the datasets through ScalaPB's ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom")," and ",Object(s.b)("inlineCode",{parentName:"p"},"toByteArray")," functions."),Object(s.b)("p",null,"Let's start by preparing a dataset with test binary data by mapping our ",Object(s.b)("inlineCode",{parentName:"p"},"testData"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val binaryDS: Dataset[Array[Byte]] = spark.createDataset(testData.map(_.toByteArray))\n// binaryDS: Dataset[Array[Byte]] = [value: binary]\n\nbinaryDS.show()\n// +--------------------+\n// | value|\n// +--------------------+\n// |[0A 04 4A 6F 68 6...|\n// |[0A 04 4D 69 6B 6...|\n// |[0A 04 42 61 72 7...|\n// +--------------------+\n//\n")),Object(s.b)("p",null,"To turn this dataset into a ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Person]"),", we map it through ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosDS: Dataset[Person] = binaryDS.map(Person.parseFrom(_))\n// protosDS: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("p",null,"to turn a dataset of protos into ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Array[Byte]]"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosBinary: Dataset[Array[Byte]] = protosDS.map(_.toByteArray)\n// protosBinary: Dataset[Array[Byte]] = [value: binary]\n")),Object(s.b)("h2",{id:"on-enums"},"On enums"),Object(s.b)("p",null,"In SparkSQL-ScalaPB, enums are represented as strings. Unrecognized enum values are represented as strings containing the numeric value."),Object(s.b)("h2",{id:"dataframes-and-datasets-from-rdds"},"Dataframes and Datasets from RDDs"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.rdd.RDD\n\nval protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)\n\nval protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)\n\nval protoDS: Dataset[Person] = spark.createDataset(protoRDD)\n")),Object(s.b)("h2",{id:"udfs"},"UDFs"),Object(s.b)("p",null,"If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ",Object(s.b)("inlineCode",{parentName:"p"},"ProtoSQL.udf")," to create UDFs. For example, if you need to parse a binary column into a proto:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'val binaryDF = protosBinary.toDF("value")\n// binaryDF: DataFrame = [value: binary]\n\nval parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }\n// parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$17600/0x0000000104b02040@7ff0e165\n\nbinaryDF.withColumn("person", parsePersons($"value"))\n// res7: DataFrame = [value: binary, person: struct]\n')),Object(s.b)("h2",{id:"primitive-wrappers"},"Primitive wrappers"),Object(s.b)("p",null,"In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs\nwitha single field named ",Object(s.b)("inlineCode",{parentName:"p"},"value"),". A better representation in Spark would be a\nnullable field of the primitive type. The better representation will be the\ndefault in 0.11.x. To enable this representation today, replace the usages of\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL")," with ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.withPrimitiveWrappers"),".\nInstead of importing ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits._"),", import\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.implicits._")),Object(s.b)("p",null,"See example in ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/sparksql-scalapb/blob/80f3162b69313d57f95d3dcbfee865809873567a/sparksql-scalapb/src/test/scala/WrappersSpec.scala#L42-L59"},"WrappersSpec"),"."),Object(s.b)("h2",{id:"datasets-and-none-is-not-a-term"},"Datasets and ",Object(s.b)("inlineCode",{parentName:"h2"}," is not a term")),Object(s.b)("p",null,"You will see this error if for some reason Spark's ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s are being picked up\ninstead of the ones provided by sparksql-scalapb. Please ensure you are not importing ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._"),". See instructions above for imports."),Object(s.b)("h2",{id:"example"},"Example"),Object(s.b)("p",null,"Check out a ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test"},"complete example")," here."))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/28fe488f.74ce6e07.js b/28fe488f.971c2fcf.js similarity index 97% rename from 28fe488f.74ce6e07.js rename to 28fe488f.971c2fcf.js index 92e2da911..e8cec49e6 100644 --- a/28fe488f.74ce6e07.js +++ b/28fe488f.971c2fcf.js @@ -1 +1 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[14],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return b})),t.d(n,"b",(function(){return u}));var a=t(0),r=t.n(a);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function s(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var c=r.a.createContext({}),p=function(e){var n=r.a.useContext(c),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},b=function(e){var n=p(e.components);return r.a.createElement(c.Provider,{value:n},e.children)},d={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},m=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),b=p(t),m=a,u=b["".concat(s,".").concat(m)]||b[m]||d[m]||o;return t?r.a.createElement(u,i(i({ref:n},c),{},{components:t})):r.a.createElement(u,i({ref:n},c))}));function u(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=m;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var c=2;c,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n\nval typeRegistry = TypeRegistry().addMessage[MyMessage]\n// typeRegistry: TypeRegistry = TypeRegistry(\n// Map(\n// "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@5e020ff8\n// ),\n// Set()\n// )\n\nval printer = new Printer().withTypeRegistry(typeRegistry)\n// printer: Printer = scalapb.json4s.Printer@1870ec39\n\nprinter.print(c)\n// res0: String = "{\\"myAny\\":{\\"@type\\":\\"type.googleapis.com/com.thesamet.docs.MyMessage\\",\\"x\\":17}}"\n')),Object(o.b)("p",null,"Conversely, you can start from a JSON and parse it back to a ",Object(o.b)("inlineCode",{parentName:"p"},"MyContainer")," that contains an ",Object(o.b)("inlineCode",{parentName:"p"},"Any")," field:"),Object(o.b)("pre",null,Object(o.b)("code",{parentName:"pre",className:"language-scala"},'val parser = new Parser().withTypeRegistry(typeRegistry)\n// parser: Parser = scalapb.json4s.Parser@39da1c92\n\nparser.fromJsonString[MyContainer]("""\n {\n "myAny": {\n "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",\n "x": 17\n }\n }""")\n// res1: MyContainer = MyContainer(\n// Some(\n// Any(\n// "type.googleapis.com/com.thesamet.docs.MyMessage",\n// ,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n')))}b.isMDXComponent=!0}}]); \ No newline at end of file +(window.webpackJsonp=window.webpackJsonp||[]).push([[14],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return b})),t.d(n,"b",(function(){return u}));var a=t(0),r=t.n(a);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function s(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var c=r.a.createContext({}),p=function(e){var n=r.a.useContext(c),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},b=function(e){var n=p(e.components);return r.a.createElement(c.Provider,{value:n},e.children)},d={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},m=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),b=p(t),m=a,u=b["".concat(s,".").concat(m)]||b[m]||d[m]||o;return t?r.a.createElement(u,i(i({ref:n},c),{},{components:t})):r.a.createElement(u,i({ref:n},c))}));function u(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=m;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var c=2;c,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n\nval typeRegistry = TypeRegistry().addMessage[MyMessage]\n// typeRegistry: TypeRegistry = TypeRegistry(\n// Map(\n// "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@4aee7e67\n// ),\n// Set()\n// )\n\nval printer = new Printer().withTypeRegistry(typeRegistry)\n// printer: Printer = scalapb.json4s.Printer@3e89ec63\n\nprinter.print(c)\n// res0: String = "{\\"myAny\\":{\\"@type\\":\\"type.googleapis.com/com.thesamet.docs.MyMessage\\",\\"x\\":17}}"\n')),Object(o.b)("p",null,"Conversely, you can start from a JSON and parse it back to a ",Object(o.b)("inlineCode",{parentName:"p"},"MyContainer")," that contains an ",Object(o.b)("inlineCode",{parentName:"p"},"Any")," field:"),Object(o.b)("pre",null,Object(o.b)("code",{parentName:"pre",className:"language-scala"},'val parser = new Parser().withTypeRegistry(typeRegistry)\n// parser: Parser = scalapb.json4s.Parser@37452476\n\nparser.fromJsonString[MyContainer]("""\n {\n "myAny": {\n "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",\n "x": 17\n }\n }""")\n// res1: MyContainer = MyContainer(\n// Some(\n// Any(\n// "type.googleapis.com/com.thesamet.docs.MyMessage",\n// ,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n')))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/404.html b/404.html index 062d03496..6b089e768 100644 --- a/404.html +++ b/404.html @@ -10,14 +10,14 @@ Page Not Found | ScalaPB - +

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- + \ No newline at end of file diff --git a/blog/2019/05/28/hola/index.html b/blog/2019/05/28/hola/index.html index 1a8a7fee7..9656d49d0 100644 --- a/blog/2019/05/28/hola/index.html +++ b/blog/2019/05/28/hola/index.html @@ -10,7 +10,7 @@ Hola | ScalaPB - + @@ -23,7 +23,7 @@

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/2019/05/29/hello-world/index.html b/blog/2019/05/29/hello-world/index.html index 5d7f7e125..52f5d73ce 100644 --- a/blog/2019/05/29/hello-world/index.html +++ b/blog/2019/05/29/hello-world/index.html @@ -10,7 +10,7 @@ Hello | ScalaPB - + @@ -23,7 +23,7 @@
- + diff --git a/blog/2019/05/30/welcome/index.html b/blog/2019/05/30/welcome/index.html index 42872c044..95dd634ae 100644 --- a/blog/2019/05/30/welcome/index.html +++ b/blog/2019/05/30/welcome/index.html @@ -10,7 +10,7 @@ Welcome | ScalaPB - + @@ -23,7 +23,7 @@

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/index.html b/blog/index.html index 72c73520e..22337bc24 100644 --- a/blog/index.html +++ b/blog/index.html @@ -10,7 +10,7 @@ Blog | ScalaPB - + @@ -26,7 +26,7 @@

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/docusaurus/index.html b/blog/tags/docusaurus/index.html index 66b67a5a7..cdcb0806b 100644 --- a/blog/tags/docusaurus/index.html +++ b/blog/tags/docusaurus/index.html @@ -10,7 +10,7 @@ Posts tagged "docusaurus" | ScalaPB - + @@ -26,7 +26,7 @@

3 posts tagged with "docusaurus"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/facebook/index.html b/blog/tags/facebook/index.html index 0a279edbd..9b0f5755a 100644 --- a/blog/tags/facebook/index.html +++ b/blog/tags/facebook/index.html @@ -10,7 +10,7 @@ Posts tagged "facebook" | ScalaPB - + @@ -24,7 +24,7 @@

1 post tagged with "facebook"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/tags/hello/index.html b/blog/tags/hello/index.html index 8761e663c..a4729f26b 100644 --- a/blog/tags/hello/index.html +++ b/blog/tags/hello/index.html @@ -10,7 +10,7 @@ Posts tagged "hello" | ScalaPB - + @@ -25,7 +25,7 @@

2 posts tagged with "hello"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/tags/hola/index.html b/blog/tags/hola/index.html index bb1b6a70d..f5e305c40 100644 --- a/blog/tags/hola/index.html +++ b/blog/tags/hola/index.html @@ -10,7 +10,7 @@ Posts tagged "hola" | ScalaPB - + @@ -24,7 +24,7 @@

1 post tagged with "hola"

View All Tags

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/index.html b/blog/tags/index.html index b1be54d3a..ef3bbe430 100644 --- a/blog/tags/index.html +++ b/blog/tags/index.html @@ -10,7 +10,7 @@ Tags | ScalaPB - + @@ -22,7 +22,7 @@ - + diff --git a/docs/common-protos/index.html b/docs/common-protos/index.html index 974f37297..5233deac1 100644 --- a/docs/common-protos/index.html +++ b/docs/common-protos/index.html @@ -10,7 +10,7 @@ Common protos | ScalaPB - + @@ -29,7 +29,7 @@ the classpath. This is accomplished by adding the library as a normal dependency.

If you don't have any proto files that import the common protos, then you can omit the "protobuf" dependency.

Adding new packages#

If you don't see your favorite third-party proto package here, and there is already a maven package for it that provides the proto files (with possibly Java generated classes), you can send a pull request to common-protos to have it added. See instruction on the ScalaPB Common Protos project page on Github.

Available packages#

proto-google-common-protos#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.11" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.11" % "2.9.6-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.10" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.10" % "2.9.6-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.9" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.9" % "2.9.6-0"
)

proto-google-cloud-pubsub-v1#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.11" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.11" % "1.102.20-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.10" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.10" % "1.102.20-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.9" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.9" % "1.102.20-0"
)

pgv-proto#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.11" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.11" % "0.6.13-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.10" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.10" % "0.6.13-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.9" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.9" % "0.6.13-0"
)

footer

- + diff --git a/docs/contact/index.html b/docs/contact/index.html index 0442c45f7..40fe2e644 100644 --- a/docs/contact/index.html +++ b/docs/contact/index.html @@ -10,7 +10,7 @@ Contacting us | ScalaPB - + @@ -46,7 +46,7 @@ would be a great way to support the time and effort put into the development of ScalaPB!

- + diff --git a/docs/customizations/index.html b/docs/customizations/index.html index 9d14516ba..66e0bfaf1 100644 --- a/docs/customizations/index.html +++ b/docs/customizations/index.html @@ -10,7 +10,7 @@ Customizations | ScalaPB - + @@ -225,7 +225,7 @@ sealed oneofs:

message Foo {
option (scalapb.message).derives = "yourpkg.Show";
...
}}}
message Expr {
option (scalapb.message).sealed_oneof_derives = "yourpkg.Show";
oneof sealed_value {
...
}
}
- + diff --git a/docs/dotty/index.html b/docs/dotty/index.html index 97d9f53f3..59b33eed1 100644 --- a/docs/dotty/index.html +++ b/docs/dotty/index.html @@ -10,7 +10,7 @@ Using with Dotty | ScalaPB - + @@ -29,7 +29,7 @@ the Scala compiler with the default compiler settings. It is known that currently the generator will provide an error if -language:strictEquality is set.

- + diff --git a/docs/faq/index.html b/docs/faq/index.html index 6882bbaf8..85bfb040f 100644 --- a/docs/faq/index.html +++ b/docs/faq/index.html @@ -10,7 +10,7 @@ Frequently Asked Questions | ScalaPB - + @@ -76,7 +76,7 @@
Use a recent version of sbt-protoc (at least 1.0.6), which defaults to a
compatible version of protoc (3.19.2).
- + diff --git a/docs/generated-code/index.html b/docs/generated-code/index.html index af77a6f05..752253f6e 100644 --- a/docs/generated-code/index.html +++ b/docs/generated-code/index.html @@ -10,7 +10,7 @@ Generated Code | ScalaPB - + @@ -119,7 +119,7 @@ toJavaProto methods.
  • The companion object for enums will have fromJavaValue and toJavaValue methods.
  • - + diff --git a/docs/generic/index.html b/docs/generic/index.html index f7d126054..8dc92419e 100644 --- a/docs/generic/index.html +++ b/docs/generic/index.html @@ -10,7 +10,7 @@ Writing generic code | ScalaPB - + @@ -46,7 +46,7 @@ to return, and the filename. The Scala compiler will automatically find the appropriate message companion to pass as cmp via implicit search:

    readFromFile[Person]("/tmp/person.pb")
    - + diff --git a/docs/getting-started/index.html b/docs/getting-started/index.html index 31a61b4b3..97c26a2c0 100644 --- a/docs/getting-started/index.html +++ b/docs/getting-started/index.html @@ -10,7 +10,7 @@ Protocol Buffer Tutorial: Scala | ScalaPB - + @@ -46,7 +46,7 @@
    Person(
    id = id,
    name = name,
    email = if (email.nonEmpty) Some(email) else None,
    phones = phones
    )
    }
    def addPerson(): Unit = {
    val newPerson = personFromStdin()
    val addressBook = readFromFile()
    // Append the new person to the people list field
    val updated = addressBook.update(
    _.people :+= newPerson
    )
    Using(new FileOutputStream("addressbook.pb")) { output =>
    updated.writeTo(output)
    }
    }

    Running the example#

    In sbt, type run

    This document, "Protocol Buffer Tutorial: Scala" is a modification of "Protocol Buffer Basics: Java", which is a work created and shared by Google and used according to terms described in the Creative Commons 4.0 Attribution License.

    - + diff --git a/docs/grpc/index.html b/docs/grpc/index.html index 421ae402e..6b9c07f48 100644 --- a/docs/grpc/index.html +++ b/docs/grpc/index.html @@ -10,7 +10,7 @@ gRPC | ScalaPB - + @@ -44,7 +44,7 @@ closely the offical grpc-java API. Example project coming soon.

    grpc-netty issues#

    In certain situations (for example when you have a fat jar), you may see the following exception:

    Exception in thread "main" io.grpc.ManagedChannelProvider$ProviderNotFoundException: No functional server found. Try adding a dependency on the grpc-netty artifact

    To work around this issue, try the following solutions:

    1. Create a NettyServer explicitly using io.grpc.netty.NettyServerBuilder.

    Example:

    NettyServerBuilder
    .forPort(9000)
    .keepAliveTime(500, TimeUnit.SECONDS)
    1. If using SBT, try the following merge conflict strategy:
    assemblyMergeStrategy in assembly := {
    case x if x.contains("io.netty.versions.properties") => MergeStrategy.discard
    case x =>
    val oldStrategy = (assemblyMergeStrategy in assembly).value
    oldStrategy(x)
    }
    - + diff --git a/docs/index.html b/docs/index.html index de19a9e8c..a2526e886 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ ScalaPB: Scala Protocol Buffer Compiler | ScalaPB - + @@ -33,7 +33,7 @@ Oneof's that were introduced in Protocol Buffers 2.6.0.

  • Newer: Supports Scala.js (in 0.5.x).

  • Newer: Supports gRPC (in 0.5.x).

  • Newest: Supports SparkSQL (in 0.5.23).

  • Newest: Supports converting to and from JSON (in 0.5.x).

  • Newest: Supports User-defined options (in 0.5.29).

  • - + diff --git a/docs/installation/index.html b/docs/installation/index.html index 3ae64c46d..d7e59dff3 100644 --- a/docs/installation/index.html +++ b/docs/installation/index.html @@ -10,7 +10,7 @@ Installing ScalaPB | ScalaPB - + @@ -31,7 +31,7 @@ use scalapbc (ScalaPB compiler).

    See ScalaPBC.

    Running from Maven#

    Using ScalaPBC, you can get maven to generate the code for you. Check out the ScalaPB Maven example.

    Next:#

    Read about the Generated Code.

    - + diff --git a/docs/json/index.html b/docs/json/index.html index f9c954902..e9568a8ec 100644 --- a/docs/json/index.html +++ b/docs/json/index.html @@ -10,7 +10,7 @@ ScalaPB and JSON | ScalaPB - + @@ -18,7 +18,7 @@ - +
    @@ -45,13 +45,13 @@ strings. To use the numeric representation, set this option to true. Note that due to the way Javascript represents numbers, there is a possibility to lose precision (more details here).

    The parser can be instantiated with new scalapb.json4s.Parser(), and various methods can return instances of the parser with customized configuration:

    • ignoringUnkownFields: by default the parser will throw a JsonFormatException when encountering unknown fields. By enabling this option, unknown options will be silently ignored.
    • ignoringOverlappingOneofFields: by default the parser will throw a JsonFormatException if values are provided for more than one field within the same oneof. By enabling this option, when more than one field is present for a oneof, one of the values of this field will be picked for the oneof.
    • mapEntriesAsKeyValuePairs: by default, protobuf maps are modeled as json objects. When this setting is enabled, protobuf maps are expected to be read as arrays of objects with key and value keys.

    See the list of constructor paramerters here

    Printing and parsing Anys#

    In Protocol Buffers, google.protobuf.Any is a type that embeds an arbitrary protobuf message. An Any is represented as a message that contains a typeUrl field that identifies the type, and a bytes field value which contains the serialized contents of a message. In JSON, the message embedded in the Any is serialized as usual, and there is a @type key added to it to identify which message it is. The parser expects this @type key to know which message it is. To accomplish this, all the expected embedded types need to be registered with a TypeRegistry so the printer and parser know how to process the embedded message.

    The following example is based on this proto.

    import com.thesamet.docs.json._
    import scalapb.json4s.{Printer, Parser, TypeRegistry}
    -
    val c = MyContainer(
    myAny=Some(
    com.google.protobuf.any.Any.pack(
    MyMessage(x=17)
    )
    )
    )
    // c: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@31a3727a size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    -
    val typeRegistry = TypeRegistry().addMessage[MyMessage]
    // typeRegistry: TypeRegistry = TypeRegistry(
    // Map(
    // "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@5e020ff8
    // ),
    // Set()
    // )
    -
    val printer = new Printer().withTypeRegistry(typeRegistry)
    // printer: Printer = scalapb.json4s.Printer@1870ec39
    -
    printer.print(c)
    // res0: String = "{\"myAny\":{\"@type\":\"type.googleapis.com/com.thesamet.docs.MyMessage\",\"x\":17}}"

    Conversely, you can start from a JSON and parse it back to a MyContainer that contains an Any field:

    val parser = new Parser().withTypeRegistry(typeRegistry)
    // parser: Parser = scalapb.json4s.Parser@39da1c92
    -
    parser.fromJsonString[MyContainer]("""
    {
    "myAny": {
    "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",
    "x": 17
    }
    }""")
    // res1: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@1bb57311 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    +
    val c = MyContainer(
    myAny=Some(
    com.google.protobuf.any.Any.pack(
    MyMessage(x=17)
    )
    )
    )
    // c: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@20c6a466 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    +
    val typeRegistry = TypeRegistry().addMessage[MyMessage]
    // typeRegistry: TypeRegistry = TypeRegistry(
    // Map(
    // "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@4aee7e67
    // ),
    // Set()
    // )
    +
    val printer = new Printer().withTypeRegistry(typeRegistry)
    // printer: Printer = scalapb.json4s.Printer@3e89ec63
    +
    printer.print(c)
    // res0: String = "{\"myAny\":{\"@type\":\"type.googleapis.com/com.thesamet.docs.MyMessage\",\"x\":17}}"

    Conversely, you can start from a JSON and parse it back to a MyContainer that contains an Any field:

    val parser = new Parser().withTypeRegistry(typeRegistry)
    // parser: Parser = scalapb.json4s.Parser@37452476
    +
    parser.fromJsonString[MyContainer]("""
    {
    "myAny": {
    "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",
    "x": 17
    }
    }""")
    // res1: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@77d7a523 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    - + @@ -59,6 +59,6 @@ - + \ No newline at end of file diff --git a/docs/sbt-settings/index.html b/docs/sbt-settings/index.html index 6b7b11f4a..0665537a6 100644 --- a/docs/sbt-settings/index.html +++ b/docs/sbt-settings/index.html @@ -10,7 +10,7 @@ SBT Settings | ScalaPB - + @@ -30,7 +30,7 @@ however this is configurable using the Compile / PB.protoSources setting.

    By default, sbt-protoc invokes protoc 3.x that is shipped with protoc-jar. If you would like to run a different version of protoc:

    PB.protocVersion := "-v3.11.4"

    See all available options in sbt-protoc documentation

    Java Conversions#

    To enable Java conversions add the following to your build.sbt:

    Compile / PB.targets := Seq(
    PB.gens.java -> (Compile / sourceManaged).value,
    scalapb.gen(javaConversions=true) -> (Compile / sourceManaged).value
    )

    gRPC#

    Generating gRPC stubs for services is enabled by default. To disable:

    Compile / PB.targets := Seq(
    scalapb.gen(grpc=false) -> (Compile / sourceManaged).value
    )

    Additional options to the generator#

    scalapb.gen(
    flatPackage: Boolean = false,
    javaConversions: Boolean = false,
    grpc: Boolean = true,
    singleLineToProtoString: Boolean = false,
    asciiFormatToString: Boolean = false,
    lenses: Boolean = true,
    retainSourceCodeInfo: Boolean = false
    )
    OptionscalapbcDescription
    flatPackageflat_packageWhen set, ScalaPB will not append the protofile base name to the package name.
    javaConversionsjava_conversionsGenerates in the companion object two functions, toJavaProto and fromJavaProto that convert between the Scala case class and the Java protobufs. For the generated code to compile, the Java protobuf code need to be also generated or available as a library dependency.
    grpcgrpcGenerates gRPC code for services. Default is true in scalapb.gen, and need to be explicitly specified in scalapbc.
    singleLineToProtoStringsingle_line_to_proto_stringBy default, ScalaPB generates a toProtoString() method that renders the message as a multi-line format (using TextFormat.printToUnicodeString). If set, ScalaPB generates toString() methods that use the single line format.
    asciiFormatToStringascii_format_to_stringSetting this to true, overrides toString to return a standard ASCII representation of the message by calling toProtoString.
    lensesno_lensesBy default, ScalaPB generates lenses for each message for easy updating. If you are not using this feature and would like to reduce code size or compilation time, you can set this to false and lenses will not be generated.
    retainSourceCodeInforetain_source_code_infoRetain source code information (locations, comments) provided by protoc in the descriptors. Use the location accessor to get that information from a descriptor.
    scala3Sourcesscala3_sourcesIf set, generates sources that are error-free under -source future with Scala 3, or Xsource:3 with Scala 2.13.
    - + diff --git a/docs/scala.js/index.html b/docs/scala.js/index.html index 8b3afba8a..711e25dde 100644 --- a/docs/scala.js/index.html +++ b/docs/scala.js/index.html @@ -10,7 +10,7 @@ Using ScalaPB with Scala.js | ScalaPB - + @@ -29,7 +29,7 @@ 0.6.0 would cover most use cases)

    Getting Started#

    Add to your library dependencies:

    libraryDependencies ++= Seq(
    "com.thesamet.scalapb" %%% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion,
    // The following needed only if you include scalapb/scalapb.proto:
    "com.thesamet.scalapb" %%% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
    )

    Demo#

    Example project: https://github.com/thesamet/scalapbjs-test

    Example with multi-project build: https://github.com/thesamet/sbt-protoc/tree/master/examples/scalajs-multiproject

    Live demo: http://thesamet.github.io/scalapbjs-test/

    - + diff --git a/docs/scalapbc/index.html b/docs/scalapbc/index.html index 60570f82b..7e565b252 100644 --- a/docs/scalapbc/index.html +++ b/docs/scalapbc/index.html @@ -10,7 +10,7 @@ ScalaPBC: ScalaPB's standalone compiler | ScalaPB - + @@ -37,7 +37,7 @@ GRPC descriptors that the generated ZIO code depends on.

    bin/scalapbc --plugin-artifact=com.thesamet.scalapb.zio-grpc:protoc-gen-zio:0.1.0:default,classifier=unix,ext=sh,type=jar -- e2e/src/main/protobuf/service.proto --zio_out=/tmp/out --scala_out=grpc:/tmp/out -Ie2e/src/main/protobuf -Ithird_party -Iprotobuf

    bin/scalapbc --plugin-artifact=io.grpc:grpc-java:

    Using ScalaPB as a proper protoc plugin#

    You may want to use ScalaPB code generator as a standard protoc plugin (rather than using scalapbc as a wrapper or through SBT).

    For Linux and Mac OS X, you can download a native executable version of the plugin for Scala from our release page:

    Those zip files contain native executables of the plugin for the respective operating system built using GraalVM. If you are using another operating system, or prefer to use a JVM based plugin implementation, you will find executable scripts for Windows and Unix-like operating systems in maven. These scripts require a JVM to run. The JVM needs to be available on the path, or through the JAVA_HOME environment variable.

    To generate code:

    protoc my.protos --plugin=/path/to/bin/protoc-gen-scala-0.11.11-unix.sh --scala_out=scala

    On Windows:

    protoc my.protos --plugin=protoc-gen-scala=/path/to/bin/protoc-gen-scala.bat --scala_out=scala

    For passing parameters to the plugin, see the section above.

    Note that the standalone plugin provided in scalapbc needs to be able to find a JVM in the path or through JAVA_HOME environment variable. If you encounter unexpected errors, try to execute the plugin directly from the command line, and the output printed may be useful for further debugging.

    The generated code depends on scalapb-runtime to compile. To get the code to work, add a dependency on scalapb-runtime to your project. The version of scalapb-runtime needs to match or be newer than the version of the plugin.

    - + diff --git a/docs/sealed-oneofs/index.html b/docs/sealed-oneofs/index.html index 41dd8c939..64ffbd658 100644 --- a/docs/sealed-oneofs/index.html +++ b/docs/sealed-oneofs/index.html @@ -10,7 +10,7 @@ Sealed oneofs | ScalaPB - + @@ -41,7 +41,7 @@
    case class Mul(left: Option[Expr], right: Option[Expr]) extends Expr with GeneratedMessage
    case class Programs(exprs: Seq[Option[Expr]]) extends GeneratedMessage
    - + diff --git a/docs/sparksql/index.html b/docs/sparksql/index.html index 7b4be73cb..9722f7aa4 100644 --- a/docs/sparksql/index.html +++ b/docs/sparksql/index.html @@ -10,7 +10,7 @@ Using ScalaPB with Spark | ScalaPB - + @@ -18,7 +18,7 @@ - +
    @@ -27,20 +27,22 @@ Spark does not understand such as enums, ByteStrings and oneofs. To get around this, sparksql-scalapb provides its own Encoders for protocol buffers.

    However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an Encoder for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses frameless which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types.

    Setting up your project#

    We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and your compiled protos. Make sure in project/plugins.sbt you have a line that adds sbt-assembly:

    addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")

    To add sparksql-scalapb to your project, add one of the following lines that -matches both the version of ScalaPB and Spark you use:

    // Spark 3.3 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.2"
    -
    // Spark 3.2 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.2"
    -
    // Spark 3.1 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.2"
    +matches both the version of ScalaPB and Spark you use:

    // Spark 3.5 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql35-scalapb0_11" % "1.0.4"
    +
    // Spark 3.4 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql34-scalapb0_11" % "1.0.4"
    +
    // Spark 3.3 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.4"
    +
    // Spark 3.2 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.4"
    +
    // Spark 3.1 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.4"
    // Spark 3.0 and ScalaPB 0.11
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"
    -
    // Spark 3.3 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.2"
    -
    // Spark 3.2 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.2"
    -
    // Spark 3.1 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.2"
    +
    // Spark 3.3 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.4"
    +
    // Spark 3.2 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.4"
    +
    // Spark 3.1 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.4"
    // Spark 3.0 and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"
    // Spark 2.x and ScalaPB 0.10
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"
    // Spark 2.x and ScalaPB 0.9
    libraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"

    Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal APIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1.

    Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with the current version. In addition, it comes with incompatible versions of scala-collection-compat and shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:

    assemblyShadeRules in assembly := Seq(
    ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,
    ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,
    ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll
    )

    See complete example of build.sbt.

    Using sparksql-scalapb#

    We assume you have a SparkSession assigned to the variable spark. In a standalone Scala program, this can be created with:

    import org.apache.spark.sql.SparkSession
    -
    val spark: SparkSession = SparkSession
    .builder()
    .appName("ScalaPB Demo")
    .master("local[2]")
    .getOrCreate()
    // spark: SparkSession = org.apache.spark.sql.SparkSession@60e9e7ab

    IMPORTANT: Ensure you do not import spark.implicits._ to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import StringToColumn to convert $"col name" into a Column. Add an import scalapb.spark.Implicits to add ScalaPB's encoders for protocol buffers into the implicit search scope:

    import org.apache.spark.sql.{Dataset, DataFrame, functions => F}
    import spark.implicits.StringToColumn
    import scalapb.spark.ProtoSQL
    +
    val spark: SparkSession = SparkSession
    .builder()
    .appName("ScalaPB Demo")
    .master("local[2]")
    .getOrCreate()
    // spark: SparkSession = org.apache.spark.sql.SparkSession@68d6fe03

    IMPORTANT: Ensure you do not import spark.implicits._ to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import StringToColumn to convert $"col name" into a Column. Add an import scalapb.spark.Implicits to add ScalaPB's encoders for protocol buffers into the implicit search scope:

    import org.apache.spark.sql.{Dataset, DataFrame, functions => F}
    import spark.implicits.StringToColumn
    import scalapb.spark.ProtoSQL
    import scalapb.spark.Implicits._

    The code snippets below use the Person message.

    We start by creating some test data:

    import scalapb.docs.person.Person
    import scalapb.docs.person.Person.{Address, AddressType}
    val testData = Seq(
    Person(name="John", age=32, addresses=Vector(
    Address(addressType=AddressType.HOME, street="Market", city="SF"))
    ),
    Person(name="Mike", age=29, addresses=Vector(
    Address(addressType=AddressType.WORK, street="Castro", city="MV"),
    Address(addressType=AddressType.HOME, street="Church", city="MV"))
    ),
    Person(name="Bart", age=27)
    )

    We can create a DataFrame from the test data:

    val df = ProtoSQL.createDataFrame(spark, testData)
    // df: DataFrame = [name: string, age: int ... 1 more field]
    df.printSchema()
    // root
    // |-- name: string (nullable = true)
    // |-- age: integer (nullable = true)
    // |-- addresses: array (nullable = false)
    // | |-- element: struct (containsNull = false)
    // | | |-- address_type: string (nullable = true)
    // | | |-- street: string (nullable = true)
    // | | |-- city: string (nullable = true)
    //
    df.show()
    // +----+---+--------------------+
    // |name|age| addresses|
    // +----+---+--------------------+
    // |John| 32|[{HOME, Market, SF}]|
    // |Mike| 29|[{WORK, Castro, M...|
    // |Bart| 27| []|
    // +----+---+--------------------+
    //

    and then process it as any other Dataframe in Spark:

    df.select($"name", F.size($"addresses").alias("address_count")).show()
    // +----+-------------+
    // |name|address_count|
    // +----+-------------+
    // |John| 1|
    // |Mike| 2|
    // |Bart| 0|
    // +----+-------------+
    //
    val nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))
    // nameAndAddress: DataFrame = [name: string, firstAddress: struct<address_type: string, street: string ... 1 more field>]
    @@ -49,7 +51,7 @@
    val protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)
    val protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)
    val protoDS: Dataset[Person] = spark.createDataset(protoRDD)

    UDFs#

    If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ProtoSQL.udf to create UDFs. For example, if you need to parse a binary column into a proto:

    val binaryDF = protosBinary.toDF("value")
    // binaryDF: DataFrame = [value: binary]
    -
    val parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }
    // parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$16311/0x00000001047c5840@3fd04c7e
    +
    val parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }
    // parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$17600/0x0000000104b02040@7ff0e165
    binaryDF.withColumn("person", parsePersons($"value"))
    // res7: DataFrame = [value: binary, person: struct<name: string, age: int ... 1 more field>]

    Primitive wrappers#

    In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs witha single field named value. A better representation in Spark would be a nullable field of the primitive type. The better representation will be the @@ -59,7 +61,7 @@ scalapb.spark.ProtoSQL.implicits._

    See example in WrappersSpec.

    Datasets and <none> is not a term#

    You will see this error if for some reason Spark's Encoders are being picked up instead of the ones provided by sparksql-scalapb. Please ensure you are not importing spark.implicits._. See instructions above for imports.

    Example#

    Check out a complete example here.

    - + @@ -67,6 +69,6 @@ - + \ No newline at end of file diff --git a/docs/third-party-protos/index.html b/docs/third-party-protos/index.html index 2e14bc879..25b5df38b 100644 --- a/docs/third-party-protos/index.html +++ b/docs/third-party-protos/index.html @@ -10,7 +10,7 @@ Using third-party protos | ScalaPB - + @@ -26,7 +26,7 @@
    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value
    )
    )
    // myProject contains its own protos which rely on protos from externalProtos
    lazy val myProject = (project in file("my-project"))
    .dependsOn(externalProtos)
    .settings(
    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value
    )
    )

    See full example here.

    - + diff --git a/docs/transformations/index.html b/docs/transformations/index.html index ffd2828bb..e27fa3c33 100644 --- a/docs/transformations/index.html +++ b/docs/transformations/index.html @@ -10,7 +10,7 @@ Transformations | ScalaPB - + @@ -44,7 +44,7 @@
    import "scalapb/scalapb.proto";
    option (scalapb.options) = {
    scope: PACKAGE
    field_transformations : [
    {
    when : {
    type: TYPE_MESSAGE
    type_name: ".google.protobuf.Timestamp"
    }
    set : {[scalapb.field] {type : 'com.myexample.MyType' }}
    }
    ]
    };
    note

    Note the . (dot) prefix in the type_name field above. It is needed as explained here. In this example we assume the user's package is not named google or google.protobuf since then type_name could be relative and would not match.

    Now, we need to make sure there is an implicit typemapper converting between google.protobuf.timestamp.Timestamp and com.myexample.MyType. The typemapper can be defined in the companion object of MyType as exampled in custom types.

    - + diff --git a/docs/upgrading/index.html b/docs/upgrading/index.html index adfce64fc..df8f8ca7d 100644 --- a/docs/upgrading/index.html +++ b/docs/upgrading/index.html @@ -10,7 +10,7 @@ Upgrade guide | ScalaPB - + @@ -34,7 +34,7 @@ ScalaPB SBT Settings.

    If you are using files like scalapb.proto and Google's well-known proto change the library dependency from:

    "com.trueaccord.scalapb" %% "scalapb-runtime" % "0.11.11" % PB.protobufConfig

    to:

    "com.thesamet.scalapb" %% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
    - + diff --git a/docs/user_defined_options/index.html b/docs/user_defined_options/index.html index 4fa522f6b..d56eb3098 100644 --- a/docs/user_defined_options/index.html +++ b/docs/user_defined_options/index.html @@ -10,7 +10,7 @@ Defining custom options | ScalaPB - + @@ -45,7 +45,7 @@
    assert(use_opts.OneMessage.scalaDescriptor.getOptions.extension(
    my_opts.CustomOptionsMyOptsProto.myMessageOption).get ==
    my_opts.MyMessageOption().update(_.priority := 17))
    assert(numberField.getOptions.extension(
    my_opts.Wrapper.tags) == Seq(
    my_opts.Tag(name = Some("tag1")),
    my_opts.Tag(name = Some("tag2"))))

    Example code#

    The full source code of this example is available below:

    - + diff --git a/docs/validation/index.html b/docs/validation/index.html index 076dfcce7..b1e39a7fa 100644 --- a/docs/validation/index.html +++ b/docs/validation/index.html @@ -10,7 +10,7 @@ Validating Protobufs | ScalaPB - + @@ -67,7 +67,7 @@
    field_transformations : [ {
    when : {options: {[validate.rules] {int32 : {gt : 1}}}} // <-- 1 can be replaced with any number
    set : {type : "Int Refined Greater[$(options.[validate.rules].int32.gt)]"}
    match_type : PRESENCE
    } ]
    };
    message Test {
    int32 gt_test = 1 [ (validate.rules).int32 = {gt : 5} ]; // transformed to: Int Refined Greater[5]
    }

    For this to work, a typemapper for refined types need to be either put in a package object in the same package where the code is generated, or be manually imported through import options.

    The typemapper used in scalapb-validate tests is here.

    Additional resources:

    - + diff --git a/docs/writing-plugins/index.html b/docs/writing-plugins/index.html index 27b46463f..1fc8e7f95 100644 --- a/docs/writing-plugins/index.html +++ b/docs/writing-plugins/index.html @@ -10,7 +10,7 @@ Writing protoc plugins in Scala | ScalaPB - + @@ -53,7 +53,7 @@ library with a % "protobuf" scope. To use:

    import "myplugin.proto";
    message MyMessage {
    option (myplugin.myopts).my_option = false;
    }

    Publishing the plugin#

    The project can be published to Maven using the “publish” command. We recommend to use the excellent sbt-ci-release plugin to automatically build a snapshot on each commit, and a full release when pushing a git tag.

    SBT users of your code generators will add your plugin to the build by adding it to their project/plugins.sbt like this:

    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value / "scalapb",
    com.myplugin.gen() -> (Compile / sourceManaged).value / "scalapb"
    )

    The template also publishes artifacts with names ending with unix.sh and windows.bat. These are executable jars for Unix and Windows systems that contain all the classes needed to run your code generator (except of a JVM which is expected to be in JAVA_HOME or in the PATH). This is useful if your users need to use your plugin directly with protoc, or with a build tool such as maven.

    Secondary outputs#

    note

    Secondary outputs were introduced in protoc-bridge 0.9.0 and are supported by sbt-protoc 1.0.0 and onwards.

    Secondary outputs provide a simple way for protoc plugins to pass information for other protoc plugins running after them in the same protoc invocation. The information is passed through files that are created in a temporary directory. The absolute path of that temporary directory is provided to all protoc plugins. Plugins may create new files in that directory for subsequent plugins to consume.

    Conventions:

    • Names of secondary output files should be in kebab-case, and should clearly identify the plugin producing them. For example scalapb-validate-preprocessor.
    • The content of the file should be a serialized google.protobuf.Any message that packs the arbitrary payload the plugin wants to publish.

    Determining the secondary output directory location#

    JVM-based plugins that are executed in the same JVM that spawns protoc (like the ones described on this page), receive the location of the secondary output directory via the CodeGeneratorRequest. protoc-bridge appends to the request an unknown field carrying a message called ExtraEnv which contains the path to the secondary output directory.

    Other plugins that are invoked directly by protoc can find the secondary output directory by inspecting the SCALAPB_SECONDARY_OUTPUT_DIR environment variable.

    protoc-bridge takes care of creating the temporary directory and setting up the environment variable before invoking protoc. If protoc is ran manually (for example, through the CLI), it is the user's responsibility to create a directory for secondary outputs and pass it as an environment variable to protoc. It's worth noting that ScalaPB only looks for secondary output directory if a preprocessor is requested, and therefore for the most part users do not need to worry about secondary output directories.

    In ScalaPB's code base, SecondaryOutputProvider provides a method to find the secondary output directory as described above.

    Preprocessors#

    Preprocessors are protoc plugins that provide secondary outputs that are consumed by ScalaPB. ScalaPB expects the secondary output to be a google.protobuf.Any that encodes a PreprocessorOutput. The message contains a map between proto file names (as given by FileDescriptor#getFullName()) to additional ScalaPbOptions that are merged with the files options. By appending to aux_field_options, a preprocessor can, for example, impact the generated types of ScalaPB fields.

    • ScalaPB applies the provided options to a proto file only if the original file lists the preprocessor secondary output filename in a preprocessors file-level option. That option can be inherited from a package-scoped option.
    • To exclude a specific file from being preprocessed (if it would be otherwise impacted by a package-scoped option), add a -NAME entry to the list of preprocessors where NAME is the name of the preprocessor's secondary output.
    • In case of multiple preprocessors, options of later preprocessors overrides the one of earlier processors. Options in the file are merged over the preprocessor's options. When merging, repeated fields get concatenated.
    • Preprocessor plugins need to be invoked (in PB.targets or protoc's command line) before ScalaPB, so when ScalaPB runs their output is available.
    • Plugins that depend on ScalaPB (such as scalapb-validate) rely on DescriptorImplicits which consume the preprocessor output and therefore also see the updated options.

    Summary#

    If you followed this guide all the way to here, then congratulations for creating your first protoc plugin in Scala!

    If you have any questions, feel free to reach out to us on Gitter or Github.

    Did you write an interesting protoc plugin? Let us know on our gitter channel or our Google group and we'd love to mention it here!

    - + diff --git a/index.html b/index.html index f7fffbf1e..458c9feac 100644 --- a/index.html +++ b/index.html @@ -10,7 +10,7 @@ ScalaPB: Protocol Buffer Compiler for Scala | ScalaPB - + @@ -20,7 +20,7 @@

    Your Docusaurus site did not load properly.

    A very common reason is a wrong site baseUrl configuration.

    Current configured baseUrl = / (default value)

    We suggest trying baseUrl =

    Protocol Buffer Compiler for Scala

    Easy to Use

    ScalaPB translates Protocol Buffers to Scala case classes. The generated API is easy to use!

    Supports proto2 and proto3

    ScalaPB is built as a protoc plugin and has perfect compatibility with the protobuf language specification.

    Nested updates

    Updating immutable nested structure is made easy by an optional lenses support. Learn more.

    Interoperate with Java

    Scala Protocol Buffers can be converted to Java and vice versa. Scala and Java protobufs can co-exist in the same project to make it easier to gradually migrate, or interact with legacy Java APIs.

    Scala.js support

    ScalaPB fully supports Scala.js so you can write Scala programs that use your domain-specific Protocol Buffers in the browser! Learn more.

    gRPC

    Build gRPC servers and clients with ScalaPB. ScalaPB ships with its own wrapper around the official gRPC Java implementation. There are gRPC libraries for ZIO, Cats Effect and Akka.

    - + diff --git a/runtime~main.f209c451.js b/runtime~main.f8b9379c.js similarity index 65% rename from runtime~main.f209c451.js rename to runtime~main.f8b9379c.js index 8451c7e6b..cfbe6cf17 100644 --- a/runtime~main.f209c451.js +++ b/runtime~main.f8b9379c.js @@ -1 +1 @@ -!function(e){function a(a){for(var r,n,d=a[0],o=a[1],b=a[2],u=0,l=[];u