From c7aebf51eac6694bba820be49ebec3f5c262d441 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Wed, 24 Feb 2021 22:28:06 +0000
Subject: [PATCH 01/47] chore(master): release 0.1.1-SNAPSHOT (#91)

:robot: I have created a release \*beep\* \*boop\*
---
### Updating meta-information for bleeding-edge SNAPSHOT release.
---


This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please).
---
 pom.xml      | 2 +-
 versions.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/pom.xml b/pom.xml
index af8ff185..e399314a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,7 +8,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.google.cloud</groupId>
   <artifactId>pubsublite-spark-sql-streaming</artifactId>
-  <version>0.1.0</version><!-- {x-version-update:pubsublite-spark-sql-streaming:current} -->
+  <version>0.1.1-SNAPSHOT</version><!-- {x-version-update:pubsublite-spark-sql-streaming:current} -->
   <packaging>jar</packaging>
   <name>Pub/Sub Lite Spark SQL Streaming</name>
   <url>https://github.com/googleapis/java-pubsublite-spark</url>
diff --git a/versions.txt b/versions.txt
index 86dfe658..b62cf13b 100644
--- a/versions.txt
+++ b/versions.txt
@@ -1,5 +1,5 @@
 # Format:
 # module:released-version:current-version
 
-pubsublite-spark-sql-streaming:0.1.0:0.1.0
+pubsublite-spark-sql-streaming:0.1.0:0.1.1-SNAPSHOT
 com.google.cloud.samples.shared-configuration:1.0.21:1.0.21

From c44c05e21cbbcaf898ac5976de9f171b9faf900e Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Thu, 25 Feb 2021 01:47:30 +0100
Subject: [PATCH 02/47] chore(deps): update dependency
 com.google.cloud:libraries-bom to v18 (#92)

---
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index de5d8a34..89d04ba3 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>17.0.0</version>
+        <version>18.0.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index eb5ab8b6..8f3e748f 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>17.0.0</version>
+        <version>18.0.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>

From 8b8886add0e086c3c31390b7c2e5bc478f065390 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Wed, 24 Feb 2021 16:56:05 -0800
Subject: [PATCH 03/47] chore: regenerate README (#93)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-02-25 00:50:33,400 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-02-25 00:50:34,949 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/cb1a92e3-4929-40fe-b7d2-5404d7321c41/targets

- [ ] To automatically regenerate this PR, check this box.
---
 .github/readme/synth.metadata/synth.metadata | 4 ++--
 README.md                                    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index 1014012f..58cce6f4 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "5726724b43c44c269478c37a389ef825da8083ac"
+        "sha": "c44c05e21cbbcaf898ac5976de9f171b9faf900e"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "79ab0b44a2cc7d803d07c107f9faf07729fc4012"
+        "sha": "0199c79b8324fba66476300824aa931788c47e2d"
       }
     }
   ]
diff --git a/README.md b/README.md
index aebb07ec..e1fdabb5 100644
--- a/README.md
+++ b/README.md
@@ -20,18 +20,18 @@ If you are using Maven, add this to your pom.xml file:
 <dependency>
   <groupId>com.google.cloud</groupId>
   <artifactId>pubsublite-spark-sql-streaming</artifactId>
-  <version>0.0.0</version>
+  <version>0.1.0</version>
 </dependency>
 ```
 
 If you are using Gradle without BOM, add this to your dependencies
 ```Groovy
-compile 'com.google.cloud:pubsublite-spark-sql-streaming:0.0.0'
+compile 'com.google.cloud:pubsublite-spark-sql-streaming:0.1.0'
 ```
 
 If you are using SBT, add this to your dependencies
 ```Scala
-libraryDependencies += "com.google.cloud" % "pubsublite-spark-sql-streaming" % "0.0.0"
+libraryDependencies += "com.google.cloud" % "pubsublite-spark-sql-streaming" % "0.1.0"
 ```
 
 ## Authentication

From 508b90c3fd0a6045d548c1c6336fe4b542a27d07 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Thu, 25 Feb 2021 22:41:48 +0100
Subject: [PATCH 04/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite to v0.11.0 (#95)

---
 pom.xml                  | 2 +-
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index e399314a..ffcaa4c6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -43,7 +43,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.10.0</version>
+      <version>0.11.0</version>
     </dependency>
     <dependency>
       <groupId>com.google.api.grpc</groupId>
diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index 89d04ba3..3172c288 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.10.0</version>
+      <version>0.11.0</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 8f3e748f..97d5b00b 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.10.0</version>
+      <version>0.11.0</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>

From d9b9289160d50ca4b44447287b887249190db9fd Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Thu, 25 Feb 2021 23:36:19 +0100
Subject: [PATCH 05/47] deps: update dependency
 com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.11.0 (#94)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index ffcaa4c6..dabf27ee 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-cloud-pubsublite-v1</artifactId>
-      <version>0.10.0</version>
+      <version>0.11.0</version>
     </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>

From 435624723d5ffdbfc803ac19e034d65cea33986e Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Thu, 25 Feb 2021 23:50:52 +0100
Subject: [PATCH 06/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite-parent to v0.11.0 (#96)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index dabf27ee..4200f709 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
   <parent>
     <groupId>com.google.cloud</groupId>
     <artifactId>google-cloud-pubsublite-parent</artifactId>
-    <version>0.10.0</version>
+    <version>0.11.0</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.google.cloud</groupId>

From 9187ff382714b810e94758f3ba1e89a75ae99caf Mon Sep 17 00:00:00 2001
From: Tianzi Cai <tianzi@google.com>
Date: Fri, 26 Feb 2021 10:37:50 -0800
Subject: [PATCH 07/47] docs: update client lib documentation link (#98)

---
 .repo-metadata.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.repo-metadata.json b/.repo-metadata.json
index 93ec87a2..4cdef959 100644
--- a/.repo-metadata.json
+++ b/.repo-metadata.json
@@ -3,7 +3,7 @@
   "name_pretty": "Pub/Sub Lite Spark Connector",
   "product_documentation": "https://cloud.google.com/pubsub/lite/docs",
   "api_description": "Pub/Sub Lite is a zonal, real-time messaging service that lets you send and receive messages between independent applications. You can manually configure the throughput and storage capacity for Pub/Sub Lite systems.",
-  "client_documentation": "https://googleapis.dev/java/google-cloud-pubsublite/latest/index.html",
+  "client_documentation": "https://googleapis.dev/java/pubsublite-spark-sql-streaming/latest/index.html",
   "release_level": "alpha",
   "transport": "grpc",
   "requires_billing": true,
@@ -14,4 +14,4 @@
   "distribution_name": "com.google.cloud:pubsublite-spark-sql-streaming",
   "codeowner_team": "@googleapis/api-pubsub",
   "api_id": "pubsublite.googleapis.com"
-}
\ No newline at end of file
+}

From 91bdd037a9bc20cfff094fec0f31a1461b6f4978 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Fri, 26 Feb 2021 10:46:04 -0800
Subject: [PATCH 08/47] chore: regenerate README (#99)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-02-26 18:40:00,410 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-02-26 18:40:02,026 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/40128cfb-4872-47ca-8564-2ceca3934c06/targets

- [ ] To automatically regenerate this PR, check this box.
---
 .github/readme/synth.metadata/synth.metadata | 4 ++--
 README.md                                    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index 58cce6f4..9791a1fa 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "c44c05e21cbbcaf898ac5976de9f171b9faf900e"
+        "sha": "9187ff382714b810e94758f3ba1e89a75ae99caf"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "0199c79b8324fba66476300824aa931788c47e2d"
+        "sha": "8c5628b86cfa8386de7b8fc1675e6b528b552d57"
       }
     }
   ]
diff --git a/README.md b/README.md
index e1fdabb5..2e549cbd 100644
--- a/README.md
+++ b/README.md
@@ -226,7 +226,7 @@ Java 11 | [![Kokoro CI][kokoro-badge-image-5]][kokoro-badge-link-5]
 Java is a registered trademark of Oracle and/or its affiliates.
 
 [product-docs]: https://cloud.google.com/pubsub/lite/docs
-[javadocs]: https://googleapis.dev/java/google-cloud-pubsublite/latest/index.html
+[javadocs]: https://googleapis.dev/java/pubsublite-spark-sql-streaming/latest/index.html
 [kokoro-badge-image-1]: http://storage.googleapis.com/cloud-devrel-public/java/badges/java-pubsublite-spark/java7.svg
 [kokoro-badge-link-1]: http://storage.googleapis.com/cloud-devrel-public/java/badges/java-pubsublite-spark/java7.html
 [kokoro-badge-image-2]: http://storage.googleapis.com/cloud-devrel-public/java/badges/java-pubsublite-spark/java8.svg

From f82087b17a6c6e44af235a7e6e7a4632874aef42 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Fri, 26 Feb 2021 17:50:00 -0500
Subject: [PATCH 09/47] docs: Add maven central link.  (#100)

* update

* Update .readme-partials.yaml

Co-authored-by: Tianzi Cai <tianzi@google.com>

Co-authored-by: Tianzi Cai <tianzi@google.com>
---
 .readme-partials.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.readme-partials.yaml b/.readme-partials.yaml
index b627f59c..c6a90ce8 100644
--- a/.readme-partials.yaml
+++ b/.readme-partials.yaml
@@ -16,8 +16,7 @@ custom_content: |
 
   ## Downloading and Using the Connector
 
-  <!--- TODO(jiangmichael): Release on Maven Central and add Maven Central link -->
-    The connector will be available from the Maven Central repository. It can be used using the `--packages` option or the `spark.jars.packages` configuration property.
+    The connector is available from the [Maven Central repository](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming). You can download and pass it in the `--packages` option when using the `spark-submit` command or set it via the `spark.jars.packages` [configuration property](https://spark.apache.org/docs/latest/configuration.html#available-properties).
 
   ## Compatibility
   | Connector version | Spark version |

From 0d8461567b04249f2a7dad94ebc1b67dbcd66039 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Fri, 26 Feb 2021 14:58:04 -0800
Subject: [PATCH 10/47] chore: regenerate README (#101)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-02-26 22:52:17,406 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-02-26 22:52:18,972 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/5b054a0f-ff73-4e8e-88e0-8df8c4f356d9/targets

- [ ] To automatically regenerate this PR, check this box.
---
 .github/readme/synth.metadata/synth.metadata | 2 +-
 README.md                                    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index 9791a1fa..b892c10e 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,7 +4,7 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "9187ff382714b810e94758f3ba1e89a75ae99caf"
+        "sha": "f82087b17a6c6e44af235a7e6e7a4632874aef42"
       }
     },
     {
diff --git a/README.md b/README.md
index 2e549cbd..02cdf64a 100644
--- a/README.md
+++ b/README.md
@@ -85,8 +85,7 @@ and manual Spark installations.
 
 ## Downloading and Using the Connector
 
-<!--- TODO(jiangmichael): Release on Maven Central and add Maven Central link -->
-  The connector will be available from the Maven Central repository. It can be used using the `--packages` option or the `spark.jars.packages` configuration property.
+  The connector is available from the [Maven Central repository](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming). You can download and pass it in the `--packages` option when using the `spark-submit` command or set it via the `spark.jars.packages` [configuration property](https://spark.apache.org/docs/latest/configuration.html#available-properties).
 
 ## Compatibility
 | Connector version | Spark version |

From 86c8b7e9295e5abbb4f491ef1a5295a1ac9b498c Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Tue, 2 Mar 2021 02:04:48 +0100
Subject: [PATCH 11/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite to v0.11.1 (#103)

---
 pom.xml                  | 2 +-
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index 4200f709..7852a9d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -43,7 +43,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.11.0</version>
+      <version>0.11.1</version>
     </dependency>
     <dependency>
       <groupId>com.google.api.grpc</groupId>
diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index 3172c288..bf200a62 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.11.0</version>
+      <version>0.11.1</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 97d5b00b..d2a93c3d 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.11.0</version>
+      <version>0.11.1</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>

From 4812cbc6710f2a894045b50b8f5f1245e3b80196 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Tue, 2 Mar 2021 02:05:07 +0100
Subject: [PATCH 12/47] deps: update dependency
 com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.11.1 (#102)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 7852a9d8..2b22dd0a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-cloud-pubsublite-v1</artifactId>
-      <version>0.11.0</version>
+      <version>0.11.1</version>
     </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>

From bb73ca2900d3cea9cd6d5c920c0f09871fff73aa Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Tue, 2 Mar 2021 02:17:02 +0100
Subject: [PATCH 13/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite-parent to v0.11.1 (#104)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 2b22dd0a..6cab0960 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
   <parent>
     <groupId>com.google.cloud</groupId>
     <artifactId>google-cloud-pubsublite-parent</artifactId>
-    <version>0.11.0</version>
+    <version>0.11.1</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.google.cloud</groupId>

From 4de89f33e7717d2394b5ba63f6086673316818c2 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Tue, 2 Mar 2021 16:03:59 -0800
Subject: [PATCH 14/47] chore: remove docLava v2 doc generation (#105)

* chore: remove docLava v2 doc generation

Not using this anymore and it is causing some issues in pubsublite

* chore: removing v2 doclava bucket config

Source-Author: Emily Ball <emilyball@google.com>
Source-Date: Tue Mar 2 10:13:11 2021 -0800
Source-Repo: googleapis/synthtool
Source-Sha: 21da7d9fa02f6916d9f87cf4072b3547b5c72eb5
Source-Link: https://github.com/googleapis/synthtool/commit/21da7d9fa02f6916d9f87cf4072b3547b5c72eb5
---
 .kokoro/release/publish_javadoc.cfg |  8 +-------
 .kokoro/release/publish_javadoc.sh  | 19 -------------------
 synth.metadata                      |  4 ++--
 3 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/.kokoro/release/publish_javadoc.cfg b/.kokoro/release/publish_javadoc.cfg
index c0ea28d2..7a9aba0e 100644
--- a/.kokoro/release/publish_javadoc.cfg
+++ b/.kokoro/release/publish_javadoc.cfg
@@ -7,12 +7,6 @@ env_vars: {
   value: "docs-staging"
 }
 
-# cloud-rad staging
-env_vars: {
-  key: "STAGING_BUCKET_V2"
-  value: "docs-staging-v2-staging"
-}
-
 env_vars: {
   key: "TRAMPOLINE_BUILD_FILE"
   value: "github/java-pubsublite-spark/.kokoro/release/publish_javadoc.sh"
@@ -26,4 +20,4 @@ before_action {
       keyname: "docuploader_service_account"
     }
   }
-}
\ No newline at end of file
+}
diff --git a/.kokoro/release/publish_javadoc.sh b/.kokoro/release/publish_javadoc.sh
index c0ecd34e..efc49913 100755
--- a/.kokoro/release/publish_javadoc.sh
+++ b/.kokoro/release/publish_javadoc.sh
@@ -56,22 +56,3 @@ python3 -m docuploader create-metadata \
 python3 -m docuploader upload . \
   --credentials ${CREDENTIALS} \
   --staging-bucket ${STAGING_BUCKET}
-
-popd
-
-# V2 due to problems w/ the released javadoc plugin doclava, Java 8 is required.  Beware of accidental updates.
-
-mvn clean site -B -q -Ddevsite.template="${KOKORO_GFILE_DIR}/java/"
-
-pushd target/devsite/reference
-
-# create metadata
-python3 -m docuploader create-metadata \
-  --name ${NAME} \
-  --version ${VERSION} \
-  --language java
-
-# upload docs to staging bucket
-python3 -m docuploader upload . \
-  --credentials ${CREDENTIALS} \
-  --staging-bucket ${STAGING_BUCKET_V2}
diff --git a/synth.metadata b/synth.metadata
index 276f2637..e51d110b 100644
--- a/synth.metadata
+++ b/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "c4944f2f96d134ae116791b091d7be77e8393aae"
+        "sha": "bb73ca2900d3cea9cd6d5c920c0f09871fff73aa"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "6946fd71ae9215b0e7ae188f5057df765ee6d7d2"
+        "sha": "21da7d9fa02f6916d9f87cf4072b3547b5c72eb5"
       }
     }
   ],

From 6e5138546c9446db4d8133c91ec207da46702266 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Wed, 3 Mar 2021 20:36:52 +0100
Subject: [PATCH 15/47] chore(deps): update dependency
 com.google.cloud:libraries-bom to v18.1.0 (#106)

[![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Change | Age | Adoption | Passing | Confidence |
|---|---|---|---|---|---|
| [com.google.cloud:libraries-bom](https://togithub.com/GoogleCloudPlatform/cloud-opensource-java) | `18.0.0` -> `18.1.0` | [![age](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/18.1.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/18.1.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/18.1.0/compatibility-slim/18.0.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/18.1.0/confidence-slim/18.0.0)](https://docs.renovatebot.com/merge-confidence/) |

---

### Renovate configuration

:date: **Schedule**: At any time (no schedule defined).

:vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied.

:recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.

:no_bell: **Ignore**: Close this PR and you won't be reminded about this update again.

---

 - [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check this box

---

This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/java-pubsublite-spark).
---
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index bf200a62..04c9212f 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>18.0.0</version>
+        <version>18.1.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index d2a93c3d..5dd560ac 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>18.0.0</version>
+        <version>18.1.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>

From 759c0da48867ea2b3978c700859cc3c4b8043bf5 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Thu, 4 Mar 2021 20:26:47 +0100
Subject: [PATCH 16/47] chore(deps): update dependency
 com.google.cloud:libraries-bom to v19 (#107)

---
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index 04c9212f..7792e9b7 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>18.1.0</version>
+        <version>19.0.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 5dd560ac..6a71d668 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>18.1.0</version>
+        <version>19.0.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>

From d75274e1e1d8debd2c72eda92b2bce7f687b8bc2 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Thu, 4 Mar 2021 15:54:09 -0800
Subject: [PATCH 17/47] chore: copy README to docfx-yml dir (#108)

This PR was generated using Autosynth. :rainbow:

Synth log will be available here:
https://source.cloud.google.com/results/invocations/ed7b8a82-0394-45bb-ab76-b43a96195edd/targets

- [ ] To automatically regenerate this PR, check this box.

Source-Link: https://github.com/googleapis/synthtool/commit/d0bdade9a962042dc0f770cf631086f3db59b5b0
---
 .kokoro/release/publish_javadoc11.sh | 5 ++++-
 synth.metadata                       | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.kokoro/release/publish_javadoc11.sh b/.kokoro/release/publish_javadoc11.sh
index cf735ff7..4101fd58 100755
--- a/.kokoro/release/publish_javadoc11.sh
+++ b/.kokoro/release/publish_javadoc11.sh
@@ -40,6 +40,9 @@ export VERSION=$(grep ${NAME}: versions.txt | cut -d: -f3)
 # generate yml
 mvn clean site -B -q -P docFX
 
+# copy README to docfx-yml dir and rename index.md
+cp README.md target/docfx-yml/index.md
+
 pushd target/docfx-yml
 
 # create metadata
@@ -52,4 +55,4 @@ python3 -m docuploader create-metadata \
 python3 -m docuploader upload . \
  --credentials ${CREDENTIALS} \
  --staging-bucket ${STAGING_BUCKET_V2} \
- --destination-prefix docfx-
+ --destination-prefix docfx
diff --git a/synth.metadata b/synth.metadata
index e51d110b..93682520 100644
--- a/synth.metadata
+++ b/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "bb73ca2900d3cea9cd6d5c920c0f09871fff73aa"
+        "sha": "759c0da48867ea2b3978c700859cc3c4b8043bf5"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "21da7d9fa02f6916d9f87cf4072b3547b5c72eb5"
+        "sha": "d0bdade9a962042dc0f770cf631086f3db59b5b0"
       }
     }
   ],

From 1bf772a275e76d6b7229d628b72d5dce4f5c8bc5 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Tue, 9 Mar 2021 20:12:02 -0500
Subject: [PATCH 18/47] docs: Update gcs public available link (#109)

---
 .readme-partials.yaml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.readme-partials.yaml b/.readme-partials.yaml
index c6a90ce8..9c84e179 100644
--- a/.readme-partials.yaml
+++ b/.readme-partials.yaml
@@ -15,8 +15,11 @@ custom_content: |
     ```
 
   ## Downloading and Using the Connector
-
-    The connector is available from the [Maven Central repository](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming). You can download and pass it in the `--packages` option when using the `spark-submit` command or set it via the `spark.jars.packages` [configuration property](https://spark.apache.org/docs/latest/configuration.html#available-properties).
+    The latest version of the connector is publicly available in the following link:
+    | Connector version | Link |
+    | --- | --- |
+    | 0.1.0 | `gs://spark-lib/pubsublite/pubsublite-spark-sql-streaming-0.1.0-with-dependencies.jar`([HTTP link](https://storage.googleapis.com/spark-lib/pubsublite/pubsublite-spark-sql-streaming-0.1.0-with-dependencies.jar)) |
+    The connector is also available from the [Maven Central repository](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming). You can download and pass it in the `--jars` option when using the `spark-submit` command.
 
   ## Compatibility
   | Connector version | Spark version |

From 011e9d567cd0109790f916f545c8ad50138ddbe5 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Tue, 9 Mar 2021 17:22:02 -0800
Subject: [PATCH 19/47] chore: regenerate README (#112)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-03-10 01:16:35,757 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-03-10 01:16:36,659 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/dd59780c-4231-48a1-83dd-be9f1c53d16e/targets

- [ ] To automatically regenerate this PR, check this box.
---
 .github/readme/synth.metadata/synth.metadata | 4 ++--
 README.md                                    | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index b892c10e..6d0f9a81 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "f82087b17a6c6e44af235a7e6e7a4632874aef42"
+        "sha": "1bf772a275e76d6b7229d628b72d5dce4f5c8bc5"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "8c5628b86cfa8386de7b8fc1675e6b528b552d57"
+        "sha": "e5fa6d93e42918dd4a000a80b92be23f5f4c6ac7"
       }
     }
   ]
diff --git a/README.md b/README.md
index 02cdf64a..628f6445 100644
--- a/README.md
+++ b/README.md
@@ -84,8 +84,11 @@ and manual Spark installations.
   ```
 
 ## Downloading and Using the Connector
-
-  The connector is available from the [Maven Central repository](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming). You can download and pass it in the `--packages` option when using the `spark-submit` command or set it via the `spark.jars.packages` [configuration property](https://spark.apache.org/docs/latest/configuration.html#available-properties).
+  The latest version of the connector is publicly available in the following link:
+  | Connector version | Link |
+  | --- | --- |
+  | 0.1.0 | `gs://spark-lib/pubsublite/pubsublite-spark-sql-streaming-0.1.0-with-dependencies.jar`([HTTP link](https://storage.googleapis.com/spark-lib/pubsublite/pubsublite-spark-sql-streaming-0.1.0-with-dependencies.jar)) |
+  The connector is also available from the [Maven Central repository](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming). You can download and pass it in the `--jars` option when using the `spark-submit` command.
 
 ## Compatibility
 | Connector version | Spark version |

From 84d851233db07c5a2af9bbad644b98ee36f7d298 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Wed, 10 Mar 2021 14:34:35 -0800
Subject: [PATCH 20/47] build(java): update autorelease title check in response
 to the new multi release branch changes (#113)

Source-Author: Stephanie Wang <stephaniewang526@users.noreply.github.com>
Source-Date: Wed Mar 10 14:40:03 2021 -0500
Source-Repo: googleapis/synthtool
Source-Sha: 0b064d767537e0675fc053e53fca473c5c701fb8
Source-Link: https://github.com/googleapis/synthtool/commit/0b064d767537e0675fc053e53fca473c5c701fb8
---
 .github/workflows/auto-release.yaml | 4 ++--
 synth.metadata                      | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/auto-release.yaml b/.github/workflows/auto-release.yaml
index 7c8816a7..9b4fd4d8 100644
--- a/.github/workflows/auto-release.yaml
+++ b/.github/workflows/auto-release.yaml
@@ -16,8 +16,8 @@ jobs:
             return;
           }
 
-          // only approve PRs like "chore(master): release <release version>"
-          if ( !context.payload.pull_request.title.startsWith("chore(master): release") ) {
+          // only approve PRs like "chore: release <release version>"
+          if ( !context.payload.pull_request.title.startsWith("chore: release") ) {
             return;
           }
 
diff --git a/synth.metadata b/synth.metadata
index 93682520..7a54af80 100644
--- a/synth.metadata
+++ b/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "759c0da48867ea2b3978c700859cc3c4b8043bf5"
+        "sha": "011e9d567cd0109790f916f545c8ad50138ddbe5"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "d0bdade9a962042dc0f770cf631086f3db59b5b0"
+        "sha": "0b064d767537e0675fc053e53fca473c5c701fb8"
       }
     }
   ],

From b1a9dcd030d55bf2d4d32e4818aa2ac64b5d8e46 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Wed, 17 Mar 2021 20:19:05 +0100
Subject: [PATCH 21/47] chore(deps): update dependency
 com.google.cloud:libraries-bom to v19.1.0 (#116)

---
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index 7792e9b7..93297c63 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>19.0.0</version>
+        <version>19.1.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 6a71d668..3ca85f45 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>19.0.0</version>
+        <version>19.1.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>

From ea3596fad872e26b5b0157192762d037d163577f Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Fri, 19 Mar 2021 22:30:14 +0100
Subject: [PATCH 22/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite-parent to v0.12.0 (#120)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 6cab0960..b5a238dc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
   <parent>
     <groupId>com.google.cloud</groupId>
     <artifactId>google-cloud-pubsublite-parent</artifactId>
-    <version>0.11.1</version>
+    <version>0.12.0</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.google.cloud</groupId>

From 821449eca7259bc36c5ae8f0d12a3c54af1484e7 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Fri, 19 Mar 2021 22:30:32 +0100
Subject: [PATCH 23/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite to v0.12.0 (#119)

---
 pom.xml                  | 2 +-
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pom.xml b/pom.xml
index b5a238dc..ff688c69 100644
--- a/pom.xml
+++ b/pom.xml
@@ -43,7 +43,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.11.1</version>
+      <version>0.12.0</version>
     </dependency>
     <dependency>
       <groupId>com.google.api.grpc</groupId>
diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index 93297c63..aeab8dfa 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.11.1</version>
+      <version>0.12.0</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 3ca85f45..fbae9274 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.11.1</version>
+      <version>0.12.0</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>

From 880da1bf953526cd40e4b736a898185751c7bb27 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Fri, 19 Mar 2021 22:30:49 +0100
Subject: [PATCH 24/47] deps: update dependency
 com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.12.0 (#118)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index ff688c69..c377b164 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-cloud-pubsublite-v1</artifactId>
-      <version>0.11.1</version>
+      <version>0.12.0</version>
     </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>

From b7237f88d04b474c44b33d329234e6ce016ec134 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Fri, 19 Mar 2021 22:31:07 +0100
Subject: [PATCH 25/47] chore(deps): update dependency
 com.google.cloud:libraries-bom to v19.2.1 (#117)

---
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index aeab8dfa..f08c3d43 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>19.1.0</version>
+        <version>19.2.1</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index fbae9274..b301d2d0 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>19.1.0</version>
+        <version>19.2.1</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>

From 20f336639c261ddb3b61d0bd14f02e6ea5146377 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Fri, 19 Mar 2021 17:40:46 -0400
Subject: [PATCH 26/47] feat: Supports topic partition increase. (#115)

---
 clirr-ignored-differences.xml                 |  15 ++
 .../spark/CachedPartitionCountReader.java     |  47 ++++++
 .../spark/LimitingHeadOffsetReader.java       |  20 ++-
 .../spark/MultiPartitionCommitterImpl.java    |  73 ++++++++-
 .../spark/PartitionCountReader.java           |  26 ++++
 .../pubsublite/spark/PslContinuousReader.java |  13 +-
 .../cloud/pubsublite/spark/PslDataSource.java |  24 +--
 .../pubsublite/spark/PslMicroBatchReader.java |  58 +++----
 .../spark/LimitingHeadOffsetReaderTest.java   |  33 +++-
 .../MultiPartitionCommitterImplTest.java      | 141 ++++++++++++------
 .../spark/PslContinuousReaderTest.java        |  16 +-
 .../spark/PslMicroBatchReaderTest.java        |  99 +++++++++---
 .../cloud/pubsublite/spark/TestingUtils.java  |  43 ++++++
 13 files changed, 488 insertions(+), 120 deletions(-)
 create mode 100644 clirr-ignored-differences.xml
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java
 create mode 100644 src/test/java/com/google/cloud/pubsublite/spark/TestingUtils.java

diff --git a/clirr-ignored-differences.xml b/clirr-ignored-differences.xml
new file mode 100644
index 00000000..1aa41e4f
--- /dev/null
+++ b/clirr-ignored-differences.xml
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- see http://www.mojohaus.org/clirr-maven-plugin/examples/ignored-differences.html -->
+<differences>
+    <difference>
+        <differenceType>7004</differenceType>
+        <className>com/google/cloud/pubsublite/spark/*Reader</className>
+        <method>*</method>
+    </difference>
+    <difference>
+        <differenceType>7005</differenceType>
+        <className>com/google/cloud/pubsublite/spark/*Reader</className>
+        <method>*</method>
+        <to>*</to>
+    </difference>
+</differences>
\ No newline at end of file
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java b/src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java
new file mode 100644
index 00000000..35555805
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.cloud.pubsublite.AdminClient;
+import com.google.cloud.pubsublite.PartitionLookupUtils;
+import com.google.cloud.pubsublite.TopicPath;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import java.util.concurrent.TimeUnit;
+import javax.annotation.concurrent.ThreadSafe;
+
+@ThreadSafe
+public class CachedPartitionCountReader implements PartitionCountReader {
+  private final AdminClient adminClient;
+  private final Supplier<Integer> supplier;
+
+  public CachedPartitionCountReader(AdminClient adminClient, TopicPath topicPath) {
+    this.adminClient = adminClient;
+    this.supplier =
+        Suppliers.memoizeWithExpiration(
+            () -> PartitionLookupUtils.numPartitions(topicPath, adminClient), 1, TimeUnit.MINUTES);
+  }
+
+  @Override
+  public void close() {
+    adminClient.close();
+  }
+
+  public int getPartitionCount() {
+    return supplier.get();
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java b/src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java
index 5954492f..7bad0ffc 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java
@@ -27,7 +27,9 @@
 import com.google.cloud.pubsublite.internal.TopicStatsClient;
 import com.google.cloud.pubsublite.proto.Cursor;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.flogger.GoogleLogger;
 import com.google.common.util.concurrent.MoreExecutors;
+import java.io.Closeable;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
@@ -40,18 +42,22 @@
  * offsets for the topic at most once per minute.
  */
 public class LimitingHeadOffsetReader implements PerTopicHeadOffsetReader {
+  private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
 
   private final TopicStatsClient topicStatsClient;
   private final TopicPath topic;
-  private final long topicPartitionCount;
+  private final PartitionCountReader partitionCountReader;
   private final AsyncLoadingCache<Partition, Offset> cachedHeadOffsets;
 
   @VisibleForTesting
   public LimitingHeadOffsetReader(
-      TopicStatsClient topicStatsClient, TopicPath topic, long topicPartitionCount, Ticker ticker) {
+      TopicStatsClient topicStatsClient,
+      TopicPath topic,
+      PartitionCountReader partitionCountReader,
+      Ticker ticker) {
     this.topicStatsClient = topicStatsClient;
     this.topic = topic;
-    this.topicPartitionCount = topicPartitionCount;
+    this.partitionCountReader = partitionCountReader;
     this.cachedHeadOffsets =
         Caffeine.newBuilder()
             .ticker(ticker)
@@ -82,7 +88,7 @@ public void onSuccess(Cursor c) {
   @Override
   public PslSourceOffset getHeadOffset() {
     Set<Partition> keySet = new HashSet<>();
-    for (int i = 0; i < topicPartitionCount; i++) {
+    for (int i = 0; i < partitionCountReader.getPartitionCount(); i++) {
       keySet.add(Partition.of(i));
     }
     CompletableFuture<Map<Partition, Offset>> future = cachedHeadOffsets.getAll(keySet);
@@ -95,6 +101,10 @@ public PslSourceOffset getHeadOffset() {
 
   @Override
   public void close() {
-    topicStatsClient.close();
+    try (AutoCloseable a = topicStatsClient;
+        Closeable b = partitionCountReader) {
+    } catch (Exception e) {
+      log.atWarning().withCause(e).log("Unable to close LimitingHeadOffsetReader.");
+    }
   }
 }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java b/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java
index f672242f..7ebec891 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java
@@ -25,21 +25,47 @@
 import com.google.common.flogger.GoogleLogger;
 import com.google.common.util.concurrent.MoreExecutors;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import javax.annotation.concurrent.GuardedBy;
 
+/**
+ * A {@link MultiPartitionCommitter} that lazily adjusts for partition changes when {@link
+ * MultiPartitionCommitter#commit(PslSourceOffset)} is called.
+ */
 public class MultiPartitionCommitterImpl implements MultiPartitionCommitter {
   private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
 
+  private final CommitterFactory committerFactory;
+
+  @GuardedBy("this")
   private final Map<Partition, Committer> committerMap = new HashMap<>();
 
+  @GuardedBy("this")
+  private final Set<Partition> partitionsCleanUp = new HashSet<>();
+
+  public MultiPartitionCommitterImpl(long topicPartitionCount, CommitterFactory committerFactory) {
+    this(
+        topicPartitionCount,
+        committerFactory,
+        MoreExecutors.getExitingScheduledExecutorService(new ScheduledThreadPoolExecutor(1)));
+  }
+
   @VisibleForTesting
-  MultiPartitionCommitterImpl(long topicPartitionCount, CommitterFactory committerFactory) {
+  MultiPartitionCommitterImpl(
+      long topicPartitionCount,
+      CommitterFactory committerFactory,
+      ScheduledExecutorService executorService) {
+    this.committerFactory = committerFactory;
     for (int i = 0; i < topicPartitionCount; i++) {
       Partition p = Partition.of(i);
-      Committer committer = committerFactory.newCommitter(p);
-      committer.startAsync().awaitRunning();
-      committerMap.put(p, committer);
+      committerMap.put(p, createCommitter(p));
     }
+    executorService.scheduleWithFixedDelay(this::cleanUpCommitterMap, 10, 10, TimeUnit.MINUTES);
   }
 
   @Override
@@ -47,8 +73,47 @@ public synchronized void close() {
     committerMap.values().forEach(c -> c.stopAsync().awaitTerminated());
   }
 
+  /** Adjust committerMap based on the partitions that needs to be committed. */
+  private synchronized void updateCommitterMap(PslSourceOffset offset) {
+    int currentPartitions = committerMap.size();
+    int newPartitions = offset.partitionOffsetMap().size();
+
+    if (currentPartitions == newPartitions) {
+      return;
+    }
+    if (currentPartitions < newPartitions) {
+      for (int i = currentPartitions; i < newPartitions; i++) {
+        Partition p = Partition.of(i);
+        if (!committerMap.containsKey(p)) {
+          committerMap.put(p, createCommitter(p));
+        }
+        partitionsCleanUp.remove(p);
+      }
+      return;
+    }
+    partitionsCleanUp.clear();
+    for (int i = newPartitions; i < currentPartitions; i++) {
+      partitionsCleanUp.add(Partition.of(i));
+    }
+  }
+
+  private synchronized Committer createCommitter(Partition p) {
+    Committer committer = committerFactory.newCommitter(p);
+    committer.startAsync().awaitRunning();
+    return committer;
+  }
+
+  private synchronized void cleanUpCommitterMap() {
+    for (Partition p : partitionsCleanUp) {
+      committerMap.get(p).stopAsync();
+      committerMap.remove(p);
+    }
+    partitionsCleanUp.clear();
+  }
+
   @Override
   public synchronized void commit(PslSourceOffset offset) {
+    updateCommitterMap(offset);
     offset
         .partitionOffsetMap()
         .forEach(
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java
new file mode 100644
index 00000000..934d40be
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import java.io.Closeable;
+
+public interface PartitionCountReader extends Closeable {
+  int getPartitionCount();
+
+  @Override
+  void close();
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
index ba6452b7..65953031 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
@@ -41,8 +41,9 @@ public class PslContinuousReader implements ContinuousReader {
   private final PartitionSubscriberFactory partitionSubscriberFactory;
   private final SubscriptionPath subscriptionPath;
   private final FlowControlSettings flowControlSettings;
-  private final long topicPartitionCount;
   private SparkSourceOffset startOffset;
+  private final PartitionCountReader partitionCountReader;
+  private final long topicPartitionCount;
 
   @VisibleForTesting
   public PslContinuousReader(
@@ -51,13 +52,14 @@ public PslContinuousReader(
       PartitionSubscriberFactory partitionSubscriberFactory,
       SubscriptionPath subscriptionPath,
       FlowControlSettings flowControlSettings,
-      long topicPartitionCount) {
+      PartitionCountReader partitionCountReader) {
     this.cursorClient = cursorClient;
     this.committer = committer;
     this.partitionSubscriberFactory = partitionSubscriberFactory;
     this.subscriptionPath = subscriptionPath;
     this.flowControlSettings = flowControlSettings;
-    this.topicPartitionCount = topicPartitionCount;
+    this.partitionCountReader = partitionCountReader;
+    this.topicPartitionCount = partitionCountReader.getPartitionCount();
   }
 
   @Override
@@ -126,4 +128,9 @@ public List<InputPartition<InternalRow>> planInputPartitions() {
     }
     return list;
   }
+
+  @Override
+  public boolean needsReconfiguration() {
+    return partitionCountReader.getPartitionCount() != topicPartitionCount;
+  }
 }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java b/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java
index 3f436ddd..08a96ee8 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java
@@ -21,7 +21,6 @@
 import com.github.benmanes.caffeine.cache.Ticker;
 import com.google.auto.service.AutoService;
 import com.google.cloud.pubsublite.AdminClient;
-import com.google.cloud.pubsublite.PartitionLookupUtils;
 import com.google.cloud.pubsublite.SubscriptionPath;
 import com.google.cloud.pubsublite.TopicPath;
 import java.util.Objects;
@@ -55,17 +54,21 @@ public ContinuousReader createContinuousReader(
     PslDataSourceOptions pslDataSourceOptions =
         PslDataSourceOptions.fromSparkDataSourceOptions(options);
     SubscriptionPath subscriptionPath = pslDataSourceOptions.subscriptionPath();
-    long topicPartitionCount;
+    TopicPath topicPath;
     try (AdminClient adminClient = pslDataSourceOptions.newAdminClient()) {
-      topicPartitionCount = PartitionLookupUtils.numPartitions(subscriptionPath, adminClient);
+      topicPath = TopicPath.parse(adminClient.getSubscription(subscriptionPath).get().getTopic());
+    } catch (Throwable t) {
+      throw toCanonical(t).underlying;
     }
+    PartitionCountReader partitionCountReader =
+        new CachedPartitionCountReader(pslDataSourceOptions.newAdminClient(), topicPath);
     return new PslContinuousReader(
         pslDataSourceOptions.newCursorClient(),
-        pslDataSourceOptions.newMultiPartitionCommitter(topicPartitionCount),
+        pslDataSourceOptions.newMultiPartitionCommitter(partitionCountReader.getPartitionCount()),
         pslDataSourceOptions.getSubscriberFactory(),
         subscriptionPath,
         Objects.requireNonNull(pslDataSourceOptions.flowControlSettings()),
-        topicPartitionCount);
+        partitionCountReader);
   }
 
   @Override
@@ -80,25 +83,24 @@ public MicroBatchReader createMicroBatchReader(
         PslDataSourceOptions.fromSparkDataSourceOptions(options);
     SubscriptionPath subscriptionPath = pslDataSourceOptions.subscriptionPath();
     TopicPath topicPath;
-    long topicPartitionCount;
     try (AdminClient adminClient = pslDataSourceOptions.newAdminClient()) {
       topicPath = TopicPath.parse(adminClient.getSubscription(subscriptionPath).get().getTopic());
-      topicPartitionCount = PartitionLookupUtils.numPartitions(topicPath, adminClient);
     } catch (Throwable t) {
       throw toCanonical(t).underlying;
     }
+    PartitionCountReader partitionCountReader =
+        new CachedPartitionCountReader(pslDataSourceOptions.newAdminClient(), topicPath);
     return new PslMicroBatchReader(
         pslDataSourceOptions.newCursorClient(),
-        pslDataSourceOptions.newMultiPartitionCommitter(topicPartitionCount),
+        pslDataSourceOptions.newMultiPartitionCommitter(partitionCountReader.getPartitionCount()),
         pslDataSourceOptions.getSubscriberFactory(),
         new LimitingHeadOffsetReader(
             pslDataSourceOptions.newTopicStatsClient(),
             topicPath,
-            topicPartitionCount,
+            partitionCountReader,
             Ticker.systemTicker()),
         subscriptionPath,
         Objects.requireNonNull(pslDataSourceOptions.flowControlSettings()),
-        pslDataSourceOptions.maxMessagesPerBatch(),
-        topicPartitionCount);
+        pslDataSourceOptions.maxMessagesPerBatch());
   }
 }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
index 3ae0d91d..b2a346c0 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
@@ -19,6 +19,7 @@
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkState;
 
+import com.google.cloud.pubsublite.Partition;
 import com.google.cloud.pubsublite.SubscriptionPath;
 import com.google.cloud.pubsublite.cloudpubsub.FlowControlSettings;
 import com.google.cloud.pubsublite.internal.CursorClient;
@@ -34,14 +35,12 @@
 import org.apache.spark.sql.types.StructType;
 
 public class PslMicroBatchReader implements MicroBatchReader {
-
   private final CursorClient cursorClient;
   private final MultiPartitionCommitter committer;
   private final PartitionSubscriberFactory partitionSubscriberFactory;
   private final PerTopicHeadOffsetReader headOffsetReader;
   private final SubscriptionPath subscriptionPath;
   private final FlowControlSettings flowControlSettings;
-  private final long topicPartitionCount;
   private final long maxMessagesPerBatch;
   @Nullable private SparkSourceOffset startOffset = null;
   private SparkSourceOffset endOffset;
@@ -53,20 +52,30 @@ public PslMicroBatchReader(
       PerTopicHeadOffsetReader headOffsetReader,
       SubscriptionPath subscriptionPath,
       FlowControlSettings flowControlSettings,
-      long maxMessagesPerBatch,
-      long topicPartitionCount) {
+      long maxMessagesPerBatch) {
     this.cursorClient = cursorClient;
     this.committer = committer;
     this.partitionSubscriberFactory = partitionSubscriberFactory;
     this.headOffsetReader = headOffsetReader;
     this.subscriptionPath = subscriptionPath;
     this.flowControlSettings = flowControlSettings;
-    this.topicPartitionCount = topicPartitionCount;
     this.maxMessagesPerBatch = maxMessagesPerBatch;
   }
 
   @Override
   public void setOffsetRange(Optional<Offset> start, Optional<Offset> end) {
+    int currentTopicPartitionCount;
+    if (end.isPresent()) {
+      checkArgument(
+          end.get() instanceof SparkSourceOffset,
+          "end offset is not instance of SparkSourceOffset.");
+      endOffset = (SparkSourceOffset) end.get();
+      currentTopicPartitionCount = ((SparkSourceOffset) end.get()).getPartitionOffsetMap().size();
+    } else {
+      endOffset = PslSparkUtils.toSparkSourceOffset(headOffsetReader.getHeadOffset());
+      currentTopicPartitionCount = endOffset.getPartitionOffsetMap().size();
+    }
+
     if (start.isPresent()) {
       checkArgument(
           start.get() instanceof SparkSourceOffset,
@@ -74,20 +83,14 @@ public void setOffsetRange(Optional<Offset> start, Optional<Offset> end) {
       startOffset = (SparkSourceOffset) start.get();
     } else {
       startOffset =
-          PslSparkUtils.getSparkStartOffset(cursorClient, subscriptionPath, topicPartitionCount);
-    }
-    if (end.isPresent()) {
-      checkArgument(
-          end.get() instanceof SparkSourceOffset,
-          "end offset is not instance of SparkSourceOffset.");
-      endOffset = (SparkSourceOffset) end.get();
-    } else {
-      SparkSourceOffset headOffset =
-          PslSparkUtils.toSparkSourceOffset(headOffsetReader.getHeadOffset());
-      endOffset =
-          PslSparkUtils.getSparkEndOffset(
-              headOffset, startOffset, maxMessagesPerBatch, topicPartitionCount);
+          PslSparkUtils.getSparkStartOffset(
+              cursorClient, subscriptionPath, currentTopicPartitionCount);
     }
+
+    // Limit endOffset by maxMessagesPerBatch.
+    endOffset =
+        PslSparkUtils.getSparkEndOffset(
+            endOffset, startOffset, maxMessagesPerBatch, currentTopicPartitionCount);
   }
 
   @Override
@@ -126,23 +129,28 @@ public StructType readSchema() {
 
   @Override
   public List<InputPartition<InternalRow>> planInputPartitions() {
-    checkState(startOffset != null);
+    checkState(startOffset != null && endOffset != null);
+
     List<InputPartition<InternalRow>> list = new ArrayList<>();
-    for (SparkPartitionOffset offset : startOffset.getPartitionOffsetMap().values()) {
-      SparkPartitionOffset endPartitionOffset =
-          endOffset.getPartitionOffsetMap().get(offset.partition());
-      if (offset.equals(endPartitionOffset)) {
+    // Since this is called right after setOffsetRange, we could use partitions in endOffset as
+    // current partition count.
+    for (SparkPartitionOffset endPartitionOffset : endOffset.getPartitionOffsetMap().values()) {
+      Partition p = endPartitionOffset.partition();
+      SparkPartitionOffset startPartitionOffset =
+          startOffset.getPartitionOffsetMap().getOrDefault(p, SparkPartitionOffset.create(p, -1L));
+      if (startPartitionOffset.equals(endPartitionOffset)) {
         // There is no message to pull for this partition.
         continue;
       }
       PartitionSubscriberFactory partitionSubscriberFactory = this.partitionSubscriberFactory;
       SubscriberFactory subscriberFactory =
-          (consumer) -> partitionSubscriberFactory.newSubscriber(offset.partition(), consumer);
+          (consumer) ->
+              partitionSubscriberFactory.newSubscriber(endPartitionOffset.partition(), consumer);
       list.add(
           new PslMicroBatchInputPartition(
               subscriptionPath,
               flowControlSettings,
-              offset,
+              startPartitionOffset,
               endPartitionOffset,
               subscriberFactory));
     }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java b/src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java
index 0007dd89..dcc3025a 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java
@@ -38,12 +38,15 @@ public class LimitingHeadOffsetReaderTest {
 
   private final FakeTicker ticker = new FakeTicker();
   private final TopicStatsClient topicStatsClient = mock(TopicStatsClient.class);
+  private final PartitionCountReader partitionReader = mock(PartitionCountReader.class);
   private final LimitingHeadOffsetReader reader =
       new LimitingHeadOffsetReader(
-          topicStatsClient, UnitTestExamples.exampleTopicPath(), 1, ticker::read);
+          topicStatsClient, UnitTestExamples.exampleTopicPath(), partitionReader, ticker::read);
 
   @Test
   public void testRead() {
+    when(partitionReader.getPartitionCount()).thenReturn(1);
+
     Cursor cursor1 = Cursor.newBuilder().setOffset(10).build();
     Cursor cursor2 = Cursor.newBuilder().setOffset(13).build();
     when(topicStatsClient.computeHeadCursor(UnitTestExamples.exampleTopicPath(), Partition.of(0)))
@@ -66,4 +69,32 @@ public void testRead() {
         .containsExactly(Partition.of(0), Offset.of(cursor2.getOffset()));
     verify(topicStatsClient).computeHeadCursor(any(), any());
   }
+
+  @Test
+  public void testPartitionChange() {
+    when(partitionReader.getPartitionCount()).thenReturn(1);
+
+    Cursor cursor1 = Cursor.newBuilder().setOffset(10).build();
+    when(topicStatsClient.computeHeadCursor(UnitTestExamples.exampleTopicPath(), Partition.of(0)))
+        .thenReturn(ApiFutures.immediateFuture(cursor1));
+    assertThat(reader.getHeadOffset().partitionOffsetMap())
+        .containsExactly(Partition.of(0), Offset.of(10));
+    verify(topicStatsClient).computeHeadCursor(any(), any());
+
+    when(partitionReader.getPartitionCount()).thenReturn(3);
+
+    for (int i = 0; i < 3; i++) {
+      when(topicStatsClient.computeHeadCursor(UnitTestExamples.exampleTopicPath(), Partition.of(i)))
+          .thenReturn(ApiFutures.immediateFuture(cursor1));
+    }
+    assertThat(reader.getHeadOffset().partitionOffsetMap())
+        .containsExactly(
+            Partition.of(0),
+            Offset.of(10),
+            Partition.of(1),
+            Offset.of(10),
+            Partition.of(2),
+            Offset.of(10));
+    verify(topicStatsClient, times(3)).computeHeadCursor(any(), any());
+  }
 }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java b/src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java
index a9fbf3a2..65b4675a 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java
@@ -16,75 +16,124 @@
 
 package com.google.cloud.pubsublite.spark;
 
+import static com.google.cloud.pubsublite.spark.TestingUtils.createPslSourceOffset;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.*;
 
 import com.google.api.core.SettableApiFuture;
 import com.google.cloud.pubsublite.*;
 import com.google.cloud.pubsublite.internal.wire.Committer;
-import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
 import org.junit.Test;
+import org.mockito.ArgumentCaptor;
 
 public class MultiPartitionCommitterImplTest {
 
-  @Test
-  public void testCommit() {
-    Committer committer1 = mock(Committer.class);
-    Committer committer2 = mock(Committer.class);
-    when(committer1.startAsync())
-        .thenReturn(committer1)
-        .thenThrow(new IllegalStateException("should only init once"));
-    when(committer2.startAsync())
-        .thenReturn(committer2)
-        .thenThrow(new IllegalStateException("should only init once"));
+  private Runnable task;
+  private List<Committer> committerList;
+
+  private MultiPartitionCommitterImpl createCommitter(int initialPartitions, int available) {
+    committerList = new ArrayList<>();
+    for (int i = 0; i < available; i++) {
+      Committer committer = mock(Committer.class);
+      when(committer.startAsync())
+          .thenReturn(committer)
+          .thenThrow(new IllegalStateException("should only init once"));
+      when(committer.commitOffset(eq(Offset.of(10L)))).thenReturn(SettableApiFuture.create());
+      committerList.add(committer);
+    }
+    ScheduledExecutorService mockExecutor = mock(ScheduledExecutorService.class);
+    ArgumentCaptor<Runnable> taskCaptor = ArgumentCaptor.forClass(Runnable.class);
+    when(mockExecutor.scheduleWithFixedDelay(
+            taskCaptor.capture(), anyLong(), anyLong(), any(TimeUnit.class)))
+        .thenReturn(null);
     MultiPartitionCommitterImpl multiCommitter =
         new MultiPartitionCommitterImpl(
-            2,
-            (p) -> {
-              if (p.value() == 0L) {
-                return committer1;
-              } else {
-                return committer2;
-              }
-            });
-    verify(committer1, times(1)).startAsync();
-    verify(committer2, times(1)).startAsync();
-
-    PslSourceOffset offset =
-        PslSourceOffset.builder()
-            .partitionOffsetMap(
-                ImmutableMap.of(
-                    Partition.of(0), Offset.of(10L),
-                    Partition.of(1), Offset.of(8L)))
-            .build();
+            initialPartitions, p -> committerList.get((int) p.value()), mockExecutor);
+    task = taskCaptor.getValue();
+    return multiCommitter;
+  }
+
+  private MultiPartitionCommitterImpl createCommitter(int initialPartitions) {
+    return createCommitter(initialPartitions, initialPartitions);
+  }
+
+  @Test
+  public void testCommit() {
+    MultiPartitionCommitterImpl multiCommitter = createCommitter(2);
+
+    verify(committerList.get(0)).startAsync();
+    verify(committerList.get(1)).startAsync();
+
+    PslSourceOffset offset = createPslSourceOffset(10L, 8L);
     SettableApiFuture<Void> future1 = SettableApiFuture.create();
     SettableApiFuture<Void> future2 = SettableApiFuture.create();
-    when(committer1.commitOffset(eq(Offset.of(10L)))).thenReturn(future1);
-    when(committer2.commitOffset(eq(Offset.of(8L)))).thenReturn(future2);
+    when(committerList.get(0).commitOffset(eq(Offset.of(10L)))).thenReturn(future1);
+    when(committerList.get(1).commitOffset(eq(Offset.of(8L)))).thenReturn(future2);
     multiCommitter.commit(offset);
-    verify(committer1, times(1)).commitOffset(eq(Offset.of(10L)));
-    verify(committer2, times(1)).commitOffset(eq(Offset.of(8L)));
+    verify(committerList.get(0)).commitOffset(eq(Offset.of(10L)));
+    verify(committerList.get(1)).commitOffset(eq(Offset.of(8L)));
   }
 
   @Test
   public void testClose() {
-    Committer committer = mock(Committer.class);
-    when(committer.startAsync())
-        .thenReturn(committer)
-        .thenThrow(new IllegalStateException("should only init once"));
-    MultiPartitionCommitterImpl multiCommitter =
-        new MultiPartitionCommitterImpl(1, (p) -> committer);
+    MultiPartitionCommitterImpl multiCommitter = createCommitter(1);
 
-    PslSourceOffset offset =
-        PslSourceOffset.builder()
-            .partitionOffsetMap(ImmutableMap.of(Partition.of(0), Offset.of(10L)))
-            .build();
+    PslSourceOffset offset = createPslSourceOffset(10L);
     SettableApiFuture<Void> future1 = SettableApiFuture.create();
-    when(committer.commitOffset(eq(Offset.of(10L)))).thenReturn(future1);
-    when(committer.stopAsync()).thenReturn(committer);
+    when(committerList.get(0).commitOffset(eq(Offset.of(10L)))).thenReturn(future1);
     multiCommitter.commit(offset);
+    when(committerList.get(0).stopAsync()).thenReturn(committerList.get(0));
 
     multiCommitter.close();
-    verify(committer, times(1)).stopAsync();
+    verify(committerList.get(0)).stopAsync();
+  }
+
+  @Test
+  public void testPartitionChange() {
+    // Creates committer with 2 partitions
+    MultiPartitionCommitterImpl multiCommitter = createCommitter(2, 4);
+    for (int i = 0; i < 2; i++) {
+      verify(committerList.get(i)).startAsync();
+    }
+    for (int i = 2; i < 4; i++) {
+      verify(committerList.get(i), times(0)).startAsync();
+    }
+
+    // Partitions increased to 4.
+    multiCommitter.commit(createPslSourceOffset(10L, 10L, 10L, 10L));
+    for (int i = 0; i < 2; i++) {
+      verify(committerList.get(i)).commitOffset(eq(Offset.of(10L)));
+    }
+    for (int i = 2; i < 4; i++) {
+      verify(committerList.get(i)).startAsync();
+      verify(committerList.get(i)).commitOffset(eq(Offset.of(10L)));
+    }
+
+    // Partitions decreased to 2
+    multiCommitter.commit(createPslSourceOffset(10L, 10L));
+    for (int i = 0; i < 2; i++) {
+      verify(committerList.get(i), times(2)).commitOffset(eq(Offset.of(10L)));
+    }
+    task.run();
+    for (int i = 2; i < 4; i++) {
+      verify(committerList.get(i)).stopAsync();
+    }
+  }
+
+  @Test
+  public void testDelayedPartitionRemoval() {
+    // Creates committer with 4 partitions, then decrease to 2, then increase to 3.
+    MultiPartitionCommitterImpl multiCommitter = createCommitter(4);
+    multiCommitter.commit(createPslSourceOffset(10L, 10L));
+    multiCommitter.commit(createPslSourceOffset(10L, 10L, 10L));
+    task.run();
+    verify(committerList.get(2)).startAsync();
+    verify(committerList.get(2), times(0)).stopAsync();
+    verify(committerList.get(3)).startAsync();
+    verify(committerList.get(3)).stopAsync();
   }
 }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java
index b4982caa..36bcdf91 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java
@@ -38,6 +38,14 @@ public class PslContinuousReaderTest {
   private final MultiPartitionCommitter committer = mock(MultiPartitionCommitter.class);
   private final PartitionSubscriberFactory partitionSubscriberFactory =
       mock(PartitionSubscriberFactory.class);
+  private final PartitionCountReader partitionCountReader;
+
+  {
+    PartitionCountReader mock = mock(PartitionCountReader.class);
+    when(mock.getPartitionCount()).thenReturn(2);
+    partitionCountReader = mock;
+  }
+
   private final PslContinuousReader reader =
       new PslContinuousReader(
           cursorClient,
@@ -45,7 +53,7 @@ public class PslContinuousReaderTest {
           partitionSubscriberFactory,
           UnitTestExamples.exampleSubscriptionPath(),
           OPTIONS.flowControlSettings(),
-          2);
+          partitionCountReader);
 
   @Test
   public void testEmptyStartOffset() {
@@ -122,4 +130,10 @@ public void testCommit() {
     reader.commit(offset);
     verify(committer, times(1)).commit(eq(expectedCommitOffset));
   }
+
+  @Test
+  public void testPartitionIncrease() {
+    when(partitionCountReader.getPartitionCount()).thenReturn(4);
+    assertThat(reader.needsReconfiguration()).isTrue();
+  }
 }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java
index 13649f05..3692e7a5 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java
@@ -16,6 +16,8 @@
 
 package com.google.cloud.pubsublite.spark;
 
+import static com.google.cloud.pubsublite.spark.TestingUtils.createPslSourceOffset;
+import static com.google.cloud.pubsublite.spark.TestingUtils.createSparkSourceOffset;
 import static com.google.common.truth.Truth.assertThat;
 import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.*;
@@ -49,31 +51,33 @@ public class PslMicroBatchReaderTest {
           headOffsetReader,
           UnitTestExamples.exampleSubscriptionPath(),
           OPTIONS.flowControlSettings(),
-          MAX_MESSAGES_PER_BATCH,
-          2);
+          MAX_MESSAGES_PER_BATCH);
 
-  private PslSourceOffset createPslSourceOffsetTwoPartition(long offset0, long offset1) {
-    return PslSourceOffset.builder()
-        .partitionOffsetMap(
-            ImmutableMap.of(
-                Partition.of(0L), Offset.of(offset0), Partition.of(1L), Offset.of(offset1)))
-        .build();
-  }
-
-  private SparkSourceOffset createSparkSourceOffsetTwoPartition(long offset0, long offset1) {
-    return new SparkSourceOffset(
-        ImmutableMap.of(
+  @Test
+  public void testNoCommitCursors() {
+    when(cursorClient.listPartitionCursors(UnitTestExamples.exampleSubscriptionPath()))
+        .thenReturn(ApiFutures.immediateFuture(ImmutableMap.of()));
+    when(headOffsetReader.getHeadOffset()).thenReturn(createPslSourceOffset(301L, 200L));
+    reader.setOffsetRange(Optional.empty(), Optional.empty());
+    assertThat(((SparkSourceOffset) reader.getStartOffset()).getPartitionOffsetMap())
+        .containsExactly(
+            Partition.of(0L),
+            SparkPartitionOffset.create(Partition.of(0L), -1L),
+            Partition.of(1L),
+            SparkPartitionOffset.create(Partition.of(1L), -1L));
+    assertThat(((SparkSourceOffset) reader.getEndOffset()).getPartitionOffsetMap())
+        .containsExactly(
             Partition.of(0L),
-            SparkPartitionOffset.create(Partition.of(0L), offset0),
+            SparkPartitionOffset.create(Partition.of(0L), 300L),
             Partition.of(1L),
-            SparkPartitionOffset.create(Partition.of(1L), offset1)));
+            SparkPartitionOffset.create(Partition.of(1L), 199L));
   }
 
   @Test
   public void testEmptyOffsets() {
     when(cursorClient.listPartitionCursors(UnitTestExamples.exampleSubscriptionPath()))
         .thenReturn(ApiFutures.immediateFuture(ImmutableMap.of(Partition.of(0L), Offset.of(100L))));
-    when(headOffsetReader.getHeadOffset()).thenReturn(createPslSourceOffsetTwoPartition(301L, 0L));
+    when(headOffsetReader.getHeadOffset()).thenReturn(createPslSourceOffset(301L, 0L));
     reader.setOffsetRange(Optional.empty(), Optional.empty());
     assertThat(((SparkSourceOffset) reader.getStartOffset()).getPartitionOffsetMap())
         .containsExactly(
@@ -91,8 +95,8 @@ public void testEmptyOffsets() {
 
   @Test
   public void testValidOffsets() {
-    SparkSourceOffset startOffset = createSparkSourceOffsetTwoPartition(10L, 100L);
-    SparkSourceOffset endOffset = createSparkSourceOffsetTwoPartition(20L, 300L);
+    SparkSourceOffset startOffset = createSparkSourceOffset(10L, 100L);
+    SparkSourceOffset endOffset = createSparkSourceOffset(20L, 300L);
     reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset));
     assertThat(reader.getStartOffset()).isEqualTo(startOffset);
     assertThat(reader.getEndOffset()).isEqualTo(endOffset);
@@ -108,16 +112,16 @@ public void testDeserializeOffset() {
 
   @Test
   public void testCommit() {
-    SparkSourceOffset offset = createSparkSourceOffsetTwoPartition(10L, 50L);
-    PslSourceOffset expectedCommitOffset = createPslSourceOffsetTwoPartition(11L, 51L);
+    SparkSourceOffset offset = createSparkSourceOffset(10L, 50L);
+    PslSourceOffset expectedCommitOffset = createPslSourceOffset(11L, 51L);
     reader.commit(offset);
     verify(committer, times(1)).commit(eq(expectedCommitOffset));
   }
 
   @Test
   public void testPlanInputPartitionNoMessage() {
-    SparkSourceOffset startOffset = createSparkSourceOffsetTwoPartition(10L, 100L);
-    SparkSourceOffset endOffset = createSparkSourceOffsetTwoPartition(20L, 100L);
+    SparkSourceOffset startOffset = createSparkSourceOffset(10L, 100L);
+    SparkSourceOffset endOffset = createSparkSourceOffset(20L, 100L);
     reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset));
     assertThat(reader.planInputPartitions()).hasSize(1);
   }
@@ -126,8 +130,7 @@ public void testPlanInputPartitionNoMessage() {
   public void testMaxMessagesPerBatch() {
     when(cursorClient.listPartitionCursors(UnitTestExamples.exampleSubscriptionPath()))
         .thenReturn(ApiFutures.immediateFuture(ImmutableMap.of(Partition.of(0L), Offset.of(100L))));
-    when(headOffsetReader.getHeadOffset())
-        .thenReturn(createPslSourceOffsetTwoPartition(10000000L, 0L));
+    when(headOffsetReader.getHeadOffset()).thenReturn(createPslSourceOffset(10000000L, 0L));
     reader.setOffsetRange(Optional.empty(), Optional.empty());
     assertThat(((SparkSourceOffset) reader.getEndOffset()).getPartitionOffsetMap())
         .containsExactly(
@@ -139,4 +142,52 @@ public void testMaxMessagesPerBatch() {
             Partition.of(1L),
             SparkPartitionOffset.create(Partition.of(1L), -1L));
   }
+
+  @Test
+  public void testPartitionIncreasedRetry() {
+    SparkSourceOffset startOffset = createSparkSourceOffset(10L, 100L);
+    SparkSourceOffset endOffset = createSparkSourceOffset(20L, 300L, 100L);
+    reader.setOffsetRange(Optional.of(startOffset), Optional.of(endOffset));
+    assertThat(reader.getStartOffset()).isEqualTo(startOffset);
+    assertThat(reader.getEndOffset()).isEqualTo(endOffset);
+    assertThat(reader.planInputPartitions()).hasSize(3);
+  }
+
+  @Test
+  public void testPartitionIncreasedNewQuery() {
+    when(cursorClient.listPartitionCursors(UnitTestExamples.exampleSubscriptionPath()))
+        .thenReturn(ApiFutures.immediateFuture(ImmutableMap.of(Partition.of(0L), Offset.of(100L))));
+    SparkSourceOffset endOffset = createSparkSourceOffset(301L, 200L);
+    when(headOffsetReader.getHeadOffset()).thenReturn(PslSparkUtils.toPslSourceOffset(endOffset));
+    reader.setOffsetRange(Optional.empty(), Optional.empty());
+    assertThat(reader.getStartOffset()).isEqualTo(createSparkSourceOffset(99L, -1L));
+    assertThat(reader.getEndOffset()).isEqualTo(endOffset);
+    assertThat(reader.planInputPartitions()).hasSize(2);
+  }
+
+  @Test
+  public void testPartitionIncreasedBeforeSetOffsets() {
+    SparkSourceOffset endOffset = createSparkSourceOffset(301L, 200L);
+    SparkSourceOffset startOffset = createSparkSourceOffset(100L);
+    when(headOffsetReader.getHeadOffset()).thenReturn(PslSparkUtils.toPslSourceOffset(endOffset));
+    reader.setOffsetRange(Optional.of(startOffset), Optional.empty());
+    assertThat(reader.getStartOffset()).isEqualTo(startOffset);
+    assertThat(reader.getEndOffset()).isEqualTo(endOffset);
+    assertThat(reader.planInputPartitions()).hasSize(2);
+  }
+
+  @Test
+  public void testPartitionIncreasedBetweenSetOffsetsAndPlan() {
+    SparkSourceOffset startOffset = createSparkSourceOffset(100L);
+    SparkSourceOffset endOffset = createSparkSourceOffset(301L);
+    SparkSourceOffset newEndOffset = createSparkSourceOffset(600L, 300L);
+    when(headOffsetReader.getHeadOffset()).thenReturn(PslSparkUtils.toPslSourceOffset(endOffset));
+    reader.setOffsetRange(Optional.of(startOffset), Optional.empty());
+    assertThat(reader.getStartOffset()).isEqualTo(startOffset);
+    assertThat(reader.getEndOffset()).isEqualTo(endOffset);
+    when(headOffsetReader.getHeadOffset())
+        .thenReturn(PslSparkUtils.toPslSourceOffset(newEndOffset));
+    // headOffsetReader changes between setOffsets and plan should have no effect.
+    assertThat(reader.planInputPartitions()).hasSize(1);
+  }
 }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/TestingUtils.java b/src/test/java/com/google/cloud/pubsublite/spark/TestingUtils.java
new file mode 100644
index 00000000..43b466ce
--- /dev/null
+++ b/src/test/java/com/google/cloud/pubsublite/spark/TestingUtils.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.cloud.pubsublite.Offset;
+import com.google.cloud.pubsublite.Partition;
+import java.util.HashMap;
+import java.util.Map;
+
+public class TestingUtils {
+  public static PslSourceOffset createPslSourceOffset(long... offsets) {
+    Map<Partition, Offset> map = new HashMap<>();
+    int idx = 0;
+    for (long offset : offsets) {
+      map.put(Partition.of(idx++), Offset.of(offset));
+    }
+    return PslSourceOffset.builder().partitionOffsetMap(map).build();
+  }
+
+  public static SparkSourceOffset createSparkSourceOffset(long... offsets) {
+    Map<Partition, SparkPartitionOffset> map = new HashMap<>();
+    int idx = 0;
+    for (long offset : offsets) {
+      map.put(Partition.of(idx), SparkPartitionOffset.create(Partition.of(idx), offset));
+      idx++;
+    }
+    return new SparkSourceOffset(map);
+  }
+}

From 218898838fc8dc6854c935c227bd07f813db58d5 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Tue, 30 Mar 2021 07:54:11 -0700
Subject: [PATCH 27/47] chore: remove staging bucket v2 (#123)

This PR was generated using Autosynth. :rainbow:

Synth log will be available here:
https://source.cloud.google.com/results/invocations/5be7ad29-661e-4d3f-9958-25d07d1854da/targets

- [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.)

Source-Link: https://github.com/googleapis/synthtool/commit/572ef8f70edd9041f5bcfa71511aed6aecfc2098
---
 .kokoro/release/publish_javadoc.sh | 5 -----
 synth.metadata                     | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/.kokoro/release/publish_javadoc.sh b/.kokoro/release/publish_javadoc.sh
index efc49913..5419bbb3 100755
--- a/.kokoro/release/publish_javadoc.sh
+++ b/.kokoro/release/publish_javadoc.sh
@@ -24,11 +24,6 @@ if [[ -z "${STAGING_BUCKET}" ]]; then
   exit 1
 fi
 
-if [[ -z "${STAGING_BUCKET_V2}" ]]; then
-  echo "Need to set STAGING_BUCKET_V2 environment variable"
-  exit 1
-fi
-
 # work from the git root directory
 pushd $(dirname "$0")/../../
 
diff --git a/synth.metadata b/synth.metadata
index 7a54af80..a92af3d2 100644
--- a/synth.metadata
+++ b/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "011e9d567cd0109790f916f545c8ad50138ddbe5"
+        "sha": "20f336639c261ddb3b61d0bd14f02e6ea5146377"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "0b064d767537e0675fc053e53fca473c5c701fb8"
+        "sha": "572ef8f70edd9041f5bcfa71511aed6aecfc2098"
       }
     }
   ],

From 92cfdfdc85449bb2bf745d59cd9b40e5949ba53c Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Fri, 2 Apr 2021 15:11:23 -0400
Subject: [PATCH 28/47] feat: PSL Connector Writer support (#121)

---
 clirr-ignored-differences.xml                 |  25 ++++
 pom.xml                                       |   5 +
 .../cloud/pubsublite/spark/Constants.java     |  31 +++-
 .../pubsublite/spark/PslContinuousReader.java |   3 +
 .../cloud/pubsublite/spark/PslDataSource.java |  63 ++++++---
 .../cloud/pubsublite/spark/PslDataWriter.java |  97 +++++++++++++
 .../spark/PslDataWriterFactory.java           |  45 ++++++
 .../pubsublite/spark/PslMicroBatchReader.java |   3 +
 ...ons.java => PslReadDataSourceOptions.java} |  20 +--
 .../cloud/pubsublite/spark/PslSparkUtils.java | 111 ++++++++++++++-
 .../pubsublite/spark/PslStreamWriter.java     |  65 +++++++++
 .../spark/PslWriteDataSourceOptions.java      | 133 ++++++++++++++++++
 .../spark/PslWriterCommitMessage.java         |  30 ++++
 .../CachedPartitionCountReader.java           |   2 +-
 .../spark/internal/CachedPublishers.java      |  64 +++++++++
 .../LimitingHeadOffsetReader.java             |   3 +-
 .../MultiPartitionCommitter.java              |   3 +-
 .../MultiPartitionCommitterImpl.java          |   3 +-
 .../{ => internal}/PartitionCountReader.java  |   2 +-
 .../PartitionSubscriberFactory.java           |   2 +-
 .../PerTopicHeadOffsetReader.java             |   3 +-
 .../PslCredentialsProvider.java               |  14 +-
 .../spark/internal/PublisherFactory.java      |  26 ++++
 .../spark/PslContinuousReaderTest.java        |   7 +-
 .../pubsublite/spark/PslDataWriterTest.java   |  84 +++++++++++
 .../spark/PslMicroBatchReaderTest.java        |   7 +-
 ...java => PslReadDataSourceOptionsTest.java} |   4 +-
 .../pubsublite/spark/PslSparkUtilsTest.java   | 100 +++++++++++++
 .../pubsublite/spark/PslStreamWriterTest.java |  50 +++++++
 .../spark/PslWriteDataSourceOptionsTest.java  |  35 +++++
 .../LimitingHeadOffsetReaderTest.java         |   2 +-
 .../MultiPartitionCommitterImplTest.java      |   3 +-
 32 files changed, 986 insertions(+), 59 deletions(-)
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/PslDataWriter.java
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/PslDataWriterFactory.java
 rename src/main/java/com/google/cloud/pubsublite/spark/{PslDataSourceOptions.java => PslReadDataSourceOptions.java} (92%)
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/PslStreamWriter.java
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/PslWriterCommitMessage.java
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/CachedPartitionCountReader.java (96%)
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPublishers.java
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/LimitingHeadOffsetReader.java (97%)
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/MultiPartitionCommitter.java (89%)
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/MultiPartitionCommitterImpl.java (97%)
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/PartitionCountReader.java (93%)
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/PartitionSubscriberFactory.java (95%)
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/PerTopicHeadOffsetReader.java (88%)
 rename src/main/java/com/google/cloud/pubsublite/spark/{ => internal}/PslCredentialsProvider.java (85%)
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/internal/PublisherFactory.java
 create mode 100644 src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java
 rename src/test/java/com/google/cloud/pubsublite/spark/{PslDataSourceOptionsTest.java => PslReadDataSourceOptionsTest.java} (89%)
 create mode 100644 src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java
 create mode 100644 src/test/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptionsTest.java
 rename src/test/java/com/google/cloud/pubsublite/spark/{ => internal}/LimitingHeadOffsetReaderTest.java (98%)
 rename src/test/java/com/google/cloud/pubsublite/spark/{ => internal}/MultiPartitionCommitterImplTest.java (97%)

diff --git a/clirr-ignored-differences.xml b/clirr-ignored-differences.xml
index 1aa41e4f..6aa9dcf9 100644
--- a/clirr-ignored-differences.xml
+++ b/clirr-ignored-differences.xml
@@ -12,4 +12,29 @@
         <method>*</method>
         <to>*</to>
     </difference>
+    <difference>
+        <differenceType>8001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader</className>
+    </difference>
+    <difference>
+        <differenceType>8001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/MultiPartitionCommitter*</className>
+    </difference>
+    <difference>
+        <differenceType>8001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/PartitionSubscriberFactory</className>
+    </difference>
+    <difference>
+        <differenceType>8001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/PerTopicHeadOffsetReader</className>
+    </difference>
+    <difference>
+        <differenceType>8001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/PslCredentialsProvider</className>
+    </difference>
+    <difference>
+        <differenceType>8001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/PslDataSourceOptions*</className>
+    </difference>
+
 </differences>
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index c377b164..74123ab4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -113,6 +113,11 @@
       <version>${scala.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-java8-compat_2.11</artifactId>
+      <version>0.9.1</version>
+    </dependency>
 
     <!--test dependencies-->
     <dependency>
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/Constants.java b/src/main/java/com/google/cloud/pubsublite/spark/Constants.java
index cac4337a..9ad29b23 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/Constants.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/Constants.java
@@ -17,7 +17,12 @@
 package com.google.cloud.pubsublite.spark;
 
 import com.google.cloud.pubsublite.internal.wire.PubsubContext;
+import com.google.common.collect.ImmutableMap;
+import java.util.Map;
+import org.apache.spark.sql.types.ArrayType;
+import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.MapType;
 import org.apache.spark.sql.types.Metadata;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
@@ -26,22 +31,33 @@ public class Constants {
   public static long DEFAULT_BYTES_OUTSTANDING = 50_000_000;
   public static long DEFAULT_MESSAGES_OUTSTANDING = Long.MAX_VALUE;
   public static long DEFAULT_MAX_MESSAGES_PER_BATCH = Long.MAX_VALUE;
+
+  public static ArrayType ATTRIBUTES_PER_KEY_DATATYPE =
+      DataTypes.createArrayType(DataTypes.BinaryType);
+  public static MapType ATTRIBUTES_DATATYPE =
+      DataTypes.createMapType(DataTypes.StringType, ATTRIBUTES_PER_KEY_DATATYPE);
+  public static Map<String, DataType> PUBLISH_FIELD_TYPES =
+      ImmutableMap.of(
+          "key", DataTypes.BinaryType,
+          "data", DataTypes.BinaryType,
+          "attributes", ATTRIBUTES_DATATYPE,
+          "event_timestamp", DataTypes.TimestampType);
   public static StructType DEFAULT_SCHEMA =
       new StructType(
           new StructField[] {
             new StructField("subscription", DataTypes.StringType, false, Metadata.empty()),
             new StructField("partition", DataTypes.LongType, false, Metadata.empty()),
             new StructField("offset", DataTypes.LongType, false, Metadata.empty()),
-            new StructField("key", DataTypes.BinaryType, false, Metadata.empty()),
-            new StructField("data", DataTypes.BinaryType, false, Metadata.empty()),
+            new StructField("key", PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
+            new StructField("data", PUBLISH_FIELD_TYPES.get("data"), false, Metadata.empty()),
             new StructField("publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
-            new StructField("event_timestamp", DataTypes.TimestampType, true, Metadata.empty()),
             new StructField(
-                "attributes",
-                DataTypes.createMapType(
-                    DataTypes.StringType, DataTypes.createArrayType(DataTypes.BinaryType)),
+                "event_timestamp",
+                PUBLISH_FIELD_TYPES.get("event_timestamp"),
                 true,
-                Metadata.empty())
+                Metadata.empty()),
+            new StructField(
+                "attributes", PUBLISH_FIELD_TYPES.get("attributes"), true, Metadata.empty())
           });
 
   public static final PubsubContext.Framework FRAMEWORK = PubsubContext.Framework.of("SPARK");
@@ -52,6 +68,7 @@ public class Constants {
       "pubsublite.flowcontrol.byteoutstandingperpartition";
   public static String MESSAGES_OUTSTANDING_CONFIG_KEY =
       "pubsublite.flowcontrol.messageoutstandingperparition";
+  public static String TOPIC_CONFIG_KEY = "pubsublite.topic";
   public static String SUBSCRIPTION_CONFIG_KEY = "pubsublite.subscription";
   public static String CREDENTIALS_KEY_CONFIG_KEY = "gcp.credentials.key";
 }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
index 65953031..ad2ca3da 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
@@ -22,6 +22,9 @@
 import com.google.cloud.pubsublite.cloudpubsub.FlowControlSettings;
 import com.google.cloud.pubsublite.internal.CursorClient;
 import com.google.cloud.pubsublite.internal.wire.SubscriberFactory;
+import com.google.cloud.pubsublite.spark.internal.MultiPartitionCommitter;
+import com.google.cloud.pubsublite.spark.internal.PartitionCountReader;
+import com.google.cloud.pubsublite.spark.internal.PartitionSubscriberFactory;
 import com.google.common.annotations.VisibleForTesting;
 import java.util.ArrayList;
 import java.util.Arrays;
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java b/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java
index 08a96ee8..2ef2535d 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslDataSource.java
@@ -23,6 +23,9 @@
 import com.google.cloud.pubsublite.AdminClient;
 import com.google.cloud.pubsublite.SubscriptionPath;
 import com.google.cloud.pubsublite.TopicPath;
+import com.google.cloud.pubsublite.spark.internal.CachedPartitionCountReader;
+import com.google.cloud.pubsublite.spark.internal.LimitingHeadOffsetReader;
+import com.google.cloud.pubsublite.spark.internal.PartitionCountReader;
 import java.util.Objects;
 import java.util.Optional;
 import org.apache.spark.sql.sources.DataSourceRegister;
@@ -30,13 +33,20 @@
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.apache.spark.sql.sources.v2.DataSourceV2;
 import org.apache.spark.sql.sources.v2.MicroBatchReadSupport;
+import org.apache.spark.sql.sources.v2.StreamWriteSupport;
 import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReader;
 import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchReader;
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter;
+import org.apache.spark.sql.streaming.OutputMode;
 import org.apache.spark.sql.types.StructType;
 
 @AutoService(DataSourceRegister.class)
 public final class PslDataSource
-    implements DataSourceV2, ContinuousReadSupport, MicroBatchReadSupport, DataSourceRegister {
+    implements DataSourceV2,
+        ContinuousReadSupport,
+        MicroBatchReadSupport,
+        StreamWriteSupport,
+        DataSourceRegister {
 
   @Override
   public String shortName() {
@@ -51,23 +61,24 @@ public ContinuousReader createContinuousReader(
           "PubSub Lite uses fixed schema and custom schema is not allowed");
     }
 
-    PslDataSourceOptions pslDataSourceOptions =
-        PslDataSourceOptions.fromSparkDataSourceOptions(options);
-    SubscriptionPath subscriptionPath = pslDataSourceOptions.subscriptionPath();
+    PslReadDataSourceOptions pslReadDataSourceOptions =
+        PslReadDataSourceOptions.fromSparkDataSourceOptions(options);
+    SubscriptionPath subscriptionPath = pslReadDataSourceOptions.subscriptionPath();
     TopicPath topicPath;
-    try (AdminClient adminClient = pslDataSourceOptions.newAdminClient()) {
+    try (AdminClient adminClient = pslReadDataSourceOptions.newAdminClient()) {
       topicPath = TopicPath.parse(adminClient.getSubscription(subscriptionPath).get().getTopic());
     } catch (Throwable t) {
       throw toCanonical(t).underlying;
     }
     PartitionCountReader partitionCountReader =
-        new CachedPartitionCountReader(pslDataSourceOptions.newAdminClient(), topicPath);
+        new CachedPartitionCountReader(pslReadDataSourceOptions.newAdminClient(), topicPath);
     return new PslContinuousReader(
-        pslDataSourceOptions.newCursorClient(),
-        pslDataSourceOptions.newMultiPartitionCommitter(partitionCountReader.getPartitionCount()),
-        pslDataSourceOptions.getSubscriberFactory(),
+        pslReadDataSourceOptions.newCursorClient(),
+        pslReadDataSourceOptions.newMultiPartitionCommitter(
+            partitionCountReader.getPartitionCount()),
+        pslReadDataSourceOptions.getSubscriberFactory(),
         subscriptionPath,
-        Objects.requireNonNull(pslDataSourceOptions.flowControlSettings()),
+        Objects.requireNonNull(pslReadDataSourceOptions.flowControlSettings()),
         partitionCountReader);
   }
 
@@ -79,28 +90,38 @@ public MicroBatchReader createMicroBatchReader(
           "PubSub Lite uses fixed schema and custom schema is not allowed");
     }
 
-    PslDataSourceOptions pslDataSourceOptions =
-        PslDataSourceOptions.fromSparkDataSourceOptions(options);
-    SubscriptionPath subscriptionPath = pslDataSourceOptions.subscriptionPath();
+    PslReadDataSourceOptions pslReadDataSourceOptions =
+        PslReadDataSourceOptions.fromSparkDataSourceOptions(options);
+    SubscriptionPath subscriptionPath = pslReadDataSourceOptions.subscriptionPath();
     TopicPath topicPath;
-    try (AdminClient adminClient = pslDataSourceOptions.newAdminClient()) {
+    try (AdminClient adminClient = pslReadDataSourceOptions.newAdminClient()) {
       topicPath = TopicPath.parse(adminClient.getSubscription(subscriptionPath).get().getTopic());
     } catch (Throwable t) {
       throw toCanonical(t).underlying;
     }
     PartitionCountReader partitionCountReader =
-        new CachedPartitionCountReader(pslDataSourceOptions.newAdminClient(), topicPath);
+        new CachedPartitionCountReader(pslReadDataSourceOptions.newAdminClient(), topicPath);
     return new PslMicroBatchReader(
-        pslDataSourceOptions.newCursorClient(),
-        pslDataSourceOptions.newMultiPartitionCommitter(partitionCountReader.getPartitionCount()),
-        pslDataSourceOptions.getSubscriberFactory(),
+        pslReadDataSourceOptions.newCursorClient(),
+        pslReadDataSourceOptions.newMultiPartitionCommitter(
+            partitionCountReader.getPartitionCount()),
+        pslReadDataSourceOptions.getSubscriberFactory(),
         new LimitingHeadOffsetReader(
-            pslDataSourceOptions.newTopicStatsClient(),
+            pslReadDataSourceOptions.newTopicStatsClient(),
             topicPath,
             partitionCountReader,
             Ticker.systemTicker()),
         subscriptionPath,
-        Objects.requireNonNull(pslDataSourceOptions.flowControlSettings()),
-        pslDataSourceOptions.maxMessagesPerBatch());
+        Objects.requireNonNull(pslReadDataSourceOptions.flowControlSettings()),
+        pslReadDataSourceOptions.maxMessagesPerBatch());
+  }
+
+  @Override
+  public StreamWriter createStreamWriter(
+      String queryId, StructType schema, OutputMode mode, DataSourceOptions options) {
+    PslSparkUtils.verifyWriteInputSchema(schema);
+    PslWriteDataSourceOptions pslWriteDataSourceOptions =
+        PslWriteDataSourceOptions.fromSparkDataSourceOptions(options);
+    return new PslStreamWriter(schema, pslWriteDataSourceOptions);
   }
 }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslDataWriter.java b/src/main/java/com/google/cloud/pubsublite/spark/PslDataWriter.java
new file mode 100644
index 00000000..631fb2d3
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslDataWriter.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.api.core.ApiFuture;
+import com.google.api.core.ApiService;
+import com.google.cloud.pubsublite.MessageMetadata;
+import com.google.cloud.pubsublite.internal.Publisher;
+import com.google.cloud.pubsublite.spark.internal.PublisherFactory;
+import com.google.common.flogger.GoogleLogger;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+import java.util.Optional;
+import java.util.concurrent.ExecutionException;
+import javax.annotation.concurrent.GuardedBy;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.types.StructType;
+
+public class PslDataWriter implements DataWriter<InternalRow> {
+
+  private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
+
+  private final long partitionId, taskId, epochId;
+  private final StructType inputSchema;
+  private final PublisherFactory publisherFactory;
+
+  @GuardedBy("this")
+  private Optional<Publisher<MessageMetadata>> publisher = Optional.empty();
+
+  @GuardedBy("this")
+  private final List<ApiFuture<MessageMetadata>> futures = new ArrayList<>();
+
+  public PslDataWriter(
+      long partitionId,
+      long taskId,
+      long epochId,
+      StructType schema,
+      PublisherFactory publisherFactory) {
+    this.partitionId = partitionId;
+    this.taskId = taskId;
+    this.epochId = epochId;
+    this.inputSchema = schema;
+    this.publisherFactory = publisherFactory;
+  }
+
+  @Override
+  public synchronized void write(InternalRow record) {
+    if (!publisher.isPresent() || publisher.get().state() != ApiService.State.RUNNING) {
+      publisher = Optional.of(publisherFactory.newPublisher());
+    }
+    futures.add(
+        publisher
+            .get()
+            .publish(Objects.requireNonNull(PslSparkUtils.toPubSubMessage(inputSchema, record))));
+  }
+
+  @Override
+  public synchronized WriterCommitMessage commit() throws IOException {
+    for (ApiFuture<MessageMetadata> f : futures) {
+      try {
+        f.get();
+      } catch (InterruptedException | ExecutionException e) {
+        publisher = Optional.empty();
+        throw new IOException(e);
+      }
+    }
+    log.atInfo().log(
+        "All writes for partitionId:%d, taskId:%d, epochId:%d succeeded, committing...",
+        partitionId, taskId, epochId);
+    return PslWriterCommitMessage.create(futures.size());
+  }
+
+  @Override
+  public synchronized void abort() {
+    log.atWarning().log(
+        "One or more writes for partitionId:%d, taskId:%d, epochId:%d failed, aborted.",
+        partitionId, taskId, epochId);
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslDataWriterFactory.java b/src/main/java/com/google/cloud/pubsublite/spark/PslDataWriterFactory.java
new file mode 100644
index 00000000..12d95921
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslDataWriterFactory.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.cloud.pubsublite.spark.internal.CachedPublishers;
+import com.google.cloud.pubsublite.spark.internal.PublisherFactory;
+import java.io.Serializable;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
+import org.apache.spark.sql.types.StructType;
+
+public class PslDataWriterFactory implements Serializable, DataWriterFactory<InternalRow> {
+  private static final long serialVersionUID = -6904546364310978844L;
+
+  private static final CachedPublishers CACHED_PUBLISHERS = new CachedPublishers();
+
+  private final StructType inputSchema;
+  private final PslWriteDataSourceOptions writeOptions;
+
+  public PslDataWriterFactory(StructType inputSchema, PslWriteDataSourceOptions writeOptions) {
+    this.inputSchema = inputSchema;
+    this.writeOptions = writeOptions;
+  }
+
+  @Override
+  public DataWriter<InternalRow> createDataWriter(int partitionId, long taskId, long epochId) {
+    PublisherFactory pf = () -> CACHED_PUBLISHERS.getOrCreate(writeOptions);
+    return new PslDataWriter(partitionId, taskId, epochId, inputSchema, pf);
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
index b2a346c0..a0f0dfee 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
@@ -24,6 +24,9 @@
 import com.google.cloud.pubsublite.cloudpubsub.FlowControlSettings;
 import com.google.cloud.pubsublite.internal.CursorClient;
 import com.google.cloud.pubsublite.internal.wire.SubscriberFactory;
+import com.google.cloud.pubsublite.spark.internal.MultiPartitionCommitter;
+import com.google.cloud.pubsublite.spark.internal.PartitionSubscriberFactory;
+import com.google.cloud.pubsublite.spark.internal.PerTopicHeadOffsetReader;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslDataSourceOptions.java b/src/main/java/com/google/cloud/pubsublite/spark/PslReadDataSourceOptions.java
similarity index 92%
rename from src/main/java/com/google/cloud/pubsublite/spark/PslDataSourceOptions.java
rename to src/main/java/com/google/cloud/pubsublite/spark/PslReadDataSourceOptions.java
index 380e022a..f5987788 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslDataSourceOptions.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslReadDataSourceOptions.java
@@ -33,6 +33,10 @@
 import com.google.cloud.pubsublite.internal.wire.RoutingMetadata;
 import com.google.cloud.pubsublite.internal.wire.ServiceClients;
 import com.google.cloud.pubsublite.internal.wire.SubscriberBuilder;
+import com.google.cloud.pubsublite.spark.internal.MultiPartitionCommitter;
+import com.google.cloud.pubsublite.spark.internal.MultiPartitionCommitterImpl;
+import com.google.cloud.pubsublite.spark.internal.PartitionSubscriberFactory;
+import com.google.cloud.pubsublite.spark.internal.PslCredentialsProvider;
 import com.google.cloud.pubsublite.v1.AdminServiceClient;
 import com.google.cloud.pubsublite.v1.AdminServiceSettings;
 import com.google.cloud.pubsublite.v1.CursorServiceClient;
@@ -47,7 +51,7 @@
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
 
 @AutoValue
-public abstract class PslDataSourceOptions implements Serializable {
+public abstract class PslReadDataSourceOptions implements Serializable {
   private static final long serialVersionUID = 2680059304693561607L;
 
   @Nullable
@@ -60,7 +64,7 @@ public abstract class PslDataSourceOptions implements Serializable {
   public abstract long maxMessagesPerBatch();
 
   public static Builder builder() {
-    return new AutoValue_PslDataSourceOptions.Builder()
+    return new AutoValue_PslReadDataSourceOptions.Builder()
         .setCredentialsKey(null)
         .setMaxMessagesPerBatch(Constants.DEFAULT_MAX_MESSAGES_PER_BATCH)
         .setFlowControlSettings(
@@ -70,7 +74,7 @@ public static Builder builder() {
                 .build());
   }
 
-  public static PslDataSourceOptions fromSparkDataSourceOptions(DataSourceOptions options) {
+  public static PslReadDataSourceOptions fromSparkDataSourceOptions(DataSourceOptions options) {
     if (!options.get(Constants.SUBSCRIPTION_CONFIG_KEY).isPresent()) {
       throw new IllegalArgumentException(Constants.SUBSCRIPTION_CONFIG_KEY + " is required.");
     }
@@ -115,7 +119,7 @@ public abstract static class Builder {
 
     public abstract Builder setFlowControlSettings(FlowControlSettings flowControlSettings);
 
-    public abstract PslDataSourceOptions build();
+    public abstract PslReadDataSourceOptions build();
   }
 
   MultiPartitionCommitter newMultiPartitionCommitter(long topicPartitionCount) {
@@ -135,7 +139,7 @@ PartitionSubscriberFactory getSubscriberFactory() {
       PubsubContext context = PubsubContext.of(Constants.FRAMEWORK);
       SubscriberServiceSettings.Builder settingsBuilder =
           SubscriberServiceSettings.newBuilder()
-              .setCredentialsProvider(new PslCredentialsProvider(this));
+              .setCredentialsProvider(new PslCredentialsProvider(credentialsKey()));
       ServiceClients.addDefaultMetadata(
           context, RoutingMetadata.of(this.subscriptionPath(), partition), settingsBuilder);
       try {
@@ -161,7 +165,7 @@ private CursorServiceClient newCursorServiceClient() {
           addDefaultSettings(
               this.subscriptionPath().location().region(),
               CursorServiceSettings.newBuilder()
-                  .setCredentialsProvider(new PslCredentialsProvider(this))));
+                  .setCredentialsProvider(new PslCredentialsProvider(credentialsKey()))));
     } catch (IOException e) {
       throw new IllegalStateException("Unable to create CursorServiceClient.");
     }
@@ -181,7 +185,7 @@ private AdminServiceClient newAdminServiceClient() {
           addDefaultSettings(
               this.subscriptionPath().location().region(),
               AdminServiceSettings.newBuilder()
-                  .setCredentialsProvider(new PslCredentialsProvider(this))));
+                  .setCredentialsProvider(new PslCredentialsProvider(credentialsKey()))));
     } catch (IOException e) {
       throw new IllegalStateException("Unable to create AdminServiceClient.");
     }
@@ -201,7 +205,7 @@ private TopicStatsServiceClient newTopicStatsServiceClient() {
           addDefaultSettings(
               this.subscriptionPath().location().region(),
               TopicStatsServiceSettings.newBuilder()
-                  .setCredentialsProvider(new PslCredentialsProvider(this))));
+                  .setCredentialsProvider(new PslCredentialsProvider(credentialsKey()))));
     } catch (IOException e) {
       throw new IllegalStateException("Unable to create TopicStatsServiceClient.");
     }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java b/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java
index 1d54fe19..2510315a 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java
@@ -19,12 +19,16 @@
 import static com.google.common.base.Preconditions.checkArgument;
 import static scala.collection.JavaConverters.asScalaBufferConverter;
 
+import com.google.cloud.pubsublite.Message;
 import com.google.cloud.pubsublite.Offset;
 import com.google.cloud.pubsublite.Partition;
 import com.google.cloud.pubsublite.SequencedMessage;
 import com.google.cloud.pubsublite.SubscriptionPath;
 import com.google.cloud.pubsublite.internal.CursorClient;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ImmutableListMultimap;
 import com.google.common.collect.ListMultimap;
+import com.google.common.flogger.GoogleLogger;
 import com.google.common.math.LongMath;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.util.Timestamps;
@@ -34,15 +38,29 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Consumer;
 import java.util.stream.Collectors;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.util.ArrayBasedMapData;
+import org.apache.spark.sql.catalyst.util.ArrayData;
 import org.apache.spark.sql.catalyst.util.GenericArrayData;
+import org.apache.spark.sql.catalyst.util.MapData;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
 import org.apache.spark.unsafe.types.ByteArray;
 import org.apache.spark.unsafe.types.UTF8String;
+import scala.Option;
+import scala.compat.java8.functionConverterImpls.FromJavaBiConsumer;
 
 public class PslSparkUtils {
-  private static ArrayBasedMapData convertAttributesToSparkMap(
+
+  private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
+
+  @VisibleForTesting
+  public static ArrayBasedMapData convertAttributesToSparkMap(
       ListMultimap<String, ByteString> attributeMap) {
 
     List<UTF8String> keyList = new ArrayList<>();
@@ -83,6 +101,97 @@ public static InternalRow toInternalRow(
     return InternalRow.apply(asScalaBufferConverter(list).asScala());
   }
 
+  @SuppressWarnings("unchecked")
+  private static <T> void extractVal(
+      StructType inputSchema,
+      InternalRow row,
+      String fieldName,
+      DataType expectedDataType,
+      Consumer<T> consumer) {
+    Option<Object> idxOr = inputSchema.getFieldIndex(fieldName);
+    if (!idxOr.isEmpty()) {
+      Integer idx = (Integer) idxOr.get();
+      // DateType should match and not throw ClassCastException, as we already verified
+      // type match in driver node.
+      consumer.accept((T) row.get(idx, expectedDataType));
+    }
+  }
+
+  public static Message toPubSubMessage(StructType inputSchema, InternalRow row) {
+    Message.Builder builder = Message.builder();
+    extractVal(
+        inputSchema,
+        row,
+        "key",
+        Constants.PUBLISH_FIELD_TYPES.get("key"),
+        (byte[] o) -> builder.setKey(ByteString.copyFrom(o)));
+    extractVal(
+        inputSchema,
+        row,
+        "data",
+        Constants.PUBLISH_FIELD_TYPES.get("data"),
+        (byte[] o) -> builder.setData(ByteString.copyFrom(o)));
+    extractVal(
+        inputSchema,
+        row,
+        "event_timestamp",
+        Constants.PUBLISH_FIELD_TYPES.get("event_timestamp"),
+        (Long o) -> builder.setEventTime(Timestamps.fromMicros(o)));
+    extractVal(
+        inputSchema,
+        row,
+        "attributes",
+        Constants.PUBLISH_FIELD_TYPES.get("attributes"),
+        (MapData o) -> {
+          ImmutableListMultimap.Builder<String, ByteString> attributeMapBuilder =
+              ImmutableListMultimap.builder();
+          o.foreach(
+              DataTypes.StringType,
+              Constants.ATTRIBUTES_PER_KEY_DATATYPE,
+              new FromJavaBiConsumer<>(
+                  (k, v) -> {
+                    String key = ((UTF8String) k).toString();
+                    ArrayData values = (ArrayData) v;
+                    values.foreach(
+                        DataTypes.BinaryType,
+                        new FromJavaBiConsumer<>(
+                            (idx, a) ->
+                                attributeMapBuilder.put(key, ByteString.copyFrom((byte[]) a))));
+                  }));
+          builder.setAttributes(attributeMapBuilder.build());
+        });
+    return builder.build();
+  }
+
+  /**
+   * Make sure data fields for publish have expected Spark DataType if they exist.
+   *
+   * @param inputSchema input table schema to write to Pub/Sub Lite.
+   * @throws IllegalArgumentException if any DataType mismatch detected.
+   */
+  public static void verifyWriteInputSchema(StructType inputSchema) {
+    Constants.PUBLISH_FIELD_TYPES.forEach(
+        (k, v) -> {
+          Option<Object> idxOr = inputSchema.getFieldIndex(k);
+          if (!idxOr.isEmpty()) {
+            StructField f = inputSchema.apply((int) idxOr.get());
+            if (f.dataType() != v) {
+              throw new IllegalArgumentException(
+                  String.format(
+                      "Column %s in input schema to write to "
+                          + "Pub/Sub Lite has a wrong DataType. Actual: %s, expected: %s.",
+                      k, f.dataType(), v));
+            }
+          } else {
+            log.atInfo().atMostEvery(5, TimeUnit.MINUTES).log(
+                "Input schema to write "
+                    + "to Pub/Sub Lite doesn't contain %s column, this field for all rows will "
+                    + "be set to empty.",
+                k);
+          }
+        });
+  }
+
   public static SparkSourceOffset toSparkSourceOffset(PslSourceOffset pslSourceOffset) {
     return new SparkSourceOffset(
         pslSourceOffset.partitionOffsetMap().entrySet().stream()
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslStreamWriter.java b/src/main/java/com/google/cloud/pubsublite/spark/PslStreamWriter.java
new file mode 100644
index 00000000..b2efaf80
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslStreamWriter.java
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.common.flogger.GoogleLogger;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.sources.v2.writer.DataWriterFactory;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.sources.v2.writer.streaming.StreamWriter;
+import org.apache.spark.sql.types.StructType;
+
+public class PslStreamWriter implements StreamWriter {
+
+  private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
+
+  private final StructType inputSchema;
+  private final PslWriteDataSourceOptions writeOptions;
+
+  public PslStreamWriter(StructType inputSchema, PslWriteDataSourceOptions writeOptions) {
+    this.inputSchema = inputSchema;
+    this.writeOptions = writeOptions;
+  }
+
+  @Override
+  public void commit(long epochId, WriterCommitMessage[] messages) {
+    log.atInfo().log("Committed %d messages for epochId:%d.", countMessages(messages), epochId);
+  }
+
+  @Override
+  public void abort(long epochId, WriterCommitMessage[] messages) {
+    log.atWarning().log(
+        "Epoch id: %d is aborted, %d messages might have been published.",
+        epochId, countMessages(messages));
+  }
+
+  private long countMessages(WriterCommitMessage[] messages) {
+    long cnt = 0;
+    for (WriterCommitMessage m : messages) {
+      // It's not guaranteed to be typed PslWriterCommitMessage when abort.
+      if (m instanceof PslWriterCommitMessage) {
+        cnt += ((PslWriterCommitMessage) m).numMessages();
+      }
+    }
+    return cnt;
+  }
+
+  @Override
+  public DataWriterFactory<InternalRow> createWriterFactory() {
+    return new PslDataWriterFactory(inputSchema, writeOptions);
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java b/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java
new file mode 100644
index 00000000..44b6d95d
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import static com.google.cloud.pubsublite.internal.ExtractStatus.toCanonical;
+import static com.google.cloud.pubsublite.internal.wire.ServiceClients.addDefaultMetadata;
+import static com.google.cloud.pubsublite.internal.wire.ServiceClients.addDefaultSettings;
+
+import com.google.api.gax.rpc.ApiException;
+import com.google.auto.value.AutoValue;
+import com.google.cloud.pubsublite.AdminClient;
+import com.google.cloud.pubsublite.AdminClientSettings;
+import com.google.cloud.pubsublite.MessageMetadata;
+import com.google.cloud.pubsublite.Partition;
+import com.google.cloud.pubsublite.TopicPath;
+import com.google.cloud.pubsublite.internal.Publisher;
+import com.google.cloud.pubsublite.internal.wire.PartitionCountWatchingPublisherSettings;
+import com.google.cloud.pubsublite.internal.wire.PubsubContext;
+import com.google.cloud.pubsublite.internal.wire.RoutingMetadata;
+import com.google.cloud.pubsublite.internal.wire.SinglePartitionPublisherBuilder;
+import com.google.cloud.pubsublite.spark.internal.PslCredentialsProvider;
+import com.google.cloud.pubsublite.v1.AdminServiceClient;
+import com.google.cloud.pubsublite.v1.AdminServiceSettings;
+import com.google.cloud.pubsublite.v1.PublisherServiceClient;
+import com.google.cloud.pubsublite.v1.PublisherServiceSettings;
+import java.io.Serializable;
+import javax.annotation.Nullable;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+
+@AutoValue
+public abstract class PslWriteDataSourceOptions implements Serializable {
+
+  @Nullable
+  public abstract String credentialsKey();
+
+  public abstract TopicPath topicPath();
+
+  public static Builder builder() {
+    return new AutoValue_PslWriteDataSourceOptions.Builder().setCredentialsKey(null);
+  }
+
+  @AutoValue.Builder
+  public abstract static class Builder {
+
+    public abstract PslWriteDataSourceOptions.Builder setCredentialsKey(String credentialsKey);
+
+    public abstract PslWriteDataSourceOptions.Builder setTopicPath(TopicPath topicPath);
+
+    public abstract PslWriteDataSourceOptions build();
+  }
+
+  public static PslWriteDataSourceOptions fromSparkDataSourceOptions(DataSourceOptions options) {
+    if (!options.get(Constants.TOPIC_CONFIG_KEY).isPresent()) {
+      throw new IllegalArgumentException(Constants.TOPIC_CONFIG_KEY + " is required.");
+    }
+
+    Builder builder = builder();
+    String topicPathVal = options.get(Constants.TOPIC_CONFIG_KEY).get();
+    try {
+      builder.setTopicPath(TopicPath.parse(topicPathVal));
+    } catch (ApiException e) {
+      throw new IllegalArgumentException("Unable to parse topic path " + topicPathVal, e);
+    }
+    options.get(Constants.CREDENTIALS_KEY_CONFIG_KEY).ifPresent(builder::setCredentialsKey);
+    return builder.build();
+  }
+
+  public PslCredentialsProvider getCredentialProvider() {
+    return new PslCredentialsProvider(credentialsKey());
+  }
+
+  public Publisher<MessageMetadata> createNewPublisher() {
+    return PartitionCountWatchingPublisherSettings.newBuilder()
+        .setTopic(topicPath())
+        .setPublisherFactory(
+            partition ->
+                SinglePartitionPublisherBuilder.newBuilder()
+                    .setTopic(topicPath())
+                    .setPartition(partition)
+                    .setServiceClient(newServiceClient(partition))
+                    .build())
+        .setAdminClient(getAdminClient())
+        .build()
+        .instantiate();
+  }
+
+  private PublisherServiceClient newServiceClient(Partition partition) throws ApiException {
+    PublisherServiceSettings.Builder settingsBuilder = PublisherServiceSettings.newBuilder();
+    settingsBuilder = settingsBuilder.setCredentialsProvider(getCredentialProvider());
+    settingsBuilder =
+        addDefaultMetadata(
+            PubsubContext.of(Constants.FRAMEWORK),
+            RoutingMetadata.of(topicPath(), partition),
+            settingsBuilder);
+    try {
+      return PublisherServiceClient.create(
+          addDefaultSettings(topicPath().location().region(), settingsBuilder));
+    } catch (Throwable t) {
+      throw toCanonical(t).underlying;
+    }
+  }
+
+  private AdminClient getAdminClient() throws ApiException {
+    try {
+      return AdminClient.create(
+          AdminClientSettings.newBuilder()
+              .setServiceClient(
+                  AdminServiceClient.create(
+                      addDefaultSettings(
+                          topicPath().location().region(),
+                          AdminServiceSettings.newBuilder()
+                              .setCredentialsProvider(getCredentialProvider()))))
+              .setRegion(topicPath().location().region())
+              .build());
+    } catch (Throwable t) {
+      throw toCanonical(t).underlying;
+    }
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslWriterCommitMessage.java b/src/main/java/com/google/cloud/pubsublite/spark/PslWriterCommitMessage.java
new file mode 100644
index 00000000..9204d169
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslWriterCommitMessage.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.auto.value.AutoValue;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+
+@AutoValue
+public abstract class PslWriterCommitMessage implements WriterCommitMessage {
+
+  public abstract long numMessages();
+
+  public static PslWriterCommitMessage create(long numMessages) {
+    return new AutoValue_PslWriterCommitMessage(numMessages);
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPartitionCountReader.java
similarity index 96%
rename from src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPartitionCountReader.java
index 35555805..a144d253 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/CachedPartitionCountReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPartitionCountReader.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import com.google.cloud.pubsublite.AdminClient;
 import com.google.cloud.pubsublite.PartitionLookupUtils;
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPublishers.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPublishers.java
new file mode 100644
index 00000000..711a241a
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/CachedPublishers.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark.internal;
+
+import com.google.api.core.ApiService;
+import com.google.cloud.pubsublite.MessageMetadata;
+import com.google.cloud.pubsublite.internal.Publisher;
+import com.google.cloud.pubsublite.spark.PslWriteDataSourceOptions;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+import javax.annotation.concurrent.GuardedBy;
+
+/** Cached {@link Publisher}s to reuse publisher of same settings in the same task. */
+public class CachedPublishers {
+
+  // TODO(jiangmichaellll): Use com.google.cloud.pubsublite.internal.wire.SystemExecutors
+  // once new PSL client library is released.
+  private final Executor listenerExecutor = Executors.newSingleThreadExecutor();
+
+  @GuardedBy("this")
+  private static final Map<PslWriteDataSourceOptions, Publisher<MessageMetadata>> publishers =
+      new HashMap<>();
+
+  public synchronized Publisher<MessageMetadata> getOrCreate(
+      PslWriteDataSourceOptions writeOptions) {
+    Publisher<MessageMetadata> publisher = publishers.get(writeOptions);
+    if (publisher != null && publisher.state() == ApiService.State.RUNNING) {
+      return publisher;
+    }
+
+    publisher = writeOptions.createNewPublisher();
+    publishers.put(writeOptions, publisher);
+    publisher.addListener(
+        new ApiService.Listener() {
+          @Override
+          public void failed(ApiService.State s, Throwable t) {
+            removePublisher(writeOptions);
+          }
+        },
+        listenerExecutor);
+    publisher.startAsync().awaitRunning();
+    return publisher;
+  }
+
+  private synchronized void removePublisher(PslWriteDataSourceOptions writeOptions) {
+    publishers.remove(writeOptions);
+  }
+}
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/LimitingHeadOffsetReader.java
similarity index 97%
rename from src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/LimitingHeadOffsetReader.java
index 7bad0ffc..a974ba23 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/LimitingHeadOffsetReader.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import com.github.benmanes.caffeine.cache.AsyncLoadingCache;
 import com.github.benmanes.caffeine.cache.Caffeine;
@@ -26,6 +26,7 @@
 import com.google.cloud.pubsublite.TopicPath;
 import com.google.cloud.pubsublite.internal.TopicStatsClient;
 import com.google.cloud.pubsublite.proto.Cursor;
+import com.google.cloud.pubsublite.spark.PslSourceOffset;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.flogger.GoogleLogger;
 import com.google.common.util.concurrent.MoreExecutors;
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitter.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitter.java
similarity index 89%
rename from src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitter.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitter.java
index d42f33ca..bf6441e8 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitter.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitter.java
@@ -14,10 +14,11 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import com.google.cloud.pubsublite.Partition;
 import com.google.cloud.pubsublite.internal.wire.Committer;
+import com.google.cloud.pubsublite.spark.PslSourceOffset;
 import java.io.Closeable;
 
 public interface MultiPartitionCommitter extends Closeable {
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitterImpl.java
similarity index 97%
rename from src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitterImpl.java
index 7ebec891..4c221f1d 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImpl.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitterImpl.java
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import com.google.api.core.ApiFuture;
 import com.google.api.core.ApiFutureCallback;
 import com.google.api.core.ApiFutures;
 import com.google.cloud.pubsublite.Partition;
 import com.google.cloud.pubsublite.internal.wire.Committer;
+import com.google.cloud.pubsublite.spark.PslSourceOffset;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.flogger.GoogleLogger;
 import com.google.common.util.concurrent.MoreExecutors;
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/PartitionCountReader.java
similarity index 93%
rename from src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/PartitionCountReader.java
index 934d40be..90991835 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PartitionCountReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/PartitionCountReader.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import java.io.Closeable;
 
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PartitionSubscriberFactory.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/PartitionSubscriberFactory.java
similarity index 95%
rename from src/main/java/com/google/cloud/pubsublite/spark/PartitionSubscriberFactory.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/PartitionSubscriberFactory.java
index 9ea51670..d7a16257 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PartitionSubscriberFactory.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/PartitionSubscriberFactory.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import com.google.api.gax.rpc.ApiException;
 import com.google.cloud.pubsublite.Partition;
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PerTopicHeadOffsetReader.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/PerTopicHeadOffsetReader.java
similarity index 88%
rename from src/main/java/com/google/cloud/pubsublite/spark/PerTopicHeadOffsetReader.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/PerTopicHeadOffsetReader.java
index 21e0bc63..9ccd72c5 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PerTopicHeadOffsetReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/PerTopicHeadOffsetReader.java
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
+import com.google.cloud.pubsublite.spark.PslSourceOffset;
 import java.io.Closeable;
 
 public interface PerTopicHeadOffsetReader extends Closeable {
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslCredentialsProvider.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/PslCredentialsProvider.java
similarity index 85%
rename from src/main/java/com/google/cloud/pubsublite/spark/PslCredentialsProvider.java
rename to src/main/java/com/google/cloud/pubsublite/spark/internal/PslCredentialsProvider.java
index 6dce5272..6022a655 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslCredentialsProvider.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/PslCredentialsProvider.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import com.google.api.client.util.Base64;
 import com.google.api.gax.core.CredentialsProvider;
@@ -23,17 +23,17 @@
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import javax.annotation.Nullable;
 
 public class PslCredentialsProvider implements CredentialsProvider {
 
   private final Credentials credentials;
 
-  public PslCredentialsProvider(PslDataSourceOptions options) {
-    if (options.credentialsKey() != null) {
-      this.credentials = createCredentialsFromKey(options.credentialsKey());
-    } else {
-      this.credentials = createDefaultCredentials();
-    }
+  public PslCredentialsProvider(@Nullable String credentialsKey) {
+    this.credentials =
+        credentialsKey != null
+            ? createCredentialsFromKey(credentialsKey)
+            : createDefaultCredentials();
   }
 
   private static Credentials createCredentialsFromKey(String key) {
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/internal/PublisherFactory.java b/src/main/java/com/google/cloud/pubsublite/spark/internal/PublisherFactory.java
new file mode 100644
index 00000000..81750def
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/internal/PublisherFactory.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark.internal;
+
+import com.google.cloud.pubsublite.MessageMetadata;
+import com.google.cloud.pubsublite.internal.Publisher;
+import java.io.Serializable;
+
+public interface PublisherFactory extends Serializable {
+
+  Publisher<MessageMetadata> newPublisher();
+}
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java
index 36bcdf91..d5cbc30e 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslContinuousReaderTest.java
@@ -24,14 +24,17 @@
 import com.google.cloud.pubsublite.*;
 import com.google.cloud.pubsublite.internal.CursorClient;
 import com.google.cloud.pubsublite.internal.testing.UnitTestExamples;
+import com.google.cloud.pubsublite.spark.internal.MultiPartitionCommitter;
+import com.google.cloud.pubsublite.spark.internal.PartitionCountReader;
+import com.google.cloud.pubsublite.spark.internal.PartitionSubscriberFactory;
 import com.google.common.collect.ImmutableMap;
 import java.util.Optional;
 import org.junit.Test;
 
 public class PslContinuousReaderTest {
 
-  private static final PslDataSourceOptions OPTIONS =
-      PslDataSourceOptions.builder()
+  private static final PslReadDataSourceOptions OPTIONS =
+      PslReadDataSourceOptions.builder()
           .setSubscriptionPath(UnitTestExamples.exampleSubscriptionPath())
           .build();
   private final CursorClient cursorClient = mock(CursorClient.class);
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java
new file mode 100644
index 00000000..a3f6f1a8
--- /dev/null
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import com.google.api.core.ApiFutures;
+import com.google.cloud.pubsublite.MessageMetadata;
+import com.google.cloud.pubsublite.Offset;
+import com.google.cloud.pubsublite.Partition;
+import com.google.cloud.pubsublite.internal.Publisher;
+import com.google.cloud.pubsublite.spark.internal.PublisherFactory;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.junit.Test;
+
+public class PslDataWriterTest {
+
+  private final InternalRow row = mock(InternalRow.class);
+
+  @SuppressWarnings("unchecked")
+  private final Publisher<MessageMetadata> publisher = mock(Publisher.class);
+
+  private final PublisherFactory publisherFactory = mock(PublisherFactory.class);
+  private final StructType keyOnly =
+      new StructType(
+          new StructField[] {
+            new StructField(
+                "key", Constants.PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
+          });
+
+  private final PslDataWriter writer = new PslDataWriter(1L, 2L, 3L, keyOnly, publisherFactory);
+
+  @Test
+  public void testAllSuccess() throws IOException {
+    when(publisherFactory.newPublisher()).thenReturn(publisher);
+    when(publisher.publish(any()))
+        .thenReturn(
+            ApiFutures.immediateFuture(MessageMetadata.of(Partition.of(0L), Offset.of(0L))));
+    when(row.get(anyInt(), eq(DataTypes.BinaryType)))
+        .thenReturn("abc".getBytes(StandardCharsets.UTF_8));
+    writer.write(row);
+    writer.write(row);
+    assertThat(writer.commit()).isEqualTo(PslWriterCommitMessage.create(2));
+  }
+
+  @Test
+  public void testPartialFail() {
+    when(publisherFactory.newPublisher()).thenReturn(publisher);
+    when(publisher.publish(any()))
+        .thenReturn(ApiFutures.immediateFuture(MessageMetadata.of(Partition.of(0L), Offset.of(0L))))
+        .thenReturn(ApiFutures.immediateFailedFuture(new InternalError("")));
+    when(row.get(anyInt(), eq(DataTypes.BinaryType)))
+        .thenReturn("abc".getBytes(StandardCharsets.UTF_8));
+    writer.write(row);
+    writer.write(row);
+    assertThrows(IOException.class, writer::commit);
+  }
+}
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java
index 3692e7a5..23bee103 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslMicroBatchReaderTest.java
@@ -28,13 +28,16 @@
 import com.google.cloud.pubsublite.Partition;
 import com.google.cloud.pubsublite.internal.CursorClient;
 import com.google.cloud.pubsublite.internal.testing.UnitTestExamples;
+import com.google.cloud.pubsublite.spark.internal.MultiPartitionCommitter;
+import com.google.cloud.pubsublite.spark.internal.PartitionSubscriberFactory;
+import com.google.cloud.pubsublite.spark.internal.PerTopicHeadOffsetReader;
 import com.google.common.collect.ImmutableMap;
 import java.util.Optional;
 import org.junit.Test;
 
 public class PslMicroBatchReaderTest {
-  private static final PslDataSourceOptions OPTIONS =
-      PslDataSourceOptions.builder()
+  private static final PslReadDataSourceOptions OPTIONS =
+      PslReadDataSourceOptions.builder()
           .setSubscriptionPath(UnitTestExamples.exampleSubscriptionPath())
           .build();
   private final CursorClient cursorClient = mock(CursorClient.class);
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslDataSourceOptionsTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslReadDataSourceOptionsTest.java
similarity index 89%
rename from src/test/java/com/google/cloud/pubsublite/spark/PslDataSourceOptionsTest.java
rename to src/test/java/com/google/cloud/pubsublite/spark/PslReadDataSourceOptionsTest.java
index bc794ead..2db8f705 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslDataSourceOptionsTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslReadDataSourceOptionsTest.java
@@ -22,7 +22,7 @@
 import org.apache.spark.sql.sources.v2.DataSourceOptions;
 import org.junit.Test;
 
-public class PslDataSourceOptionsTest {
+public class PslReadDataSourceOptionsTest {
 
   @Test
   public void testInvalidSubPath() {
@@ -30,6 +30,6 @@ public void testInvalidSubPath() {
         new DataSourceOptions(ImmutableMap.of(Constants.SUBSCRIPTION_CONFIG_KEY, "invalid/path"));
     assertThrows(
         IllegalArgumentException.class,
-        () -> PslDataSourceOptions.fromSparkDataSourceOptions(options));
+        () -> PslReadDataSourceOptions.fromSparkDataSourceOptions(options));
   }
 }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java
index b3b81246..7081082f 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java
@@ -17,6 +17,8 @@
 package com.google.cloud.pubsublite.spark;
 
 import static com.google.common.truth.Truth.assertThat;
+import static org.junit.Assert.assertThrows;
+import static scala.collection.JavaConverters.asScalaBufferConverter;
 
 import com.google.cloud.pubsublite.Message;
 import com.google.cloud.pubsublite.Offset;
@@ -29,10 +31,18 @@
 import com.google.protobuf.Timestamp;
 import com.google.protobuf.util.Timestamps;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.util.ArrayData;
 import org.apache.spark.sql.catalyst.util.GenericArrayData;
 import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.unsafe.types.ByteArray;
 import org.junit.Test;
 
 public class PslSparkUtilsTest {
@@ -105,4 +115,94 @@ public void testToPslPartitionOffset() {
     assertThat(PslSparkUtils.toPslPartitionOffset(sparkPartitionOffset))
         .isEqualTo(pslPartitionOffset);
   }
+
+  @Test
+  public void testToPubSubMessage() {
+    Timestamp eventTimestamp = Timestamp.newBuilder().setSeconds(10000000L).build();
+    Message message =
+        Message.builder()
+            .setKey(ByteString.copyFromUtf8("key"))
+            .setData(ByteString.copyFromUtf8("data"))
+            .setEventTime(eventTimestamp)
+            .setAttributes(
+                ImmutableListMultimap.of(
+                    "key1", ByteString.copyFromUtf8("val1"),
+                    "key1", ByteString.copyFromUtf8("val2"),
+                    "key2", ByteString.copyFromUtf8("val3")))
+            .build();
+    List<Object> list =
+        new ArrayList<>(
+            Arrays.asList(
+                ByteArray.concat(message.key().toByteArray()),
+                ByteArray.concat(message.data().toByteArray()),
+                PslSparkUtils.convertAttributesToSparkMap(message.attributes()),
+                Timestamps.toMicros(message.eventTime().get()),
+                "abc".getBytes()));
+    InternalRow row = InternalRow.apply(asScalaBufferConverter(list).asScala());
+
+    StructType structType =
+        new StructType(
+            new StructField[] {
+              new StructField("key", DataTypes.BinaryType, false, Metadata.empty()),
+              new StructField("data", DataTypes.BinaryType, false, Metadata.empty()),
+              new StructField("attributes", Constants.ATTRIBUTES_DATATYPE, true, Metadata.empty()),
+              new StructField("event_timestamp", DataTypes.TimestampType, true, Metadata.empty()),
+              new StructField("random_extra_field", DataTypes.BinaryType, false, Metadata.empty())
+            });
+
+    assertThat(message).isEqualTo(PslSparkUtils.toPubSubMessage(structType, row));
+  }
+
+  @Test
+  public void testToPubSubMessageLongForEventTimestamp() {
+    Message expectedMsg = Message.builder().setEventTime(Timestamps.fromMicros(100000L)).build();
+
+    StructType structType =
+        new StructType(
+            new StructField[] {
+              new StructField("event_timestamp", DataTypes.LongType, false, Metadata.empty())
+            });
+    List<Object> list = Collections.singletonList(/*Timestamp=*/ 100000L);
+    InternalRow row = InternalRow.apply(asScalaBufferConverter(list).asScala());
+
+    Message message = PslSparkUtils.toPubSubMessage(structType, row);
+    assertThat(message).isEqualTo(expectedMsg);
+  }
+
+  @Test
+  public void testVerifyWriteInputSchema() {
+    PslSparkUtils.verifyWriteInputSchema(Constants.DEFAULT_SCHEMA);
+
+    StructType goodThoughMissing =
+        new StructType(
+            new StructField[] {
+              new StructField("offset", DataTypes.LongType, false, Metadata.empty()),
+              new StructField(
+                  "key", Constants.PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
+              new StructField(
+                  "publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
+              new StructField(
+                  "attributes",
+                  Constants.PUBLISH_FIELD_TYPES.get("attributes"),
+                  true,
+                  Metadata.empty())
+            });
+    PslSparkUtils.verifyWriteInputSchema(goodThoughMissing);
+
+    StructType bad =
+        new StructType(
+            new StructField[] {
+              new StructField("offset", DataTypes.LongType, false, Metadata.empty()),
+              // Key field wrong DataType
+              new StructField("key", DataTypes.StringType, false, Metadata.empty()),
+              new StructField(
+                  "publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
+              new StructField(
+                  "attributes",
+                  Constants.PUBLISH_FIELD_TYPES.get("attributes"),
+                  true,
+                  Metadata.empty())
+            });
+    assertThrows(IllegalArgumentException.class, () -> PslSparkUtils.verifyWriteInputSchema(bad));
+  }
 }
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java
new file mode 100644
index 00000000..35b525d7
--- /dev/null
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.cloud.pubsublite.internal.testing.UnitTestExamples;
+import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.junit.Test;
+
+public class PslStreamWriterTest {
+
+  private final PslStreamWriter writer =
+      new PslStreamWriter(
+          Constants.DEFAULT_SCHEMA,
+          PslWriteDataSourceOptions.builder()
+              .setTopicPath(UnitTestExamples.exampleTopicPath())
+              .build());
+  private final PslWriterCommitMessage message1 = PslWriterCommitMessage.create(10);
+  private final PslWriterCommitMessage message2 = PslWriterCommitMessage.create(5);
+
+  private static class AbortCommitMessage implements WriterCommitMessage {}
+
+  @Test
+  public void testCommit() {
+    writer.commit(100, new WriterCommitMessage[] {message1, message2});
+  }
+
+  @Test
+  public void testAbort() {
+    writer.abort(100, new WriterCommitMessage[] {message1, message2, new AbortCommitMessage()});
+  }
+
+  @Test
+  public void testCreateFactory() {
+    writer.createWriterFactory();
+  }
+}
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptionsTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptionsTest.java
new file mode 100644
index 00000000..5cf10f50
--- /dev/null
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptionsTest.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import static org.junit.Assert.assertThrows;
+
+import com.google.common.collect.ImmutableMap;
+import org.apache.spark.sql.sources.v2.DataSourceOptions;
+import org.junit.Test;
+
+public class PslWriteDataSourceOptionsTest {
+
+  @Test
+  public void testInvalidTopicPath() {
+    DataSourceOptions options =
+        new DataSourceOptions(ImmutableMap.of(Constants.TOPIC_CONFIG_KEY, "invalid/path"));
+    assertThrows(
+        IllegalArgumentException.class,
+        () -> PslWriteDataSourceOptions.fromSparkDataSourceOptions(options));
+  }
+}
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java b/src/test/java/com/google/cloud/pubsublite/spark/internal/LimitingHeadOffsetReaderTest.java
similarity index 98%
rename from src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java
rename to src/test/java/com/google/cloud/pubsublite/spark/internal/LimitingHeadOffsetReaderTest.java
index dcc3025a..944f86b0 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/LimitingHeadOffsetReaderTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/internal/LimitingHeadOffsetReaderTest.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import static com.google.common.truth.Truth.assertThat;
 import static org.mockito.ArgumentMatchers.any;
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java b/src/test/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitterImplTest.java
similarity index 97%
rename from src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java
rename to src/test/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitterImplTest.java
index 65b4675a..9d801ea2 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/MultiPartitionCommitterImplTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/internal/MultiPartitionCommitterImplTest.java
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-package com.google.cloud.pubsublite.spark;
+package com.google.cloud.pubsublite.spark.internal;
 
 import static com.google.cloud.pubsublite.spark.TestingUtils.createPslSourceOffset;
 import static org.mockito.ArgumentMatchers.eq;
@@ -23,6 +23,7 @@
 import com.google.api.core.SettableApiFuture;
 import com.google.cloud.pubsublite.*;
 import com.google.cloud.pubsublite.internal.wire.Committer;
+import com.google.cloud.pubsublite.spark.PslSourceOffset;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.ScheduledExecutorService;

From faf1ece43816f28298ad4db54cee968c6f59681b Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Fri, 2 Apr 2021 16:25:57 -0400
Subject: [PATCH 29/47] fix: Move Spark constants into its own class. (#127)

---
 clirr-ignored-differences.xml                 |  6 +-
 .../cloud/pubsublite/spark/Constants.java     | 41 +------------
 .../pubsublite/spark/PslContinuousReader.java |  2 +-
 .../pubsublite/spark/PslMicroBatchReader.java |  2 +-
 .../cloud/pubsublite/spark/PslSparkUtils.java | 12 ++--
 .../cloud/pubsublite/spark/SparkStructs.java  | 58 +++++++++++++++++++
 .../pubsublite/spark/PslDataWriterTest.java   |  2 +-
 .../pubsublite/spark/PslSparkUtilsTest.java   | 11 ++--
 .../pubsublite/spark/PslStreamWriterTest.java |  2 +-
 9 files changed, 81 insertions(+), 55 deletions(-)
 create mode 100644 src/main/java/com/google/cloud/pubsublite/spark/SparkStructs.java

diff --git a/clirr-ignored-differences.xml b/clirr-ignored-differences.xml
index 6aa9dcf9..2921d7b6 100644
--- a/clirr-ignored-differences.xml
+++ b/clirr-ignored-differences.xml
@@ -36,5 +36,9 @@
         <differenceType>8001</differenceType>
         <className>com/google/cloud/pubsublite/spark/PslDataSourceOptions*</className>
     </difference>
-
+    <difference>
+        <differenceType>6001</differenceType>
+        <className>com/google/cloud/pubsublite/spark/Constants</className>
+        <field>DEFAULT_SCHEMA</field>
+    </difference>
 </differences>
\ No newline at end of file
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/Constants.java b/src/main/java/com/google/cloud/pubsublite/spark/Constants.java
index 9ad29b23..9587ed23 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/Constants.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/Constants.java
@@ -17,51 +17,14 @@
 package com.google.cloud.pubsublite.spark;
 
 import com.google.cloud.pubsublite.internal.wire.PubsubContext;
-import com.google.common.collect.ImmutableMap;
-import java.util.Map;
-import org.apache.spark.sql.types.ArrayType;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.MapType;
-import org.apache.spark.sql.types.Metadata;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
 
 public class Constants {
+  public static final PubsubContext.Framework FRAMEWORK = PubsubContext.Framework.of("SPARK");
+
   public static long DEFAULT_BYTES_OUTSTANDING = 50_000_000;
   public static long DEFAULT_MESSAGES_OUTSTANDING = Long.MAX_VALUE;
   public static long DEFAULT_MAX_MESSAGES_PER_BATCH = Long.MAX_VALUE;
 
-  public static ArrayType ATTRIBUTES_PER_KEY_DATATYPE =
-      DataTypes.createArrayType(DataTypes.BinaryType);
-  public static MapType ATTRIBUTES_DATATYPE =
-      DataTypes.createMapType(DataTypes.StringType, ATTRIBUTES_PER_KEY_DATATYPE);
-  public static Map<String, DataType> PUBLISH_FIELD_TYPES =
-      ImmutableMap.of(
-          "key", DataTypes.BinaryType,
-          "data", DataTypes.BinaryType,
-          "attributes", ATTRIBUTES_DATATYPE,
-          "event_timestamp", DataTypes.TimestampType);
-  public static StructType DEFAULT_SCHEMA =
-      new StructType(
-          new StructField[] {
-            new StructField("subscription", DataTypes.StringType, false, Metadata.empty()),
-            new StructField("partition", DataTypes.LongType, false, Metadata.empty()),
-            new StructField("offset", DataTypes.LongType, false, Metadata.empty()),
-            new StructField("key", PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
-            new StructField("data", PUBLISH_FIELD_TYPES.get("data"), false, Metadata.empty()),
-            new StructField("publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
-            new StructField(
-                "event_timestamp",
-                PUBLISH_FIELD_TYPES.get("event_timestamp"),
-                true,
-                Metadata.empty()),
-            new StructField(
-                "attributes", PUBLISH_FIELD_TYPES.get("attributes"), true, Metadata.empty())
-          });
-
-  public static final PubsubContext.Framework FRAMEWORK = PubsubContext.Framework.of("SPARK");
-
   public static String MAX_MESSAGE_PER_BATCH_CONFIG_KEY =
       "pubsublite.flowcontrol.maxmessagesperbatch";
   public static String BYTES_OUTSTANDING_CONFIG_KEY =
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
index ad2ca3da..d984b174 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslContinuousReader.java
@@ -109,7 +109,7 @@ public void stop() {
 
   @Override
   public StructType readSchema() {
-    return Constants.DEFAULT_SCHEMA;
+    return SparkStructs.DEFAULT_SCHEMA;
   }
 
   @Override
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java b/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
index a0f0dfee..d526526a 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslMicroBatchReader.java
@@ -127,7 +127,7 @@ public void stop() {
 
   @Override
   public StructType readSchema() {
-    return Constants.DEFAULT_SCHEMA;
+    return SparkStructs.DEFAULT_SCHEMA;
   }
 
   @Override
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java b/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java
index 2510315a..cf336a35 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslSparkUtils.java
@@ -123,31 +123,31 @@ public static Message toPubSubMessage(StructType inputSchema, InternalRow row) {
         inputSchema,
         row,
         "key",
-        Constants.PUBLISH_FIELD_TYPES.get("key"),
+        SparkStructs.PUBLISH_FIELD_TYPES.get("key"),
         (byte[] o) -> builder.setKey(ByteString.copyFrom(o)));
     extractVal(
         inputSchema,
         row,
         "data",
-        Constants.PUBLISH_FIELD_TYPES.get("data"),
+        SparkStructs.PUBLISH_FIELD_TYPES.get("data"),
         (byte[] o) -> builder.setData(ByteString.copyFrom(o)));
     extractVal(
         inputSchema,
         row,
         "event_timestamp",
-        Constants.PUBLISH_FIELD_TYPES.get("event_timestamp"),
+        SparkStructs.PUBLISH_FIELD_TYPES.get("event_timestamp"),
         (Long o) -> builder.setEventTime(Timestamps.fromMicros(o)));
     extractVal(
         inputSchema,
         row,
         "attributes",
-        Constants.PUBLISH_FIELD_TYPES.get("attributes"),
+        SparkStructs.PUBLISH_FIELD_TYPES.get("attributes"),
         (MapData o) -> {
           ImmutableListMultimap.Builder<String, ByteString> attributeMapBuilder =
               ImmutableListMultimap.builder();
           o.foreach(
               DataTypes.StringType,
-              Constants.ATTRIBUTES_PER_KEY_DATATYPE,
+              SparkStructs.ATTRIBUTES_PER_KEY_DATATYPE,
               new FromJavaBiConsumer<>(
                   (k, v) -> {
                     String key = ((UTF8String) k).toString();
@@ -170,7 +170,7 @@ public static Message toPubSubMessage(StructType inputSchema, InternalRow row) {
    * @throws IllegalArgumentException if any DataType mismatch detected.
    */
   public static void verifyWriteInputSchema(StructType inputSchema) {
-    Constants.PUBLISH_FIELD_TYPES.forEach(
+    SparkStructs.PUBLISH_FIELD_TYPES.forEach(
         (k, v) -> {
           Option<Object> idxOr = inputSchema.getFieldIndex(k);
           if (!idxOr.isEmpty()) {
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/SparkStructs.java b/src/main/java/com/google/cloud/pubsublite/spark/SparkStructs.java
new file mode 100644
index 00000000..329ffbf7
--- /dev/null
+++ b/src/main/java/com/google/cloud/pubsublite/spark/SparkStructs.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2020 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.cloud.pubsublite.spark;
+
+import com.google.common.collect.ImmutableMap;
+import java.util.Map;
+import org.apache.spark.sql.types.ArrayType;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.MapType;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+public class SparkStructs {
+
+  public static ArrayType ATTRIBUTES_PER_KEY_DATATYPE =
+      DataTypes.createArrayType(DataTypes.BinaryType);
+  public static MapType ATTRIBUTES_DATATYPE =
+      DataTypes.createMapType(DataTypes.StringType, ATTRIBUTES_PER_KEY_DATATYPE);
+  public static Map<String, DataType> PUBLISH_FIELD_TYPES =
+      ImmutableMap.of(
+          "key", DataTypes.BinaryType,
+          "data", DataTypes.BinaryType,
+          "attributes", ATTRIBUTES_DATATYPE,
+          "event_timestamp", DataTypes.TimestampType);
+  public static StructType DEFAULT_SCHEMA =
+      new StructType(
+          new StructField[] {
+            new StructField("subscription", DataTypes.StringType, false, Metadata.empty()),
+            new StructField("partition", DataTypes.LongType, false, Metadata.empty()),
+            new StructField("offset", DataTypes.LongType, false, Metadata.empty()),
+            new StructField("key", PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
+            new StructField("data", PUBLISH_FIELD_TYPES.get("data"), false, Metadata.empty()),
+            new StructField("publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
+            new StructField(
+                "event_timestamp",
+                PUBLISH_FIELD_TYPES.get("event_timestamp"),
+                true,
+                Metadata.empty()),
+            new StructField(
+                "attributes", PUBLISH_FIELD_TYPES.get("attributes"), true, Metadata.empty())
+          });
+}
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java
index a3f6f1a8..137cec63 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslDataWriterTest.java
@@ -51,7 +51,7 @@ public class PslDataWriterTest {
       new StructType(
           new StructField[] {
             new StructField(
-                "key", Constants.PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
+                "key", SparkStructs.PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
           });
 
   private final PslDataWriter writer = new PslDataWriter(1L, 2L, 3L, keyOnly, publisherFactory);
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java
index 7081082f..e7928915 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslSparkUtilsTest.java
@@ -145,7 +145,8 @@ public void testToPubSubMessage() {
             new StructField[] {
               new StructField("key", DataTypes.BinaryType, false, Metadata.empty()),
               new StructField("data", DataTypes.BinaryType, false, Metadata.empty()),
-              new StructField("attributes", Constants.ATTRIBUTES_DATATYPE, true, Metadata.empty()),
+              new StructField(
+                  "attributes", SparkStructs.ATTRIBUTES_DATATYPE, true, Metadata.empty()),
               new StructField("event_timestamp", DataTypes.TimestampType, true, Metadata.empty()),
               new StructField("random_extra_field", DataTypes.BinaryType, false, Metadata.empty())
             });
@@ -171,19 +172,19 @@ public void testToPubSubMessageLongForEventTimestamp() {
 
   @Test
   public void testVerifyWriteInputSchema() {
-    PslSparkUtils.verifyWriteInputSchema(Constants.DEFAULT_SCHEMA);
+    PslSparkUtils.verifyWriteInputSchema(SparkStructs.DEFAULT_SCHEMA);
 
     StructType goodThoughMissing =
         new StructType(
             new StructField[] {
               new StructField("offset", DataTypes.LongType, false, Metadata.empty()),
               new StructField(
-                  "key", Constants.PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
+                  "key", SparkStructs.PUBLISH_FIELD_TYPES.get("key"), false, Metadata.empty()),
               new StructField(
                   "publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
               new StructField(
                   "attributes",
-                  Constants.PUBLISH_FIELD_TYPES.get("attributes"),
+                  SparkStructs.PUBLISH_FIELD_TYPES.get("attributes"),
                   true,
                   Metadata.empty())
             });
@@ -199,7 +200,7 @@ public void testVerifyWriteInputSchema() {
                   "publish_timestamp", DataTypes.TimestampType, false, Metadata.empty()),
               new StructField(
                   "attributes",
-                  Constants.PUBLISH_FIELD_TYPES.get("attributes"),
+                  SparkStructs.PUBLISH_FIELD_TYPES.get("attributes"),
                   true,
                   Metadata.empty())
             });
diff --git a/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java b/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java
index 35b525d7..48f1c77b 100644
--- a/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java
+++ b/src/test/java/com/google/cloud/pubsublite/spark/PslStreamWriterTest.java
@@ -24,7 +24,7 @@ public class PslStreamWriterTest {
 
   private final PslStreamWriter writer =
       new PslStreamWriter(
-          Constants.DEFAULT_SCHEMA,
+          SparkStructs.DEFAULT_SCHEMA,
           PslWriteDataSourceOptions.builder()
               .setTopicPath(UnitTestExamples.exampleTopicPath())
               .build());

From 4ef4a043ccaacbca8d103374fd5d07c49bfac0b5 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Mon, 5 Apr 2021 22:19:56 +0200
Subject: [PATCH 30/47] deps: update dependency
 com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.13.1 (#124)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 74123ab4..7235c268 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <dependency>
       <groupId>com.google.api.grpc</groupId>
       <artifactId>proto-google-cloud-pubsublite-v1</artifactId>
-      <version>0.12.0</version>
+      <version>0.13.1</version>
     </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>

From 98f5863245584bf517d4817610dcca0c3979a470 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Tue, 6 Apr 2021 15:54:00 -0400
Subject: [PATCH 31/47] samples: Spark connector writer support sample and
 integration test (#122)

---
 samples/README.md                             |  52 ++++--
 .../java/pubsublite/spark/AdminUtils.java     |  79 ++++++++-
 .../java/pubsublite/spark/PublishWords.java   |  38 +++--
 .../java/pubsublite/spark/ReadResults.java    |  55 +++++++
 .../main/java/pubsublite/spark/WordCount.java |  31 +++-
 .../spark/SampleIntegrationTest.java          | 151 +++++++++++-------
 .../spark/PslWriteDataSourceOptions.java      |   2 +
 7 files changed, 314 insertions(+), 94 deletions(-)
 create mode 100644 samples/snippets/src/main/java/pubsublite/spark/ReadResults.java

diff --git a/samples/README.md b/samples/README.md
index a8747527..3091db75 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -1,6 +1,9 @@
 # Pub/Sub Lite Spark Connector Word Count Samples
 
-This directory contains a word count sample for Pub/Sub Lite Spark Connector.
+This directory contains a word count sample for Pub/Sub Lite Spark Connector. The sample will read 
+single word count messages from Pub/Sub Lite, do the aggregation (count words) in Spark, and finally
+write back to Pub/Sub Lite. Note the topic/subscription to read is different from the topic/subscription
+to write and verify the final word count results.
 
 ## Authentication
 
@@ -8,17 +11,20 @@ Please see the [Google cloud authentication guide](https://cloud.google.com/docs
 The recommended approach is to use Application Default Credentials by setting `GOOGLE_APPLICATION_CREDENTIALS`.
 
 ## Environment Variables
-Set the following environment variables:
+Set the following environment variables. <br>
+Note `SOURCE_TOPIC_ID` and `SOURCE_SUBSCRIPTION_ID` are used to read _raw_ single word count messages; 
+while `DESTINATION_TOPIC_ID` and `DESTINATION_SUBSCRIPTION_ID` are used for the final word counts results. They must 
+be different.
 ```
 PROJECT_NUMBER=12345 # or your project number
 REGION=us-central1 # or your region
 ZONE_ID=b # or your zone id
-TOPIC_ID=test-topic # or your topic id to create
-SUBSCRIPTION_ID=test-subscrciption # or your subscription to create
-PARTITIONS=1 # or your number of partitions to create
+SOURCE_TOPIC_ID=test-topic # or your topic id to create
+SOURCE_SUBSCRIPTION_ID=test-subscription # or your subscription to create
+DESTINATION_TOPIC_ID=test-topic-2 # or your topic id to create, this is different from SOURCE_TOPIC_ID!
+DESTINATION_SUBSCRIPTION_ID=test-subscription-2 # or your subscription to create, this is different from SOURCE_SUBSCRIPTION_ID!
 CLUSTER_NAME=waprin-spark7 # or your Dataproc cluster name to create
 BUCKET=gs://your-gcs-bucket
-SUBSCRIPTION_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SUBSCRIPTION_ID
 CONNECTOR_VERSION= # latest pubsublite-spark-sql-streaming release version
 PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION= # downloaded pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies jar location
 ```
@@ -36,14 +42,16 @@ To run the word count sample in Dataproc cluster, follow the steps:
     --non-recursive \
     exec:exec)
    ```
-3. Create the topic and subscription, and publish word count messages to the topic.
+3. Create both the source and destination topics and subscriptions, and publish word count messages to the _source_ 
+      topic.
    ```sh
    PROJECT_NUMBER=$PROJECT_NUMBER \
    REGION=$REGION \
    ZONE_ID=$ZONE_ID \
-   TOPIC_ID=$TOPIC_ID \
-   SUBSCRIPTION_ID=$SUBSCRIPTION_ID \
-   PARTITIONS=$PARTITIONS \
+   SOURCE_TOPIC_ID=$SOURCE_TOPIC_ID \
+   SOURCE_SUBSCRIPTION_ID=$SOURCE_SUBSCRIPTION_ID \
+   DESTINATION_TOPIC_ID=$DESTINATION_TOPIC_ID \
+   DESTINATION_SUBSCRIPTION_ID=$DESTINATION_SUBSCRIPTION_ID \
    mvn compile exec:java -Dexec.mainClass=pubsublite.spark.PublishWords
    ```
 4. Create a Dataproc cluster
@@ -54,8 +62,7 @@ To run the word count sample in Dataproc cluster, follow the steps:
    ```sh
    mvn clean package -Dmaven.test.skip=true
    ```
-<!-- TODO: provide link to maven central --> 
-6. Download `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` from Maven Central and set `PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION` environment variable.
+6. Download `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` from [Maven Central](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming) and set `PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION` environment variable.
 7. Create GCS bucket and upload both `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` and the sample jar onto GCS
    ```sh
    gsutil mb $BUCKET
@@ -66,19 +73,30 @@ To run the word count sample in Dataproc cluster, follow the steps:
    ```sh
    gcloud config set dataproc/region $REGION
    ```
-<!-- TODO: set up bots to update jar version -->
-9. Run the sample in Dataproc. You would see the word count result show up in the console output.
+9. Run the sample in Dataproc. This will perform word count aggregation and publish word count results to Pub/Sub Lite.
    ```sh
    gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
       --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,$BUCKET/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
-      --class=pubsublite.spark.WordCount -- $SUBSCRIPTION_PATH
+      --class=pubsublite.spark.WordCount -- \
+      projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SOURCE_SUBSCRIPTION_ID \
+      projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/topics/$DESTINATION_TOPIC_ID
    ```
+10. Read word count results from Pub/Sub Lite, you should see the result in console output.
+    ```sh
+    PROJECT_NUMBER=$PROJECT_NUMBER \
+    REGION=$REGION \
+    ZONE_ID=$ZONE_ID \
+    DESTINATION_SUBSCRIPTION_ID=$DESTINATION_SUBSCRIPTION_ID \
+    mvn compile exec:java -Dexec.mainClass=pubsublite.spark.ReadResults
+    ```
 
 ## Cleaning up
 1. Delete Pub/Sub Lite topic and subscription.
    ```sh
-   gcloud pubsub lite-subscriptions delete $SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
-   gcloud pubsub lite-topics delete $TOPIC_ID --zone=$REGION-$ZONE_ID
+   gcloud pubsub lite-subscriptions delete $SOURCE_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+   gcloud pubsub lite-topics delete $SOURCE_TOPIC_ID --zone=$REGION-$ZONE_ID
+   gcloud pubsub lite-subscriptions delete $DESTINATION_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+   gcloud pubsub lite-topics delete $DESTINATION_TOPIC_ID --zone=$REGION-$ZONE_ID
    ```
 2. Delete GCS bucket.
    ```sh
diff --git a/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java b/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java
index 09bac836..b6f712f6 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java
@@ -20,6 +20,8 @@
 import com.google.api.core.ApiFutures;
 import com.google.api.gax.rpc.AlreadyExistsException;
 import com.google.api.gax.rpc.ApiException;
+import com.google.cloud.pubsub.v1.AckReplyConsumer;
+import com.google.cloud.pubsub.v1.MessageReceiver;
 import com.google.cloud.pubsublite.AdminClient;
 import com.google.cloud.pubsublite.AdminClientSettings;
 import com.google.cloud.pubsublite.CloudRegion;
@@ -30,8 +32,11 @@
 import com.google.cloud.pubsublite.SubscriptionPath;
 import com.google.cloud.pubsublite.TopicName;
 import com.google.cloud.pubsublite.TopicPath;
+import com.google.cloud.pubsublite.cloudpubsub.FlowControlSettings;
 import com.google.cloud.pubsublite.cloudpubsub.Publisher;
 import com.google.cloud.pubsublite.cloudpubsub.PublisherSettings;
+import com.google.cloud.pubsublite.cloudpubsub.Subscriber;
+import com.google.cloud.pubsublite.cloudpubsub.SubscriberSettings;
 import com.google.cloud.pubsublite.proto.Subscription;
 import com.google.cloud.pubsublite.proto.Topic;
 import com.google.protobuf.ByteString;
@@ -39,7 +44,11 @@
 import com.google.pubsub.v1.PubsubMessage;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Queue;
+import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 
 public class AdminUtils {
 
@@ -144,14 +153,7 @@ public static void createSubscriptionExample(
   }
 
   public static void deleteSubscriptionExample(
-      String cloudRegion, char zoneId, long projectNumber, String subscriptionId) throws Exception {
-    SubscriptionPath subscriptionPath =
-        SubscriptionPath.newBuilder()
-            .setLocation(CloudZone.of(CloudRegion.of(cloudRegion), zoneId))
-            .setProject(ProjectNumber.of(projectNumber))
-            .setName(SubscriptionName.of(subscriptionId))
-            .build();
-
+      String cloudRegion, SubscriptionPath subscriptionPath) throws Exception {
     AdminClientSettings adminClientSettings =
         AdminClientSettings.newBuilder().setRegion(CloudRegion.of(cloudRegion)).build();
 
@@ -161,6 +163,16 @@ public static void deleteSubscriptionExample(
     }
   }
 
+  public static void deleteTopicExample(String cloudRegion, TopicPath topicPath) throws Exception {
+    AdminClientSettings adminClientSettings =
+        AdminClientSettings.newBuilder().setRegion(CloudRegion.of(cloudRegion)).build();
+
+    try (AdminClient adminClient = AdminClient.create(adminClientSettings)) {
+      adminClient.deleteTopic(topicPath).get();
+      System.out.println(topicPath + " deleted successfully.");
+    }
+  }
+
   public static void publisherExample(
       String cloudRegion, char zoneId, long projectNumber, String topicId, List<String> words)
       throws ApiException, ExecutionException, InterruptedException {
@@ -208,4 +220,55 @@ public static void publisherExample(
       }
     }
   }
+
+  public static Queue<PubsubMessage> subscriberExample(
+      String cloudRegion, char zoneId, long projectNumber, String subscriptionId)
+      throws ApiException {
+    // Sample has at most 200 messages.
+    Queue<PubsubMessage> result = new ArrayBlockingQueue<>(1000);
+
+    SubscriptionPath subscriptionPath =
+        SubscriptionPath.newBuilder()
+            .setLocation(CloudZone.of(CloudRegion.of(cloudRegion), zoneId))
+            .setProject(ProjectNumber.of(projectNumber))
+            .setName(SubscriptionName.of(subscriptionId))
+            .build();
+
+    MessageReceiver receiver =
+        (PubsubMessage message, AckReplyConsumer consumer) -> {
+          result.add(message);
+          consumer.ack();
+        };
+    FlowControlSettings flowControlSettings =
+        FlowControlSettings.builder()
+            .setBytesOutstanding(10 * 1024 * 1024L)
+            .setMessagesOutstanding(1000L)
+            .build();
+
+    SubscriberSettings subscriberSettings =
+        SubscriberSettings.newBuilder()
+            .setSubscriptionPath(subscriptionPath)
+            .setReceiver(receiver)
+            .setPerPartitionFlowControlSettings(flowControlSettings)
+            .build();
+
+    Subscriber subscriber = Subscriber.create(subscriberSettings);
+
+    // Start the subscriber. Upon successful starting, its state will become RUNNING.
+    subscriber.startAsync().awaitRunning();
+
+    try {
+      System.out.println(subscriber.state());
+      // Wait 90 seconds for the subscriber to reach TERMINATED state. If it encounters
+      // unrecoverable errors before then, its state will change to FAILED and an
+      // IllegalStateException will be thrown.
+      subscriber.awaitTerminated(90, TimeUnit.SECONDS);
+    } catch (TimeoutException t) {
+      // Shut down the subscriber. This will change the state of the subscriber to TERMINATED.
+      subscriber.stopAsync().awaitTerminated();
+      System.out.println("Subscriber is shut down: " + subscriber.state());
+    }
+
+    return result;
+  }
 }
diff --git a/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java b/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java
index 5845d2d6..17152f67 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java
@@ -34,27 +34,37 @@ public class PublishWords {
 
   private static final String REGION = "REGION";
   private static final String ZONE_ID = "ZONE_ID";
-  private static final String TOPIC_ID = "TOPIC_ID";
-  private static final String SUBSCRIPTION_ID = "SUBSCRIPTION_ID";
+  private static final String SOURCE_TOPIC_ID = "SOURCE_TOPIC_ID";
+  private static final String SOURCE_SUBSCRIPTION_ID = "SOURCE_SUBSCRIPTION_ID";
+  private static final String DESTINATION_TOPIC_ID = "DESTINATION_TOPIC_ID";
+  private static final String DESTINATION_SUBSCRIPTION_ID = "DESTINATION_SUBSCRIPTION_ID";
   private static final String PROJECT_NUMBER = "PROJECT_NUMBER";
-  private static final String PARTITIONS = "PARTITIONS";
 
   public static void main(String[] args) throws Exception {
 
     Map<String, String> env = System.getenv();
     Set<String> missingVars =
         Sets.difference(
-            ImmutableSet.of(REGION, ZONE_ID, TOPIC_ID, SUBSCRIPTION_ID, PROJECT_NUMBER, PARTITIONS),
+            ImmutableSet.of(
+                REGION,
+                ZONE_ID,
+                SOURCE_TOPIC_ID,
+                SOURCE_SUBSCRIPTION_ID,
+                DESTINATION_TOPIC_ID,
+                DESTINATION_SUBSCRIPTION_ID,
+                PROJECT_NUMBER),
             env.keySet());
     Preconditions.checkState(
         missingVars.isEmpty(), "Missing required environment variables: " + missingVars);
 
-    String cloudRegion = env.get(REGION);
+    final String cloudRegion = env.get(REGION);
     char zoneId = env.get(ZONE_ID).charAt(0);
-    String topicId = env.get(TOPIC_ID);
-    String subscriptionId = env.get(SUBSCRIPTION_ID);
+    final String sourceTopicId = env.get(SOURCE_TOPIC_ID);
+    final String sourceSubscriptionId = env.get(SOURCE_SUBSCRIPTION_ID);
+    final String destinationTopicId = env.get(DESTINATION_TOPIC_ID);
+    final String destinationSubscriptionId = env.get(DESTINATION_SUBSCRIPTION_ID);
     long projectNumber = Long.parseLong(env.get(PROJECT_NUMBER));
-    int partitions = Integer.parseInt(env.get(PARTITIONS));
+    int partitions = 1;
 
     String snippets =
         Resources.toString(Resources.getResource("text_snippets.txt"), Charset.defaultCharset());
@@ -64,12 +74,16 @@ public static void main(String[] args) throws Exception {
             .replaceAll("\n", " ")
             .replaceAll("\\s+", " ")
             .toLowerCase();
-    List<String> words = Arrays.asList(snippets.split(" "));
+    final List<String> words = Arrays.asList(snippets.split(" "));
 
-    createTopicExample(cloudRegion, zoneId, projectNumber, topicId, partitions);
-    createSubscriptionExample(cloudRegion, zoneId, projectNumber, topicId, subscriptionId);
+    createTopicExample(cloudRegion, zoneId, projectNumber, sourceTopicId, partitions);
+    createSubscriptionExample(
+        cloudRegion, zoneId, projectNumber, sourceTopicId, sourceSubscriptionId);
+    createTopicExample(cloudRegion, zoneId, projectNumber, destinationTopicId, partitions);
+    createSubscriptionExample(
+        cloudRegion, zoneId, projectNumber, destinationTopicId, destinationSubscriptionId);
 
-    publisherExample(cloudRegion, zoneId, projectNumber, topicId, words);
+    publisherExample(cloudRegion, zoneId, projectNumber, sourceTopicId, words);
 
     System.exit(0);
   }
diff --git a/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java b/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java
new file mode 100644
index 00000000..e93d08e0
--- /dev/null
+++ b/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pubsublite.spark;
+
+import static pubsublite.spark.AdminUtils.subscriberExample;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Sets;
+import java.util.Map;
+import java.util.Set;
+
+public class ReadResults {
+
+  private static final String REGION = "REGION";
+  private static final String ZONE_ID = "ZONE_ID";
+  private static final String DESTINATION_SUBSCRIPTION_ID = "DESTINATION_SUBSCRIPTION_ID";
+  private static final String PROJECT_NUMBER = "PROJECT_NUMBER";
+
+  public static void main(String[] args) {
+
+    Map<String, String> env = System.getenv();
+    Set<String> missingVars =
+        Sets.difference(
+            ImmutableSet.of(REGION, ZONE_ID, DESTINATION_SUBSCRIPTION_ID, PROJECT_NUMBER),
+            env.keySet());
+    Preconditions.checkState(
+        missingVars.isEmpty(), "Missing required environment variables: " + missingVars);
+
+    String cloudRegion = env.get(REGION);
+    char zoneId = env.get(ZONE_ID).charAt(0);
+    String destinationSubscriptionId = env.get(DESTINATION_SUBSCRIPTION_ID);
+    long projectNumber = Long.parseLong(env.get(PROJECT_NUMBER));
+
+    System.out.println("Word count results:");
+    subscriberExample(cloudRegion, zoneId, projectNumber, destinationSubscriptionId)
+        .forEach((m) -> System.out.println(m.getData().toStringUtf8().replace("_", ": ")));
+
+    System.exit(0);
+  }
+}
diff --git a/samples/snippets/src/main/java/pubsublite/spark/WordCount.java b/samples/snippets/src/main/java/pubsublite/spark/WordCount.java
index c6c7ce5f..4696bc69 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/WordCount.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/WordCount.java
@@ -16,8 +16,11 @@
 
 package pubsublite.spark;
 
+import static org.apache.spark.sql.functions.concat;
+import static org.apache.spark.sql.functions.lit;
 import static org.apache.spark.sql.functions.split;
 
+import java.util.UUID;
 import java.util.concurrent.TimeUnit;
 import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
@@ -31,12 +34,25 @@
 public class WordCount {
 
   public static void main(String[] args) throws Exception {
+    final String appId = UUID.randomUUID().toString();
+    final String sourceSubscriptionPath = args[0];
+    final String destinationTopicPath = args[1];
 
-    SparkSession spark = SparkSession.builder().appName("Word count").master("yarn").getOrCreate();
+    SparkSession spark =
+        SparkSession.builder()
+            .appName(String.format("Word count (ID: %s)", appId))
+            .master("yarn")
+            .getOrCreate();
 
+    // Read messages from Pub/Sub Lite
     Dataset<Row> df =
-        spark.readStream().format("pubsublite").option("pubsublite.subscription", args[0]).load();
+        spark
+            .readStream()
+            .format("pubsublite")
+            .option("pubsublite.subscription", sourceSubscriptionPath)
+            .load();
 
+    // Aggregate word counts
     Column splitCol = split(df.col("data"), "_");
     df =
         df.withColumn("word", splitCol.getItem(0))
@@ -44,9 +60,18 @@ public static void main(String[] args) throws Exception {
     df = df.groupBy("word").sum("word_count");
     df = df.orderBy(df.col("sum(word_count)").desc(), df.col("word").asc());
 
+    // Add Pub/Sub Lite message data field
+    df =
+        df.withColumn(
+            "data",
+            concat(df.col("word"), lit("_"), df.col("sum(word_count)")).cast(DataTypes.BinaryType));
+
+    // Write word count results to Pub/Sub Lite
     StreamingQuery query =
         df.writeStream()
-            .format("console")
+            .format("pubsublite")
+            .option("pubsublite.topic", destinationTopicPath)
+            .option("checkpointLocation", String.format("/tmp/checkpoint-%s", appId))
             .outputMode(OutputMode.Complete())
             .trigger(Trigger.ProcessingTime(1, TimeUnit.SECONDS))
             .start();
diff --git a/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java b/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java
index d3011e6b..27afa147 100644
--- a/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java
+++ b/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java
@@ -18,7 +18,10 @@
 
 import static com.google.common.truth.Truth.assertThat;
 import static pubsublite.spark.AdminUtils.createSubscriptionExample;
+import static pubsublite.spark.AdminUtils.createTopicExample;
 import static pubsublite.spark.AdminUtils.deleteSubscriptionExample;
+import static pubsublite.spark.AdminUtils.deleteTopicExample;
+import static pubsublite.spark.AdminUtils.subscriberExample;
 
 import com.google.api.gax.longrunning.OperationFuture;
 import com.google.cloud.dataproc.v1.Job;
@@ -34,24 +37,26 @@
 import com.google.cloud.pubsublite.SubscriptionName;
 import com.google.cloud.pubsublite.SubscriptionPath;
 import com.google.cloud.pubsublite.TopicName;
-import com.google.cloud.storage.Blob;
+import com.google.cloud.pubsublite.TopicPath;
 import com.google.cloud.storage.BlobId;
 import com.google.cloud.storage.BlobInfo;
 import com.google.cloud.storage.Storage;
 import com.google.cloud.storage.StorageOptions;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Sets;
+import com.google.pubsub.v1.PubsubMessage;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStreamReader;
 import java.nio.file.Files;
 import java.nio.file.Paths;
+import java.util.HashMap;
 import java.util.Map;
+import java.util.Queue;
 import java.util.Set;
 import java.util.UUID;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import org.apache.commons.lang.StringUtils;
 import org.apache.maven.shared.invoker.DefaultInvocationRequest;
 import org.apache.maven.shared.invoker.DefaultInvoker;
@@ -63,7 +68,6 @@
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
-import org.spark_project.guava.collect.ImmutableList;
 
 public class SampleIntegrationTest {
 
@@ -82,9 +86,13 @@ public class SampleIntegrationTest {
   private CloudZone cloudZone;
   private ProjectNumber projectNumber;
   private ProjectId projectId;
-  private TopicName topicId;
-  private SubscriptionName subscriptionName;
-  private SubscriptionPath subscriptionPath;
+  private TopicName sourceTopicId;
+  private SubscriptionName sourceSubscriptionName;
+  private SubscriptionPath sourceSubscriptionPath;
+  private TopicName destinationTopicId;
+  private TopicPath destinationTopicPath;
+  private SubscriptionName destinationSubscriptionName;
+  private SubscriptionPath destinationSubscriptionPath;
   private String clusterName;
   private String bucketName;
   private String workingDir;
@@ -143,7 +151,8 @@ private Job runDataprocJob() throws Exception {
               .addJarFileUris(String.format("gs://%s/%s", bucketName, sampleJarNameInGCS))
               .addJarFileUris(String.format("gs://%s/%s", bucketName, connectorJarNameInGCS))
               .setMainClass("pubsublite.spark.WordCount")
-              .addArgs(subscriptionPath.toString())
+              .addArgs(sourceSubscriptionPath.toString())
+              .addArgs(destinationTopicPath.toString())
               .build();
       Job job = Job.newBuilder().setPlacement(jobPlacement).setSparkJob(sparkJob).build();
       OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest =
@@ -153,39 +162,40 @@ private Job runDataprocJob() throws Exception {
     }
   }
 
-  private void verifyDataprocOutput(Storage storage, Job job) {
-    Matcher matches = Pattern.compile("gs://(.*?)/(.*)").matcher(job.getDriverOutputResourceUri());
-    assertThat(matches.matches()).isTrue();
-
-    Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
-    String sparkJobOutput = new String(blob.getContent());
-    String expectedWordCountResult =
-        "+-----+---------------+\n"
-            + "| word|sum(word_count)|\n"
-            + "+-----+---------------+\n"
-            + "|  the|             24|\n"
-            + "|   of|             16|\n"
-            + "|  and|             14|\n"
-            + "|    i|             13|\n"
-            + "|   my|             10|\n"
-            + "|    a|              6|\n"
-            + "|   in|              5|\n"
-            + "| that|              5|\n"
-            + "| soul|              4|\n"
-            + "| with|              4|\n"
-            + "|   as|              3|\n"
-            + "| feel|              3|\n"
-            + "| like|              3|\n"
-            + "|   me|              3|\n"
-            + "|   so|              3|\n"
-            + "| then|              3|\n"
-            + "|   us|              3|\n"
-            + "| when|              3|\n"
-            + "|which|              3|\n"
-            + "|   am|              2|\n"
-            + "+-----+---------------+\n"
-            + "only showing top 20 rows";
-    assertThat(sparkJobOutput).contains(expectedWordCountResult);
+  private void verifyWordCountResult() {
+    Map<String, Integer> expected = new HashMap<>();
+    expected.put("the", 24);
+    expected.put("of", 16);
+    expected.put("and", 14);
+    expected.put("i", 13);
+    expected.put("my", 10);
+    expected.put("a", 6);
+    expected.put("in", 5);
+    expected.put("that", 5);
+    expected.put("soul", 4);
+    expected.put("with", 4);
+    expected.put("as", 3);
+    expected.put("feel", 3);
+    expected.put("like", 3);
+    expected.put("me", 3);
+    expected.put("so", 3);
+    expected.put("then", 3);
+    expected.put("us", 3);
+    expected.put("when", 3);
+    expected.put("which", 3);
+    expected.put("am", 2);
+    Map<String, Integer> actual = new HashMap<>();
+    Queue<PubsubMessage> results =
+        subscriberExample(
+            cloudRegion.value(),
+            cloudZone.zoneId(),
+            projectNumber.value(),
+            destinationSubscriptionName.value());
+    for (PubsubMessage m : results) {
+      String[] pair = m.getData().toStringUtf8().split("_");
+      actual.put(pair[0], Integer.parseInt(pair[1]));
+    }
+    assertThat(actual).containsAtLeastEntriesIn(expected);
   }
 
   private void setUpVariables() {
@@ -208,13 +218,28 @@ private void setUpVariables() {
     cloudZone = CloudZone.of(cloudRegion, env.get(CLOUD_ZONE).charAt(0));
     projectId = ProjectId.of(env.get(PROJECT_ID));
     projectNumber = ProjectNumber.of(Long.parseLong(env.get(PROJECT_NUMBER)));
-    topicId = TopicName.of(env.get(TOPIC_ID));
-    subscriptionName = SubscriptionName.of("sample-integration-sub-" + runId);
-    subscriptionPath =
+    sourceTopicId = TopicName.of(env.get(TOPIC_ID));
+    sourceSubscriptionName = SubscriptionName.of("sample-integration-sub-source-" + runId);
+    sourceSubscriptionPath =
+        SubscriptionPath.newBuilder()
+            .setProject(projectId)
+            .setLocation(cloudZone)
+            .setName(sourceSubscriptionName)
+            .build();
+    destinationTopicId = TopicName.of("sample-integration-topic-destination-" + runId);
+    destinationTopicPath =
+        TopicPath.newBuilder()
+            .setProject(projectId)
+            .setLocation(cloudZone)
+            .setName(destinationTopicId)
+            .build();
+    destinationSubscriptionName =
+        SubscriptionName.of("sample-integration-sub-destination-" + runId);
+    destinationSubscriptionPath =
         SubscriptionPath.newBuilder()
             .setProject(projectId)
             .setLocation(cloudZone)
-            .setName(subscriptionName)
+            .setName(destinationSubscriptionName)
             .build();
     clusterName = env.get(CLUSTER_NAME);
     bucketName = env.get(BUCKET_NAME);
@@ -240,22 +265,38 @@ public void setUp() throws Exception {
     setUpVariables();
     findMavenHome();
 
-    // Create a subscription
+    // Create a subscription to read source word messages
     createSubscriptionExample(
         cloudRegion.value(),
         cloudZone.zoneId(),
         projectNumber.value(),
-        topicId.value(),
-        subscriptionName.value());
+        sourceTopicId.value(),
+        sourceSubscriptionName.value());
+
+    // Create a topic and subscription for word count final results
+    createTopicExample(
+        cloudRegion.value(),
+        cloudZone.zoneId(),
+        projectNumber.value(),
+        destinationTopicId.value(),
+        /*partitions=*/ 1);
+    createSubscriptionExample(
+        cloudRegion.value(),
+        cloudZone.zoneId(),
+        projectNumber.value(),
+        destinationTopicId.value(),
+        destinationSubscriptionName.value());
   }
 
   @After
   public void tearDown() throws Exception {
-    // Cleanup the subscription
-    deleteSubscriptionExample(
-        cloudRegion.value(), cloudZone.zoneId(), projectNumber.value(), subscriptionName.value());
+    // Cleanup the topics and subscriptions
+    deleteSubscriptionExample(cloudRegion.value(), sourceSubscriptionPath);
+    deleteSubscriptionExample(cloudRegion.value(), destinationSubscriptionPath);
+    deleteTopicExample(cloudRegion.value(), destinationTopicPath);
   }
 
+  /** Note that source single word messages have been published to a permanent topic. */
   @Test
   public void test() throws Exception {
     // Maven package into jars
@@ -268,8 +309,10 @@ public void test() throws Exception {
     uploadGCS(storage, sampleJarNameInGCS, sampleJarLoc);
     uploadGCS(storage, connectorJarNameInGCS, connectorJarLoc);
 
-    // Run Dataproc job and verify output
-    Job jobResponse = runDataprocJob();
-    verifyDataprocOutput(storage, jobResponse);
+    // Run Dataproc job, block until it finishes
+    runDataprocJob();
+
+    // Verify final destination messages in Pub/Sub Lite
+    verifyWordCountResult();
   }
 }
diff --git a/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java b/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java
index 44b6d95d..d36844a0 100644
--- a/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java
+++ b/src/main/java/com/google/cloud/pubsublite/spark/PslWriteDataSourceOptions.java
@@ -27,6 +27,7 @@
 import com.google.cloud.pubsublite.MessageMetadata;
 import com.google.cloud.pubsublite.Partition;
 import com.google.cloud.pubsublite.TopicPath;
+import com.google.cloud.pubsublite.cloudpubsub.PublisherSettings;
 import com.google.cloud.pubsublite.internal.Publisher;
 import com.google.cloud.pubsublite.internal.wire.PartitionCountWatchingPublisherSettings;
 import com.google.cloud.pubsublite.internal.wire.PubsubContext;
@@ -92,6 +93,7 @@ public Publisher<MessageMetadata> createNewPublisher() {
                     .setTopic(topicPath())
                     .setPartition(partition)
                     .setServiceClient(newServiceClient(partition))
+                    .setBatchingSettings(PublisherSettings.DEFAULT_BATCHING_SETTINGS)
                     .build())
         .setAdminClient(getAdminClient())
         .build()

From a92ddd9adb714b0768b021bdd92ae3107d40583f Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Tue, 6 Apr 2021 13:02:03 -0700
Subject: [PATCH 32/47] chore: regenerate README (#133)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-04-06 19:56:10,679 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-04-06 19:56:11,940 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/3eb00cbc-2c38-4d6d-90de-0da91cf5243b/targets

- [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.)
---
 .github/readme/synth.metadata/synth.metadata | 4 ++--
 README.md                                    | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index 6d0f9a81..935ee157 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "1bf772a275e76d6b7229d628b72d5dce4f5c8bc5"
+        "sha": "98f5863245584bf517d4817610dcca0c3979a470"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "e5fa6d93e42918dd4a000a80b92be23f5f4c6ac7"
+        "sha": "705743e66f5c0b24a95f7f30619c9d3ef747b317"
       }
     }
   ]
diff --git a/README.md b/README.md
index 628f6445..d1eb9154 100644
--- a/README.md
+++ b/README.md
@@ -176,6 +176,7 @@ has instructions for running the samples.
 | --------------------------- | --------------------------------- | ------ |
 | Admin Utils | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java) |
 | Publish Words | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/PublishWords.java) |
+| Read Results | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/ReadResults.java) |
 | Word Count | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/WordCount.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/WordCount.java) |
 
 

From d3130e76e643321977af7d1487da6b87b578f4ed Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Wed, 7 Apr 2021 20:03:37 +0200
Subject: [PATCH 33/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite-parent to v0.13.1 (#126)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 7235c268..d8941301 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,7 +3,7 @@
   <parent>
     <groupId>com.google.cloud</groupId>
     <artifactId>google-cloud-pubsublite-parent</artifactId>
-    <version>0.12.0</version>
+    <version>0.13.1</version>
   </parent>
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.google.cloud</groupId>

From 52ddd7384fa8315cf3ee69ddcbcbd8c96cd26302 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Wed, 7 Apr 2021 20:03:55 +0200
Subject: [PATCH 34/47] test(deps): update dependency org.mockito:mockito-core
 to v3.9.0 (#134)

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index d8941301..94da0515 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,7 @@
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
-      <version>3.8.0</version>
+      <version>3.9.0</version>
     </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>

From 3abbc1f778238293bf19176a374b6e56f3a1b5e3 Mon Sep 17 00:00:00 2001
From: Tianzi Cai <tianzi@google.com>
Date: Wed, 7 Apr 2021 16:18:39 -0700
Subject: [PATCH 35/47] chore: add samples test as a required check during
 presubmit (#135)

* chore: add samples test as a required check during presubmit

* reorder
---
 .github/sync-repo-settings.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index 680566d2..4a339b05 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -36,6 +36,7 @@ branchProtectionRules:
     - "units (8)"
     - "units (11)"
     - "Kokoro - Test: Integration"
+    - "Kokoro - Test: Samples"
     - "cla/google"
 # List of explicit permissions to add (additive only)
 permissionRules:

From eddcb420a9dd6f41a1dd07a56bcb6ec16a444a56 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Fri, 9 Apr 2021 14:37:07 -0400
Subject: [PATCH 36/47] samples: Simple read and simple write (#131)

---
 samples/README.md                             | 239 ++++++++++---
 samples/simple_read_sample.sh                 |  62 ++++
 samples/simple_write_sample.sh                |  62 ++++
 .../java/pubsublite/spark/CommonUtils.java    |  33 ++
 .../java/pubsublite/spark/PublishWords.java   |  27 +-
 .../java/pubsublite/spark/ReadResults.java    |  17 +-
 .../java/pubsublite/spark/SimpleRead.java     |  70 ++++
 .../java/pubsublite/spark/SimpleWrite.java    |  70 ++++
 .../main/java/pubsublite/spark/WordCount.java |  18 +-
 .../spark/SampleIntegrationTest.java          | 318 ------------------
 .../java/pubsublite/spark/SampleTestBase.java | 173 ++++++++++
 .../spark/SamplesIntegrationTest.java         | 309 +++++++++++++++++
 samples/word_count_sample.sh                  |  69 ++++
 13 files changed, 1076 insertions(+), 391 deletions(-)
 create mode 100644 samples/simple_read_sample.sh
 create mode 100644 samples/simple_write_sample.sh
 create mode 100644 samples/snippets/src/main/java/pubsublite/spark/CommonUtils.java
 create mode 100644 samples/snippets/src/main/java/pubsublite/spark/SimpleRead.java
 create mode 100644 samples/snippets/src/main/java/pubsublite/spark/SimpleWrite.java
 delete mode 100644 samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java
 create mode 100644 samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java
 create mode 100644 samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java
 create mode 100644 samples/word_count_sample.sh

diff --git a/samples/README.md b/samples/README.md
index 3091db75..ea1ae264 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -1,57 +1,57 @@
-# Pub/Sub Lite Spark Connector Word Count Samples
+# Pub/Sub Lite Spark Connector Samples
 
-This directory contains a word count sample for Pub/Sub Lite Spark Connector. The sample will read 
-single word count messages from Pub/Sub Lite, do the aggregation (count words) in Spark, and finally
-write back to Pub/Sub Lite. Note the topic/subscription to read is different from the topic/subscription
-to write and verify the final word count results.
+This directory contains 3 samples for Pub/Sub Lite Spark Connector:
+1. [Word count sample](#word-count-sample). The sample reads single word count messages from Pub/Sub Lite,
+   does the aggregation (count words) in Spark, and finally writes back to Pub/Sub Lite. 
+   Note the topic/subscription to read is different from the topic/subscription to write 
+   and verify the final word count results.
+2. [Simple read sample](#simple-read-sample). The sample reads messages from Pub/Sub Lite, and outputs to console sink.
+3. [Simple write sample](#simple-write-sample). The sample creates DataFrame inside spark and writes to Pub/Sub Lite.
 
-## Authentication
+### Authentication
 
 Please see the [Google cloud authentication guide](https://cloud.google.com/docs/authentication/). 
 The recommended approach is to use Application Default Credentials by setting `GOOGLE_APPLICATION_CREDENTIALS`.
 
-## Environment Variables
+## Word Count Sample
+
+### Environment Variables
 Set the following environment variables. <br>
 Note `SOURCE_TOPIC_ID` and `SOURCE_SUBSCRIPTION_ID` are used to read _raw_ single word count messages; 
 while `DESTINATION_TOPIC_ID` and `DESTINATION_SUBSCRIPTION_ID` are used for the final word counts results. They must 
 be different.
 ```
-PROJECT_NUMBER=12345 # or your project number
-REGION=us-central1 # or your region
-ZONE_ID=b # or your zone id
-SOURCE_TOPIC_ID=test-topic # or your topic id to create
-SOURCE_SUBSCRIPTION_ID=test-subscription # or your subscription to create
-DESTINATION_TOPIC_ID=test-topic-2 # or your topic id to create, this is different from SOURCE_TOPIC_ID!
-DESTINATION_SUBSCRIPTION_ID=test-subscription-2 # or your subscription to create, this is different from SOURCE_SUBSCRIPTION_ID!
-CLUSTER_NAME=waprin-spark7 # or your Dataproc cluster name to create
-BUCKET=gs://your-gcs-bucket
-CONNECTOR_VERSION= # latest pubsublite-spark-sql-streaming release version
-PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION= # downloaded pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies jar location
+export PROJECT_NUMBER=12345 # or your project number
+export REGION=us-central1 # or your region
+export ZONE_ID=b # or your zone id
+export SOURCE_TOPIC_ID=test-topic # or your topic id to create
+export SOURCE_SUBSCRIPTION_ID=test-subscription # or your subscription to create
+export DESTINATION_TOPIC_ID=test-topic-2 # or your topic id to create, this is different from SOURCE_TOPIC_ID!
+export DESTINATION_SUBSCRIPTION_ID=test-subscription-2 # or your subscription to create, this is different from SOURCE_SUBSCRIPTION_ID!
+export CLUSTER_NAME=waprin-spark7 # or your Dataproc cluster name to create
+export BUCKET=gs://your-gcs-bucket
+export CONNECTOR_VERSION= # latest pubsublite-spark-sql-streaming release version
 ```
 
-## Running word count sample
+### Running word count sample
 
-To run the word count sample in Dataproc cluster, follow the steps:
+To run the word count sample in Dataproc cluster, either use provided bash script `word_count_sample.sh run` or 
+follow the steps:
 
-1. `cd samples/snippets` 
-2. Set the current sample version.
+1. `cd samples/snippets`
+2. Set extra environment variables.
    ```sh
-   SAMPLE_VERSION=$(mvn -q \
+   export SAMPLE_VERSION=$(mvn -q \
     -Dexec.executable=echo \
     -Dexec.args='${project.version}' \
     --non-recursive \
     exec:exec)
+   export SOURCE_SUBSCRIPTION_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SOURCE_SUBSCRIPTION_ID
+   export DESTINATION_TOPIC_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/topics/$DESTINATION_TOPIC_ID
    ```
-3. Create both the source and destination topics and subscriptions, and publish word count messages to the _source_ 
-      topic.
+3. Create both the source and destination topics and subscriptions, and publish word count messages to the _source_
+   topic.
    ```sh
-   PROJECT_NUMBER=$PROJECT_NUMBER \
-   REGION=$REGION \
-   ZONE_ID=$ZONE_ID \
-   SOURCE_TOPIC_ID=$SOURCE_TOPIC_ID \
-   SOURCE_SUBSCRIPTION_ID=$SOURCE_SUBSCRIPTION_ID \
-   DESTINATION_TOPIC_ID=$DESTINATION_TOPIC_ID \
-   DESTINATION_SUBSCRIPTION_ID=$DESTINATION_SUBSCRIPTION_ID \
    mvn compile exec:java -Dexec.mainClass=pubsublite.spark.PublishWords
    ```
 4. Create a Dataproc cluster
@@ -77,20 +77,18 @@ To run the word count sample in Dataproc cluster, follow the steps:
    ```sh
    gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
       --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,$BUCKET/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
-      --class=pubsublite.spark.WordCount -- \
-      projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SOURCE_SUBSCRIPTION_ID \
-      projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/topics/$DESTINATION_TOPIC_ID
+      --class=pubsublite.spark.WordCount \
+      --properties=spark.submit.deployMode=cluster,spark.yarn.appMasterEnv.SOURCE_SUBSCRIPTION_PATH=$SOURCE_SUBSCRIPTION_PATH,spark.yarn.appMasterEnv.DESTINATION_TOPIC_PATH=$DESTINATION_TOPIC_PATH
    ```
 10. Read word count results from Pub/Sub Lite, you should see the result in console output.
     ```sh
-    PROJECT_NUMBER=$PROJECT_NUMBER \
-    REGION=$REGION \
-    ZONE_ID=$ZONE_ID \
-    DESTINATION_SUBSCRIPTION_ID=$DESTINATION_SUBSCRIPTION_ID \
     mvn compile exec:java -Dexec.mainClass=pubsublite.spark.ReadResults
     ```
 
 ## Cleaning up
+
+To clean up, either use provided bash script `word_count_sample.sh clean` or follow the steps:
+
 1. Delete Pub/Sub Lite topic and subscription.
    ```sh
    gcloud pubsub lite-subscriptions delete $SOURCE_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
@@ -107,10 +105,171 @@ To run the word count sample in Dataproc cluster, follow the steps:
    gcloud dataproc clusters delete $CLUSTER_NAME --region=$REGION
    ```
 
+## Simple Read Sample
+
+### Environment Variables
+Set the following environment variables. <br>
+```
+export PROJECT_NUMBER=12345 # or your project number
+export REGION=us-central1 # or your region
+export ZONE_ID=b # or your zone id
+export SOURCE_TOPIC_ID=test-topic # or your topic id to create
+export SOURCE_SUBSCRIPTION_ID=test-subscription # or your subscription to create
+export CLUSTER_NAME=waprin-spark7 # or your Dataproc cluster name to create
+export BUCKET=gs://your-gcs-bucket
+export CONNECTOR_VERSION= # latest pubsublite-spark-sql-streaming release version
+```
+
+### Running simple read sample
+
+To run the simple read sample in Dataproc cluster, either use provided bash script `simple_read_sample.sh run` or
+follow the steps:
+
+1. `cd samples/snippets`
+2. Set extra environment variables.
+   ```sh
+   export SAMPLE_VERSION=$(mvn -q \
+    -Dexec.executable=echo \
+    -Dexec.args='${project.version}' \
+    --non-recursive \
+    exec:exec)
+   export SOURCE_SUBSCRIPTION_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SOURCE_SUBSCRIPTION_ID
+   ```
+3. Create both the source and destination topics and subscriptions, and publish word count messages to the _source_
+   topic.
+   ```sh
+   mvn compile exec:java -Dexec.mainClass=pubsublite.spark.PublishWords
+   ```
+4. Create a Dataproc cluster
+   ```sh
+   gcloud dataproc clusters create $CLUSTER_NAME --region=$REGION --zone=$REGION-$ZONE_ID --image-version=1.5-debian10 --scopes=cloud-platform
+   ```
+5. Package sample jar
+   ```sh
+   mvn clean package -Dmaven.test.skip=true
+   ```
+6. Download `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` from [Maven Central](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming) and set `PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION` environment variable.
+7. Create GCS bucket and upload both `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` and the sample jar onto GCS
+   ```sh
+   gsutil mb $BUCKET
+   gsutil cp target/pubsublite-spark-snippets-$SAMPLE_VERSION.jar $BUCKET
+   gsutil cp $PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION $BUCKET
+   ```
+8. Set Dataproc region
+   ```sh
+   gcloud config set dataproc/region $REGION
+   ```
+   ```sh  
+9. Run the sample in Dataproc. You would see the messages show up in the console output.
+   ```sh
+   gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
+      --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,$BUCKET/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
+      --class=pubsublite.spark.SimpleRead -- $SOURCE_SUBSCRIPTION_PATH
+   ```
+
+### Cleaning up
+
+To clean up, either use provided bash script `simple_read_sample.sh clean` or follow the steps:
+
+1. Delete Pub/Sub Lite topic and subscription.
+   ```sh
+   gcloud pubsub lite-subscriptions delete $SOURCE_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+   gcloud pubsub lite-topics delete $SOURCE_TOPIC_ID --zone=$REGION-$ZONE_ID
+   ```
+2. Delete GCS bucket.
+   ```sh
+   gsutil -m rm -rf $BUCKET
+   ```
+3. Delete Dataproc cluster.
+   ```sh
+   gcloud dataproc clusters delete $CLUSTER_NAME --region=$REGION
+   ```
+
+## Simple Write Sample
+
+### Environment Variables
+Set the following environment variables. <br>
+```
+export PROJECT_NUMBER=12345 # or your project number
+export REGION=us-central1 # or your region
+export ZONE_ID=b # or your zone id
+export DESTINATION_TOPIC_ID=test-topic # or your topic id to create
+export DESTINATION_SUBSCRIPTION_ID=test-subscription # or your subscription to create
+export CLUSTER_NAME=waprin-spark7 # or your Dataproc cluster name to create
+export BUCKET=gs://your-gcs-bucket
+export CONNECTOR_VERSION= # latest pubsublite-spark-sql-streaming release version
+```
+
+### Running simple write sample
+
+To run the simple read sample in Dataproc cluster, either use provided bash script `simple_write_sample.sh run` or
+follow the steps:
+
+1. `cd samples/snippets`
+2. Set extra environment variables.
+   ```sh
+   export SAMPLE_VERSION=$(mvn -q \
+    -Dexec.executable=echo \
+    -Dexec.args='${project.version}' \
+    --non-recursive \
+    exec:exec)
+   export DESTINATION_TOPIC_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/topics/$DESTINATION_TOPIC_ID
+   ```
+3. Create both the source and destination topics and subscriptions, and publish word count messages to the _source_
+   topic.
+   ```sh
+   mvn compile exec:java -Dexec.mainClass=pubsublite.spark.PublishWords
+   ```
+4. Create a Dataproc cluster
+   ```sh
+   gcloud dataproc clusters create $CLUSTER_NAME --region=$REGION --zone=$REGION-$ZONE_ID --image-version=1.5-debian10 --scopes=cloud-platform
+   ```
+5. Package sample jar
+   ```sh
+   mvn clean package -Dmaven.test.skip=true
+   ```
+6. Download `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` from [Maven Central](https://search.maven.org/artifact/com.google.cloud/pubsublite-spark-sql-streaming) and set `PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION` environment variable.
+7. Create GCS bucket and upload both `pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar` and the sample jar onto GCS
+   ```sh
+   gsutil mb $BUCKET
+   gsutil cp target/pubsublite-spark-snippets-$SAMPLE_VERSION.jar $BUCKET
+   gsutil cp $PUBSUBLITE_SPARK_SQL_STREAMING_JAR_LOCATION $BUCKET
+   ```
+8. Set Dataproc region
+   ```sh
+   gcloud config set dataproc/region $REGION
+   ```
+   ```sh  
+9. Run the sample in Dataproc. You would see the messages show up in the console output.
+   ```sh
+   gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
+      --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,$BUCKET/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
+      --class=pubsublite.spark.SimpleWrite \
+      --properties=spark.submit.deployMode=cluster,spark.yarn.appMasterEnv.DESTINATION_TOPIC_PATH=$DESTINATION_TOPIC_PATH
+   ```
+
+### Cleaning up
+
+To clean up, either use provided bash script `simple_write_sample.sh clean` or follow the steps:
+
+1. Delete Pub/Sub Lite topic and subscription.
+   ```sh
+   gcloud pubsub lite-subscriptions delete DESTINATION_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+   gcloud pubsub lite-topics delete $DESTINATION_TOPIC_ID --zone=$REGION-$ZONE_ID
+   ```
+2. Delete GCS bucket.
+   ```sh
+   gsutil -m rm -rf $BUCKET
+   ```
+3. Delete Dataproc cluster.
+   ```sh
+   gcloud dataproc clusters delete $CLUSTER_NAME --region=$REGION
+   ```
+
 ## Common issues
 1. Permission not granted. <br>
    This could happen when creating a topic and a subscription, or submitting a job to your Dataproc cluster.
-   Make sure your service account has at least `Editor` permissions for Pub/Sub Lite and Dataproc. 
+   Make sure your service account has at least `Editor` permissions for Pub/Sub Lite and Dataproc.
    Your Dataproc cluster needs `scope=cloud-platform` to access other services and resources within the same project.
    Your `gcloud` and `GOOGLE_APPLICATION_CREDENTIALS` should access the same project. Check out which project your `gcloud` and `gstuil` commands use with `gcloud config get-value project`.
 
diff --git a/samples/simple_read_sample.sh b/samples/simple_read_sample.sh
new file mode 100644
index 00000000..bf81cadd
--- /dev/null
+++ b/samples/simple_read_sample.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Bash script that runs simple read sample.
+set -e
+
+if [ "$1" == "run" ]; then
+  echo "Running simple read sample..."
+
+  cd samples/snippets
+
+  # Set the current sample version.
+  export SAMPLE_VERSION=$(mvn -q \
+    -Dexec.executable=echo \
+    -Dexec.args="${project.version}" \
+    --non-recursive \
+    exec:exec)
+
+  # Set extra environment variables.
+  export SOURCE_SUBSCRIPTION_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SOURCE_SUBSCRIPTION_ID
+
+  # Create both the source and destination topics and subscriptions,
+  # and publish word count messages to the _source_ topic.
+  mvn compile exec:java -Dexec.mainClass=pubsublite.spark.PublishWords
+
+  # Create a Dataproc cluster
+  gcloud dataproc clusters create $CLUSTER_NAME \
+    --region=$REGION \
+    --zone=$REGION-$ZONE_ID \
+    --image-version=1.5-debian10 \
+    --scopes=cloud-platform
+
+  # Package sample jar
+  mvn clean package -Dmaven.test.skip=true
+
+  # Create GCS bucket and upload sample jar onto GCS
+  gsutil mb $BUCKET
+  gsutil cp target/pubsublite-spark-snippets-$SAMPLE_VERSION.jar $BUCKET
+
+  # Set Dataproc region
+  gcloud config set dataproc/region $REGION
+
+  # Run the sample in Dataproc. You would see the messages show up in the console output.
+  gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
+    --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,gs://spark-lib/pubsublite/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
+    --class=pubsublite.spark.SimpleRead -- $SOURCE_SUBSCRIPTION_PATH
+
+  echo "Simple read sample finished."
+elif [ "$1" == "clean" ]; then
+  echo "Cleaning up..."
+
+  # Delete Pub/Sub Lite topic and subscription.
+  gcloud pubsub lite-subscriptions delete $SOURCE_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+  gcloud pubsub lite-topics delete $SOURCE_TOPIC_ID --zone=$REGION-$ZONE_ID
+
+  # Delete GCS bucket.
+  gsutil -m rm -rf $BUCKET
+
+  # Delete Dataproc cluster.
+  gcloud dataproc clusters delete $CLUSTER_NAME --region=$REGION
+  echo "Clean up finished."
+else
+  echo "Invalid arguments, should be either run or clean."
+  exit 1
diff --git a/samples/simple_write_sample.sh b/samples/simple_write_sample.sh
new file mode 100644
index 00000000..6688b752
--- /dev/null
+++ b/samples/simple_write_sample.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# Bash script that runs simple write sample.
+set -e
+
+if [ "$1" == "run" ]; then
+  echo "Running simple write sample..."
+
+  cd samples/snippets
+
+  # Set the current sample version.
+  export SAMPLE_VERSION=$(mvn -q \
+    -Dexec.executable=echo \
+    -Dexec.args="${project.version}" \
+    --non-recursive \
+    exec:exec)
+
+  # Set extra environment variables.
+  export DESTINATION_TOPIC_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/topics/$DESTINATION_TOPIC_ID
+
+  # Create a Dataproc cluster
+  gcloud dataproc clusters create $CLUSTER_NAME \
+    --region=$REGION \
+    --zone=$REGION-$ZONE_ID \
+    --image-version=1.5-debian10 \
+    --scopes=cloud-platform
+
+  # Package sample jar
+  mvn clean package -Dmaven.test.skip=true
+
+  # Create GCS bucket and upload sample jar onto GCS
+  gsutil mb $BUCKET
+  gsutil cp target/pubsublite-spark-snippets-$SAMPLE_VERSION.jar $BUCKET
+
+  # Set Dataproc region
+  gcloud config set dataproc/region $REGION
+
+  # Run the sample in Dataproc. This would publish messages from Spark into Pub/Sub Lite.
+  gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
+    --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,gs://spark-lib/pubsublite/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
+    --class=pubsublite.spark.SimpleWrite \
+    --properties=spark.submit.deployMode=cluster,spark.yarn.appMasterEnv.DESTINATION_TOPIC_PATH=$DESTINATION_TOPIC_PATH
+
+  # Read results from Pub/Sub Lite, you should see the result in console output.
+  mvn compile exec:java -Dexec.mainClass=pubsublite.spark.ReadResults
+
+  echo "Simple write sample finished."
+elif [ "$1" == "clean" ]; then
+  echo "Cleaning up..."
+
+  # Delete Pub/Sub Lite topic and subscription.
+  gcloud pubsub lite-subscriptions delete DESTINATION_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+  gcloud pubsub lite-topics delete $DESTINATION_TOPIC_ID --zone=$REGION-$ZONE_ID
+
+  # Delete GCS bucket.
+  gsutil -m rm -rf $BUCKET
+
+  # Delete Dataproc cluster.
+  gcloud dataproc clusters delete $CLUSTER_NAME --region=$REGION
+  echo "Clean up finished."
+else
+  echo "Invalid arguments, should be either run or clean."
+  exit 1
diff --git a/samples/snippets/src/main/java/pubsublite/spark/CommonUtils.java b/samples/snippets/src/main/java/pubsublite/spark/CommonUtils.java
new file mode 100644
index 00000000..8beddb4e
--- /dev/null
+++ b/samples/snippets/src/main/java/pubsublite/spark/CommonUtils.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pubsublite.spark;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+import java.util.Map;
+import java.util.Set;
+
+public class CommonUtils {
+
+  public static Map<String, String> getAndValidateEnvVars(String... keys) {
+    Map<String, String> env = System.getenv();
+    Set<String> missingVars = Sets.difference(Sets.newHashSet(keys), env.keySet());
+    Preconditions.checkState(
+        missingVars.isEmpty(), "Missing required environment variables: " + missingVars);
+    return env;
+  }
+}
diff --git a/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java b/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java
index 17152f67..e9538c6e 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java
@@ -20,15 +20,11 @@
 import static pubsublite.spark.AdminUtils.createTopicExample;
 import static pubsublite.spark.AdminUtils.publisherExample;
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
 import com.google.common.io.Resources;
 import java.nio.charset.Charset;
 import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 
 public class PublishWords {
 
@@ -42,20 +38,15 @@ public class PublishWords {
 
   public static void main(String[] args) throws Exception {
 
-    Map<String, String> env = System.getenv();
-    Set<String> missingVars =
-        Sets.difference(
-            ImmutableSet.of(
-                REGION,
-                ZONE_ID,
-                SOURCE_TOPIC_ID,
-                SOURCE_SUBSCRIPTION_ID,
-                DESTINATION_TOPIC_ID,
-                DESTINATION_SUBSCRIPTION_ID,
-                PROJECT_NUMBER),
-            env.keySet());
-    Preconditions.checkState(
-        missingVars.isEmpty(), "Missing required environment variables: " + missingVars);
+    Map<String, String> env =
+        CommonUtils.getAndValidateEnvVars(
+            REGION,
+            ZONE_ID,
+            SOURCE_TOPIC_ID,
+            SOURCE_SUBSCRIPTION_ID,
+            DESTINATION_TOPIC_ID,
+            DESTINATION_SUBSCRIPTION_ID,
+            PROJECT_NUMBER);
 
     final String cloudRegion = env.get(REGION);
     char zoneId = env.get(ZONE_ID).charAt(0);
diff --git a/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java b/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java
index e93d08e0..4c3b8041 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java
@@ -18,11 +18,7 @@
 
 import static pubsublite.spark.AdminUtils.subscriberExample;
 
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
 import java.util.Map;
-import java.util.Set;
 
 public class ReadResults {
 
@@ -32,21 +28,16 @@ public class ReadResults {
   private static final String PROJECT_NUMBER = "PROJECT_NUMBER";
 
   public static void main(String[] args) {
-
-    Map<String, String> env = System.getenv();
-    Set<String> missingVars =
-        Sets.difference(
-            ImmutableSet.of(REGION, ZONE_ID, DESTINATION_SUBSCRIPTION_ID, PROJECT_NUMBER),
-            env.keySet());
-    Preconditions.checkState(
-        missingVars.isEmpty(), "Missing required environment variables: " + missingVars);
+    Map<String, String> env =
+        CommonUtils.getAndValidateEnvVars(
+            REGION, ZONE_ID, DESTINATION_SUBSCRIPTION_ID, PROJECT_NUMBER);
 
     String cloudRegion = env.get(REGION);
     char zoneId = env.get(ZONE_ID).charAt(0);
     String destinationSubscriptionId = env.get(DESTINATION_SUBSCRIPTION_ID);
     long projectNumber = Long.parseLong(env.get(PROJECT_NUMBER));
 
-    System.out.println("Word count results:");
+    System.out.println("Results from Pub/Sub Lite:");
     subscriberExample(cloudRegion, zoneId, projectNumber, destinationSubscriptionId)
         .forEach((m) -> System.out.println(m.getData().toStringUtf8().replace("_", ": ")));
 
diff --git a/samples/snippets/src/main/java/pubsublite/spark/SimpleRead.java b/samples/snippets/src/main/java/pubsublite/spark/SimpleRead.java
new file mode 100644
index 00000000..ca5eea90
--- /dev/null
+++ b/samples/snippets/src/main/java/pubsublite/spark/SimpleRead.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pubsublite.spark;
+
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.streaming.OutputMode;
+import org.apache.spark.sql.streaming.StreamingQuery;
+import org.apache.spark.sql.streaming.Trigger;
+
+public class SimpleRead {
+
+  public static void main(String[] args) throws Exception {
+    // Takes command line arguments instead of environment variables here. The reason
+    // is that we want to use client deployment mode so that user could see the console
+    // output, otherwise using cluster deployment mode would mean the console output is
+    // in one of the worker node and not easily accessible. Unfortunately the driver's
+    // environment variables can not be set dynamically, instead they could only be set
+    // up at cluster startup time via spark-env.sh.
+    String sourceSubscriptionPath = args[0];
+    simpleRead(sourceSubscriptionPath);
+  }
+
+  private static void simpleRead(String sourceSubscriptionPath) throws Exception {
+    final String appId = UUID.randomUUID().toString();
+
+    SparkSession spark =
+        SparkSession.builder()
+            .appName(String.format("Simple read (ID: %s)", appId))
+            .master("yarn")
+            .getOrCreate();
+
+    // Read messages from Pub/Sub Lite
+    Dataset<Row> df =
+        spark
+            .readStream()
+            .format("pubsublite")
+            .option("pubsublite.subscription", sourceSubscriptionPath)
+            .load();
+
+    // Write messages to Console Output
+    StreamingQuery query =
+        df.writeStream()
+            .format("console")
+            .outputMode(OutputMode.Append())
+            .trigger(Trigger.ProcessingTime(1, TimeUnit.SECONDS))
+            .start();
+
+    // Wait enough time to execute query
+    query.awaitTermination(60 * 1000); // 60s
+    query.stop();
+  }
+}
diff --git a/samples/snippets/src/main/java/pubsublite/spark/SimpleWrite.java b/samples/snippets/src/main/java/pubsublite/spark/SimpleWrite.java
new file mode 100644
index 00000000..489f1de4
--- /dev/null
+++ b/samples/snippets/src/main/java/pubsublite/spark/SimpleWrite.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *       http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pubsublite.spark;
+
+import static org.apache.spark.sql.functions.concat;
+import static org.apache.spark.sql.functions.lit;
+
+import java.util.Map;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.concurrent.TimeUnit;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.streaming.OutputMode;
+import org.apache.spark.sql.streaming.StreamingQuery;
+import org.apache.spark.sql.streaming.Trigger;
+import org.apache.spark.sql.types.DataTypes;
+
+public class SimpleWrite {
+  private static final String DESTINATION_TOPIC_PATH = "DESTINATION_TOPIC_PATH";
+
+  public static void main(String[] args) throws Exception {
+    Map<String, String> env = CommonUtils.getAndValidateEnvVars(DESTINATION_TOPIC_PATH);
+    simpleWrite(Objects.requireNonNull(env.get(DESTINATION_TOPIC_PATH)));
+  }
+
+  private static void simpleWrite(String destinationTopicPath) throws Exception {
+    final String appId = UUID.randomUUID().toString();
+    SparkSession spark =
+        SparkSession.builder()
+            .appName(String.format("Simple write (ID: %s)", appId))
+            .master("yarn")
+            .getOrCreate();
+
+    // Generate rate source, 1 row per second
+    Dataset<Row> df = spark.readStream().format("rate").load();
+    df =
+        df.withColumn("key", lit("testkey").cast(DataTypes.BinaryType))
+            .withColumn("data", concat(lit("data_"), df.col("value")).cast(DataTypes.BinaryType));
+
+    // Write word count results to Pub/Sub Lite
+    StreamingQuery query =
+        df.writeStream()
+            .format("pubsublite")
+            .option("pubsublite.topic", destinationTopicPath)
+            .option("checkpointLocation", String.format("/tmp/checkpoint-%s", appId))
+            .outputMode(OutputMode.Append())
+            .trigger(Trigger.ProcessingTime(1, TimeUnit.SECONDS))
+            .start();
+
+    // Wait enough time to execute query
+    query.awaitTermination(60 * 1000); // 60s
+    query.stop();
+  }
+}
diff --git a/samples/snippets/src/main/java/pubsublite/spark/WordCount.java b/samples/snippets/src/main/java/pubsublite/spark/WordCount.java
index 4696bc69..8bc44330 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/WordCount.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/WordCount.java
@@ -20,6 +20,8 @@
 import static org.apache.spark.sql.functions.lit;
 import static org.apache.spark.sql.functions.split;
 
+import java.util.Map;
+import java.util.Objects;
 import java.util.UUID;
 import java.util.concurrent.TimeUnit;
 import org.apache.spark.sql.Column;
@@ -33,10 +35,20 @@
 
 public class WordCount {
 
+  private static final String SOURCE_SUBSCRIPTION_PATH = "SOURCE_SUBSCRIPTION_PATH";
+  private static final String DESTINATION_TOPIC_PATH = "DESTINATION_TOPIC_PATH";
+
   public static void main(String[] args) throws Exception {
+    Map<String, String> env =
+        CommonUtils.getAndValidateEnvVars(SOURCE_SUBSCRIPTION_PATH, DESTINATION_TOPIC_PATH);
+    wordCount(
+        Objects.requireNonNull(env.get(SOURCE_SUBSCRIPTION_PATH)),
+        Objects.requireNonNull(env.get(DESTINATION_TOPIC_PATH)));
+  }
+
+  private static void wordCount(String sourceSubscriptionPath, String destinationTopicPath)
+      throws Exception {
     final String appId = UUID.randomUUID().toString();
-    final String sourceSubscriptionPath = args[0];
-    final String destinationTopicPath = args[1];
 
     SparkSession spark =
         SparkSession.builder()
@@ -75,6 +87,8 @@ public static void main(String[] args) throws Exception {
             .outputMode(OutputMode.Complete())
             .trigger(Trigger.ProcessingTime(1, TimeUnit.SECONDS))
             .start();
+
+    // Wait enough time to execute query
     query.awaitTermination(60 * 1000); // 60s
     query.stop();
   }
diff --git a/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java b/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java
deleted file mode 100644
index 27afa147..00000000
--- a/samples/snippets/src/test/java/pubsublite/spark/SampleIntegrationTest.java
+++ /dev/null
@@ -1,318 +0,0 @@
-/*
- * Copyright 2021 Google LLC
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package pubsublite.spark;
-
-import static com.google.common.truth.Truth.assertThat;
-import static pubsublite.spark.AdminUtils.createSubscriptionExample;
-import static pubsublite.spark.AdminUtils.createTopicExample;
-import static pubsublite.spark.AdminUtils.deleteSubscriptionExample;
-import static pubsublite.spark.AdminUtils.deleteTopicExample;
-import static pubsublite.spark.AdminUtils.subscriberExample;
-
-import com.google.api.gax.longrunning.OperationFuture;
-import com.google.cloud.dataproc.v1.Job;
-import com.google.cloud.dataproc.v1.JobControllerClient;
-import com.google.cloud.dataproc.v1.JobControllerSettings;
-import com.google.cloud.dataproc.v1.JobMetadata;
-import com.google.cloud.dataproc.v1.JobPlacement;
-import com.google.cloud.dataproc.v1.SparkJob;
-import com.google.cloud.pubsublite.CloudRegion;
-import com.google.cloud.pubsublite.CloudZone;
-import com.google.cloud.pubsublite.ProjectId;
-import com.google.cloud.pubsublite.ProjectNumber;
-import com.google.cloud.pubsublite.SubscriptionName;
-import com.google.cloud.pubsublite.SubscriptionPath;
-import com.google.cloud.pubsublite.TopicName;
-import com.google.cloud.pubsublite.TopicPath;
-import com.google.cloud.storage.BlobId;
-import com.google.cloud.storage.BlobInfo;
-import com.google.cloud.storage.Storage;
-import com.google.cloud.storage.StorageOptions;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableSet;
-import com.google.common.collect.Sets;
-import com.google.pubsub.v1.PubsubMessage;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.InputStreamReader;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-import java.util.UUID;
-import org.apache.commons.lang.StringUtils;
-import org.apache.maven.shared.invoker.DefaultInvocationRequest;
-import org.apache.maven.shared.invoker.DefaultInvoker;
-import org.apache.maven.shared.invoker.InvocationRequest;
-import org.apache.maven.shared.invoker.InvocationResult;
-import org.apache.maven.shared.invoker.Invoker;
-import org.apache.maven.shared.invoker.MavenInvocationException;
-import org.apache.maven.shared.utils.cli.CommandLineException;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-public class SampleIntegrationTest {
-
-  private static final String CLOUD_REGION = "CLOUD_REGION";
-  private static final String CLOUD_ZONE = "CLOUD_ZONE";
-  private static final String PROJECT_NUMBER = "GOOGLE_CLOUD_PROJECT_NUMBER";
-  private static final String PROJECT_ID = "PROJECT_ID";
-  private static final String TOPIC_ID = "TOPIC_ID";
-  private static final String CLUSTER_NAME = "CLUSTER_NAME";
-  private static final String BUCKET_NAME = "BUCKET_NAME";
-  private static final String SAMPLE_VERSION = "SAMPLE_VERSION";
-  private static final String CONNECTOR_VERSION = "CONNECTOR_VERSION";
-
-  private final String runId = UUID.randomUUID().toString();
-  private CloudRegion cloudRegion;
-  private CloudZone cloudZone;
-  private ProjectNumber projectNumber;
-  private ProjectId projectId;
-  private TopicName sourceTopicId;
-  private SubscriptionName sourceSubscriptionName;
-  private SubscriptionPath sourceSubscriptionPath;
-  private TopicName destinationTopicId;
-  private TopicPath destinationTopicPath;
-  private SubscriptionName destinationSubscriptionName;
-  private SubscriptionPath destinationSubscriptionPath;
-  private String clusterName;
-  private String bucketName;
-  private String workingDir;
-  private String mavenHome;
-  private String sampleVersion;
-  private String connectorVersion;
-  private String sampleJarName;
-  private String connectorJarName;
-  private String sampleJarNameInGCS;
-  private String connectorJarNameInGCS;
-  private String sampleJarLoc;
-  private String connectorJarLoc;
-
-  private void findMavenHome() throws Exception {
-    Process p = Runtime.getRuntime().exec("mvn --version");
-    BufferedReader stdOut = new BufferedReader(new InputStreamReader(p.getInputStream()));
-    assertThat(p.waitFor()).isEqualTo(0);
-    String s;
-    while ((s = stdOut.readLine()) != null) {
-      if (StringUtils.startsWith(s, "Maven home: ")) {
-        mavenHome = s.replace("Maven home: ", "");
-      }
-    }
-  }
-
-  private void mavenPackage(String workingDir)
-      throws MavenInvocationException, CommandLineException {
-    InvocationRequest request = new DefaultInvocationRequest();
-    request.setPomFile(new File(workingDir + "/pom.xml"));
-    request.setGoals(ImmutableList.of("clean", "package", "-Dmaven.test.skip=true"));
-    Invoker invoker = new DefaultInvoker();
-    invoker.setMavenHome(new File(mavenHome));
-    InvocationResult result = invoker.execute(request);
-    if (result.getExecutionException() != null) {
-      throw result.getExecutionException();
-    }
-    assertThat(result.getExitCode()).isEqualTo(0);
-  }
-
-  private void uploadGCS(Storage storage, String fileNameInGCS, String fileLoc) throws Exception {
-    BlobId blobId = BlobId.of(bucketName, fileNameInGCS);
-    BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();
-    storage.create(blobInfo, Files.readAllBytes(Paths.get(fileLoc)));
-  }
-
-  private Job runDataprocJob() throws Exception {
-    String myEndpoint = String.format("%s-dataproc.googleapis.com:443", cloudRegion.value());
-    JobControllerSettings jobControllerSettings =
-        JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
-
-    try (JobControllerClient jobControllerClient =
-        JobControllerClient.create(jobControllerSettings)) {
-      JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
-      SparkJob sparkJob =
-          SparkJob.newBuilder()
-              .addJarFileUris(String.format("gs://%s/%s", bucketName, sampleJarNameInGCS))
-              .addJarFileUris(String.format("gs://%s/%s", bucketName, connectorJarNameInGCS))
-              .setMainClass("pubsublite.spark.WordCount")
-              .addArgs(sourceSubscriptionPath.toString())
-              .addArgs(destinationTopicPath.toString())
-              .build();
-      Job job = Job.newBuilder().setPlacement(jobPlacement).setSparkJob(sparkJob).build();
-      OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest =
-          jobControllerClient.submitJobAsOperationAsync(
-              projectId.value(), cloudRegion.value(), job);
-      return submitJobAsOperationAsyncRequest.get();
-    }
-  }
-
-  private void verifyWordCountResult() {
-    Map<String, Integer> expected = new HashMap<>();
-    expected.put("the", 24);
-    expected.put("of", 16);
-    expected.put("and", 14);
-    expected.put("i", 13);
-    expected.put("my", 10);
-    expected.put("a", 6);
-    expected.put("in", 5);
-    expected.put("that", 5);
-    expected.put("soul", 4);
-    expected.put("with", 4);
-    expected.put("as", 3);
-    expected.put("feel", 3);
-    expected.put("like", 3);
-    expected.put("me", 3);
-    expected.put("so", 3);
-    expected.put("then", 3);
-    expected.put("us", 3);
-    expected.put("when", 3);
-    expected.put("which", 3);
-    expected.put("am", 2);
-    Map<String, Integer> actual = new HashMap<>();
-    Queue<PubsubMessage> results =
-        subscriberExample(
-            cloudRegion.value(),
-            cloudZone.zoneId(),
-            projectNumber.value(),
-            destinationSubscriptionName.value());
-    for (PubsubMessage m : results) {
-      String[] pair = m.getData().toStringUtf8().split("_");
-      actual.put(pair[0], Integer.parseInt(pair[1]));
-    }
-    assertThat(actual).containsAtLeastEntriesIn(expected);
-  }
-
-  private void setUpVariables() {
-    Map<String, String> env = System.getenv();
-    Set<String> missingVars =
-        Sets.difference(
-            ImmutableSet.of(
-                CLOUD_REGION,
-                CLOUD_ZONE,
-                PROJECT_NUMBER,
-                TOPIC_ID,
-                CLUSTER_NAME,
-                BUCKET_NAME,
-                SAMPLE_VERSION,
-                CONNECTOR_VERSION),
-            env.keySet());
-    Preconditions.checkState(
-        missingVars.isEmpty(), "Missing required environment variables: " + missingVars);
-    cloudRegion = CloudRegion.of(env.get(CLOUD_REGION));
-    cloudZone = CloudZone.of(cloudRegion, env.get(CLOUD_ZONE).charAt(0));
-    projectId = ProjectId.of(env.get(PROJECT_ID));
-    projectNumber = ProjectNumber.of(Long.parseLong(env.get(PROJECT_NUMBER)));
-    sourceTopicId = TopicName.of(env.get(TOPIC_ID));
-    sourceSubscriptionName = SubscriptionName.of("sample-integration-sub-source-" + runId);
-    sourceSubscriptionPath =
-        SubscriptionPath.newBuilder()
-            .setProject(projectId)
-            .setLocation(cloudZone)
-            .setName(sourceSubscriptionName)
-            .build();
-    destinationTopicId = TopicName.of("sample-integration-topic-destination-" + runId);
-    destinationTopicPath =
-        TopicPath.newBuilder()
-            .setProject(projectId)
-            .setLocation(cloudZone)
-            .setName(destinationTopicId)
-            .build();
-    destinationSubscriptionName =
-        SubscriptionName.of("sample-integration-sub-destination-" + runId);
-    destinationSubscriptionPath =
-        SubscriptionPath.newBuilder()
-            .setProject(projectId)
-            .setLocation(cloudZone)
-            .setName(destinationSubscriptionName)
-            .build();
-    clusterName = env.get(CLUSTER_NAME);
-    bucketName = env.get(BUCKET_NAME);
-    workingDir =
-        System.getProperty("user.dir")
-            .replace("/samples/snapshot", "")
-            .replace("/samples/snippets", "");
-    sampleVersion = env.get(SAMPLE_VERSION);
-    connectorVersion = env.get(CONNECTOR_VERSION);
-    sampleJarName = String.format("pubsublite-spark-snippets-%s.jar", sampleVersion);
-    connectorJarName =
-        String.format("pubsublite-spark-sql-streaming-%s-with-dependencies.jar", connectorVersion);
-    sampleJarNameInGCS = String.format("pubsublite-spark-snippets-%s-%s.jar", sampleVersion, runId);
-    connectorJarNameInGCS =
-        String.format(
-            "pubsublite-spark-sql-streaming-%s-with-dependencies-%s.jar", connectorVersion, runId);
-    sampleJarLoc = String.format("%s/samples/snippets/target/%s", workingDir, sampleJarName);
-    connectorJarLoc = String.format("%s/target/%s", workingDir, connectorJarName);
-  }
-
-  @Before
-  public void setUp() throws Exception {
-    setUpVariables();
-    findMavenHome();
-
-    // Create a subscription to read source word messages
-    createSubscriptionExample(
-        cloudRegion.value(),
-        cloudZone.zoneId(),
-        projectNumber.value(),
-        sourceTopicId.value(),
-        sourceSubscriptionName.value());
-
-    // Create a topic and subscription for word count final results
-    createTopicExample(
-        cloudRegion.value(),
-        cloudZone.zoneId(),
-        projectNumber.value(),
-        destinationTopicId.value(),
-        /*partitions=*/ 1);
-    createSubscriptionExample(
-        cloudRegion.value(),
-        cloudZone.zoneId(),
-        projectNumber.value(),
-        destinationTopicId.value(),
-        destinationSubscriptionName.value());
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    // Cleanup the topics and subscriptions
-    deleteSubscriptionExample(cloudRegion.value(), sourceSubscriptionPath);
-    deleteSubscriptionExample(cloudRegion.value(), destinationSubscriptionPath);
-    deleteTopicExample(cloudRegion.value(), destinationTopicPath);
-  }
-
-  /** Note that source single word messages have been published to a permanent topic. */
-  @Test
-  public void test() throws Exception {
-    // Maven package into jars
-    mavenPackage(workingDir);
-    mavenPackage(workingDir + "/samples");
-
-    // Upload to GCS
-    Storage storage =
-        StorageOptions.newBuilder().setProjectId(projectId.value()).build().getService();
-    uploadGCS(storage, sampleJarNameInGCS, sampleJarLoc);
-    uploadGCS(storage, connectorJarNameInGCS, connectorJarLoc);
-
-    // Run Dataproc job, block until it finishes
-    runDataprocJob();
-
-    // Verify final destination messages in Pub/Sub Lite
-    verifyWordCountResult();
-  }
-}
diff --git a/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java b/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java
new file mode 100644
index 00000000..6086b2cc
--- /dev/null
+++ b/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pubsublite.spark;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.dataproc.v1.Job;
+import com.google.cloud.dataproc.v1.JobControllerClient;
+import com.google.cloud.dataproc.v1.JobControllerSettings;
+import com.google.cloud.dataproc.v1.JobMetadata;
+import com.google.cloud.dataproc.v1.JobPlacement;
+import com.google.cloud.dataproc.v1.SparkJob;
+import com.google.cloud.pubsublite.CloudRegion;
+import com.google.cloud.pubsublite.CloudZone;
+import com.google.cloud.pubsublite.ProjectId;
+import com.google.cloud.pubsublite.ProjectNumber;
+import com.google.cloud.pubsublite.TopicName;
+import com.google.cloud.storage.BlobId;
+import com.google.cloud.storage.BlobInfo;
+import com.google.cloud.storage.Storage;
+import com.google.common.collect.ImmutableList;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Map;
+import java.util.UUID;
+import org.apache.commons.lang.StringUtils;
+import org.apache.maven.shared.invoker.DefaultInvocationRequest;
+import org.apache.maven.shared.invoker.DefaultInvoker;
+import org.apache.maven.shared.invoker.InvocationRequest;
+import org.apache.maven.shared.invoker.InvocationResult;
+import org.apache.maven.shared.invoker.Invoker;
+import org.apache.maven.shared.invoker.MavenInvocationException;
+import org.apache.maven.shared.utils.cli.CommandLineException;
+
+public abstract class SampleTestBase {
+
+  private static final String CLOUD_REGION = "CLOUD_REGION";
+  private static final String CLOUD_ZONE = "CLOUD_ZONE";
+  private static final String PROJECT_NUMBER = "GOOGLE_CLOUD_PROJECT_NUMBER";
+  private static final String PROJECT_ID = "PROJECT_ID";
+  private static final String TOPIC_ID = "TOPIC_ID";
+  private static final String CLUSTER_NAME = "CLUSTER_NAME";
+  private static final String BUCKET_NAME = "BUCKET_NAME";
+  private static final String SAMPLE_VERSION = "SAMPLE_VERSION";
+  private static final String CONNECTOR_VERSION = "CONNECTOR_VERSION";
+
+  protected final String runId = UUID.randomUUID().toString();
+  protected CloudRegion cloudRegion;
+  protected CloudZone cloudZone;
+  protected ProjectNumber projectNumber;
+  protected ProjectId projectId;
+  protected TopicName sourceTopicId;
+  protected String clusterName;
+  protected String bucketName;
+  protected String workingDir;
+  protected String mavenHome;
+  protected String sampleVersion;
+  protected String connectorVersion;
+  protected String sampleJarName;
+  protected String connectorJarName;
+  protected String sampleJarNameInGCS;
+  protected String connectorJarNameInGCS;
+  protected String sampleJarLoc;
+  protected String connectorJarLoc;
+
+  protected void setupEnvVars() {
+    Map<String, String> env =
+        CommonUtils.getAndValidateEnvVars(
+            CLOUD_REGION,
+            CLOUD_REGION,
+            CLOUD_ZONE,
+            PROJECT_ID,
+            PROJECT_NUMBER,
+            TOPIC_ID,
+            CLUSTER_NAME,
+            BUCKET_NAME,
+            SAMPLE_VERSION,
+            CONNECTOR_VERSION);
+    cloudRegion = CloudRegion.of(env.get(CLOUD_REGION));
+    cloudZone = CloudZone.of(cloudRegion, env.get(CLOUD_ZONE).charAt(0));
+    projectId = ProjectId.of(env.get(PROJECT_ID));
+    projectNumber = ProjectNumber.of(Long.parseLong(env.get(PROJECT_NUMBER)));
+    sourceTopicId = TopicName.of(env.get(TOPIC_ID));
+
+    clusterName = env.get(CLUSTER_NAME);
+    bucketName = env.get(BUCKET_NAME);
+    workingDir =
+        System.getProperty("user.dir")
+            .replace("/samples/snapshot", "")
+            .replace("/samples/snippets", "");
+    sampleVersion = env.get(SAMPLE_VERSION);
+    connectorVersion = env.get(CONNECTOR_VERSION);
+    sampleJarName = String.format("pubsublite-spark-snippets-%s.jar", sampleVersion);
+    connectorJarName =
+        String.format("pubsublite-spark-sql-streaming-%s-with-dependencies.jar", connectorVersion);
+    sampleJarNameInGCS = String.format("pubsublite-spark-snippets-%s-%s.jar", sampleVersion, runId);
+    connectorJarNameInGCS =
+        String.format(
+            "pubsublite-spark-sql-streaming-%s-with-dependencies-%s.jar", connectorVersion, runId);
+    sampleJarLoc = String.format("%s/samples/snippets/target/%s", workingDir, sampleJarName);
+    connectorJarLoc = String.format("%s/target/%s", workingDir, connectorJarName);
+  }
+
+  protected void findMavenHome() throws Exception {
+    Process p = Runtime.getRuntime().exec("mvn --version");
+    BufferedReader stdOut = new BufferedReader(new InputStreamReader(p.getInputStream()));
+    assertThat(p.waitFor()).isEqualTo(0);
+    String s;
+    while ((s = stdOut.readLine()) != null) {
+      if (StringUtils.startsWith(s, "Maven home: ")) {
+        mavenHome = s.replace("Maven home: ", "");
+      }
+    }
+  }
+
+  protected void mavenPackage(String workingDir)
+      throws MavenInvocationException, CommandLineException {
+    InvocationRequest request = new DefaultInvocationRequest();
+    request.setPomFile(new File(workingDir + "/pom.xml"));
+    request.setGoals(ImmutableList.of("clean", "package", "-Dmaven.test.skip=true"));
+    Invoker invoker = new DefaultInvoker();
+    invoker.setMavenHome(new File(mavenHome));
+    InvocationResult result = invoker.execute(request);
+    if (result.getExecutionException() != null) {
+      throw result.getExecutionException();
+    }
+    assertThat(result.getExitCode()).isEqualTo(0);
+  }
+
+  protected void uploadGCS(Storage storage, String fileNameInGCS, String fileLoc) throws Exception {
+    BlobId blobId = BlobId.of(bucketName, fileNameInGCS);
+    BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();
+    storage.create(blobInfo, Files.readAllBytes(Paths.get(fileLoc)));
+  }
+
+  protected Job runDataprocJob(SparkJob.Builder sparkJobBuilder) throws Exception {
+    String myEndpoint = String.format("%s-dataproc.googleapis.com:443", cloudRegion.value());
+    JobControllerSettings jobControllerSettings =
+        JobControllerSettings.newBuilder().setEndpoint(myEndpoint).build();
+
+    try (JobControllerClient jobControllerClient =
+        JobControllerClient.create(jobControllerSettings)) {
+      JobPlacement jobPlacement = JobPlacement.newBuilder().setClusterName(clusterName).build();
+      sparkJobBuilder
+          .addJarFileUris(String.format("gs://%s/%s", bucketName, sampleJarNameInGCS))
+          .addJarFileUris(String.format("gs://%s/%s", bucketName, connectorJarNameInGCS));
+      Job job =
+          Job.newBuilder().setPlacement(jobPlacement).setSparkJob(sparkJobBuilder.build()).build();
+      OperationFuture<Job, JobMetadata> submitJobAsOperationAsyncRequest =
+          jobControllerClient.submitJobAsOperationAsync(
+              projectId.value(), cloudRegion.value(), job);
+      return submitJobAsOperationAsyncRequest.get();
+    }
+  }
+}
diff --git a/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java b/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java
new file mode 100644
index 00000000..7be0f801
--- /dev/null
+++ b/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2021 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package pubsublite.spark;
+
+import static com.google.common.truth.Truth.assertThat;
+import static pubsublite.spark.AdminUtils.createSubscriptionExample;
+import static pubsublite.spark.AdminUtils.createTopicExample;
+import static pubsublite.spark.AdminUtils.deleteSubscriptionExample;
+import static pubsublite.spark.AdminUtils.deleteTopicExample;
+import static pubsublite.spark.AdminUtils.subscriberExample;
+
+import com.google.cloud.dataproc.v1.Job;
+import com.google.cloud.dataproc.v1.SparkJob;
+import com.google.cloud.pubsublite.SubscriptionName;
+import com.google.cloud.pubsublite.SubscriptionPath;
+import com.google.cloud.pubsublite.TopicName;
+import com.google.cloud.pubsublite.TopicPath;
+import com.google.cloud.storage.Blob;
+import com.google.cloud.storage.Storage;
+import com.google.cloud.storage.StorageOptions;
+import com.google.common.flogger.GoogleLogger;
+import com.google.pubsub.v1.PubsubMessage;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Queue;
+import java.util.UUID;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SamplesIntegrationTest extends SampleTestBase {
+
+  private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
+
+  private SubscriptionName sourceSubscriptionName;
+  private SubscriptionPath sourceSubscriptionPath;
+  private TopicName destinationTopicId;
+  private TopicPath destinationTopicPath;
+  private SubscriptionName destinationSubscriptionName;
+  private SubscriptionPath destinationSubscriptionPath;
+  private Boolean initialized = false;
+
+  @Before
+  public void beforeClass() throws Exception {
+    if (initialized) {
+      return;
+    }
+    log.atInfo().log("RunId is: %s", runId);
+    setupEnvVars();
+    findMavenHome();
+
+    // Maven package into jars
+    mavenPackage(workingDir);
+    mavenPackage(workingDir + "/samples");
+
+    // Upload to GCS
+    Storage storage =
+        StorageOptions.newBuilder().setProjectId(projectId.value()).build().getService();
+    uploadGCS(storage, sampleJarNameInGCS, sampleJarLoc);
+    uploadGCS(storage, connectorJarNameInGCS, connectorJarLoc);
+    initialized = true;
+  }
+
+  /** Note that source single word messages have been published to a permanent topic. */
+  @Test
+  public void testWordCount() throws Exception {
+    UUID testId = UUID.randomUUID();
+    setupSourceWithTestId(testId);
+    setupDestinationWithTestId(testId);
+    try {
+      // Run Dataproc job, block until it finishes
+      SparkJob.Builder sparkJobBuilder =
+          SparkJob.newBuilder()
+              .setMainClass("pubsublite.spark.WordCount")
+              .putProperties("spark.submit.deployMode", "cluster")
+              .putProperties(
+                  "spark.yarn.appMasterEnv.SOURCE_SUBSCRIPTION_PATH",
+                  sourceSubscriptionPath.toString())
+              .putProperties(
+                  "spark.yarn.appMasterEnv.DESTINATION_TOPIC_PATH",
+                  destinationTopicPath.toString());
+      runDataprocJob(sparkJobBuilder);
+      // Verify final destination messages in Pub/Sub Lite
+      verifyWordCountResultViaPSL();
+    } finally {
+      deleteSubscriptionExample(cloudRegion.value(), sourceSubscriptionPath);
+      deleteSubscriptionExample(cloudRegion.value(), destinationSubscriptionPath);
+      deleteTopicExample(cloudRegion.value(), destinationTopicPath);
+    }
+  }
+
+  @Test
+  public void testSimpleRead() throws Exception {
+    UUID testId = UUID.randomUUID();
+    setupSourceWithTestId(testId);
+    try {
+      // Run Dataproc job, block until it finishes
+      SparkJob.Builder sparkJobBuilder =
+          SparkJob.newBuilder()
+              .setMainClass("pubsublite.spark.SimpleRead")
+              .addArgs(sourceSubscriptionPath.toString());
+      Job job = runDataprocJob(sparkJobBuilder);
+      // Verify results in console
+      verifyConsoleOutput(job);
+    } finally {
+      deleteSubscriptionExample(cloudRegion.value(), sourceSubscriptionPath);
+    }
+  }
+
+  @Test
+  public void testSimpleWrite() throws Exception {
+    UUID testId = UUID.randomUUID();
+    setupDestinationWithTestId(testId);
+    try {
+      // Run Dataproc job, block until it finishes
+      SparkJob.Builder sparkJobBuilder =
+          SparkJob.newBuilder()
+              .setMainClass("pubsublite.spark.SimpleWrite")
+              .putProperties("spark.submit.deployMode", "cluster")
+              .putProperties(
+                  "spark.yarn.appMasterEnv.DESTINATION_TOPIC_PATH",
+                  destinationTopicPath.toString());
+      runDataprocJob(sparkJobBuilder);
+      // Verify write results in PSL
+      verifySimpleWriteResultViaPSL();
+    } finally {
+      deleteSubscriptionExample(cloudRegion.value(), destinationSubscriptionPath);
+      deleteTopicExample(cloudRegion.value(), destinationTopicPath);
+    }
+  }
+
+  private void setupSourceWithTestId(UUID testId) throws Exception {
+    sourceSubscriptionName = SubscriptionName.of("sample-integration-sub-source-" + testId);
+    sourceSubscriptionPath =
+        SubscriptionPath.newBuilder()
+            .setProject(projectId)
+            .setLocation(cloudZone)
+            .setName(sourceSubscriptionName)
+            .build();
+    createSubscriptionExample(
+        cloudRegion.value(),
+        cloudZone.zoneId(),
+        projectNumber.value(),
+        sourceTopicId.value(),
+        sourceSubscriptionName.value());
+  }
+
+  private void setupDestinationWithTestId(UUID testId) throws Exception {
+    destinationTopicId = TopicName.of("sample-integration-topic-destination-" + testId);
+    destinationTopicPath =
+        TopicPath.newBuilder()
+            .setProject(projectId)
+            .setLocation(cloudZone)
+            .setName(destinationTopicId)
+            .build();
+    destinationSubscriptionName =
+        SubscriptionName.of("sample-integration-sub-destination-" + runId);
+    destinationSubscriptionPath =
+        SubscriptionPath.newBuilder()
+            .setProject(projectId)
+            .setLocation(cloudZone)
+            .setName(destinationSubscriptionName)
+            .build();
+    createTopicExample(
+        cloudRegion.value(),
+        cloudZone.zoneId(),
+        projectNumber.value(),
+        destinationTopicId.value(),
+        /*partitions=*/ 1);
+    createSubscriptionExample(
+        cloudRegion.value(),
+        cloudZone.zoneId(),
+        projectNumber.value(),
+        destinationTopicId.value(),
+        destinationSubscriptionName.value());
+  }
+
+  private void verifyWordCountResultViaPSL() {
+    Map<String, Integer> expected = new HashMap<>();
+    expected.put("the", 24);
+    expected.put("of", 16);
+    expected.put("and", 14);
+    expected.put("i", 13);
+    expected.put("my", 10);
+    expected.put("a", 6);
+    expected.put("in", 5);
+    expected.put("that", 5);
+    expected.put("soul", 4);
+    expected.put("with", 4);
+    expected.put("as", 3);
+    expected.put("feel", 3);
+    expected.put("like", 3);
+    expected.put("me", 3);
+    expected.put("so", 3);
+    expected.put("then", 3);
+    expected.put("us", 3);
+    expected.put("when", 3);
+    expected.put("which", 3);
+    expected.put("am", 2);
+    Map<String, Integer> actual = new HashMap<>();
+    Queue<PubsubMessage> results =
+        subscriberExample(
+            cloudRegion.value(),
+            cloudZone.zoneId(),
+            projectNumber.value(),
+            destinationSubscriptionName.value());
+    for (PubsubMessage m : results) {
+      String[] pair = m.getData().toStringUtf8().split("_");
+      actual.put(pair[0], Integer.parseInt(pair[1]));
+    }
+    assertThat(actual).containsAtLeastEntriesIn(expected);
+  }
+
+  private void verifySimpleWriteResultViaPSL() {
+    Queue<PubsubMessage> results =
+        subscriberExample(
+            cloudRegion.value(),
+            cloudZone.zoneId(),
+            projectNumber.value(),
+            destinationSubscriptionName.value());
+    // The streaming query runs for 60s, and rate source generate one row per sec.
+    assertThat(results.size()).isGreaterThan(10);
+    for (PubsubMessage m : results) {
+      assertThat(m.getOrderingKey()).isEqualTo("testkey");
+      assertThat(m.getData().toStringUtf8()).startsWith("data_");
+    }
+  }
+
+  private void verifyConsoleOutput(Job job) {
+    Storage storage =
+        StorageOptions.newBuilder().setProjectId(projectId.value()).build().getService();
+    Matcher matches = Pattern.compile("gs://(.*?)/(.*)").matcher(job.getDriverOutputResourceUri());
+    assertThat(matches.matches()).isTrue();
+
+    Blob blob = storage.get(matches.group(1), String.format("%s.000000000", matches.group(2)));
+    String sparkJobOutput = new String(blob.getContent());
+    log.atInfo().log(sparkJobOutput);
+    String expectedWordCountResult =
+        "-------------------------------------------\n"
+            + "Batch: 0\n"
+            + "-------------------------------------------\n"
+            + "+--------------------+---------+------+---+--------------------+"
+            + "--------------------+---------------+----------+\n"
+            + "|        subscription|partition|offset|key|                data|"
+            + "   publish_timestamp|event_timestamp|attributes|\n"
+            + "+--------------------+---------+------+---+--------------------+"
+            + "--------------------+---------------+----------+\n"
+            + "|projects/java-doc...|        0|     0| []|          [61 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     1| []|[77 6F 6E 64 65 7...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     2| []|[73 65 72 65 6E 6...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     3| []|    [68 61 73 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     4| []|[74 61 6B 65 6E 5...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     5| []|[70 6F 73 73 65 7...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     6| []|       [6F 66 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     7| []|       [6D 79 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     8| []|[65 6E 74 69 72 6...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|     9| []| [73 6F 75 6C 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    10| []| [6C 69 6B 65 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    11| []|[74 68 65 73 65 5...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    12| []|[73 77 65 65 74 5...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    13| []|[6D 6F 72 6E 69 6...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    14| []|       [6F 66 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    15| []|[73 70 72 69 6E 6...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    16| []|[77 68 69 63 68 5...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    17| []|          [69 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    18| []|[65 6E 6A 6F 79 5...|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "|projects/java-doc...|        0|    19| []| [77 69 74 68 5F 31]|"
+            + "2021-02-01 23:26:...|           null|        []|\n"
+            + "+--------------------+---------+------+---+--------------------+"
+            + "--------------------+"
+            + "---------------+----------+\n"
+            + "only showing top 20 rows";
+    assertThat(sparkJobOutput).contains(expectedWordCountResult);
+  }
+}
diff --git a/samples/word_count_sample.sh b/samples/word_count_sample.sh
new file mode 100644
index 00000000..052eee36
--- /dev/null
+++ b/samples/word_count_sample.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Bash script that runs word count sample.
+set -e
+
+if [ "$1" == "run" ]; then
+  echo "Running word count sample..."
+
+  cd samples/snippets
+
+  # Set the current sample version.
+  export SAMPLE_VERSION=$(mvn -q \
+    -Dexec.executable=echo \
+    -Dexec.args="${project.version}" \
+    --non-recursive \
+    exec:exec)
+
+  # Set extra environment variables.
+  export SOURCE_SUBSCRIPTION_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/subscriptions/$SOURCE_SUBSCRIPTION_ID
+  export DESTINATION_TOPIC_PATH=projects/$PROJECT_NUMBER/locations/$REGION-$ZONE_ID/topics/$DESTINATION_TOPIC_ID
+
+  # Create both the source and destination topics and subscriptions,
+  # and publish word count messages to the _source_ topic.
+  mvn compile exec:java -Dexec.mainClass=pubsublite.spark.PublishWords
+
+  # Create a Dataproc cluster
+  gcloud dataproc clusters create $CLUSTER_NAME \
+    --region=$REGION \
+    --zone=$REGION-$ZONE_ID \
+    --image-version=1.5-debian10 \
+    --scopes=cloud-platform
+
+  # Package sample jar
+  mvn clean package -Dmaven.test.skip=true
+
+  # Create GCS bucket and upload sample jar onto GCS
+  gsutil mb $BUCKET
+  gsutil cp target/pubsublite-spark-snippets-$SAMPLE_VERSION.jar $BUCKET
+
+  # Set Dataproc region
+  gcloud config set dataproc/region $REGION
+
+  # Run the sample in Dataproc. This will perform word count aggregation
+  # and publish word count results to Pub/Sub Lite.
+  gcloud dataproc jobs submit spark --cluster=$CLUSTER_NAME \
+    --jars=$BUCKET/pubsublite-spark-snippets-$SAMPLE_VERSION.jar,gs://spark-lib/pubsublite/pubsublite-spark-sql-streaming-$CONNECTOR_VERSION-with-dependencies.jar \
+    --class=pubsublite.spark.WordCount \
+    --properties=spark.submit.deployMode=cluster,spark.yarn.appMasterEnv.SOURCE_SUBSCRIPTION_PATH=$SOURCE_SUBSCRIPTION_PATH,spark.yarn.appMasterEnv.DESTINATION_TOPIC_PATH=$DESTINATION_TOPIC_PATH
+
+  # Read word count results from Pub/Sub Lite, you should see the result in console output.
+  mvn compile exec:java -Dexec.mainClass=pubsublite.spark.ReadResults
+  echo "Word count sample finished."
+elif [ "$1" == "clean" ]; then
+  echo "Cleaning up..."
+
+  # Delete Pub/Sub Lite topic and subscription.
+  gcloud pubsub lite-subscriptions delete $SOURCE_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+  gcloud pubsub lite-topics delete $SOURCE_TOPIC_ID --zone=$REGION-$ZONE_ID
+  gcloud pubsub lite-subscriptions delete $DESTINATION_SUBSCRIPTION_ID --zone=$REGION-$ZONE_ID
+  gcloud pubsub lite-topics delete $DESTINATION_TOPIC_ID --zone=$REGION-$ZONE_ID
+
+  # Delete GCS bucket.
+  gsutil -m rm -rf $BUCKET
+
+  # Delete Dataproc cluster.
+  gcloud dataproc clusters delete $CLUSTER_NAME --region=$REGION
+  echo "Clean up finished."
+else
+  echo "Invalid arguments, should be either run or clean."
+  exit 1

From 1e20e481aa9452d110695931b5ac6208fd392996 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Fri, 9 Apr 2021 12:12:36 -0700
Subject: [PATCH 37/47] chore: regenerate README (#137)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-04-09 18:39:17,746 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-04-09 18:39:18,946 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/ad0b016b-42ba-4d50-8f27-55b916c23ff5/targets

- [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.)
---
 .github/readme/synth.metadata/synth.metadata | 4 ++--
 README.md                                    | 6 ++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index 935ee157..9c995dd0 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "98f5863245584bf517d4817610dcca0c3979a470"
+        "sha": "eddcb420a9dd6f41a1dd07a56bcb6ec16a444a56"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "705743e66f5c0b24a95f7f30619c9d3ef747b317"
+        "sha": "1f5e6bc8dc8e3661ee550905fc070e55e1b6cea1"
       }
     }
   ]
diff --git a/README.md b/README.md
index d1eb9154..7aa7b47c 100644
--- a/README.md
+++ b/README.md
@@ -8,9 +8,12 @@ Java idiomatic client for [Pub/Sub Lite Spark Connector][product-docs].
 - [Product Documentation][product-docs]
 - [Client Library Documentation][javadocs]
 
+
 > Note: This client is a work-in-progress, and may occasionally
 > make backwards-incompatible changes.
 
+
+
 ## Quickstart
 
 
@@ -175,8 +178,11 @@ has instructions for running the samples.
 | Sample                      | Source Code                       | Try it |
 | --------------------------- | --------------------------------- | ------ |
 | Admin Utils | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java) |
+| Common Utils | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/CommonUtils.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/CommonUtils.java) |
 | Publish Words | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/PublishWords.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/PublishWords.java) |
 | Read Results | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/ReadResults.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/ReadResults.java) |
+| Simple Read | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/SimpleRead.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/SimpleRead.java) |
+| Simple Write | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/SimpleWrite.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/SimpleWrite.java) |
 | Word Count | [source code](https://github.com/googleapis/java-pubsublite-spark/blob/master/samples/snippets/src/main/java/pubsublite/spark/WordCount.java) | [![Open in Cloud Shell][shell_img]](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/googleapis/java-pubsublite-spark&page=editor&open_in_editor=samples/snippets/src/main/java/pubsublite/spark/WordCount.java) |
 
 

From b5edda641b1aa3d3dc7b3e38cd11b65fec086468 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Fri, 9 Apr 2021 19:13:36 -0400
Subject: [PATCH 38/47] docs: Add write support documentations (#132)

---
 .readme-partials.yaml | 60 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 57 insertions(+), 3 deletions(-)

diff --git a/.readme-partials.yaml b/.readme-partials.yaml
index 9c84e179..260d7a64 100644
--- a/.readme-partials.yaml
+++ b/.readme-partials.yaml
@@ -28,20 +28,56 @@ custom_content: |
 
   ## Usage
 
+  ### Samples
+
+    There are 3 java samples (word count, simple write, simple read) under [samples](https://github.com/googleapis/java-pubsublite-spark/tree/master/samples) that shows using the connector inside Dataproc.
+
   ### Reading data from Pub/Sub Lite
 
+    Here is an example in Python:
     ```python
     df = spark.readStream \
-      .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID")
       .format("pubsublite") \
+      .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID") \
       .load
     ```
+    Here is an example in Java:
+    ```java
+    Dataset<Row> df = spark
+      .readStream()
+      .format("pubsublite")
+      .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID"t )
+      .load();
+    ```
 
     Note that the connector supports both MicroBatch Processing and [Continuous Processing](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#continuous-processing).
 
+  ### Writing data to Pub/Sub Lite
+
+    Here is an example in Python:
+    ```python
+    df.writeStream \
+      .format("pubsublite") \
+      .option("pubsublite.topic", "projects/$PROJECT_NUMBER/locations/$LOCATION/topics/$TOPIC_ID") \
+      .option("checkpointLocation", "path/to/HDFS/dir")
+      .outputMode("complete") \
+      .trigger(processingTime="2 seconds") \
+      .start()
+    ```
+    Here is an example in Java:
+    ```java
+    df.writeStream()
+      .format("pubsublite")
+      .option("pubsublite.topic", "projects/$PROJECT_NUMBER/locations/$LOCATION/topics/$TOPIC_ID")
+      .option("checkpointLocation", "path/to/HDFS/dir")
+      .outputMode(OutputMode.Complete())
+      .trigger(Trigger.ProcessingTime(2, TimeUnit.SECONDS))
+      .start();
+    ```
+
   ### Properties
 
-  The connector supports a number of options to configure the read:
+  When reading from Pub/Sub Lite, the connector supports a number of configuration options:
 
     | Option | Type | Required | Default Value | Meaning |
     | ------ | ---- | -------- | ------------- | ------- |
@@ -51,9 +87,16 @@ custom_content: |
     | pubsublite.flowcontrol.maxmessagesperbatch | Long | N | Long.MAX | Max number of messages in micro batch. |
     | gcp.credentials.key | String | N | [Application Default Credentials](https://cloud.google.com/docs/authentication/production#automatically) | Service account JSON in base64. |
 
+  When writing to Pub/Sub Lite, the connector supports a number of configuration options:
+
+    | Option | Type | Required | Default Value | Meaning |
+    | ------ | ---- | -------- | ------------- | ------- |
+    | pubsublite.topic | String | Y | | Full topic path that the connector will write to. |
+    | gcp.credentials.key | String | N | [Application Default Credentials](https://cloud.google.com/docs/authentication/production#automatically) | Service account JSON in base64. |
+
   ### Data Schema
 
-  The connector has fixed data schema as follows:
+  When reading from Pub/Sub Lite, the connector has a fixed data schema as follows:
 
     | Data Field | Spark Data Type | Notes |
     | ---------- | --------------- | ----- |
@@ -66,6 +109,17 @@ custom_content: |
     | publish_timestamp | TimestampType | |
     | event_timestamp | TimestampType | Nullable |
 
+  When writing to Pub/Sub Lite, the connetor matches the following data field and data types as follows:
+
+    | Data Field | Spark Data Type | Required |
+    | ---------- | --------------- | ----- |
+    | key | BinaryType | N |
+    | data | BinaryType | N |
+    | attributes | MapType\[StringType, ArrayType\[BinaryType\]\] | N |
+    | event_timestamp | TimestampType | N |
+
+  Note that when a data field is present in the table but the data type mismatches, the connector will throw IllegalArgumentException that terminates the query.
+
   ## Building the Connector
 
   The connector is built using Maven. Following command creates a JAR file with shaded dependencies:

From c4ab0c7c9b94f7edd257fb7905473d203aa73c7d Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Fri, 9 Apr 2021 16:46:07 -0700
Subject: [PATCH 39/47] chore: regenerate README (#139)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-04-09 23:16:42,037 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-04-09 23:16:43,245 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/71ea23de-3fa9-4145-bf9d-08fe2a2455f4/targets

- [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.)
---
 .github/readme/synth.metadata/synth.metadata |  4 +-
 README.md                                    | 63 ++++++++++++++++++--
 2 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index 9c995dd0..c477db3a 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "eddcb420a9dd6f41a1dd07a56bcb6ec16a444a56"
+        "sha": "b5edda641b1aa3d3dc7b3e38cd11b65fec086468"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "1f5e6bc8dc8e3661ee550905fc070e55e1b6cea1"
+        "sha": "d9ddac83a22a600dd33854c9d835a4fe52284207"
       }
     }
   ]
diff --git a/README.md b/README.md
index 7aa7b47c..f37576d7 100644
--- a/README.md
+++ b/README.md
@@ -8,12 +8,9 @@ Java idiomatic client for [Pub/Sub Lite Spark Connector][product-docs].
 - [Product Documentation][product-docs]
 - [Client Library Documentation][javadocs]
 
-
 > Note: This client is a work-in-progress, and may occasionally
 > make backwards-incompatible changes.
 
-
-
 ## Quickstart
 
 
@@ -100,20 +97,56 @@ and manual Spark installations.
 
 ## Usage
 
+### Samples
+
+  There are 3 java samples (word count, simple write, simple read) under [samples](https://github.com/googleapis/java-pubsublite-spark/tree/master/samples) that shows using the connector inside Dataproc.
+
 ### Reading data from Pub/Sub Lite
 
+  Here is an example in Python:
   ```python
   df = spark.readStream \
-    .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID")
     .format("pubsublite") \
+    .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID") \
     .load
   ```
+  Here is an example in Java:
+  ```java
+  Dataset<Row> df = spark
+    .readStream()
+    .format("pubsublite")
+    .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID"t )
+    .load();
+  ```
 
   Note that the connector supports both MicroBatch Processing and [Continuous Processing](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#continuous-processing).
 
+### Writing data to Pub/Sub Lite
+
+  Here is an example in Python:
+  ```python
+  df.writeStream \
+    .format("pubsublite") \
+    .option("pubsublite.topic", "projects/$PROJECT_NUMBER/locations/$LOCATION/topics/$TOPIC_ID") \
+    .option("checkpointLocation", "path/to/HDFS/dir")
+    .outputMode("complete") \
+    .trigger(processingTime="2 seconds") \
+    .start()
+  ```
+  Here is an example in Java:
+  ```java
+  df.writeStream()
+    .format("pubsublite")
+    .option("pubsublite.topic", "projects/$PROJECT_NUMBER/locations/$LOCATION/topics/$TOPIC_ID")
+    .option("checkpointLocation", "path/to/HDFS/dir")
+    .outputMode(OutputMode.Complete())
+    .trigger(Trigger.ProcessingTime(2, TimeUnit.SECONDS))
+    .start();
+  ```
+
 ### Properties
 
-The connector supports a number of options to configure the read:
+When reading from Pub/Sub Lite, the connector supports a number of configuration options:
 
   | Option | Type | Required | Default Value | Meaning |
   | ------ | ---- | -------- | ------------- | ------- |
@@ -123,9 +156,16 @@ The connector supports a number of options to configure the read:
   | pubsublite.flowcontrol.maxmessagesperbatch | Long | N | Long.MAX | Max number of messages in micro batch. |
   | gcp.credentials.key | String | N | [Application Default Credentials](https://cloud.google.com/docs/authentication/production#automatically) | Service account JSON in base64. |
 
+When writing to Pub/Sub Lite, the connector supports a number of configuration options:
+
+  | Option | Type | Required | Default Value | Meaning |
+  | ------ | ---- | -------- | ------------- | ------- |
+  | pubsublite.topic | String | Y | | Full topic path that the connector will write to. |
+  | gcp.credentials.key | String | N | [Application Default Credentials](https://cloud.google.com/docs/authentication/production#automatically) | Service account JSON in base64. |
+
 ### Data Schema
 
-The connector has fixed data schema as follows:
+When reading from Pub/Sub Lite, the connector has a fixed data schema as follows:
 
   | Data Field | Spark Data Type | Notes |
   | ---------- | --------------- | ----- |
@@ -138,6 +178,17 @@ The connector has fixed data schema as follows:
   | publish_timestamp | TimestampType | |
   | event_timestamp | TimestampType | Nullable |
 
+When writing to Pub/Sub Lite, the connetor matches the following data field and data types as follows:
+
+  | Data Field | Spark Data Type | Required |
+  | ---------- | --------------- | ----- |
+  | key | BinaryType | N |
+  | data | BinaryType | N |
+  | attributes | MapType\[StringType, ArrayType\[BinaryType\]\] | N |
+  | event_timestamp | TimestampType | N |
+
+Note that when a data field is present in the table but the data type mismatches, the connector will throw IllegalArgumentException that terminates the query.
+
 ## Building the Connector
 
 The connector is built using Maven. Following command creates a JAR file with shaded dependencies:

From 7d8584d10d6c89856a1ae0d0421939b0c5560689 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Fri, 9 Apr 2021 17:20:20 -0700
Subject: [PATCH 40/47] build(java): skip javadoc tests during dependencies
 test (#138)

Source-Author: Jeff Ching <chingor@google.com>
Source-Date: Fri Apr 9 09:18:54 2021 -0700
Source-Repo: googleapis/synthtool
Source-Sha: 5b0e1592dd7d70b485e157ea4b3eb1704ecbd015
Source-Link: https://github.com/googleapis/synthtool/commit/5b0e1592dd7d70b485e157ea4b3eb1704ecbd015
---
 .kokoro/dependencies.sh | 1 +
 synth.metadata          | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.kokoro/dependencies.sh b/.kokoro/dependencies.sh
index 0fb8c843..59d2aafc 100755
--- a/.kokoro/dependencies.sh
+++ b/.kokoro/dependencies.sh
@@ -33,6 +33,7 @@ export MAVEN_OPTS="-Xmx1024m -XX:MaxPermSize=128m"
 retry_with_backoff 3 10 \
   mvn install -B -V -ntp \
     -DskipTests=true \
+    -Dmaven.javadoc.skip=true \
     -Dclirr.skip=true
 
 mvn -B dependency:analyze -DfailOnWarning=true
diff --git a/synth.metadata b/synth.metadata
index a92af3d2..2591f2ba 100644
--- a/synth.metadata
+++ b/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "20f336639c261ddb3b61d0bd14f02e6ea5146377"
+        "sha": "1e20e481aa9452d110695931b5ac6208fd392996"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "572ef8f70edd9041f5bcfa71511aed6aecfc2098"
+        "sha": "5b0e1592dd7d70b485e157ea4b3eb1704ecbd015"
       }
     }
   ],

From 2b82d1cb9c08219b821e1ee665da7d633919f149 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Sat, 10 Apr 2021 20:26:51 -0400
Subject: [PATCH 41/47] update (#140)

---
 .readme-partials.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.readme-partials.yaml b/.readme-partials.yaml
index 260d7a64..d21e422f 100644
--- a/.readme-partials.yaml
+++ b/.readme-partials.yaml
@@ -46,7 +46,7 @@ custom_content: |
     Dataset<Row> df = spark
       .readStream()
       .format("pubsublite")
-      .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID"t )
+      .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID")
       .load();
     ```
 

From a5a04b67150be62e259fbd37652777bcd1b93ba0 Mon Sep 17 00:00:00 2001
From: Yoshi Automation Bot <yoshi-automation@google.com>
Date: Sat, 10 Apr 2021 17:46:02 -0700
Subject: [PATCH 42/47] chore: regenerate README (#141)

This PR was generated using Autosynth. :rainbow:


<details><summary>Log from Synthtool</summary>

```
2021-04-11 00:29:17,094 synthtool [DEBUG] > Executing /root/.cache/synthtool/java-pubsublite-spark/.github/readme/synth.py.
On branch autosynth-readme
nothing to commit, working tree clean
2021-04-11 00:29:18,109 synthtool [DEBUG] > Wrote metadata to .github/readme/synth.metadata/synth.metadata.

```
</details>

Full log will be available here:
https://source.cloud.google.com/results/invocations/6e1451c0-f81a-42dc-b3bd-048cdf59b885/targets

- [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.)
---
 .github/readme/synth.metadata/synth.metadata | 4 ++--
 README.md                                    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/readme/synth.metadata/synth.metadata b/.github/readme/synth.metadata/synth.metadata
index c477db3a..f3762eef 100644
--- a/.github/readme/synth.metadata/synth.metadata
+++ b/.github/readme/synth.metadata/synth.metadata
@@ -4,14 +4,14 @@
       "git": {
         "name": ".",
         "remote": "https://github.com/googleapis/java-pubsublite-spark.git",
-        "sha": "b5edda641b1aa3d3dc7b3e38cd11b65fec086468"
+        "sha": "2b82d1cb9c08219b821e1ee665da7d633919f149"
       }
     },
     {
       "git": {
         "name": "synthtool",
         "remote": "https://github.com/googleapis/synthtool.git",
-        "sha": "d9ddac83a22a600dd33854c9d835a4fe52284207"
+        "sha": "0a071b3460344886297a304253bf924aa68ddb7e"
       }
     }
   ]
diff --git a/README.md b/README.md
index f37576d7..6260163a 100644
--- a/README.md
+++ b/README.md
@@ -115,7 +115,7 @@ and manual Spark installations.
   Dataset<Row> df = spark
     .readStream()
     .format("pubsublite")
-    .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID"t )
+    .option("pubsublite.subscription", "projects/$PROJECT_NUMBER/locations/$LOCATION/subscriptions/$SUBSCRIPTION_ID")
     .load();
   ```
 

From 176c0bdd86e088ede12208179f84a3f2895b13e3 Mon Sep 17 00:00:00 2001
From: WhiteSource Renovate <bot@renovateapp.com>
Date: Mon, 12 Apr 2021 17:29:37 +0200
Subject: [PATCH 43/47] chore(deps): update dependency
 com.google.cloud:libraries-bom to v20 (#142)

[![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com)

This PR contains the following updates:

| Package | Change | Age | Adoption | Passing | Confidence |
|---|---|---|---|---|---|
| [com.google.cloud:libraries-bom](https://togithub.com/GoogleCloudPlatform/cloud-opensource-java) | `19.2.1` -> `20.0.0` | [![age](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/20.0.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/20.0.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/20.0.0/compatibility-slim/19.2.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/maven/com.google.cloud:libraries-bom/20.0.0/confidence-slim/19.2.1)](https://docs.renovatebot.com/merge-confidence/) |

---

### Configuration

:date: **Schedule**: At any time (no schedule defined).

:vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied.

:recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.

:no_bell: **Ignore**: Close this PR and you won't be reminded about this update again.

---

 - [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check this box.

---

This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/java-pubsublite-spark).
---
 samples/snapshot/pom.xml | 2 +-
 samples/snippets/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index f08c3d43..217d3a40 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>19.2.1</version>
+        <version>20.0.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index b301d2d0..82fca5ce 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -28,7 +28,7 @@
       <dependency>
         <groupId>com.google.cloud</groupId>
         <artifactId>libraries-bom</artifactId>
-        <version>19.2.1</version>
+        <version>20.0.0</version>
         <type>pom</type>
         <scope>import</scope>
       </dependency>

From 062b9923e2d0640d43de24c11f4d600ff87e1f9f Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Mon, 12 Apr 2021 15:13:46 -0400
Subject: [PATCH 44/47] deps: update dependency
 com.google.cloud:google-cloud-pubsublite to v0.13.1 (#136)

* update

* update

* update
---
 pom.xml                                                       | 2 +-
 samples/snapshot/pom.xml                                      | 2 +-
 samples/snippets/pom.xml                                      | 2 +-
 .../snippets/src/main/java/pubsublite/spark/AdminUtils.java   | 4 +++-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 94da0515..b51372f1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -43,7 +43,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.12.0</version>
+      <version>0.13.1</version>
     </dependency>
     <dependency>
       <groupId>com.google.api.grpc</groupId>
diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index 217d3a40..dac341b7 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.12.0</version>
+      <version>0.13.1</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 82fca5ce..6e60540f 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-pubsublite</artifactId>
-      <version>0.12.0</version>
+      <version>0.13.1</version>
     </dependency>
     <dependency>
       <groupId>junit</groupId>
diff --git a/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java b/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java
index b6f712f6..4c6d0ece 100644
--- a/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java
+++ b/samples/snippets/src/main/java/pubsublite/spark/AdminUtils.java
@@ -23,6 +23,7 @@
 import com.google.cloud.pubsub.v1.AckReplyConsumer;
 import com.google.cloud.pubsub.v1.MessageReceiver;
 import com.google.cloud.pubsublite.AdminClient;
+import com.google.cloud.pubsublite.AdminClient.BacklogLocation;
 import com.google.cloud.pubsublite.AdminClientSettings;
 import com.google.cloud.pubsublite.CloudRegion;
 import com.google.cloud.pubsublite.CloudZone;
@@ -141,7 +142,8 @@ public static void createSubscriptionExample(
         AdminClientSettings.newBuilder().setRegion(CloudRegion.of(cloudRegion)).build();
 
     try (AdminClient adminClient = AdminClient.create(adminClientSettings)) {
-      Subscription response = adminClient.createSubscription(subscription).get();
+      Subscription response =
+          adminClient.createSubscription(subscription, BacklogLocation.BEGINNING).get();
       System.out.println(response.getAllFields() + "created successfully.");
     } catch (ExecutionException e) {
       if (e.getCause() instanceof AlreadyExistsException) {

From 368d7dfbec237dc0ef4febb37f84a73436e1590d Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Mon, 12 Apr 2021 15:47:46 -0400
Subject: [PATCH 45/47] deps: renovate bot ignore scala-library and caffeine
 deps. (#143)

---
 renovate.json | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/renovate.json b/renovate.json
index ccb29633..ec1a328f 100644
--- a/renovate.json
+++ b/renovate.json
@@ -69,5 +69,9 @@
     }
   ],
   "semanticCommits": true,
-  "masterIssue": true
+  "masterIssue": true,
+  "ignoreDeps": [
+    "scala-library",
+    "caffeine"
+  ]
 }

From 5238be8779238518d1a2190e42c9b0c36e8785d3 Mon Sep 17 00:00:00 2001
From: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
Date: Mon, 12 Apr 2021 17:24:21 -0400
Subject: [PATCH 46/47] chore(deps): update dependency
 com.google.cloud.samples:shared-configuration to v1.0.22 (#144)

---
 .kokoro/build.sh                              |  3 -
 samples/pom.xml                               |  2 +-
 samples/snapshot/pom.xml                      |  2 +-
 samples/snippets/pom.xml                      |  2 +-
 .../java/pubsublite/spark/SampleTestBase.java | 72 ++++++++++++-------
 .../spark/SamplesIntegrationTest.java         |  1 +
 versions.txt                                  |  1 -
 7 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index 0deb86dc..00089722 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -78,9 +78,6 @@ samples)
 
     if [[ -f ${SAMPLES_DIR}/pom.xml ]]
     then
-      # get versions for constructing the full names of the packaged JARs
-      export CONNECTOR_VERSION=$(grep pubsublite-spark-sql-streaming ${scriptDir}/../versions.txt | cut -d: -f3)
-      export SAMPLE_VERSION=$(grep com.google.cloud.samples.shared-configuration: ${scriptDir}/../versions.txt | cut -d: -f3)
       if [ -f "${KOKORO_GFILE_DIR}/secret_manager/java-pubsublite-spark-samples-secrets" ]
         then
             source "${KOKORO_GFILE_DIR}/secret_manager/java-pubsublite-spark-samples-secrets"
diff --git a/samples/pom.xml b/samples/pom.xml
index 9beedc36..239a13c7 100644
--- a/samples/pom.xml
+++ b/samples/pom.xml
@@ -17,7 +17,7 @@
   <parent>
     <groupId>com.google.cloud.samples</groupId>
     <artifactId>shared-configuration</artifactId>
-    <version>1.0.21</version>
+    <version>1.0.22</version>
   </parent>
 
   <properties>
diff --git a/samples/snapshot/pom.xml b/samples/snapshot/pom.xml
index dac341b7..41f7156f 100644
--- a/samples/snapshot/pom.xml
+++ b/samples/snapshot/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>com.google.cloud.samples</groupId>
     <artifactId>shared-configuration</artifactId>
-    <version>1.0.21</version>
+    <version>1.0.22</version>
   </parent>
 
   <properties>
diff --git a/samples/snippets/pom.xml b/samples/snippets/pom.xml
index 6e60540f..e8e620ae 100644
--- a/samples/snippets/pom.xml
+++ b/samples/snippets/pom.xml
@@ -14,7 +14,7 @@
   <parent>
     <groupId>com.google.cloud.samples</groupId>
     <artifactId>shared-configuration</artifactId>
-    <version>1.0.21</version>
+    <version>1.0.22</version>
   </parent>
 
   <properties>
diff --git a/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java b/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java
index 6086b2cc..9c83b3cb 100644
--- a/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java
+++ b/samples/snippets/src/test/java/pubsublite/spark/SampleTestBase.java
@@ -33,17 +33,20 @@
 import com.google.cloud.storage.BlobId;
 import com.google.cloud.storage.BlobInfo;
 import com.google.cloud.storage.Storage;
-import com.google.common.collect.ImmutableList;
+import com.google.common.flogger.GoogleLogger;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStreamReader;
 import java.nio.file.Files;
 import java.nio.file.Paths;
+import java.util.Arrays;
 import java.util.Map;
+import java.util.Optional;
 import java.util.UUID;
 import org.apache.commons.lang.StringUtils;
 import org.apache.maven.shared.invoker.DefaultInvocationRequest;
 import org.apache.maven.shared.invoker.DefaultInvoker;
+import org.apache.maven.shared.invoker.InvocationOutputHandler;
 import org.apache.maven.shared.invoker.InvocationRequest;
 import org.apache.maven.shared.invoker.InvocationResult;
 import org.apache.maven.shared.invoker.Invoker;
@@ -52,6 +55,8 @@
 
 public abstract class SampleTestBase {
 
+  private static final GoogleLogger log = GoogleLogger.forEnclosingClass();
+
   private static final String CLOUD_REGION = "CLOUD_REGION";
   private static final String CLOUD_ZONE = "CLOUD_ZONE";
   private static final String PROJECT_NUMBER = "GOOGLE_CLOUD_PROJECT_NUMBER";
@@ -59,8 +64,6 @@ public abstract class SampleTestBase {
   private static final String TOPIC_ID = "TOPIC_ID";
   private static final String CLUSTER_NAME = "CLUSTER_NAME";
   private static final String BUCKET_NAME = "BUCKET_NAME";
-  private static final String SAMPLE_VERSION = "SAMPLE_VERSION";
-  private static final String CONNECTOR_VERSION = "CONNECTOR_VERSION";
 
   protected final String runId = UUID.randomUUID().toString();
   protected CloudRegion cloudRegion;
@@ -91,32 +94,14 @@ protected void setupEnvVars() {
             PROJECT_NUMBER,
             TOPIC_ID,
             CLUSTER_NAME,
-            BUCKET_NAME,
-            SAMPLE_VERSION,
-            CONNECTOR_VERSION);
+            BUCKET_NAME);
     cloudRegion = CloudRegion.of(env.get(CLOUD_REGION));
     cloudZone = CloudZone.of(cloudRegion, env.get(CLOUD_ZONE).charAt(0));
     projectId = ProjectId.of(env.get(PROJECT_ID));
     projectNumber = ProjectNumber.of(Long.parseLong(env.get(PROJECT_NUMBER)));
     sourceTopicId = TopicName.of(env.get(TOPIC_ID));
-
     clusterName = env.get(CLUSTER_NAME);
     bucketName = env.get(BUCKET_NAME);
-    workingDir =
-        System.getProperty("user.dir")
-            .replace("/samples/snapshot", "")
-            .replace("/samples/snippets", "");
-    sampleVersion = env.get(SAMPLE_VERSION);
-    connectorVersion = env.get(CONNECTOR_VERSION);
-    sampleJarName = String.format("pubsublite-spark-snippets-%s.jar", sampleVersion);
-    connectorJarName =
-        String.format("pubsublite-spark-sql-streaming-%s-with-dependencies.jar", connectorVersion);
-    sampleJarNameInGCS = String.format("pubsublite-spark-snippets-%s-%s.jar", sampleVersion, runId);
-    connectorJarNameInGCS =
-        String.format(
-            "pubsublite-spark-sql-streaming-%s-with-dependencies-%s.jar", connectorVersion, runId);
-    sampleJarLoc = String.format("%s/samples/snippets/target/%s", workingDir, sampleJarName);
-    connectorJarLoc = String.format("%s/target/%s", workingDir, connectorJarName);
   }
 
   protected void findMavenHome() throws Exception {
@@ -131,12 +116,14 @@ protected void findMavenHome() throws Exception {
     }
   }
 
-  protected void mavenPackage(String workingDir)
+  private void runMavenCommand(
+      String workingDir, Optional<InvocationOutputHandler> outputHandler, String... goals)
       throws MavenInvocationException, CommandLineException {
     InvocationRequest request = new DefaultInvocationRequest();
     request.setPomFile(new File(workingDir + "/pom.xml"));
-    request.setGoals(ImmutableList.of("clean", "package", "-Dmaven.test.skip=true"));
+    request.setGoals(Arrays.asList(goals.clone()));
     Invoker invoker = new DefaultInvoker();
+    outputHandler.ifPresent(invoker::setOutputHandler);
     invoker.setMavenHome(new File(mavenHome));
     InvocationResult result = invoker.execute(request);
     if (result.getExecutionException() != null) {
@@ -145,6 +132,43 @@ protected void mavenPackage(String workingDir)
     assertThat(result.getExitCode()).isEqualTo(0);
   }
 
+  protected void mavenPackage(String workingDir)
+      throws MavenInvocationException, CommandLineException {
+    runMavenCommand(workingDir, Optional.empty(), "clean", "package", "-Dmaven.test.skip=true");
+  }
+
+  private void getVersion(String workingDir, InvocationOutputHandler outputHandler)
+      throws MavenInvocationException, CommandLineException {
+    runMavenCommand(
+        workingDir,
+        Optional.of(outputHandler),
+        "-q",
+        "-Dexec.executable=echo",
+        "-Dexec.args='${project.version}'",
+        "--non-recursive",
+        "exec:exec");
+  }
+
+  protected void setupVersions() throws MavenInvocationException, CommandLineException {
+    workingDir =
+        System.getProperty("user.dir")
+            .replace("/samples/snapshot", "")
+            .replace("/samples/snippets", "");
+    getVersion(workingDir, (l) -> connectorVersion = l);
+    log.atInfo().log("Connector version is: %s", connectorVersion);
+    getVersion(workingDir + "/samples", (l) -> sampleVersion = l);
+    log.atInfo().log("Sample version is: %s", sampleVersion);
+    sampleJarName = String.format("pubsublite-spark-snippets-%s.jar", sampleVersion);
+    connectorJarName =
+        String.format("pubsublite-spark-sql-streaming-%s-with-dependencies.jar", connectorVersion);
+    sampleJarNameInGCS = String.format("pubsublite-spark-snippets-%s-%s.jar", sampleVersion, runId);
+    connectorJarNameInGCS =
+        String.format(
+            "pubsublite-spark-sql-streaming-%s-with-dependencies-%s.jar", connectorVersion, runId);
+    sampleJarLoc = String.format("%s/samples/snippets/target/%s", workingDir, sampleJarName);
+    connectorJarLoc = String.format("%s/target/%s", workingDir, connectorJarName);
+  }
+
   protected void uploadGCS(Storage storage, String fileNameInGCS, String fileLoc) throws Exception {
     BlobId blobId = BlobId.of(bucketName, fileNameInGCS);
     BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();
diff --git a/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java b/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java
index 7be0f801..50ff198c 100644
--- a/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java
+++ b/samples/snippets/src/test/java/pubsublite/spark/SamplesIntegrationTest.java
@@ -63,6 +63,7 @@ public void beforeClass() throws Exception {
     log.atInfo().log("RunId is: %s", runId);
     setupEnvVars();
     findMavenHome();
+    setupVersions();
 
     // Maven package into jars
     mavenPackage(workingDir);
diff --git a/versions.txt b/versions.txt
index b62cf13b..01dcba34 100644
--- a/versions.txt
+++ b/versions.txt
@@ -2,4 +2,3 @@
 # module:released-version:current-version
 
 pubsublite-spark-sql-streaming:0.1.0:0.1.1-SNAPSHOT
-com.google.cloud.samples.shared-configuration:1.0.21:1.0.21

From 8853b12ca829608bf5be0781efdb77a0b193730b Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Mon, 12 Apr 2021 21:41:47 -0400
Subject: [PATCH 47/47] chore: release 0.2.0 (#97)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
Co-authored-by: jiangmichaellll <40044148+jiangmichaellll@users.noreply.github.com>
---
 CHANGELOG.md | 38 ++++++++++++++++++++++++++++++++++++++
 pom.xml      |  2 +-
 versions.txt |  2 +-
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b7a81df..7cb70eae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,43 @@
 # Changelog
 
+## [0.2.0](https://www.github.com/googleapis/java-pubsublite-spark/compare/v0.1.0...v0.2.0) (2021-04-12)
+
+
+### Features
+
+* PSL Connector Writer support ([#121](https://www.github.com/googleapis/java-pubsublite-spark/issues/121)) ([92cfdfd](https://www.github.com/googleapis/java-pubsublite-spark/commit/92cfdfdc85449bb2bf745d59cd9b40e5949ba53c))
+* Supports topic partition increase. ([#115](https://www.github.com/googleapis/java-pubsublite-spark/issues/115)) ([20f3366](https://www.github.com/googleapis/java-pubsublite-spark/commit/20f336639c261ddb3b61d0bd14f02e6ea5146377))
+
+
+### Bug Fixes
+
+* Move Spark constants into its own class. ([#127](https://www.github.com/googleapis/java-pubsublite-spark/issues/127)) ([faf1ece](https://www.github.com/googleapis/java-pubsublite-spark/commit/faf1ece43816f28298ad4db54cee968c6f59681b))
+
+
+### Documentation
+
+* Add maven central link.  ([#100](https://www.github.com/googleapis/java-pubsublite-spark/issues/100)) ([f82087b](https://www.github.com/googleapis/java-pubsublite-spark/commit/f82087b17a6c6e44af235a7e6e7a4632874aef42))
+* Add write support documentations ([#132](https://www.github.com/googleapis/java-pubsublite-spark/issues/132)) ([b5edda6](https://www.github.com/googleapis/java-pubsublite-spark/commit/b5edda641b1aa3d3dc7b3e38cd11b65fec086468))
+* update client lib documentation link ([#98](https://www.github.com/googleapis/java-pubsublite-spark/issues/98)) ([9187ff3](https://www.github.com/googleapis/java-pubsublite-spark/commit/9187ff382714b810e94758f3ba1e89a75ae99caf))
+* Update gcs public available link ([#109](https://www.github.com/googleapis/java-pubsublite-spark/issues/109)) ([1bf772a](https://www.github.com/googleapis/java-pubsublite-spark/commit/1bf772a275e76d6b7229d628b72d5dce4f5c8bc5))
+
+
+### Dependencies
+
+* renovate bot ignore scala-library and caffeine deps. ([#143](https://www.github.com/googleapis/java-pubsublite-spark/issues/143)) ([368d7df](https://www.github.com/googleapis/java-pubsublite-spark/commit/368d7dfbec237dc0ef4febb37f84a73436e1590d))
+* update dependency com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.11.0 ([#94](https://www.github.com/googleapis/java-pubsublite-spark/issues/94)) ([d9b9289](https://www.github.com/googleapis/java-pubsublite-spark/commit/d9b9289160d50ca4b44447287b887249190db9fd))
+* update dependency com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.11.1 ([#102](https://www.github.com/googleapis/java-pubsublite-spark/issues/102)) ([4812cbc](https://www.github.com/googleapis/java-pubsublite-spark/commit/4812cbc6710f2a894045b50b8f5f1245e3b80196))
+* update dependency com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.12.0 ([#118](https://www.github.com/googleapis/java-pubsublite-spark/issues/118)) ([880da1b](https://www.github.com/googleapis/java-pubsublite-spark/commit/880da1bf953526cd40e4b736a898185751c7bb27))
+* update dependency com.google.api.grpc:proto-google-cloud-pubsublite-v1 to v0.13.1 ([#124](https://www.github.com/googleapis/java-pubsublite-spark/issues/124)) ([4ef4a04](https://www.github.com/googleapis/java-pubsublite-spark/commit/4ef4a043ccaacbca8d103374fd5d07c49bfac0b5))
+* update dependency com.google.cloud:google-cloud-pubsublite to v0.11.0 ([#95](https://www.github.com/googleapis/java-pubsublite-spark/issues/95)) ([508b90c](https://www.github.com/googleapis/java-pubsublite-spark/commit/508b90c3fd0a6045d548c1c6336fe4b542a27d07))
+* update dependency com.google.cloud:google-cloud-pubsublite to v0.11.1 ([#103](https://www.github.com/googleapis/java-pubsublite-spark/issues/103)) ([86c8b7e](https://www.github.com/googleapis/java-pubsublite-spark/commit/86c8b7e9295e5abbb4f491ef1a5295a1ac9b498c))
+* update dependency com.google.cloud:google-cloud-pubsublite to v0.12.0 ([#119](https://www.github.com/googleapis/java-pubsublite-spark/issues/119)) ([821449e](https://www.github.com/googleapis/java-pubsublite-spark/commit/821449eca7259bc36c5ae8f0d12a3c54af1484e7))
+* update dependency com.google.cloud:google-cloud-pubsublite to v0.13.1 ([#136](https://www.github.com/googleapis/java-pubsublite-spark/issues/136)) ([062b992](https://www.github.com/googleapis/java-pubsublite-spark/commit/062b9923e2d0640d43de24c11f4d600ff87e1f9f))
+* update dependency com.google.cloud:google-cloud-pubsublite-parent to v0.11.0 ([#96](https://www.github.com/googleapis/java-pubsublite-spark/issues/96)) ([4356247](https://www.github.com/googleapis/java-pubsublite-spark/commit/435624723d5ffdbfc803ac19e034d65cea33986e))
+* update dependency com.google.cloud:google-cloud-pubsublite-parent to v0.11.1 ([#104](https://www.github.com/googleapis/java-pubsublite-spark/issues/104)) ([bb73ca2](https://www.github.com/googleapis/java-pubsublite-spark/commit/bb73ca2900d3cea9cd6d5c920c0f09871fff73aa))
+* update dependency com.google.cloud:google-cloud-pubsublite-parent to v0.12.0 ([#120](https://www.github.com/googleapis/java-pubsublite-spark/issues/120)) ([ea3596f](https://www.github.com/googleapis/java-pubsublite-spark/commit/ea3596fad872e26b5b0157192762d037d163577f))
+* update dependency com.google.cloud:google-cloud-pubsublite-parent to v0.13.1 ([#126](https://www.github.com/googleapis/java-pubsublite-spark/issues/126)) ([d3130e7](https://www.github.com/googleapis/java-pubsublite-spark/commit/d3130e76e643321977af7d1487da6b87b578f4ed))
+
 ## 0.1.0 (2021-02-24)
 
 
diff --git a/pom.xml b/pom.xml
index b51372f1..6ec8944e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -8,7 +8,7 @@
   <modelVersion>4.0.0</modelVersion>
   <groupId>com.google.cloud</groupId>
   <artifactId>pubsublite-spark-sql-streaming</artifactId>
-  <version>0.1.1-SNAPSHOT</version><!-- {x-version-update:pubsublite-spark-sql-streaming:current} -->
+  <version>0.2.0</version><!-- {x-version-update:pubsublite-spark-sql-streaming:current} -->
   <packaging>jar</packaging>
   <name>Pub/Sub Lite Spark SQL Streaming</name>
   <url>https://github.com/googleapis/java-pubsublite-spark</url>
diff --git a/versions.txt b/versions.txt
index 01dcba34..72a9f35a 100644
--- a/versions.txt
+++ b/versions.txt
@@ -1,4 +1,4 @@
 # Format:
 # module:released-version:current-version
 
-pubsublite-spark-sql-streaming:0.1.0:0.1.1-SNAPSHOT
+pubsublite-spark-sql-streaming:0.2.0:0.2.0