Skip to content

Commit

Permalink
Added support for native append instead of overwrite (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ellana42 authored Sep 12, 2024
1 parent 52c447c commit 1030640
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 36 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

## [Version 1.2.3](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.3) - Feature - 2024-09-10

- Add support for native append mode for the custom dataset
- Add parameter for batch_size and insertion_delay in recipe to avoid API limits

## [Version 1.2.2](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.2) - Bugfix release - 2022-11-24

- Add a specific error message when trying to import an Excel file
Expand All @@ -8,7 +13,7 @@

- Add support for python 3.7 to 3.11

## [Version 1.2.0](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.0) - Feature abd bugfix release - 2022-11-24
## [Version 1.2.0](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.0) - Feature and bugfix release - 2022-11-24

- Add a preset for storing access tokens
- Add Google Single Sign On capability
Expand Down
25 changes: 1 addition & 24 deletions custom-recipes/googlesheets-append/recipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from gspread.utils import rowcol_to_a1
from safe_logger import SafeLogger
from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids
from googlesheets_append import append_rows


logger = SafeLogger("googlesheets plugin", ["credentials", "access_token"])
Expand Down Expand Up @@ -43,30 +44,6 @@
# Load worksheet
worksheet = session.get_spreadsheet(doc_id, tab_id)


# Make available a method of later version of gspread (probably 3.4.0)
# from https://github.com/burnash/gspread/pull/556
def append_rows(self, values, value_input_option='RAW'):
"""Adds multiple rows to the worksheet and populates them with values.
Widens the worksheet if there are more values than columns.
:param values: List of rows each row is List of values for the new row.
:param value_input_option: (optional) Determines how input data should
be interpreted. See `ValueInputOption`_ in
the Sheets API.
:type value_input_option: str
.. _ValueInputOption: https://developers.google.com/sheets/api/reference/rest/v4/ValueInputOption
"""
params = {
'valueInputOption': value_input_option
}

body = {
'values': values
}

return self.spreadsheet.values_append(self.title, params, body)


worksheet.append_rows = append_rows.__get__(worksheet, worksheet.__class__)


Expand Down
4 changes: 2 additions & 2 deletions plugin.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"id": "googlesheets",
"version": "1.2.2",
"version": "1.2.3",
"meta": {
"label": "Google Sheets",
"description": "Read from and write to Google Sheets",
Expand All @@ -10,4 +10,4 @@
"url": "https://www.dataiku.com/product/plugins/googlesheets/",
"tags": ["Google", "Connector"]
}
}
}
3 changes: 2 additions & 1 deletion python-connectors/googlesheets-sheet/connector.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"readable" : true,
"writable" : true,
"canCountRecords" : false,
"supportAppend" : true,

"kind": "PYTHON",
"paramsPythonSetup": "browse_sheets.py",
Expand Down Expand Up @@ -128,4 +129,4 @@
"visibilityCondition": "model.show_advanced_parameters==true"
}
]
}
}
22 changes: 14 additions & 8 deletions python-connectors/googlesheets-sheet/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from googlesheets import GoogleSheetsSession
from safe_logger import SafeLogger
from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids, mark_date_columns, convert_dates_in_row
from googlesheets_append import append_rows


logger = SafeLogger("googlesheets plugin", ["credentials", "access_token"])
Expand Down Expand Up @@ -94,7 +95,7 @@ def generate_rows(self, dataset_schema=None, dataset_partitioning=None,
raise Exception("Unimplemented")

def get_writer(self, dataset_schema=None, dataset_partitioning=None,
partition_id=None):
partition_id=None, write_mode="OVERWRITE"):

if self.result_format == 'json':
raise Exception('JSON format not supported in write mode')
Expand All @@ -105,7 +106,7 @@ def get_writer(self, dataset_schema=None, dataset_partitioning=None,
if len(self.tabs_ids) > 1:
raise Exception('Only one target sheet can be selected for writing')

return MyCustomDatasetWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id)
return MyCustomDatasetWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id, write_mode)

def get_records_count(self, partitioning=None, partition_id=None):
"""
Expand All @@ -117,13 +118,14 @@ def get_records_count(self, partitioning=None, partition_id=None):


class MyCustomDatasetWriter(CustomDatasetWriter):
def __init__(self, config, parent, dataset_schema, dataset_partitioning, partition_id):
def __init__(self, config, parent, dataset_schema, dataset_partitioning, partition_id, write_mode):
CustomDatasetWriter.__init__(self)
self.parent = parent
self.config = config
self.dataset_schema = dataset_schema
self.dataset_partitioning = dataset_partitioning
self.partition_id = partition_id
self.write_mode = write_mode
self.buffer = []
self.date_columns = []
if self.parent.write_format == "USER_ENTERED":
Expand All @@ -140,14 +142,18 @@ def write_row(self, row):

def flush(self):
worksheet = self.parent.session.get_spreadsheet(self.parent.doc_id, self.parent.tabs_ids[0])
worksheet.append_rows = append_rows.__get__(worksheet, worksheet.__class__)

num_columns = len(self.buffer[0])
num_lines = len(self.buffer)
if self.write_mode == "APPEND":
worksheet.append_rows(self.buffer[1:], self.parent.write_format) #TODO: batch ?
elif self.write_mode == "OVERWRITE":
num_columns = len(self.buffer[0])
num_lines = len(self.buffer)

worksheet.resize(rows=num_lines, cols=num_columns)
worksheet.resize(rows=num_lines, cols=num_columns)

range = 'A1:%s' % rowcol_to_a1(num_lines, num_columns)
worksheet.update(range, self.buffer, value_input_option=self.parent.write_format)
range = 'A1:%s' % rowcol_to_a1(num_lines, num_columns)
worksheet.update(range, self.buffer, value_input_option=self.parent.write_format)

self.buffer = []

Expand Down
21 changes: 21 additions & 0 deletions python-lib/googlesheets_append.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Make available a method of later version of gspread (probably 3.4.0)
# from https://github.com/burnash/gspread/pull/556
def append_rows(self, values, value_input_option='RAW'):
"""Adds multiple rows to the worksheet and populates them with values.
Widens the worksheet if there are more values than columns.
:param values: List of rows each row is List of values for the new row.
:param value_input_option: (optional) Determines how input data should
be interpreted. See `ValueInputOption`_ in
the Sheets API.
:type value_input_option: str
.. _ValueInputOption: https://developers.google.com/sheets/api/reference/rest/v4/ValueInputOption
"""
params = {
'valueInputOption': value_input_option
}

body = {
'values': values
}

return self.spreadsheet.values_append(self.title, params, body)

0 comments on commit 1030640

Please sign in to comment.