From 1030640221f3ded867b52806923caad1d788a28b Mon Sep 17 00:00:00 2001 From: "Mathilde K." <58742217+Ellana42@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:46:14 +0200 Subject: [PATCH] Added support for native append instead of overwrite (#16) --- CHANGELOG.md | 7 +++++- custom-recipes/googlesheets-append/recipe.py | 25 +------------------ plugin.json | 4 +-- .../googlesheets-sheet/connector.json | 3 ++- .../googlesheets-sheet/connector.py | 22 ++++++++++------ python-lib/googlesheets_append.py | 21 ++++++++++++++++ 6 files changed, 46 insertions(+), 36 deletions(-) create mode 100644 python-lib/googlesheets_append.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e2384fa..1aa2fa0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## [Version 1.2.3](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.3) - Feature - 2024-09-10 + +- Add support for native append mode for the custom dataset +- Add parameter for batch_size and insertion_delay in recipe to avoid API limits + ## [Version 1.2.2](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.2) - Bugfix release - 2022-11-24 - Add a specific error message when trying to import an Excel file @@ -8,7 +13,7 @@ - Add support for python 3.7 to 3.11 -## [Version 1.2.0](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.0) - Feature abd bugfix release - 2022-11-24 +## [Version 1.2.0](https://github.com/dataiku/dss-plugin-googlesheets/releases/tag/v1.2.0) - Feature and bugfix release - 2022-11-24 - Add a preset for storing access tokens - Add Google Single Sign On capability diff --git a/custom-recipes/googlesheets-append/recipe.py b/custom-recipes/googlesheets-append/recipe.py index b0a1bf8..b072c09 100644 --- a/custom-recipes/googlesheets-append/recipe.py +++ b/custom-recipes/googlesheets-append/recipe.py @@ -6,6 +6,7 @@ from gspread.utils import rowcol_to_a1 from safe_logger import SafeLogger from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids +from googlesheets_append import append_rows logger = SafeLogger("googlesheets plugin", ["credentials", "access_token"]) @@ -43,30 +44,6 @@ # Load worksheet worksheet = session.get_spreadsheet(doc_id, tab_id) - -# Make available a method of later version of gspread (probably 3.4.0) -# from https://github.com/burnash/gspread/pull/556 -def append_rows(self, values, value_input_option='RAW'): - """Adds multiple rows to the worksheet and populates them with values. - Widens the worksheet if there are more values than columns. - :param values: List of rows each row is List of values for the new row. - :param value_input_option: (optional) Determines how input data should - be interpreted. See `ValueInputOption`_ in - the Sheets API. - :type value_input_option: str - .. _ValueInputOption: https://developers.google.com/sheets/api/reference/rest/v4/ValueInputOption - """ - params = { - 'valueInputOption': value_input_option - } - - body = { - 'values': values - } - - return self.spreadsheet.values_append(self.title, params, body) - - worksheet.append_rows = append_rows.__get__(worksheet, worksheet.__class__) diff --git a/plugin.json b/plugin.json index 27f1783..a786611 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "googlesheets", - "version": "1.2.2", + "version": "1.2.3", "meta": { "label": "Google Sheets", "description": "Read from and write to Google Sheets", @@ -10,4 +10,4 @@ "url": "https://www.dataiku.com/product/plugins/googlesheets/", "tags": ["Google", "Connector"] } -} \ No newline at end of file +} diff --git a/python-connectors/googlesheets-sheet/connector.json b/python-connectors/googlesheets-sheet/connector.json index b83ebf0..10f5803 100644 --- a/python-connectors/googlesheets-sheet/connector.json +++ b/python-connectors/googlesheets-sheet/connector.json @@ -7,6 +7,7 @@ "readable" : true, "writable" : true, "canCountRecords" : false, + "supportAppend" : true, "kind": "PYTHON", "paramsPythonSetup": "browse_sheets.py", @@ -128,4 +129,4 @@ "visibilityCondition": "model.show_advanced_parameters==true" } ] -} \ No newline at end of file +} diff --git a/python-connectors/googlesheets-sheet/connector.py b/python-connectors/googlesheets-sheet/connector.py index c4eaac2..04b66da 100644 --- a/python-connectors/googlesheets-sheet/connector.py +++ b/python-connectors/googlesheets-sheet/connector.py @@ -6,6 +6,7 @@ from googlesheets import GoogleSheetsSession from safe_logger import SafeLogger from googlesheets_common import DSSConstants, extract_credentials, get_tab_ids, mark_date_columns, convert_dates_in_row +from googlesheets_append import append_rows logger = SafeLogger("googlesheets plugin", ["credentials", "access_token"]) @@ -94,7 +95,7 @@ def generate_rows(self, dataset_schema=None, dataset_partitioning=None, raise Exception("Unimplemented") def get_writer(self, dataset_schema=None, dataset_partitioning=None, - partition_id=None): + partition_id=None, write_mode="OVERWRITE"): if self.result_format == 'json': raise Exception('JSON format not supported in write mode') @@ -105,7 +106,7 @@ def get_writer(self, dataset_schema=None, dataset_partitioning=None, if len(self.tabs_ids) > 1: raise Exception('Only one target sheet can be selected for writing') - return MyCustomDatasetWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id) + return MyCustomDatasetWriter(self.config, self, dataset_schema, dataset_partitioning, partition_id, write_mode) def get_records_count(self, partitioning=None, partition_id=None): """ @@ -117,13 +118,14 @@ def get_records_count(self, partitioning=None, partition_id=None): class MyCustomDatasetWriter(CustomDatasetWriter): - def __init__(self, config, parent, dataset_schema, dataset_partitioning, partition_id): + def __init__(self, config, parent, dataset_schema, dataset_partitioning, partition_id, write_mode): CustomDatasetWriter.__init__(self) self.parent = parent self.config = config self.dataset_schema = dataset_schema self.dataset_partitioning = dataset_partitioning self.partition_id = partition_id + self.write_mode = write_mode self.buffer = [] self.date_columns = [] if self.parent.write_format == "USER_ENTERED": @@ -140,14 +142,18 @@ def write_row(self, row): def flush(self): worksheet = self.parent.session.get_spreadsheet(self.parent.doc_id, self.parent.tabs_ids[0]) + worksheet.append_rows = append_rows.__get__(worksheet, worksheet.__class__) - num_columns = len(self.buffer[0]) - num_lines = len(self.buffer) + if self.write_mode == "APPEND": + worksheet.append_rows(self.buffer[1:], self.parent.write_format) #TODO: batch ? + elif self.write_mode == "OVERWRITE": + num_columns = len(self.buffer[0]) + num_lines = len(self.buffer) - worksheet.resize(rows=num_lines, cols=num_columns) + worksheet.resize(rows=num_lines, cols=num_columns) - range = 'A1:%s' % rowcol_to_a1(num_lines, num_columns) - worksheet.update(range, self.buffer, value_input_option=self.parent.write_format) + range = 'A1:%s' % rowcol_to_a1(num_lines, num_columns) + worksheet.update(range, self.buffer, value_input_option=self.parent.write_format) self.buffer = [] diff --git a/python-lib/googlesheets_append.py b/python-lib/googlesheets_append.py new file mode 100644 index 0000000..4be79c8 --- /dev/null +++ b/python-lib/googlesheets_append.py @@ -0,0 +1,21 @@ +# Make available a method of later version of gspread (probably 3.4.0) +# from https://github.com/burnash/gspread/pull/556 +def append_rows(self, values, value_input_option='RAW'): + """Adds multiple rows to the worksheet and populates them with values. + Widens the worksheet if there are more values than columns. + :param values: List of rows each row is List of values for the new row. + :param value_input_option: (optional) Determines how input data should + be interpreted. See `ValueInputOption`_ in + the Sheets API. + :type value_input_option: str + .. _ValueInputOption: https://developers.google.com/sheets/api/reference/rest/v4/ValueInputOption + """ + params = { + 'valueInputOption': value_input_option + } + + body = { + 'values': values + } + + return self.spreadsheet.values_append(self.title, params, body)