From 1e3259c798332117e5b303ca526298aa44b29fe5 Mon Sep 17 00:00:00 2001 From: Dmitry Bolotin Date: Tue, 11 Feb 2025 23:41:48 +0100 Subject: [PATCH] Added "count" aggregation --- .changeset/proud-doors-type.md | 5 +++++ package.json | 2 +- pnpm-lock.yaml | 18 +++++++++--------- src/aggregate.py | 6 ++++++ src/itest/output_2.tsv | 6 +++--- src/itest/output_empty_2.tsv | 2 +- src/itest/params_2.json | 5 +++++ 7 files changed, 30 insertions(+), 14 deletions(-) create mode 100644 .changeset/proud-doors-type.md diff --git a/.changeset/proud-doors-type.md b/.changeset/proud-doors-type.md new file mode 100644 index 0000000..3c9fd90 --- /dev/null +++ b/.changeset/proud-doors-type.md @@ -0,0 +1,5 @@ +--- +"@platforma-open/milaboratories.software-ptransform": patch +--- + +Added "count" aggregation diff --git a/package.json b/package.json index df89ea0..0e8ea9d 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ ], "dependencies": {}, "devDependencies": { - "@changesets/cli": "^2.27.11", + "@changesets/cli": "^2.27.12", "@platforma-sdk/package-builder": "^2.15.0", "@platforma-open/milaboratories.runenv-python-3": "^1.1.5" }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f8f4db3..028d555 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: devDependencies: '@changesets/cli': - specifier: ^2.27.11 - version: 2.27.11 + specifier: ^2.27.12 + version: 2.27.12 '@platforma-open/milaboratories.runenv-python-3': specifier: ^1.1.5 version: 1.1.5 @@ -193,8 +193,8 @@ packages: resolution: {integrity: sha512-FDSOghenHTiToteC/QRlv2q3DhPZ/oOXTBoirfWNx1Cx3TMVcGWQtMMmQcSvb/JjpNeGzx8Pq/b4fKEJuWm1sw==} engines: {node: '>=6.9.0'} - '@changesets/apply-release-plan@7.0.7': - resolution: {integrity: sha512-qnPOcmmmnD0MfMg9DjU1/onORFyRpDXkMMl2IJg9mECY6RnxL3wN0TCCc92b2sXt1jt8DgjAUUsZYGUGTdYIXA==} + '@changesets/apply-release-plan@7.0.8': + resolution: {integrity: sha512-qjMUj4DYQ1Z6qHawsn7S71SujrExJ+nceyKKyI9iB+M5p9lCL55afuEd6uLBPRpLGWQwkwvWegDHtwHJb1UjpA==} '@changesets/assemble-release-plan@6.0.5': resolution: {integrity: sha512-IgvBWLNKZd6k4t72MBTBK3nkygi0j3t3zdC1zrfusYo0KpdsvnDjrMM9vPnTCLCMlfNs55jRL4gIMybxa64FCQ==} @@ -202,8 +202,8 @@ packages: '@changesets/changelog-git@0.2.0': resolution: {integrity: sha512-bHOx97iFI4OClIT35Lok3sJAwM31VbUM++gnMBV16fdbtBhgYu4dxsphBF/0AZZsyAHMrnM0yFcj5gZM1py6uQ==} - '@changesets/cli@2.27.11': - resolution: {integrity: sha512-1QislpE+nvJgSZZo9+Lj3Lno5pKBgN46dAV8IVxKJy9wX8AOrs9nn5pYVZuDpoxWJJCALmbfOsHkyxujgetQSg==} + '@changesets/cli@2.27.12': + resolution: {integrity: sha512-9o3fOfHYOvBnyEn0mcahB7wzaA3P4bGJf8PNqGit5PKaMEFdsRixik+txkrJWd2VX+O6wRFXpxQL8j/1ANKE9g==} hasBin: true '@changesets/config@3.0.5': @@ -1678,7 +1678,7 @@ snapshots: dependencies: regenerator-runtime: 0.14.1 - '@changesets/apply-release-plan@7.0.7': + '@changesets/apply-release-plan@7.0.8': dependencies: '@changesets/config': 3.0.5 '@changesets/get-version-range-type': 0.4.0 @@ -1707,9 +1707,9 @@ snapshots: dependencies: '@changesets/types': 6.0.0 - '@changesets/cli@2.27.11': + '@changesets/cli@2.27.12': dependencies: - '@changesets/apply-release-plan': 7.0.7 + '@changesets/apply-release-plan': 7.0.8 '@changesets/assemble-release-plan': 6.0.5 '@changesets/changelog-git': 0.2.0 '@changesets/config': 3.0.5 diff --git a/src/aggregate.py b/src/aggregate.py index b7686b4..c3578f5 100644 --- a/src/aggregate.py +++ b/src/aggregate.py @@ -16,6 +16,7 @@ def aggregate(self, grp_data: DataFrameGroupBy, data: pd.DataFrame, group_by: li type AnyAggregation = Union[ + AggregationCount, AggregationMax, AggregationMin, AggregationMean, AggregationMedian, AggregationSum, AggregationFirst, AggregationMaxBy] @@ -123,6 +124,11 @@ def _aggregate_column(self, grp_data: DataFrameGroupBy, data: pd.DataFrame) -> p return self._column_data(grp_data).first() +class AggregationCount(ColumnAggregationBase, tag="count"): + def _aggregate_column(self, grp_data: DataFrameGroupBy, data: pd.DataFrame) -> pd.Series: + return self._column_data(grp_data).count() + + class MultiAggregationCumsum(ColumnAggregationBase, tag="cumsum"): def _aggregate_column(self, grp_data: DataFrameGroupBy, data: pd.DataFrame) -> pd.Series: return self._column_data(grp_data).cumsum() diff --git a/src/itest/output_2.tsv b/src/itest/output_2.tsv index b70c676..69b6500 100644 --- a/src/itest/output_2.tsv +++ b/src/itest/output_2.tsv @@ -1,3 +1,3 @@ -k1 v1_sum v1_first -a 10.0 1.0 -b 26.0 5.0 +k1 v1_sum v1_first v1_count +a 10.0 1.0 4 +b 26.0 5.0 4 diff --git a/src/itest/output_empty_2.tsv b/src/itest/output_empty_2.tsv index 87f63ae..d5c4db5 100644 --- a/src/itest/output_empty_2.tsv +++ b/src/itest/output_empty_2.tsv @@ -1 +1 @@ -k1 v1_sum v1_first +k1 v1_sum v1_first v1_count diff --git a/src/itest/params_2.json b/src/itest/params_2.json index 3749718..eca0f16 100644 --- a/src/itest/params_2.json +++ b/src/itest/params_2.json @@ -15,6 +15,11 @@ "type": "first", "dst": "v1_first", "src": "v1" + }, + { + "type": "count", + "dst": "v1_count", + "src": "v1" } ] }