diff --git a/c/CHANGELOG.rst b/c/CHANGELOG.rst index 67b341b113..55ae0166f1 100644 --- a/c/CHANGELOG.rst +++ b/c/CHANGELOG.rst @@ -1,3 +1,12 @@ +--------------------- +[0.99.2] - 2019-03-27 +--------------------- + +Bugfix release. Changes: + +- Fix incorrect errors on tbl_collection_dump (#132) +- Catch table overflows (#157) + --------------------- [0.99.1] - 2019-01-24 --------------------- diff --git a/c/tests/test_tables.c b/c/tests/test_tables.c index 6d741e6274..8b43bd59ce 100644 --- a/c/tests/test_tables.c +++ b/c/tests/test_tables.c @@ -274,12 +274,17 @@ test_table_collection_dump_errors(void) ret = tsk_table_collection_init(&tables, 0); CU_ASSERT_EQUAL_FATAL(ret, 0); + tables.sequence_length = 1.0; + ret = tsk_table_collection_dump(&tables, "/", 0); CU_ASSERT_TRUE(tsk_is_kas_error(ret)); CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_IO); str = tsk_strerror(ret); CU_ASSERT_TRUE(strlen(str) > 0); + /* We'd like to catch close errors also, but it's hard to provoke them + * without intercepting calls to fclose() */ + tsk_table_collection_free(&tables); } static void @@ -2098,6 +2103,99 @@ test_load_reindex(void) tsk_treeseq_free(&ts); } +static void +test_table_overflow(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_size_t max_rows = ((tsk_size_t) INT32_MAX) + 1; + + ret = tsk_table_collection_init(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + /* Simulate overflows */ + tables.individuals.max_rows = max_rows; + tables.individuals.num_rows = max_rows; + ret = tsk_individual_table_add_row(&tables.individuals, 0, 0, 0, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.nodes.max_rows = max_rows; + tables.nodes.num_rows = max_rows; + ret = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.edges.max_rows = max_rows; + tables.edges.num_rows = max_rows; + ret = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.migrations.max_rows = max_rows; + tables.migrations.num_rows = max_rows; + ret = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.sites.max_rows = max_rows; + tables.sites.num_rows = max_rows; + ret = tsk_site_table_add_row(&tables.sites, 0, 0, 0, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.mutations.max_rows = max_rows; + tables.mutations.num_rows = max_rows; + ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, 0, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.provenances.max_rows = max_rows; + tables.provenances.num_rows = max_rows; + ret = tsk_provenance_table_add_row(&tables.provenances, 0, 0, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tables.populations.max_rows = max_rows; + tables.populations.num_rows = max_rows; + ret = tsk_population_table_add_row(&tables.populations, 0, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW); + + tsk_table_collection_free(&tables); +} + +static void +test_column_overflow(void) +{ + int ret; + tsk_table_collection_t tables; + tsk_size_t too_big = ((tsk_size_t) INT32_MAX) + 2; + + ret = tsk_table_collection_init(&tables, 0); + CU_ASSERT_EQUAL_FATAL(ret, 0); + + ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, too_big, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, too_big); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + + ret = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, too_big); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + + ret = tsk_site_table_add_row(&tables.sites, 0, NULL, too_big, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + ret = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, too_big); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + + ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, NULL, 0, NULL, too_big); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, NULL, too_big, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + + ret = tsk_provenance_table_add_row(&tables.provenances, NULL, too_big, NULL, 0); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + ret = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, too_big); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + + ret = tsk_population_table_add_row(&tables.populations, NULL, too_big); + CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_COLUMN_OVERFLOW); + + tsk_table_collection_free(&tables); +} + int main(int argc, char **argv) { @@ -2124,8 +2222,11 @@ main(int argc, char **argv) {"test_dump_load_empty", test_dump_load_empty}, {"test_dump_load_unsorted", test_dump_load_unsorted}, {"test_load_reindex", test_load_reindex}, + {"test_table_overflow", test_table_overflow}, + {"test_column_overflow", test_column_overflow}, {NULL, NULL}, }; + return test_main(tests, argc, argv); } diff --git a/c/tskit/core.c b/c/tskit/core.c index a020c99977..2b3417a34b 100644 --- a/c/tskit/core.c +++ b/c/tskit/core.c @@ -293,6 +293,12 @@ tsk_strerror_internal(int err) case TSK_ERR_TABLES_NOT_INDEXED: ret = "Table collection must be indexed"; break; + case TSK_ERR_TABLE_OVERFLOW: + ret = "Table too large; cannot allocate more than 2**31 rows."; + break; + case TSK_ERR_COLUMN_OVERFLOW: + ret = "Table column too large; cannot be more than 2**31 bytes."; + break; /* Limitations */ case TSK_ERR_ONLY_INFINITE_SITES: diff --git a/c/tskit/core.h b/c/tskit/core.h index 1b86afbdd9..c04cbfdb86 100644 --- a/c/tskit/core.h +++ b/c/tskit/core.h @@ -80,7 +80,7 @@ to the API or ABI are introduced, i.e., the addition of a new function. The library patch version. Incremented when any changes not relevant to the to the API or ABI are introduced, i.e., internal refactors of bugfixes. */ -#define TSK_VERSION_PATCH 1 +#define TSK_VERSION_PATCH 2 /** @} */ /* Node flags */ @@ -189,6 +189,8 @@ of tskit. #define TSK_ERR_BAD_TABLE_POSITION -700 #define TSK_ERR_BAD_SEQUENCE_LENGTH -701 #define TSK_ERR_TABLES_NOT_INDEXED -702 +#define TSK_ERR_TABLE_OVERFLOW -703 +#define TSK_ERR_COLUMN_OVERFLOW -704 /* Limitations */ #define TSK_ERR_ONLY_INFINITE_SITES -800 diff --git a/c/tskit/tables.c b/c/tskit/tables.c index eb84de89c3..b0c6545878 100644 --- a/c/tskit/tables.c +++ b/c/tskit/tables.c @@ -56,6 +56,15 @@ typedef struct { int type; } write_table_col_t; +/* Returns true if adding the specifiec number of rows would result in overflow. + * Tables can support indexes from 0 to INT32_MAX, and therefore have at most + * INT32_MAX + 1 rows */ +static bool +check_overflow(tsk_size_t current_size, tsk_size_t additional_rows) +{ + size_t new_size = (size_t) current_size + (size_t) additional_rows; + return new_size > ((size_t) INT32_MAX) + 1; +} static int read_table_cols(kastore_t *store, read_table_col_t *read_cols, size_t num_cols) @@ -167,6 +176,10 @@ tsk_individual_table_expand_main_columns(tsk_individual_table_t *self, tsk_size_t increment = TSK_MAX(additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->flags, new_size, sizeof(tsk_flags_t)); if (ret != 0) { @@ -192,10 +205,13 @@ static int tsk_individual_table_expand_location(tsk_individual_table_t *self, tsk_size_t additional_length) { int ret = 0; - tsk_size_t increment = TSK_MAX(additional_length, - self->max_location_length_increment); + tsk_size_t increment = TSK_MAX(additional_length, self->max_location_length_increment); tsk_size_t new_size = self->max_location_length + increment; + if (check_overflow(self->location_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->location_length + additional_length) > self->max_location_length) { ret = expand_column((void **) &self->location, new_size, sizeof(double)); if (ret != 0) { @@ -215,6 +231,10 @@ tsk_individual_table_expand_metadata(tsk_individual_table_t *self, tsk_size_t ad self->max_metadata_length_increment); tsk_size_t new_size = self->max_metadata_length + increment; + if (check_overflow(self->metadata_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->metadata_length + additional_length) > self->max_metadata_length) { ret = expand_column((void **) &self->metadata, new_size, sizeof(char)); if (ret != 0) { @@ -646,6 +666,10 @@ tsk_node_table_expand_main_columns(tsk_node_table_t *self, tsk_size_t additional tsk_size_t increment = TSK_MAX(additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->flags, new_size, sizeof(tsk_flags_t)); if (ret != 0) { @@ -682,6 +706,10 @@ tsk_node_table_expand_metadata(tsk_node_table_t *self, tsk_size_t additional_len self->max_metadata_length_increment); tsk_size_t new_size = self->max_metadata_length + increment; + if (check_overflow(self->metadata_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->metadata_length + additional_length) > self->max_metadata_length) { ret = expand_column((void **) &self->metadata, new_size, sizeof(char)); if (ret != 0) { @@ -1056,6 +1084,10 @@ tsk_edge_table_expand_columns(tsk_edge_table_t *self, size_t additional_rows) (tsk_size_t) additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->left, new_size, sizeof(double)); if (ret != 0) { @@ -1321,6 +1353,10 @@ tsk_site_table_expand_main_columns(tsk_site_table_t *self, tsk_size_t additional tsk_size_t increment = TSK_MAX(additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->position, new_size, sizeof(double)); if (ret != 0) { @@ -1350,6 +1386,10 @@ tsk_site_table_expand_ancestral_state(tsk_site_table_t *self, size_t additional_ self->max_ancestral_state_length_increment); tsk_size_t new_size = self->max_ancestral_state_length + increment; + if (check_overflow(self->ancestral_state_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->ancestral_state_length + additional_length) > self->max_ancestral_state_length) { ret = expand_column((void **) &self->ancestral_state, new_size, sizeof(char)); @@ -1370,6 +1410,10 @@ tsk_site_table_expand_metadata(tsk_site_table_t *self, size_t additional_length) self->max_metadata_length_increment); tsk_size_t new_size = self->max_metadata_length + increment; + if (check_overflow(self->metadata_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->metadata_length + additional_length) > self->max_metadata_length) { ret = expand_column((void **) &self->metadata, new_size, sizeof(char)); @@ -1776,6 +1820,11 @@ tsk_mutation_table_expand_main_columns(tsk_mutation_table_t *self, size_t additi tsk_size_t increment = (tsk_size_t) TSK_MAX(additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->site, new_size, sizeof(tsk_id_t)); if (ret != 0) { @@ -1813,6 +1862,10 @@ tsk_mutation_table_expand_derived_state(tsk_mutation_table_t *self, size_t addit self->max_derived_state_length_increment); tsk_size_t new_size = self->max_derived_state_length + increment; + if (check_overflow(self->derived_state_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->derived_state_length + additional_length) > self->max_derived_state_length) { ret = expand_column((void **) &self->derived_state, new_size, sizeof(char)); @@ -1833,6 +1886,10 @@ tsk_mutation_table_expand_metadata(tsk_mutation_table_t *self, size_t additional self->max_metadata_length_increment); tsk_size_t new_size = self->max_metadata_length + increment; + if (check_overflow(self->metadata_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->metadata_length + additional_length) > self->max_metadata_length) { ret = expand_column((void **) &self->metadata, new_size, sizeof(char)); @@ -2262,6 +2319,10 @@ tsk_migration_table_expand(tsk_migration_table_t *self, size_t additional_rows) (tsk_size_t) additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->left, new_size, sizeof(double)); if (ret != 0) { @@ -2558,6 +2619,10 @@ tsk_population_table_expand_main_columns(tsk_population_table_t *self, tsk_size_ tsk_size_t increment = TSK_MAX(additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->metadata_offset, new_size + 1, sizeof(tsk_size_t)); @@ -2578,6 +2643,10 @@ tsk_population_table_expand_metadata(tsk_population_table_t *self, tsk_size_t ad self->max_metadata_length_increment); tsk_size_t new_size = self->max_metadata_length + increment; + if (check_overflow(self->metadata_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->metadata_length + additional_length) > self->max_metadata_length) { ret = expand_column((void **) &self->metadata, new_size, sizeof(char)); if (ret != 0) { @@ -2890,6 +2959,10 @@ tsk_provenance_table_expand_main_columns(tsk_provenance_table_t *self, tsk_size_ tsk_size_t increment = TSK_MAX(additional_rows, self->max_rows_increment); tsk_size_t new_size = self->max_rows + increment; + if (check_overflow(self->max_rows, increment)) { + ret = TSK_ERR_TABLE_OVERFLOW; + goto out; + } if ((self->num_rows + additional_rows) > self->max_rows) { ret = expand_column((void **) &self->timestamp_offset, new_size + 1, sizeof(tsk_size_t)); @@ -2915,6 +2988,10 @@ tsk_provenance_table_expand_timestamp(tsk_provenance_table_t *self, tsk_size_t a self->max_timestamp_length_increment); tsk_size_t new_size = self->max_timestamp_length + increment; + if (check_overflow(self->timestamp_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->timestamp_length + additional_length) > self->max_timestamp_length) { ret = expand_column((void **) &self->timestamp, new_size, sizeof(char)); if (ret != 0) { @@ -2934,6 +3011,10 @@ tsk_provenance_table_expand_provenance(tsk_provenance_table_t *self, tsk_size_t self->max_record_length_increment); tsk_size_t new_size = self->max_record_length + increment; + if (check_overflow(self->record_length, increment)) { + ret = TSK_ERR_COLUMN_OVERFLOW; + goto out; + } if ((self->record_length + additional_length) > self->max_record_length) { ret = expand_column((void **) &self->record, new_size, sizeof(char)); if (ret != 0) { @@ -5733,6 +5814,8 @@ tsk_table_collection_dump(tsk_table_collection_t *self, const char *filename, } } + /* All of these functions will set the kas_error internally, so we don't have + * to modify the return value. */ ret = tsk_table_collection_write_format_data(self, &store); if (ret != 0) { goto out; @@ -5774,7 +5857,11 @@ tsk_table_collection_dump(tsk_table_collection_t *self, const char *filename, goto out; } ret = kastore_close(&store); + if (ret != 0) { + ret = tsk_set_kas_error(ret); + } out: + /* It's safe to close a kastore twice. */ if (ret != 0) { kastore_close(&store); } diff --git a/python/tests/test_lowlevel.py b/python/tests/test_lowlevel.py index 64276483a0..133e004069 100644 --- a/python/tests/test_lowlevel.py +++ b/python/tests/test_lowlevel.py @@ -945,4 +945,4 @@ def test_kastore_version(self): def test_tskit_version(self): version = _tskit.get_tskit_version() - self.assertEqual(version, (0, 99, 1)) + self.assertEqual(version, (0, 99, 2))