diff --git a/README.md b/README.md index 71b60f8e..d436c1da 100644 --- a/README.md +++ b/README.md @@ -2,21 +2,34 @@ This is a hacked up fork of [tilemaker](https://github.com/systemed/tilemaker). -It's eventual goal is to let you do: +It lets you do: ```bash tile-smush foo.mbtiles bar.mbtiles ``` -and get a `smushed.mbtiles` that is the concatenation of the layers in the input files. +and get a `merged.mbtiles` that is the concatenation of the layers in the input files. It's meant to work on mbtiles produced by [mapt](https://github.com/cldellow/mapt/). These mbtiles may have overlapping tiles, but the tiles will not have overlapping layers. This means they can be merged by just concatenating the protobufs, which in theory is a mechanical transformation that should be able to be done very quickly. +## Caveats + +This was very much built just to enable my personal use case: using [mapt](https://github.com/cldellow/mapt/) to generate thematic layers, then merging them into a single file. + +As such, it prioritizes speed and "just enough to work". + +- If two or more input mbtiles files contain the same layers, the result + is undefined. +- The `json` metadata value will get its `vector_tiles` entries merged, + but any other entries (such as `tilestats`) are just dropped on the floor. + - This JSON munging is done via string manipulation. It's a total hack, + but it works. If someone wants to send a PR using rapidjson, it'd be welcome. + ## Alternatives -You can also use [tile-join](https://github.com/mapbox/tippecanoe) from Mapbox's tippecanoe. It seems to do more stuff, and be slower than it could be for this use case. +You can also use [tile-join](https://github.com/mapbox/tippecanoe) from Mapbox's tippecanoe. It has a much broader scope, which comes at the cost of being 10-50x slower. ## Installing diff --git a/src/tilemaker.cpp b/src/tilemaker.cpp index 7a50e54a..0d76596d 100644 --- a/src/tilemaker.cpp +++ b/src/tilemaker.cpp @@ -155,56 +155,6 @@ int main(const int argc, const char* argv[]) { MBTiles merged; merged.openForWriting(MergedFilename); - std::vector matching; - for (int zoom = 0; zoom < 15; zoom++) { - Bbox bbox = inputs[0]->bbox[zoom]; - for (const auto& input : inputs) { - if (input->bbox[zoom].minX < bbox.minX) bbox.minX = input->bbox[zoom].minX; - if (input->bbox[zoom].minY < bbox.minY) bbox.minY = input->bbox[zoom].minY; - if (input->bbox[zoom].maxX > bbox.maxX) bbox.maxX = input->bbox[zoom].maxX; - if (input->bbox[zoom].maxY > bbox.maxY) bbox.maxY = input->bbox[zoom].maxY; - } - - // std::cout << "z=" << std::to_string(zoom) << " minX=" << std::to_string(bbox.minX) << " minY=" << std::to_string(bbox.minY) << " maxX=" << std::to_string(bbox.maxX) << " maxY=" << std::to_string(bbox.maxY) << std::endl; - - for (int x = bbox.minX; x <= bbox.maxX; x++) { - for (int y = bbox.minY; y <= bbox.maxY; y++) { - if ((x * (1 << zoom) + y) % shards != shard) - continue; - - matching.clear(); - for (const auto& input : inputs) { - if (input->zooms[zoom].test(x, y)) - matching.push_back(input.get()); - } - - if (matching.empty()) - continue; - - if (matching.size() == 1) { - // When exactly 1 mbtiles matches, it's a special case and we can - // copy directly between them. - //std::vector old = tlsTiles[matching[0]->index]->readTile(zoom, x, y); - std::vector old = matching[0]->mbtiles.readTile(zoom, x, y); - - std::string buffer(old.data(), old.size()); - // TODO: is this valid? We have a lock, but we'll access - // from different threads. This might be problematic because - // we cache the prepared statement. - merged.saveTile(zoom, x, y, &buffer, false); - continue; - } - - // std::cout << "need to merge z=" << std::to_string(zoom) << " x=" << std::to_string(x) << " y=" << std::to_string(y) << std::endl; - // Multiple mbtiles want to contribute a tile at this zxy. - // They'll all have disjoint layers, so decompress each tile - // and concatenate their contents to form the new tile. - - // TODO: do this - } - } - } - if (shard == 0) { // Populate the `metadata` table // See https://github.com/mapbox/mbtiles-spec/blob/master/1.3/spec.md#content @@ -218,6 +168,10 @@ int main(const int argc, const char* argv[]) { double minLonCurrent, maxLonCurrent, minLatCurrent, maxLatCurrent; std::map metadata; + + // Use a map to dedupe. You can have _the exact same_ layer, e.g. + // as hikeratlas does with its hacky parks/city_parks hijinks. + std::map layers; for (auto& input : inputs) { for (const auto& entry : input->mbtiles.readMetadata()) { metadata[entry.first] = entry.second; @@ -230,6 +184,48 @@ int main(const int argc, const char* argv[]) { if (entry.first == "maxzoom" && zoom > maxzoom) maxzoom = zoom; } + + if (entry.first == "json") { + // This is incredibly hacky! I don't want to learn how to use a C++ JSON + // library + const char* vectorLayers = strstr(entry.second.c_str(), "\"vector_layers\":["); + if (!vectorLayers) { + throw std::runtime_error("no vector_layers found for " + input->filename); + } + + vectorLayers += strlen("\"vector_layers\":["); + //std::cout << "INPUT: " << vectorLayers << std::endl; + + const char* start = NULL; + // This is a total hack, it'll fail if you have braces in strings, e.g. + int braces = 0; + while(*vectorLayers != ']') { + if (start == NULL && *vectorLayers == ']') + break; + + if (start == NULL && *vectorLayers == '{') { + start = vectorLayers; + } + + if (*vectorLayers == '{') { + braces++; + } + + if (*vectorLayers == '}') { + braces--; + } + + if (start && braces == 0) { + std::string layer(start, vectorLayers - start + 1); + //std::cout << "LAYER: " << layer << std::endl; + + layers[layer] = ""; + start = NULL; + } + + vectorLayers++; + } + } } input->mbtiles.readBoundingBox(minLonCurrent, maxLonCurrent, minLatCurrent, maxLatCurrent); @@ -255,6 +251,69 @@ int main(const int argc, const char* argv[]) { merged.writeMetadata("minzoom", std::to_string(minzoom)); merged.writeMetadata("maxzoom", std::to_string(maxzoom)); + + std::string vector_layers = "{\"vector_layers\":["; + int i = 0; + for (auto const& entry : layers) { + if (i > 0) + vector_layers += ","; + + vector_layers += entry.first; + i++; + } + + vector_layers += "]}"; + merged.writeMetadata("json", vector_layers); + } + + std::vector matching; + for (int zoom = 0; zoom < 15; zoom++) { + Bbox bbox = inputs[0]->bbox[zoom]; + for (const auto& input : inputs) { + if (input->bbox[zoom].minX < bbox.minX) bbox.minX = input->bbox[zoom].minX; + if (input->bbox[zoom].minY < bbox.minY) bbox.minY = input->bbox[zoom].minY; + if (input->bbox[zoom].maxX > bbox.maxX) bbox.maxX = input->bbox[zoom].maxX; + if (input->bbox[zoom].maxY > bbox.maxY) bbox.maxY = input->bbox[zoom].maxY; + } + + // std::cout << "z=" << std::to_string(zoom) << " minX=" << std::to_string(bbox.minX) << " minY=" << std::to_string(bbox.minY) << " maxX=" << std::to_string(bbox.maxX) << " maxY=" << std::to_string(bbox.maxY) << std::endl; + + for (int x = bbox.minX; x <= bbox.maxX; x++) { + for (int y = bbox.minY; y <= bbox.maxY; y++) { + if ((x * (1 << zoom) + y) % shards != shard) + continue; + + matching.clear(); + for (const auto& input : inputs) { + if (input->zooms[zoom].test(x, y)) + matching.push_back(input.get()); + } + + if (matching.empty()) + continue; + + if (matching.size() == 1) { + // When exactly 1 mbtiles matches, it's a special case and we can + // copy directly between them. + //std::vector old = tlsTiles[matching[0]->index]->readTile(zoom, x, y); + std::vector old = matching[0]->mbtiles.readTile(zoom, x, y); + + std::string buffer(old.data(), old.size()); + // TODO: is this valid? We have a lock, but we'll access + // from different threads. This might be problematic because + // we cache the prepared statement. + merged.saveTile(zoom, x, y, &buffer, false); + continue; + } + + // std::cout << "need to merge z=" << std::to_string(zoom) << " x=" << std::to_string(x) << " y=" << std::to_string(y) << std::endl; + // Multiple mbtiles want to contribute a tile at this zxy. + // They'll all have disjoint layers, so decompress each tile + // and concatenate their contents to form the new tile. + + // TODO: do this + } + } } merged.closeForWriting();