Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better datetime/date string parsing performance #2885

Merged
merged 1 commit into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmake/arrow.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ cmake_policy(SET CMP0097 NEW)
include(ExternalProject)
ExternalProject_Add(apachearrow
GIT_REPOSITORY https://github.com/apache/arrow.git
GIT_TAG apache-arrow-17.0.0
GIT_TAG apache-arrow-18.1.0
GIT_SUBMODULES ""
GIT_SHALLOW TRUE
SOURCE_DIR "${CMAKE_BINARY_DIR}/arrow-src"
Expand All @@ -20,5 +20,5 @@ ExternalProject_Add(apachearrow
TEST_COMMAND ""
# This patch is to work around https://github.com/apache/arrow/issues/44384
# It can be removed when a version of Arrow is released with https://github.com/apache/arrow/pull/44385
PATCH_COMMAND "${CMAKE_COMMAND}" -E chdir <SOURCE_DIR> git apply "${CMAKE_SOURCE_DIR}/patches/fix_arrow_libtool.patch"
PATCH_COMMAND "${CMAKE_COMMAND}" -E chdir <SOURCE_DIR> git apply "${CMAKE_SOURCE_DIR}/patches/fix_arrow_libtool.patch" && "${CMAKE_COMMAND}" -E chdir <SOURCE_DIR> git apply "${CMAKE_SOURCE_DIR}/patches/arrow_strptime.patch"
)
13 changes: 13 additions & 0 deletions cpp/perspective/patches/arrow_strptime.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
diff --git a/cpp/src/arrow/util/value_parsing.h b/cpp/src/arrow/util/value_parsing.h
index 609906052..1e3dfae7c 100644
--- a/cpp/src/arrow/util/value_parsing.h
+++ b/cpp/src/arrow/util/value_parsing.h
@@ -804,7 +804,7 @@ static inline bool ParseTimestampStrptime(const char* buf, size_t length,
std::string clean_copy(buf, length);
struct tm result;
memset(&result, 0, sizeof(struct tm));
-#ifdef _WIN32
+#if defined(_WIN32) || defined(__EMSCRIPTEN__)
char* ret = arrow_strptime(clean_copy.c_str(), format, &result);
#else
char* ret = strptime(clean_copy.c_str(), format, &result);
2 changes: 2 additions & 0 deletions tools/perspective-bench/basic_suite.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ perspective_bench.suite(
metadata = { version, version_idx };
}

globalThis.__SEND__ = (x) => process.send(x);

await all_benchmarks.table_suite(client, metadata);
await all_benchmarks.view_suite(client, metadata);
await all_benchmarks.to_data_suite(client, metadata);
Expand Down
11 changes: 7 additions & 4 deletions tools/perspective-bench/cross_platform_suite.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ export async function table_suite(perspective, metadata) {
const table = await perspective.table(
new_superstore_table(metadata)
);

const view = await table.view();
const csv = await view.to_csv();
const arrow = await view.to_arrow();
const json = await view.to_json();
const columns = await view.to_columns();
if (check_version_gte(metadata.version, "2.10.9")) {
Expand All @@ -169,7 +171,8 @@ export async function table_suite(perspective, metadata) {
if (check_version_gte(metadata.version, "3.0.0")) {
await table.delete();
}
return { csv, columns, json };

return { csv, arrow, table, json, columns };
} catch (e) {
console.error(e);
}
Expand All @@ -184,8 +187,8 @@ export async function table_suite(perspective, metadata) {
await table.delete();
}
},
async test() {
return await perspective.table(new_superstore_table(metadata));
async test({ table, arrow }) {
return await perspective.table(arrow.slice());
},
});

Expand All @@ -198,7 +201,7 @@ export async function table_suite(perspective, metadata) {
await table.delete();
}
},
async test({ table, csv }) {
async test({ csv }) {
return await perspective.table(csv);
},
});
Expand Down
87 changes: 87 additions & 0 deletions tools/perspective-bench/puppeteer_suite.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
// ┃ Copyright (c) 2017, the Perspective Authors. ┃
// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
// ┃ This file is part of the Perspective library, distributed under the terms ┃
// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛

import * as all_benchmarks from "./cross_platform_suite.mjs";
import * as perspective_bench from "./src/js/benchmark.mjs";
import * as puppeteer from "puppeteer";

import * as fs from "node:fs";
import * as path from "node:path";
import * as url from "node:url";
import * as process from "node:process";

const __dirname = url.fileURLToPath(new URL(".", import.meta.url)).slice(0, -1);

/**
* We use the `dependencies` of this package for the benchmark candidate
* module list, so that we only need specify the dependencies and benchmark
* candidates in one place.
*/
const VERSIONS = [
"@finos/perspective",
"perspective-3-0-0",
"perspective-2-10-0",
];

perspective_bench.suite(
[...VERSIONS],
path.join(__dirname, "dist/benchmark-js.arrow"),
async function (path, version_idx) {
let client, metadata;
console.log(path);
const browser = await puppeteer.launch({
headless: true,
protocolTimeout: 100_000_000,
});
const page = await browser.newPage();

await page.goto("http://localhost:8081/empty.html");

async function test_suite(suite) {
const items = await page.evaluate(
async ([version, suite]) => {
const { default: perspective } = await import(
`/tools/perspective-bench/node_modules/${version}/dist/esm/perspective.inline.js`
);
const benchmarks = await import(
"/tools/perspective-bench/cross_platform_suite.mjs"
);

const metadata = {
version: "3.2.0",
version_idx: 0,
};
const total = [];
window.__SEND__ = (x) => {
total.push(x);
};

await benchmarks[suite](
await perspective.worker(),
metadata
);

return total;
},
[path, suite]
);

for (const { obs_records, stats } of items) {
process.send({ obs_records, stats });
}
}

await test_suite("table_suite");
await test_suite("view_suite");
await test_suite("to_data_suite");
}
);
Loading
Loading