Skip to content

Commit

Permalink
Use unsafe PyList APIs for improved speed
Browse files Browse the repository at this point in the history
In the case of the parse basecoro, we have full control over the
gen->path object, which is a list, so we can directly call a few of its
PyList_* unsafe functions instead of the safer, but more expensive
PySequence_* ones. In the of builder, we also can safely assume
builder->value_stack is a list, and when we access items we know we are
within boundaries.

One of the consequences of using PyList_GET_ITEM in particular in the
parse basecoro is that we now deal with borrowed references, and
therefore need to do less reference tracking. This in turned simplified
the CONCAT macro to the point where it became unnecessary (and it
already was a bit dirty to begin with).

Local benchmarks show a speed increase of ~4% in the parse method across
the different test cases.

Signed-off-by: Rodrigo Tobar <rtobar@icrar.org>
  • Loading branch information
rtobar committed Dec 30, 2024
1 parent e476bf8 commit 48f98b7
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
* Restructured source code so all code lives under `src/`,
and the `ijson.backends._yajl2` extension under `src/ijson/backends/ext/_yajl2`.
This allows C backend tests to actually run on cibuildwheel.
* Improved performance of `parse` routine in C backend by ~4%.
* Fixed several potential stability issues in C backend
around correct error handling.
* Pointing to our own fork of yajl (for when we build it ourselves)
Expand Down
9 changes: 4 additions & 5 deletions src/ijson/backends/ext/_yajl2/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int builder_reset(builder_t *builder)
Py_CLEAR(builder->value);
Py_CLEAR(builder->key);

Py_ssize_t nvals = PyList_Size(builder->value_stack);
Py_ssize_t nvals = PyList_GET_SIZE(builder->value_stack);
M1_M1(PyList_SetSlice(builder->value_stack, 0, nvals, NULL));

return 0;
Expand All @@ -121,14 +121,13 @@ int builder_reset(builder_t *builder)
static inline
int _builder_add(builder_t *builder, PyObject *value)
{
Py_ssize_t nvals = PyList_Size(builder->value_stack);
Py_ssize_t nvals = PyList_GET_SIZE(builder->value_stack);
if (nvals == 0) {
Py_INCREF(value);
builder->value = value;
}
else {
PyObject *last;
M1_N(last = PyList_GetItem(builder->value_stack, nvals-1));
PyObject *last = PyList_GET_ITEM(builder->value_stack, nvals - 1);
assert(("stack element not list or dict-like",
PyList_Check(last) || PyMapping_Check(last)));
if (PyList_Check(last)) {
Expand Down Expand Up @@ -182,7 +181,7 @@ int builder_event(builder_t *builder, enames_t enames, PyObject *ename, PyObject
}
else if (ename == enames.end_array_ename || ename == enames.end_map_ename) {
// pop
Py_ssize_t nvals = PyList_Size(builder->value_stack);
Py_ssize_t nvals = PyList_GET_SIZE(builder->value_stack);
M1_M1(PyList_SetSlice(builder->value_stack, nvals-1, nvals, NULL));
}
else {
Expand Down
39 changes: 14 additions & 25 deletions src/ijson/backends/ext/_yajl2/parse_basecoro.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,10 @@ static void parse_basecoro_dealloc(ParseBasecoro *self)
Py_TYPE(self)->tp_free((PyObject*)self);
}

#define CONCAT(tgt, first, second) \
do { \
tgt = PyUnicode_Concat(first, second); \
Py_DECREF(first); \
N_N(tgt); \
} while(0);

PyObject* parse_basecoro_send_impl(PyObject *self, PyObject *event, PyObject *value)
{
ParseBasecoro *gen = (ParseBasecoro *)self;
Py_ssize_t npaths = PyList_Size(gen->path);
Py_ssize_t npaths = PyList_GET_SIZE(gen->path);
enames_t enames = gen->module_state->enames;
PyObject *dot = gen->module_state->dot;
PyObject *dotitem = gen->module_state->dotitem;
Expand All @@ -61,48 +54,45 @@ PyObject* parse_basecoro_send_impl(PyObject *self, PyObject *event, PyObject *va
// pop
N_M1(PyList_SetSlice(gen->path, npaths - 1, npaths, NULL));
npaths--;
prefix = PySequence_GetItem(gen->path, npaths - 1);
prefix = PyList_GET_ITEM(gen->path, npaths - 1);
}
else if (event == enames.map_key_ename) {

// last_path = path_stack[-2]
// to_append = '.' + value if len(path_stack) > 1 else value
// new_path = path_stack[-2] + to_append
PyObject *last_path;
N_N(last_path = PySequence_GetItem(gen->path, npaths - 2));
PyObject *last_path = PyList_GET_ITEM(gen->path, npaths - 2);
PyObject *last_path_dot = NULL;
if (npaths > 2) {
PyObject *last_path_dot;
CONCAT(last_path_dot, last_path, dot);
last_path_dot = PyUnicode_Concat(last_path, dot);
N_N(last_path_dot);
last_path = last_path_dot;
}
PyObject *new_path;
CONCAT(new_path, last_path, value);
PyObject *new_path = PyUnicode_Concat(last_path, value);
N_N(new_path);
Py_XDECREF(last_path_dot);
PyList_SetItem(gen->path, npaths - 1, new_path);

prefix = PySequence_GetItem(gen->path, npaths - 2);
prefix = PyList_GET_ITEM(gen->path, npaths - 2);
}
else {
prefix = PySequence_GetItem(gen->path, npaths - 1);
prefix = PyList_GET_ITEM(gen->path, npaths - 1);
}
N_N(prefix);

// If entering a map/array, append name to path
if (event == enames.start_array_ename) {

// to_append = '.item' if path_stack[-1] else 'item'
// path_stack.append(path_stack[-1] + to_append)
PyObject *last_path;
N_N(last_path = PySequence_GetItem(gen->path, npaths - 1));

PyObject *last_path = PyList_GET_ITEM(gen->path, npaths - 1);
if (PyUnicode_GET_LENGTH(last_path) > 0) {
PyObject *new_path;
CONCAT(new_path, last_path, dotitem);
PyObject *new_path = PyUnicode_Concat(last_path, dotitem);
N_N(new_path);
N_M1(PyList_Append(gen->path, new_path));
Py_DECREF(new_path);
}
else {
N_M1(PyList_Append(gen->path, item));
Py_DECREF(last_path);
}
}
else if (event == enames.start_map_ename) {
Expand All @@ -121,7 +111,6 @@ PyObject* parse_basecoro_send_impl(PyObject *self, PyObject *event, PyObject *va
CORO_SEND(gen->target_send, res);
Py_DECREF(res);
}
Py_DECREF(prefix);
Py_RETURN_NONE;
}

Expand Down

0 comments on commit 48f98b7

Please sign in to comment.