Skip to content

Commit

Permalink
feat: add IntrusiveStringSet
Browse files Browse the repository at this point in the history
  • Loading branch information
BorysTheDev committed Mar 7, 2025
1 parent 777195b commit 72aad85
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 101 deletions.
171 changes: 89 additions & 82 deletions src/core/intrusive_string_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,153 +4,160 @@

#pragma once

#include <cassert>
#include <cstring>
#include <memory>
#include <string_view>
#include <vector>

#include "base/hash.h"

namespace dfly {

class ISSEntry {
class ISLEntry {
public:
ISSEntry(std::string_view key) {
ISSEntry* next = nullptr;
ISLEntry() = default;

ISLEntry(char* data) {
data_ = data;
}

operator bool() const {
return data_;
}

static ISLEntry Create(std::string_view key) {
char* next = nullptr;
uint32_t key_size = key.size();

auto size = sizeof(next) + sizeof(key_size) + key_size;

data_ = (char*)malloc(size);
char* data = (char*)malloc(size);

std::memcpy(data_, &next, sizeof(next));
std::memcpy(data, &next, sizeof(next));

auto* key_size_pos = data_ + sizeof(next);
auto* key_size_pos = data + sizeof(next);
std::memcpy(key_size_pos, &key_size, sizeof(key_size));

auto* key_pos = key_size_pos + sizeof(key_size);
std::memcpy(key_pos, key.data(), key_size);

return ISLEntry(data);
}

static void Destroy(ISLEntry entry) {
free(entry.data_);
}

std::string_view Key() const {
return {GetKeyData(), GetKeySize()};
}

ISSEntry* Next() const {
ISSEntry* next = nullptr;
std::memcpy(&next, data_, sizeof(next));
ISLEntry Next() const {
ISLEntry next;
std::memcpy(&next.data_, data_, sizeof(next));
return next;
}

void SetNext(ISSEntry* next) {
// TODO remove from public
void SetNext(ISLEntry next) {
std::memcpy(data_, &next, sizeof(next));
next.data_ = nullptr;
}

private:
const char* GetKeyData() const {
return data_ + sizeof(ISSEntry*) + sizeof(uint32_t);
return data_ + sizeof(ISLEntry*) + sizeof(uint32_t);
}

uint32_t GetKeySize() const {
uint32_t size = 0;
std::memcpy(&size, data_ + sizeof(ISSEntry*), sizeof(size));
std::memcpy(&size, data_ + sizeof(ISLEntry*), sizeof(size));
return size;
}

// TODO consider use SDS strings or other approach
// TODO add optimization for big keys
// memory daya layout [ISSEntry*, key_size, key]
char* data_;
// memory daya layout [ISLEntry*, key_size, key]
char* data_ = nullptr;
};

class ISMEntry {
class IntrusiveStringList {
public:
ISMEntry(std::string_view key, std::string_view val) {
ISMEntry* next = nullptr;
uint32_t key_size = key.size();
uint32_t val_size = val.size();

auto size = sizeof(next) + sizeof(key_size) + sizeof(val_size) + key_size + val_size;

data_ = (char*)malloc(size);

std::memcpy(data_, &next, sizeof(next));

auto* key_size_pos = data_ + sizeof(next);
std::memcpy(key_size_pos, &key_size, sizeof(key_size));

auto* val_size_pos = key_size_pos + sizeof(key_size);
std::memcpy(val_size_pos, &val_size, sizeof(val_size));

auto* key_pos = val_size_pos + sizeof(val_size);
std::memcpy(key_pos, key.data(), key_size);

auto* val_pos = key_pos + key_size;
std::memcpy(val_pos, val.data(), val_size);
~IntrusiveStringList() {
while (start_) {
auto next = start_.Next();
ISLEntry::Destroy(start_);
start_ = next;
}
}

std::string_view Key() const {
return {GetKeyData(), GetKeySize()};
ISLEntry Emplace(std::string_view key) {
auto e = ISLEntry::Create(key);
e.SetNext(start_);
start_ = e;
return start_;
}

std::string_view Val() const {
return {GetValData(), GetValSize()};
ISLEntry Find(std::string_view str) {
auto it = start_;
for (; it && it.Key() != str; it = it.Next())
;
return it;
}

ISMEntry* Next() const {
ISMEntry* next = nullptr;
std::memcpy(&next, data_, sizeof(next));
return next;
}

void SetVal(std::string_view val) {
// TODO add optimization for the same size key
uint32_t val_size = val.size();
auto new_size =
sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t) + GetKeySize() + val_size;

data_ = (char*)realloc(data_, new_size);
private:
ISLEntry start_;
};

auto* val_size_pos = data_ + sizeof(ISMEntry*) + sizeof(uint32_t);
std::memcpy(val_size_pos, &val_size, sizeof(val_size));
class IntrusiveStringSet {
public:
// TODO add TTL processing
ISLEntry Add(std::string_view str, uint32_t ttl_sec = UINT32_MAX) {
if (size_ >= entries_.size()) {
Grow();
}
auto bucket_id = BucketId(Hash(str));
auto& bucket = entries_[bucket_id];

auto* val_pos = val_size_pos + sizeof(val_size) + GetKeySize();
std::memcpy(val_pos, val.data(), val_size);
}
if (auto existed_item = bucket.Find(str); existed_item) {
// TODO consider common implementation for key value pair
return ISLEntry();
}

void SetNext(ISMEntry* next) {
std::memcpy(data_, &next, sizeof(next));
return bucket.Emplace(str);
}

private:
const char* GetKeyData() const {
return data_ + sizeof(ISMEntry*) + sizeof(uint32_t) + sizeof(uint32_t);
std::uint32_t Capacity() const {
return 1 << capacity_log_;
}

uint32_t GetKeySize() const {
uint32_t size = 0;
std::memcpy(&size, data_ + sizeof(ISMEntry*), sizeof(size));
return size;
}
void Grow() {
++capacity_log_;
entries_.resize(Capacity());

const char* GetValData() const {
return GetKeyData() + GetKeySize();
// TODO rehashing
}

uint32_t GetValSize() const {
uint32_t size = 0;
std::memcpy(&size, data_ + sizeof(ISMEntry*) + sizeof(uint32_t), sizeof(size));
return size;
uint32_t BucketId(uint64_t hash) const {
assert(capacity_log_ > 0);
return hash >> (64 - capacity_log_);
}

// TODO consider use SDS strings or other approach
// TODO add optimization for big keys
// memory daya layout [ISMEntry*, key_size, val_size, key, val]
char* data_;
};
uint64_t Hash(std::string_view str) const {
constexpr XXH64_hash_t kHashSeed = 24061983;
return XXH3_64bits_withSeed(str.data(), str.size(), kHashSeed);
}

template <class EntryT> class IntrusiveStringSet {
public:
private:
std::vector<EntryT*> entries_;
static constexpr size_t kMinSizeShift = 2;
std::uint32_t capacity_log_ = 1;
std::uint32_t size_ = 0; // number of elements in the set.

static_assert(sizeof(IntrusiveStringList) == sizeof(void*),
"IntrusiveStringList should be just a pointer");
std::vector<IntrusiveStringList> entries_;
};

} // namespace dfly
31 changes: 12 additions & 19 deletions src/core/intrusive_string_set_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,31 +25,24 @@ class IntrusiveStringSetTest : public ::testing::Test {
}
};

TEST_F(IntrusiveStringSetTest, ISSEntryTest) {
ISSEntry test("0123456789");
TEST_F(IntrusiveStringSetTest, IntrusiceStringListTest) {
IntrusiveStringList isl;
ISLEntry test = isl.Emplace("0123456789");

EXPECT_EQ(test.Key(), "0123456789"sv);
EXPECT_EQ(test.Next(), nullptr);
EXPECT_EQ(test.Next(), ISLEntry());

test.SetNext(&test);
test = isl.Emplace("123456789");

EXPECT_EQ(test.Key(), "0123456789"sv);
EXPECT_EQ(test.Next(), &test);
}

TEST_F(IntrusiveStringSetTest, ISMEntryTest) {
ISMEntry test("0123456789", "qwertyuiopasdfghjklzxcvbnm");
EXPECT_EQ(test.Next().Key(), "0123456789"sv);
EXPECT_EQ(test.Key(), "123456789"sv);

EXPECT_EQ(test.Key(), "0123456789"sv);
EXPECT_EQ(test.Val(), "qwertyuiopasdfghjklzxcvbnm"sv);
EXPECT_EQ(test.Next(), nullptr);

test.SetVal("QWERTYUIOPASDFGHJKLZXCVBNM");
test.SetNext(&test);
test = isl.Emplace("23456789");

EXPECT_EQ(test.Key(), "0123456789"sv);
EXPECT_EQ(test.Val(), "QWERTYUIOPASDFGHJKLZXCVBNM"sv);
EXPECT_EQ(test.Next(), &test);
EXPECT_EQ(isl.Find("0123456789").Key(), "0123456789"sv);
EXPECT_EQ(isl.Find("23456789").Key(), "23456789"sv);
EXPECT_EQ(isl.Find("123456789").Key(), "123456789"sv);
EXPECT_EQ(isl.Find("test"), ISLEntry());
}

} // namespace dfly

0 comments on commit 72aad85

Please sign in to comment.