-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathstring_table.cc
118 lines (102 loc) · 2.69 KB
/
string_table.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
//
// Copyright RIME Developers
// Distributed under the BSD License
//
// 2014-07-04 GONG Chen <chen.sst@gmail.com>
//
#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/stream.hpp>
#include "common.h"
#include "string_table.h"
namespace rime {
StringTable::StringTable(const char* ptr, size_t size) {
trie_.map(ptr, size);
}
bool StringTable::HasKey(const string& key) {
marisa::Agent agent;
agent.set_query(key.c_str());
return trie_.lookup(agent);
}
StringId StringTable::Lookup(const string& key) {
marisa::Agent agent;
agent.set_query(key.c_str());
if(trie_.lookup(agent)) {
return agent.key().id();
}
else {
return kInvalidStringId;
}
}
void StringTable::CommonPrefixMatch(const string& query,
vector<StringId>* result) {
marisa::Agent agent;
agent.set_query(query.c_str());
result->clear();
while (trie_.common_prefix_search(agent)) {
result->push_back(agent.key().id());
}
}
void StringTable::Predict(const string& query,
vector<StringId>* result) {
marisa::Agent agent;
agent.set_query(query.c_str());
result->clear();
while (trie_.predictive_search(agent)) {
result->push_back(agent.key().id());
}
}
string StringTable::GetString(StringId string_id) {
marisa::Agent agent;
agent.set_query(string_id);
try {
trie_.reverse_lookup(agent);
}
catch (const marisa::Exception& /*ex*/) {
LOG(ERROR) << "invalid id for string table: " << string_id;
return string();
}
return string(agent.key().ptr(), agent.key().length());
}
size_t StringTable::NumKeys() const {
return trie_.size();
}
size_t StringTable::BinarySize() const {
return trie_.io_size();
}
void StringTableBuilder::Add(const string& key,
double weight,
StringId* reference) {
keys_.push_back(key.c_str(), key.length(), (float)weight);
references_.push_back(reference);
}
void StringTableBuilder::Clear() {
trie_.clear();
keys_.clear();
references_.clear();
}
void StringTableBuilder::Build() {
trie_.build(keys_);
UpdateReferences();
}
void StringTableBuilder::UpdateReferences() {
if (keys_.size() != references_.size()) {
return;
}
marisa::Agent agent;
for (size_t i = 0; i < keys_.size(); ++i) {
if (references_[i]) {
*references_[i] = keys_[i].id();
}
}
}
void StringTableBuilder::Dump(char* ptr, size_t size) {
if (size < BinarySize()) {
LOG(ERROR) << "insufficient memory to dump string table.";
return;
}
namespace io = boost::iostreams;
io::basic_array_sink<char> sink(ptr, size);
io::stream<io::basic_array_sink<char>> stream(sink);
stream << trie_;
}
} // namespace rime