-
Notifications
You must be signed in to change notification settings - Fork 61
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor Talk indexing into a Talk::Index record. #540
base: main
Are you sure you want to change the base?
Changes from all commits
72524bf
ce48d01
982a95a
598fdcb
62dd179
2310449
bae444d
8b46e6d
415c127
8ca8eee
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
module ActiveRecord::SQLite::Index | ||
extend ActiveSupport::Concern | ||
|
||
included do | ||
self.ignored_columns = [table_name.to_sym, :rank] # Rails parses our virtual table with these extra non-attributes. | ||
|
||
attribute :rowid, :integer | ||
alias_attribute :id, :rowid | ||
self.primary_key = :rowid | ||
|
||
# Active Record doesn't pick up the `rowid` primary key column. | ||
# So we have have explicitly declare this scope to have `rowid` populated in the result set. | ||
default_scope { select("#{table_name}.rowid, #{table_name}.*") } | ||
end | ||
|
||
private | ||
|
||
def attributes_for_create(attribute_names) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to override |
||
# Prevent `super` filtering out the primary key because it isn't in `self.class.column_names`. | ||
[self.class.primary_key, *super] | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
class Talk::Index < ApplicationRecord | ||
self.table_name = :talks_search_index | ||
|
||
include ActiveRecord::SQLite::Index # Depends on `table_name` being assigned. | ||
class_attribute :index_columns, default: {title: 0, summary: 1, speaker_names: 2} | ||
|
||
belongs_to :talk, foreign_key: :rowid | ||
|
||
def self.search(query) | ||
query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters | ||
query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search | ||
where("#{table_name} match ?", query) | ||
end | ||
|
||
def self.snippets(**) | ||
index_columns.each_key.reduce(all) { |relation, column| relation.snippet(column, **) } | ||
end | ||
|
||
def self.snippet(column, tag: "mark", omission: "…", limit: 32) | ||
offset = index_columns.fetch(column) | ||
select("snippet(#{table_name}, #{offset}, '<#{tag}>', '</#{tag}>', '#{omission}', #{limit}) AS #{column}_snippet") | ||
end | ||
|
||
def reindex | ||
update! id: talk.id, title: talk.title, summary: talk.summary, speaker_names: talk.speaker_names | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're taking advantage of Active Record automatically generating an update or insert statement based on Internally, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also means we now take advantage of Active Record skipping needless |
||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,62 +4,36 @@ module Talk::Searchable | |
DATE_WEIGHT = 0.000000001 | ||
|
||
included do | ||
scope :ft_search, ->(query) do | ||
query = query&.gsub(/[^[:word:]]/, " ") || "" # remove non-word characters | ||
query = query.split.map { |word| "#{word}*" }.join(" ") # wildcard search | ||
joins("join talks_search_index idx on talks.id = idx.rowid") | ||
.where("talks_search_index match ?", query) | ||
end | ||
has_one :index, foreign_key: :rowid, inverse_of: :talk, dependent: :destroy | ||
|
||
scope :ft_search, ->(query) { select("talks.*").joins(:index).merge(Talk::Index.search(query)) } | ||
|
||
scope :with_snippets, ->(**options) do | ||
select("talks.*") | ||
.select_snippet("title", 0, **options) | ||
.select_snippet("summary", 1, **options) | ||
.select_snippet("speaker_names", 2, **options) | ||
select("talks.*").merge(Talk::Index.snippets(**options)) | ||
end | ||
|
||
scope :ranked, -> do | ||
select("talks.*, | ||
bm25(talks_search_index, 10.0, 1.0, 5.0) + | ||
(strftime('%s', 'now') - strftime('%s', talks.date)) * #{DATE_WEIGHT} AS combined_score") | ||
.order(Arel.sql("combined_score ASC")) | ||
.order(combined_score: :asc) | ||
end | ||
|
||
after_create_commit :create_in_index | ||
after_update_commit :update_in_index | ||
after_destroy_commit :remove_from_index | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We get this for free now via |
||
after_save_commit :reindex | ||
end | ||
|
||
class_methods do | ||
def rebuild_search_index | ||
connection.execute("DELETE FROM talks_search_index") | ||
Talk.find_each(&:create_in_index) | ||
end | ||
|
||
def select_snippet(column, offset, tag: "mark", omission: "…", limit: 32) | ||
select("snippet(talks_search_index, #{offset}, '<#{tag}>', '</#{tag}>', '#{omission}', #{limit}) AS #{column}_snippet") | ||
def reindex_all | ||
includes(:index).find_each(&:reindex) | ||
end | ||
end | ||
|
||
def title_with_snippet | ||
try(:title_snippet) || title | ||
end | ||
|
||
def create_in_index | ||
execute_sql_with_binds "insert into talks_search_index(rowid, title, summary, speaker_names) values (?, ?, ?, ?)", id, title, summary, speaker_names | ||
end | ||
|
||
def update_in_index | ||
execute_sql_with_binds "update talks_search_index set title = ?, summary = ?, speaker_names = ? where rowid = ?", title, summary, speaker_names, id | ||
end | ||
|
||
def remove_from_index | ||
execute_sql_with_binds "delete from talks_search_index where rowid = ?", id | ||
end | ||
|
||
private | ||
|
||
def execute_sql_with_binds(*statement) | ||
self.class.connection.execute self.class.sanitize_sql(statement) | ||
def index | ||
super || build_index | ||
end | ||
delegate :reindex, to: :index | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ def up | |
"tokenize = porter" | ||
] | ||
|
||
Talk.rebuild_search_index | ||
Talk.reindex_all | ||
end | ||
|
||
def down | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A SQLite
fts5
table like this is built on arowid
primary key and several columns to index. Active Record now understands these types of virtual tables but it doesn't recognize therowid
at all, not even as a column. (It's probably because the column isn't spelled out anywhere in the schema statements so Active Record has no column to parse there).So this module encapsulates all the work we need to have Active Record recognize the
rowid
column as a column and as the primary key.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh yeah, and then Active Record also parses the table name and something called
rank
as valid columns, so we need to ignore those since they're internal to thefts5
implementation.