wget https://dl.fbaipublicfiles.com/dpr/data/retriever/biencoder-nq-train.json.gz
gzip -d biencoder-nq-train.json.gz
python prepare_retrieve_data.py --input ./biencoder-nq-train.json --output ./nq-train-data
sh embed_pairwise_train.sh
If with nohup
nohup sh embed_pairwise_train.sh > output.log 2>&1 &
Download data
# queries
wget https://www.dropbox.com/s/x4abrhszjssq6gl/nq-test-queries.json
wget https://www.dropbox.com/s/b64e07jzlji8zhl/trivia-test-queries.json
# corpus
wget https://www.dropbox.com/s/8ocbt0qpykszgeu/wikipedia-corpus.tar.gz
tar -xvf wikipedia-corpus.tar.gz
Build corpus index
sh encode_corpus.sh
Build query index
sh encode_query.sh
Search
sh retrieve.sh