wget -O "2501_01104.pdf" "https://arxiv.org/pdf/2501.01104"
wget "https://huggingface.co/onnx-community/gliner_small-v2.1/resolve/main/tokenizer.json"
wget "https://huggingface.co/onnx-community/gliner_small-v2.1/resolve/main/onnx/model.onnx"
RUST_LOG=debug cargo run -- \
--pdf-files files.txt \
--tokenizer-file tokenizer.json \
--model-file model.onnx \
--entities technology,organization
cargo build --release
cp target/release/pdf-ner .
./pdf-ner \
--pdf-files files.txt \
--tokenizer-file tokenizer.json \
--model-file model.onnx \
--entities conference,name