+#![warn(missing_docs)]
+//! A tool to manage a basic vector-based search index with candle and
+//! pgvector. It can create the index, add documents and run searches.
+//!
+//! Borrowed heavily from:
+//! - <https://github.com/huggingface/candle/blob/main/candle-examples/examples/bert/main.rs>
+//! - <https://github.com/pgvector/pgvector-rust/blob/master/examples/candle/src/main.rs>
+//!
+//! ## Initialize the database
+//!
+//! The postgresql database must have the pgvector extension
+//! installed. The user must have superuser in the target database.
+//!
+//! If the target database doesn't exist yet, then the user must have
+//! createdb permission.
+//!
+//! ```text
+//! $ ./candlevec init-database --help
+//!
+//! Initialize the database when the database or table doesn't exist already
+//!
+//! Usage: candlevec init-database [OPTIONS] --password <PASSWORD>
+//!
+//! Options:
+//! --dbname <DBNAME> [default: vsearch]
+//! --host <HOST> [default: localhost]
+//! --user <USER> [default: cvmigrator]
+//! --password <PASSWORD> [env: PASSWORD=]
+//! -h, --help Print help
+//! ```
+//!
+//! For example:
+//!
+//! ```text
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec init-database
+//! maybe creating the database
+//! database vsearch exists already
+//! maybe creating database objects
+//! ```
+//!
+//! ## Add documents
+//!
+//! A document is a regular file.
+//!
+//! The user for this operation requires only write access to the
+//! table (not superuser).
+//!
+//! ```text
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/0
+//! indexing a file
+//! Loaded and encoded 59.479µs
+//! Took 14.982262ms
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/1
+//! ...
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/2
+//! ...
+//! ```
+//!
+//! ## Search
+//!
+//! Return the top five matches for the given search. Note the search
+//! is semantic so the right document is returned for e.g. "meow" or
+//! "canine" even without the documents containing those words.
+//!
+//! The user for this operation needs only read access (not write or
+//! superuser).
+//!
+//! ```text
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec search --search feline
+//! searching for document matches
+//! Loaded and encoded 49.306µs
+//! Took 14.452557ms
+//! The cat is purring
+//! The bear is growling
+//! The dog is barking
+//! ```
+//!
+//!
+//!
use candle_transformers::models::bert::{BertModel, Config, HiddenAct, DTYPE};
use anyhow::{Error as E, Result};
#[command(subcommand)]
action: Action,
- /// The model to use, check out available models: https://huggingface.co/models?library=sentence-transformers&sort=trending
+ /// The model to use, check out available models:
+ /// <https://huggingface.co/models?library=sentence-transformers&sort=trending>
#[arg(long)]
model_id: Option<String>,
fn search(dbname: String, host: String, user: String, password: String,
search: String, model: BertModel, tokenizer: Tokenizer) -> Result<()> {
- println!("indexing a file");
+ println!("searching for document matches");
let embeddings = get_embeddings(&search,model,tokenizer)?;
let mut client = postgres::Config::new()