]> Humopery - vecsearch.git/commitdiff
Add crate docstring
authorErik Mackdanz <erikmack@gmail.com>
Thu, 28 Nov 2024 20:51:49 +0000 (14:51 -0600)
committerErik Mackdanz <erikmack@gmail.com>
Thu, 28 Nov 2024 20:51:49 +0000 (14:51 -0600)
src/main.rs

index e080030dc9cb6ea2eb526940c507cf88b91c159d..c6bea03a40801d95ffb91da2ab680bf2b3a59a62 100644 (file)
@@ -1,3 +1,82 @@
+#![warn(missing_docs)]
+//! A tool to manage a basic vector-based search index with candle and
+//! pgvector. It can create the index, add documents and run searches.
+//! 
+//! Borrowed heavily from:
+//! - <https://github.com/huggingface/candle/blob/main/candle-examples/examples/bert/main.rs>
+//! - <https://github.com/pgvector/pgvector-rust/blob/master/examples/candle/src/main.rs>
+//! 
+//! ## Initialize the database
+//! 
+//! The postgresql database must have the pgvector extension
+//! installed. The user must have superuser in the target database.
+//! 
+//! If the target database doesn't exist yet, then the user must have
+//! createdb permission.
+//! 
+//! ```text
+//! $ ./candlevec init-database --help
+//!
+//! Initialize the database when the database or table doesn't exist already
+//! 
+//! Usage: candlevec init-database [OPTIONS] --password <PASSWORD>
+//! 
+//! Options:
+//!       --dbname <DBNAME>      [default: vsearch]
+//!       --host <HOST>          [default: localhost]
+//!       --user <USER>          [default: cvmigrator]
+//!       --password <PASSWORD>  [env: PASSWORD=]
+//!   -h, --help                 Print help
+//! ```
+//! 
+//! For example:
+//! 
+//! ```text
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec init-database
+//! maybe creating the database
+//! database vsearch exists already
+//! maybe creating database objects
+//! ```
+//! 
+//! ## Add documents
+//! 
+//! A document is a regular file.
+//! 
+//! The user for this operation requires only write access to the
+//! table (not superuser).
+//! 
+//! ```text
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/0
+//! indexing a file
+//! Loaded and encoded 59.479µs
+//! Took 14.982262ms
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/1
+//! ...
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/2
+//! ...
+//! ```
+//! 
+//! ## Search
+//! 
+//! Return the top five matches for the given search. Note the search
+//! is semantic so the right document is returned for e.g. "meow" or
+//! "canine" even without the documents containing those words.
+//! 
+//! The user for this operation needs only read access (not write or
+//! superuser).
+//! 
+//! ```text
+//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec search --search feline
+//! searching for document matches
+//! Loaded and encoded 49.306µs
+//! Took 14.452557ms
+//! The cat is purring
+//! The bear is growling
+//! The dog is barking
+//! ```
+//! 
+//! 
+//! 
 use candle_transformers::models::bert::{BertModel, Config, HiddenAct, DTYPE};
 
 use anyhow::{Error as E, Result};
@@ -76,7 +155,8 @@ struct Args {
     #[command(subcommand)]
     action: Action,
 
-    /// The model to use, check out available models: https://huggingface.co/models?library=sentence-transformers&sort=trending
+    /// The model to use, check out available models:
+    /// <https://huggingface.co/models?library=sentence-transformers&sort=trending>
     #[arg(long)]
     model_id: Option<String>,
 
@@ -230,7 +310,7 @@ fn index(dbname: String, host: String, user: String, password: String,
 fn search(dbname: String, host: String, user: String, password: String,
         search: String, model: BertModel, tokenizer: Tokenizer) -> Result<()> {
 
-    println!("indexing a file");
+    println!("searching for document matches");
     let embeddings = get_embeddings(&search,model,tokenizer)?;
 
     let mut client = postgres::Config::new()