From 775e38ff38ef271049afe9356f6c8d1b251f187d Mon Sep 17 00:00:00 2001 From: Erik Mackdanz Date: Thu, 28 Nov 2024 14:51:49 -0600 Subject: [PATCH] Add crate docstring --- src/main.rs | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index e080030..c6bea03 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,82 @@ +#![warn(missing_docs)] +//! A tool to manage a basic vector-based search index with candle and +//! pgvector. It can create the index, add documents and run searches. +//! +//! Borrowed heavily from: +//! - +//! - +//! +//! ## Initialize the database +//! +//! The postgresql database must have the pgvector extension +//! installed. The user must have superuser in the target database. +//! +//! If the target database doesn't exist yet, then the user must have +//! createdb permission. +//! +//! ```text +//! $ ./candlevec init-database --help +//! +//! Initialize the database when the database or table doesn't exist already +//! +//! Usage: candlevec init-database [OPTIONS] --password +//! +//! Options: +//! --dbname [default: vsearch] +//! --host [default: localhost] +//! --user [default: cvmigrator] +//! --password [env: PASSWORD=] +//! -h, --help Print help +//! ``` +//! +//! For example: +//! +//! ```text +//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec init-database +//! maybe creating the database +//! database vsearch exists already +//! maybe creating database objects +//! ``` +//! +//! ## Add documents +//! +//! A document is a regular file. +//! +//! The user for this operation requires only write access to the +//! table (not superuser). +//! +//! ```text +//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/0 +//! indexing a file +//! Loaded and encoded 59.479µs +//! Took 14.982262ms +//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/1 +//! ... +//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec index --file testdata/2 +//! ... +//! ``` +//! +//! ## Search +//! +//! Return the top five matches for the given search. Note the search +//! is semantic so the right document is returned for e.g. "meow" or +//! "canine" even without the documents containing those words. +//! +//! The user for this operation needs only read access (not write or +//! superuser). +//! +//! ```text +//! $ PASSWORD=$(gpg -d pw-cvmigrator.gpg) candlevec search --search feline +//! searching for document matches +//! Loaded and encoded 49.306µs +//! Took 14.452557ms +//! The cat is purring +//! The bear is growling +//! The dog is barking +//! ``` +//! +//! +//! use candle_transformers::models::bert::{BertModel, Config, HiddenAct, DTYPE}; use anyhow::{Error as E, Result}; @@ -76,7 +155,8 @@ struct Args { #[command(subcommand)] action: Action, - /// The model to use, check out available models: https://huggingface.co/models?library=sentence-transformers&sort=trending + /// The model to use, check out available models: + /// #[arg(long)] model_id: Option, @@ -230,7 +310,7 @@ fn index(dbname: String, host: String, user: String, password: String, fn search(dbname: String, host: String, user: String, password: String, search: String, model: BertModel, tokenizer: Tokenizer) -> Result<()> { - println!("indexing a file"); + println!("searching for document matches"); let embeddings = get_embeddings(&search,model,tokenizer)?; let mut client = postgres::Config::new() -- 2.52.0