From: Erik Mackdanz Date: Sat, 25 Nov 2023 03:40:15 +0000 (-0600) Subject: three partitions not two X-Git-Url: https://git.humopery.space/?a=commitdiff_plain;h=5d4e990db3cb8aeaacefa0b236a98d8b9425594a;p=polarsbop.git three partitions not two --- diff --git a/scripts/paraexport b/scripts/paraexport index 972e7cf..be33a5d 100755 --- a/scripts/paraexport +++ b/scripts/paraexport @@ -3,7 +3,8 @@ # start multiple jobs in parallel target/debug/polarsbop 0 & target/debug/polarsbop 1 & +target/debug/polarsbop 2 & -wait %1 %2 +wait %1 %2 %3 echo done diff --git a/src/main.rs b/src/main.rs index 0182cc6..eb31d5e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,16 +4,16 @@ use std::fs::{create_dir_all,File}; fn main() -> Result<(),PolarsError> { - // expect first arg to be a file suffix = 0, 1 + // expect first arg to be a file partition = 0, 1, 2 let mut argsiter = args(); let _ = argsiter.next(); - let suffix = argsiter.next() - .expect("single argument required, 0 or 1"); + let partition = argsiter.next() + .expect("single argument required, 0 or 1 or 2"); // prep output file create_dir_all("out") .expect("couldn't create out dir"); - let outfilename = format!("out/output-{}.json",suffix); + let outfilename = format!("out/output-{}.json",partition); // println!("{}",outfilename); let mut file = File::create(outfilename) .expect("couldn't create output file"); @@ -38,18 +38,30 @@ fn main() -> Result<(),PolarsError> { ))); // read and parse from disk - let df = CsvReader::from_path("data/bop.csv")? + let mut df = CsvReader::from_path("data/bop.csv")? .has_header(true) .with_dtypes(schema_override) .with_null_values(null_values) .finish()?; // split list based on arg - let mut mask = df.column("Country Name")?.lt("Luxemb")?; - if suffix == "1" { - mask = df.column("Country Name")?.gt_eq("Luxemb")?; + match partition.as_str() { + "0" => { + let mask = df.column("Country Name")?.lt("Ethiopia")?; + df = df.filter(&mask)?; + }, + "1" => { + let mask = df.column("Country Name")?.gt_eq("Ethiopia")?; + df = df.filter(&mask)?; + let mask = df.column("Country Name")?.lt("Romania")?; + df = df.filter(&mask)?; + }, + "2" => { + let mask = df.column("Country Name")?.gt_eq("Romania")?; + df = df.filter(&mask)?; + }, + _ => { panic!("partition must be 0, 1 or 2"); } } - let df = df.filter(&mask)?; // values not status, selected cols let mut df = df