From 5d4e990db3cb8aeaacefa0b236a98d8b9425594a Mon Sep 17 00:00:00 2001 From: Erik Mackdanz Date: Fri, 24 Nov 2023 21:40:15 -0600 Subject: [PATCH] three partitions not two --- scripts/paraexport | 3 ++- src/main.rs | 30 +++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/scripts/paraexport b/scripts/paraexport index 972e7cf..be33a5d 100755 --- a/scripts/paraexport +++ b/scripts/paraexport @@ -3,7 +3,8 @@ # start multiple jobs in parallel target/debug/polarsbop 0 & target/debug/polarsbop 1 & +target/debug/polarsbop 2 & -wait %1 %2 +wait %1 %2 %3 echo done diff --git a/src/main.rs b/src/main.rs index 0182cc6..eb31d5e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,16 +4,16 @@ use std::fs::{create_dir_all,File}; fn main() -> Result<(),PolarsError> { - // expect first arg to be a file suffix = 0, 1 + // expect first arg to be a file partition = 0, 1, 2 let mut argsiter = args(); let _ = argsiter.next(); - let suffix = argsiter.next() - .expect("single argument required, 0 or 1"); + let partition = argsiter.next() + .expect("single argument required, 0 or 1 or 2"); // prep output file create_dir_all("out") .expect("couldn't create out dir"); - let outfilename = format!("out/output-{}.json",suffix); + let outfilename = format!("out/output-{}.json",partition); // println!("{}",outfilename); let mut file = File::create(outfilename) .expect("couldn't create output file"); @@ -38,18 +38,30 @@ fn main() -> Result<(),PolarsError> { ))); // read and parse from disk - let df = CsvReader::from_path("data/bop.csv")? + let mut df = CsvReader::from_path("data/bop.csv")? .has_header(true) .with_dtypes(schema_override) .with_null_values(null_values) .finish()?; // split list based on arg - let mut mask = df.column("Country Name")?.lt("Luxemb")?; - if suffix == "1" { - mask = df.column("Country Name")?.gt_eq("Luxemb")?; + match partition.as_str() { + "0" => { + let mask = df.column("Country Name")?.lt("Ethiopia")?; + df = df.filter(&mask)?; + }, + "1" => { + let mask = df.column("Country Name")?.gt_eq("Ethiopia")?; + df = df.filter(&mask)?; + let mask = df.column("Country Name")?.lt("Romania")?; + df = df.filter(&mask)?; + }, + "2" => { + let mask = df.column("Country Name")?.gt_eq("Romania")?; + df = df.filter(&mask)?; + }, + _ => { panic!("partition must be 0, 1 or 2"); } } - let df = df.filter(&mask)?; // values not status, selected cols let mut df = df -- 2.52.0