From 6f66118c21249a3325e427781561d6089fe474bc Mon Sep 17 00:00:00 2001 From: Erik Mackdanz Date: Fri, 24 Nov 2023 11:56:00 -0600 Subject: [PATCH] parallelize to split output --- README | 2 ++ scripts/paraexport | 9 +++++++++ src/main.rs | 24 +++++++++++++++++------- 3 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 README create mode 100755 scripts/paraexport diff --git a/README b/README new file mode 100644 index 0000000..21bb6fa --- /dev/null +++ b/README @@ -0,0 +1,2 @@ +cargo build && ./scripts/paraexport +ls out/ diff --git a/scripts/paraexport b/scripts/paraexport new file mode 100755 index 0000000..972e7cf --- /dev/null +++ b/scripts/paraexport @@ -0,0 +1,9 @@ +#!/bin/sh + +# start multiple jobs in parallel +target/debug/polarsbop 0 & +target/debug/polarsbop 1 & + +wait %1 %2 + +echo done diff --git a/src/main.rs b/src/main.rs index 3d4fb57..e4ec700 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,12 +1,19 @@ use polars::prelude::*; +use std::env::args; use std::fs::{create_dir_all,File}; - fn main() -> Result<(),PolarsError> { + // expect first arg to be a file suffix = 0, 1 + let mut argsiter = args(); + let _ = argsiter.next(); + let suffix = argsiter.next().unwrap(); + + // prep output file create_dir_all("out").unwrap(); - let mut file = - File::create("out/output.json").unwrap(); + let outfilename = format!("out/output-{}.json",suffix); + // println!("{}",outfilename); + let mut file = File::create(outfilename).unwrap(); // set some columns to be parsed as numeric let mut schema_override = Schema::new(); @@ -34,16 +41,19 @@ fn main() -> Result<(),PolarsError> { .with_null_values(null_values) .finish()?; - // one country - let df = df.filter(&df.column("Country Name")?. - equal("France")?)?; + // split list based on arg + let mut mask = df.column("Country Name")?.lt("Luxemb")?; + if suffix == "1" { + mask = df.column("Country Name")?.gt_eq("Luxemb")?; + } + let df = df.filter(&mask)?; // values not status, selected cols let mut df = df .filter(&df.column("Attribute")?. equal("Value")?)? .select(["Country Name","Indicator Name","Indicator Code","2022"])? - .sort(["Indicator Name"],false,false)?; + .sort(["Country Name","Indicator Name"],vec!(false,false),false)?; println!("df: {:?}",df); -- 2.52.0