]> Humopery - polarsbop.git/commitdiff
parallelize to split output
authorErik Mackdanz <erikmack@gmail.com>
Fri, 24 Nov 2023 17:56:00 +0000 (11:56 -0600)
committerErik Mackdanz <erikmack@gmail.com>
Fri, 24 Nov 2023 17:56:00 +0000 (11:56 -0600)
README [new file with mode: 0644]
scripts/paraexport [new file with mode: 0755]
src/main.rs

diff --git a/README b/README
new file mode 100644 (file)
index 0000000..21bb6fa
--- /dev/null
+++ b/README
@@ -0,0 +1,2 @@
+cargo build && ./scripts/paraexport
+ls out/
diff --git a/scripts/paraexport b/scripts/paraexport
new file mode 100755 (executable)
index 0000000..972e7cf
--- /dev/null
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+# start multiple jobs in parallel
+target/debug/polarsbop 0 &
+target/debug/polarsbop 1 &
+
+wait %1 %2
+
+echo done
index 3d4fb571f5274f99cb66b8fe21cfadabbcae7e38..e4ec700f1815a2320be6d81e4e68d5015f7fded8 100644 (file)
@@ -1,12 +1,19 @@
 use polars::prelude::*;
+use std::env::args;
 use std::fs::{create_dir_all,File};
 
-
 fn main() -> Result<(),PolarsError> {
 
+    // expect first arg to be a file suffix = 0, 1
+    let mut argsiter = args();
+    let _ = argsiter.next();
+    let suffix = argsiter.next().unwrap();
+
+    // prep output file
     create_dir_all("out").unwrap();
-    let mut file =
-       File::create("out/output.json").unwrap();
+    let outfilename = format!("out/output-{}.json",suffix);
+    // println!("{}",outfilename);
+    let mut file = File::create(outfilename).unwrap();
 
     // set some columns to be parsed as numeric
     let mut schema_override = Schema::new();
@@ -34,16 +41,19 @@ fn main() -> Result<(),PolarsError> {
        .with_null_values(null_values)
        .finish()?;
 
-    // one country
-    let df = df.filter(&df.column("Country Name")?.
-                      equal("France")?)?;
+    // split list based on arg
+    let mut mask = df.column("Country Name")?.lt("Luxemb")?;
+    if suffix == "1" {
+       mask = df.column("Country Name")?.gt_eq("Luxemb")?;
+    }
+    let df = df.filter(&mask)?;
 
     // values not status, selected cols
     let mut df = df
        .filter(&df.column("Attribute")?.
                           equal("Value")?)?
        .select(["Country Name","Indicator Name","Indicator Code","2022"])?
-       .sort(["Indicator Name"],false,false)?;
+       .sort(["Country Name","Indicator Name"],vec!(false,false),false)?;
 
     println!("df: {:?}",df);