]> Humopery - polarsbop.git/commitdiff
convert eager df to lazy df
authorErik Mackdanz <erikmack@gmail.com>
Sun, 26 Nov 2023 14:54:15 +0000 (08:54 -0600)
committerErik Mackdanz <erikmack@gmail.com>
Sun, 26 Nov 2023 14:54:15 +0000 (08:54 -0600)
src/main.rs

index 89c652d877f86676ae1edee059efc5eaa9eb7b6f..cec9664fbd0c0f0c06024aedf6ed2577c940679b 100644 (file)
@@ -24,7 +24,7 @@ async fn main() -> Result<(),PolarsError> {
     let mut schema_override = Schema::new();
     let _ = schema_override.with_column("2022".into(),DataType::Float64);
     let schema_override =
-       Some(Arc::new(schema_override));
+       Some(&schema_override);
 
     // Some string values in numeric fields should be ignored
     let null_values =
@@ -40,38 +40,40 @@ async fn main() -> Result<(),PolarsError> {
        )));
 
     // read and parse from disk
-    let mut df = CsvReader::from_path("data/bop.csv")?
+    let mut df = LazyCsvReader::new("data/bop.csv")
        .has_header(true)
-       .with_dtypes(schema_override)
+       .with_dtype_overwrite(schema_override)
        .with_null_values(null_values)
        .finish()?;
 
     // split list based on arg
     match partition.as_str() {
        "0" => {
-           let mask = df.column("Country Name")?.lt("Ethiopia")?;
-           df = df.filter(&mask)?;
+           let mask = col("Country Name").lt(lit("Ethiopia"));
+           df = df.filter(mask);
        },
        "1" => {
-           let mask = df.column("Country Name")?.gt_eq("Ethiopia")?;
-           df = df.filter(&mask)?;
-           let mask = df.column("Country Name")?.lt("Romania")?;
-           df = df.filter(&mask)?;
+           let mask = col("Country Name").gt_eq(lit("Ethiopia"));
+           df = df.filter(mask);
+           let mask = col("Country Name").lt(lit("Romania"));
+           df = df.filter(mask);
        },
        "2" => {
-           let mask = df.column("Country Name")?.gt_eq("Romania")?;
-           df = df.filter(&mask)?;
+           let mask = col("Country Name").gt_eq(lit("Romania"));
+           df = df.filter(mask);
        },
        _ => { panic!("partition must be 0, 1 or 2"); }
     }
 
     // values not status, selected cols
     let mut df = df
-       .filter(&df.column("Attribute")?.
-                          equal("Value")?)?
-       .select(["Country Name","Indicator Name","Indicator Code","2022"])?
-       .rename("2022","Value")?
-       .sort(["Country Name","Indicator Name"],vec!(false,false),false)?;
+       .filter(col("Attribute").eq(lit("Value")))
+       .select(&[col("Country Name"),col("Indicator Name"),
+                 col("Indicator Code"),col("2022")])
+       .rename(["2022"],["Value"])
+       .sort_by_exprs([col("Country Name"),col("Indicator Name")],
+                      vec!(false,false),false,false)
+       .collect()?;
 
     // some console output
     println!("df: {:?}",df);