From: Erik Mackdanz Date: Sun, 26 Nov 2023 14:54:15 +0000 (-0600) Subject: convert eager df to lazy df X-Git-Url: https://git.humopery.space/?a=commitdiff_plain;h=672ff1ec7e92d000ee25ed2ad169a74356d01ae9;p=polarsbop.git convert eager df to lazy df --- diff --git a/src/main.rs b/src/main.rs index 89c652d..cec9664 100644 --- a/src/main.rs +++ b/src/main.rs @@ -24,7 +24,7 @@ async fn main() -> Result<(),PolarsError> { let mut schema_override = Schema::new(); let _ = schema_override.with_column("2022".into(),DataType::Float64); let schema_override = - Some(Arc::new(schema_override)); + Some(&schema_override); // Some string values in numeric fields should be ignored let null_values = @@ -40,38 +40,40 @@ async fn main() -> Result<(),PolarsError> { ))); // read and parse from disk - let mut df = CsvReader::from_path("data/bop.csv")? + let mut df = LazyCsvReader::new("data/bop.csv") .has_header(true) - .with_dtypes(schema_override) + .with_dtype_overwrite(schema_override) .with_null_values(null_values) .finish()?; // split list based on arg match partition.as_str() { "0" => { - let mask = df.column("Country Name")?.lt("Ethiopia")?; - df = df.filter(&mask)?; + let mask = col("Country Name").lt(lit("Ethiopia")); + df = df.filter(mask); }, "1" => { - let mask = df.column("Country Name")?.gt_eq("Ethiopia")?; - df = df.filter(&mask)?; - let mask = df.column("Country Name")?.lt("Romania")?; - df = df.filter(&mask)?; + let mask = col("Country Name").gt_eq(lit("Ethiopia")); + df = df.filter(mask); + let mask = col("Country Name").lt(lit("Romania")); + df = df.filter(mask); }, "2" => { - let mask = df.column("Country Name")?.gt_eq("Romania")?; - df = df.filter(&mask)?; + let mask = col("Country Name").gt_eq(lit("Romania")); + df = df.filter(mask); }, _ => { panic!("partition must be 0, 1 or 2"); } } // values not status, selected cols let mut df = df - .filter(&df.column("Attribute")?. - equal("Value")?)? - .select(["Country Name","Indicator Name","Indicator Code","2022"])? - .rename("2022","Value")? - .sort(["Country Name","Indicator Name"],vec!(false,false),false)?; + .filter(col("Attribute").eq(lit("Value"))) + .select(&[col("Country Name"),col("Indicator Name"), + col("Indicator Code"),col("2022")]) + .rename(["2022"],["Value"]) + .sort_by_exprs([col("Country Name"),col("Indicator Name")], + vec!(false,false),false,false) + .collect()?; // some console output println!("df: {:?}",df);