use std::env::args;
use std::io::Write;
use std::fs::{create_dir_all,File};
+use std::path::PathBuf;
async fn map_bucket(partition: &str) -> Result<(),Box<dyn Error>> {
let config = aws_config::load_from_env().await;
let client = aws_sdk_s3::Client::new(&config);
+ // file list for LazyJsonLineReader
+ let mut filepaths: [PathBuf; 3] =
+ [PathBuf::new(),PathBuf::new(),PathBuf::new()];
+
// there are three input files for each income price bucket
- for partition in [0,1,2] {
+ for partition in 0..3 {
println!("downloading partition {}",partition);
let downloadfilepath = format!(
"out/reducebucket/in/bucket-{}/partition-{}.json",
bucket, partition);
+ filepaths[partition] = PathBuf::from(downloadfilepath.clone());
let mut downloadfile = File::create(downloadfilepath)?;
let ins3key =
}
}
- // todo: read partitions to DF
- // todo: concat partitions in bucket
+ // read partitions to DF
+ let bucketdf =
+ LazyJsonLineReader::new_paths(Arc::new(filepaths))
+ .finish()?;
+ println!("bucketdf: {:?}",bucketdf.collect()?);
+
+
// todo: some aggregation on my bucket
Ok(())