Inspecting streams for hashing purposes
This is a personal note on how to inspect async streams in order to hash the contents without storing and reading the stream once more. This might become the start of a a series of little code snippets that might be interesting to others as well.
Code
As an example fetch a URL and compute the SHA256 hash on it:
use anyhow::anyhow;
use futures::StreamExt;
use sha2::Digest;
use std::path::PathBuf;
#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), anyhow::Error> {
let arg = std::env::args()
.skip(1)
.next()
.ok_or(anyhow!("no URL given"))?;
let url = url::Url::parse(&arg)?;
let path = PathBuf::try_from(url.path())?;
let filename = path
.file_name()
.ok_or(anyhow!("URL does not contain a filename"))?;
let mut hasher = sha2::Sha256::new();
let stream = reqwest::get(url)
.await?
.bytes_stream()
.inspect(|bytes| {
if let Ok(bytes) = bytes {
hasher.update(bytes);
}
})
.map(|chunk| chunk.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)));
let mut reader = tokio_util::io::StreamReader::new(stream);
let mut file = tokio::io::BufWriter::new(tokio::fs::File::create(filename).await?);
tokio::io::copy(&mut reader, &mut file).await?;
let sum = hasher.finalize();
println!("{} {:?}", hex::encode(&sum), filename);
Ok(())
}
Cargo.toml dependencies
[dependencies]
anyhow = "1.0.80"
futures = "0.3.30"
hex = "0.4.3"
reqwest = { version = "0.11.24", features = ["stream"] }
sha2 = "0.10.8"
tokio = { version = "1.36.0", features = ["macros"] }
tokio-util = { version = "0.7.10", features = ["io"] }
url = "2.5.0"