Inspecting streams for hashing purposes

This is a personal note on how to inspect async streams in order to hash the contents without storing and reading the stream once more. This might become the start of a a series of little code snippets that might be interesting to others as well.

Code

As an example fetch a URL and compute the SHA256 hash on it:

use anyhow::anyhow;
use futures::StreamExt;
use sha2::Digest;
use std::path::PathBuf;

#[tokio::main(flavor = "current_thread")]
async fn main() -> Result<(), anyhow::Error> {
    let arg = std::env::args()
        .skip(1)
        .next()
        .ok_or(anyhow!("no URL given"))?;

    let url = url::Url::parse(&arg)?;
    let path = PathBuf::try_from(url.path())?;
    let filename = path
        .file_name()
        .ok_or(anyhow!("URL does not contain a filename"))?;
    let mut hasher = sha2::Sha256::new();

    let stream = reqwest::get(url)
        .await?
        .bytes_stream()
        .inspect(|bytes| {
            if let Ok(bytes) = bytes {
                hasher.update(bytes);
            }
        })
        .map(|chunk| chunk.map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err)));

    let mut reader = tokio_util::io::StreamReader::new(stream);
    let mut file = tokio::io::BufWriter::new(tokio::fs::File::create(filename).await?);
    tokio::io::copy(&mut reader, &mut file).await?;

    let sum = hasher.finalize();
    println!("{} {:?}", hex::encode(&sum), filename);

    Ok(())
}

Cargo.toml dependencies

[dependencies]
anyhow = "1.0.80"
futures = "0.3.30"
hex = "0.4.3"
reqwest = { version = "0.11.24", features = ["stream"] }
sha2 = "0.10.8"
tokio = { version = "1.36.0", features = ["macros"] }
tokio-util = { version = "0.7.10", features = ["io"] }
url = "2.5.0"