feat: remove unused directories when purging cas

Now purging the CAS will also clean up unused
folders. Additionally, since concurrent removal
of directories seems to throw a PermissionDenied
error on Windows those are ignored. Needs
investigation on why that happens.
This commit is contained in:
daimond113 2025-02-01 00:46:31 +01:00
parent 75d6aa5443
commit 82b4b858e5
No known key found for this signature in database
GPG key ID: 640DC95EC1190354
3 changed files with 65 additions and 27 deletions

View file

@ -1,6 +1,9 @@
use crate::cli::{ use crate::{
reporters::run_with_reporter, cli::{
style::{INFO_STYLE, SUCCESS_STYLE}, reporters::run_with_reporter,
style::{INFO_STYLE, SUCCESS_STYLE},
},
util::remove_empty_dir,
}; };
use anyhow::Context; use anyhow::Context;
use async_stream::try_stream; use async_stream::try_stream;
@ -150,9 +153,15 @@ async fn discover_cas_packages(cas_dir: &Path) -> anyhow::Result<HashMap<PathBuf
.into_iter() .into_iter()
.map(|index| cas_dir.join(index)) .map(|index| cas_dir.join(index))
.map(|index| async move { .map(|index| async move {
let mut tasks = read_dir_stream(&index) let mut res = HashMap::new();
.await
.context("failed to read index directory")? let tasks = match read_dir_stream(&index).await {
Ok(tasks) => tasks,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(res),
Err(e) => return Err(e).context("failed to read cas index directory"),
};
let mut tasks = tasks
.map(|entry| async move { .map(|entry| async move {
read_entry(entry.context("failed to read cas index dir entry")?).await read_entry(entry.context("failed to read cas index dir entry")?).await
}) })
@ -160,8 +169,6 @@ async fn discover_cas_packages(cas_dir: &Path) -> anyhow::Result<HashMap<PathBuf
.await .await
.0; .0;
let mut res = HashMap::new();
while let Some(task) = tasks.join_next().await { while let Some(task) = tasks.join_next().await {
res.extend(task.unwrap()?); res.extend(task.unwrap()?);
} }
@ -180,8 +187,15 @@ async fn discover_cas_packages(cas_dir: &Path) -> anyhow::Result<HashMap<PathBuf
} }
async fn remove_hashes(cas_dir: &Path) -> anyhow::Result<HashSet<String>> { async fn remove_hashes(cas_dir: &Path) -> anyhow::Result<HashSet<String>> {
let mut tasks = read_dir_stream(cas_dir) let mut res = HashSet::new();
.await?
let tasks = match read_dir_stream(cas_dir).await {
Ok(tasks) => tasks,
Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(res),
Err(e) => return Err(e).context("failed to read cas directory"),
};
let mut tasks = tasks
.map(|cas_entry| async move { .map(|cas_entry| async move {
let cas_entry = cas_entry.context("failed to read cas dir entry")?; let cas_entry = cas_entry.context("failed to read cas dir entry")?;
let prefix = cas_entry.file_name(); let prefix = cas_entry.file_name();
@ -212,10 +226,14 @@ async fn remove_hashes(cas_dir: &Path) -> anyhow::Result<HashSet<String>> {
return Ok(None); return Ok(None);
} }
fs::remove_file(path) fs::remove_file(&path)
.await .await
.context("failed to remove unused file")?; .context("failed to remove unused file")?;
if let Some(parent) = path.parent() {
remove_empty_dir(parent).await?;
}
Ok(Some(hash)) Ok(Some(hash))
} }
}) })
@ -238,8 +256,6 @@ async fn remove_hashes(cas_dir: &Path) -> anyhow::Result<HashSet<String>> {
.await .await
.0; .0;
let mut res = HashSet::new();
while let Some(removed_hashes) = tasks.join_next().await { while let Some(removed_hashes) = tasks.join_next().await {
let Some(removed_hashes) = removed_hashes.unwrap()? else { let Some(removed_hashes) = removed_hashes.unwrap()? else {
continue; continue;
@ -288,10 +304,24 @@ impl PruneCommand {
}; };
if removed_hashes.contains(&hash) { if removed_hashes.contains(&hash) {
let cas_dir = project.cas_dir().to_path_buf();
tasks.spawn(async move { tasks.spawn(async move {
fs::remove_file(path) fs::remove_file(dbg!(&path))
.await .await
.context("failed to remove unused file") .context("failed to remove unused file")?;
// remove empty directories up to the cas dir
let mut path = &*path;
while let Some(parent) = path.parent() {
if parent == cas_dir {
break;
}
remove_empty_dir(parent).await?;
path = parent;
}
Ok::<_, anyhow::Error>(())
}); });
removed_packages += 1; removed_packages += 1;
// if at least one file is removed, the package is not used // if at least one file is removed, the package is not used

View file

@ -1,6 +1,6 @@
use crate::{ use crate::{
all_packages_dirs, graph::DependencyGraphWithTarget, manifest::Alias, Project, all_packages_dirs, graph::DependencyGraphWithTarget, manifest::Alias, util::remove_empty_dir,
PACKAGES_CONTAINER_NAME, SCRIPTS_LINK_FOLDER, Project, PACKAGES_CONTAINER_NAME, SCRIPTS_LINK_FOLDER,
}; };
use fs_err::tokio as fs; use fs_err::tokio as fs;
use futures::FutureExt; use futures::FutureExt;
@ -11,15 +11,6 @@ use std::{
}; };
use tokio::task::JoinSet; use tokio::task::JoinSet;
async fn remove_empty_dir(path: &Path) -> std::io::Result<()> {
match fs::remove_dir(path).await {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::DirectoryNotEmpty => Ok(()),
Err(e) => Err(e),
}
}
fn index_entry( fn index_entry(
entry: fs::DirEntry, entry: fs::DirEntry,
packages_index_dir: &Path, packages_index_dir: &Path,

View file

@ -1,9 +1,13 @@
use crate::AuthConfig; use crate::AuthConfig;
use fs_err::tokio as fs;
use gix::bstr::BStr; use gix::bstr::BStr;
use semver::Version; use semver::Version;
use serde::{Deserialize, Deserializer, Serializer}; use serde::{Deserialize, Deserializer, Serializer};
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, HashSet}; use std::{
collections::{BTreeMap, HashSet},
path::Path,
};
pub fn authenticate_conn( pub fn authenticate_conn(
conn: &mut gix::remote::Connection< conn: &mut gix::remote::Connection<
@ -95,3 +99,16 @@ pub fn no_build_metadata(version: &Version) -> Version {
version.build = semver::BuildMetadata::EMPTY; version.build = semver::BuildMetadata::EMPTY;
version version
} }
pub async fn remove_empty_dir(path: &Path) -> std::io::Result<()> {
match fs::remove_dir(path).await {
Ok(()) => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
Err(e) if e.kind() == std::io::ErrorKind::DirectoryNotEmpty => Ok(()),
// concurrent removal on Windows seems to fail with PermissionDenied
// TODO: investigate why this happens and whether we can avoid it without ignoring all PermissionDenied errors
#[cfg(windows)]
Err(e) if e.kind() == std::io::ErrorKind::PermissionDenied => Ok(()),
Err(e) => Err(e),
}
}