perf(registry): asyncify reading data of top search packages

This commit is contained in:
daimond113 2025-01-09 22:59:20 +01:00
parent 685700f572
commit be6410443f
No known key found for this signature in database
GPG key ID: 3A8ECE51328B513C
2 changed files with 51 additions and 43 deletions

View file

@ -11,11 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add yanking & deprecating to registry by @daimond113 - Add yanking & deprecating to registry by @daimond113
- Log more information about configured auth & storage by @daimond113 - Log more information about configured auth & storage by @daimond113
### Changed
- Asyncify blocking operations by @daimond113
### Performance ### Performance
- Switch to using a `RwLock` over a `Mutex` to store repository data by @daimond113 - Switch to using a `RwLock` over a `Mutex` to store repository data by @daimond113
- Asyncify blocking operations by @daimond113
- Asyncify reading of package data of top search results by @daimond113
## [0.1.2] ## [0.1.2]
### Changed ### Changed

View file

@ -1,18 +1,16 @@
use std::collections::HashMap; use std::collections::HashMap;
use crate::{
error::RegistryError,
package::{read_package, PackageResponse},
AppState,
};
use actix_web::{web, HttpResponse}; use actix_web::{web, HttpResponse};
use pesde::names::PackageName;
use semver::Version; use semver::Version;
use serde::Deserialize; use serde::Deserialize;
use tantivy::{collector::Count, query::AllQuery, schema::Value, DateTime, Order}; use tantivy::{collector::Count, query::AllQuery, schema::Value, DateTime, Order};
use tokio::task::JoinSet;
use crate::{error::RegistryError, package::PackageResponse, AppState};
use pesde::{
names::PackageName,
source::{
git_index::{read_file, root_tree, GitBasedSource},
pesde::IndexFile,
},
};
#[derive(Deserialize)] #[derive(Deserialize)]
pub struct Request { pub struct Request {
@ -52,47 +50,58 @@ pub async fn search_packages(
) )
.unwrap(); .unwrap();
let source = app_state.source.read().await; // prevent a write lock on the source while we're reading the documents
let repo = gix::open(source.path(&app_state.project))?; let _guard = app_state.source.read().await;
let tree = root_tree(&repo)?;
let top_docs = top_docs let mut results = Vec::with_capacity(top_docs.len());
results.extend((0..top_docs.len()).map(|_| None::<PackageResponse>));
let mut tasks = top_docs
.into_iter() .into_iter()
.map(|(_, doc_address)| { .enumerate()
.map(|(i, (_, doc_address))| {
let app_state = app_state.clone();
let doc = searcher.doc::<HashMap<_, _>>(doc_address).unwrap(); let doc = searcher.doc::<HashMap<_, _>>(doc_address).unwrap();
let id = doc async move {
.get(&id) let id = doc
.unwrap() .get(&id)
.as_str() .unwrap()
.unwrap() .as_str()
.parse::<PackageName>() .unwrap()
.unwrap(); .parse::<PackageName>()
let (scope, name) = id.as_str(); .unwrap();
let version = doc let version = doc
.get(&version) .get(&version)
.unwrap() .unwrap()
.as_str() .as_str()
.unwrap() .unwrap()
.parse::<Version>() .parse::<Version>()
.unwrap(); .unwrap();
let file: IndexFile = let file = read_package(&app_state, &id, &*app_state.source.read().await)
toml::de::from_str(&read_file(&tree, [scope, name]).unwrap().unwrap()).unwrap(); .await?
.unwrap();
let version_id = file let version_id = file
.entries .entries
.keys() .keys()
.filter(|v_id| *v_id.version() == version) .filter(|v_id| *v_id.version() == version)
.max() .max()
.unwrap(); .unwrap();
PackageResponse::new(&id, version_id, &file) Ok::<_, RegistryError>((i, PackageResponse::new(&id, version_id, &file)))
}
}) })
.collect::<Vec<_>>(); .collect::<JoinSet<_>>();
while let Some(res) = tasks.join_next().await {
let (i, res) = res.unwrap()?;
results[i] = Some(res);
}
Ok(HttpResponse::Ok().json(serde_json::json!({ Ok(HttpResponse::Ok().json(serde_json::json!({
"data": top_docs, "data": results,
"count": count, "count": count,
}))) })))
} }