From be6410443f74870118ae1da56235daf78e1200e4 Mon Sep 17 00:00:00 2001 From: daimond113 <72147841+daimond113@users.noreply.github.com> Date: Thu, 9 Jan 2025 22:59:20 +0100 Subject: [PATCH] perf(registry): asyncify reading data of top search packages --- registry/CHANGELOG.md | 5 +- registry/src/endpoints/search.rs | 89 ++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 43 deletions(-) diff --git a/registry/CHANGELOG.md b/registry/CHANGELOG.md index d4c5981..9ca582b 100644 --- a/registry/CHANGELOG.md +++ b/registry/CHANGELOG.md @@ -11,11 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add yanking & deprecating to registry by @daimond113 - Log more information about configured auth & storage by @daimond113 -### Changed -- Asyncify blocking operations by @daimond113 - ### Performance - Switch to using a `RwLock` over a `Mutex` to store repository data by @daimond113 +- Asyncify blocking operations by @daimond113 +- Asyncify reading of package data of top search results by @daimond113 ## [0.1.2] ### Changed diff --git a/registry/src/endpoints/search.rs b/registry/src/endpoints/search.rs index 3ba24a8..efbde30 100644 --- a/registry/src/endpoints/search.rs +++ b/registry/src/endpoints/search.rs @@ -1,18 +1,16 @@ use std::collections::HashMap; +use crate::{ + error::RegistryError, + package::{read_package, PackageResponse}, + AppState, +}; use actix_web::{web, HttpResponse}; +use pesde::names::PackageName; use semver::Version; use serde::Deserialize; use tantivy::{collector::Count, query::AllQuery, schema::Value, DateTime, Order}; - -use crate::{error::RegistryError, package::PackageResponse, AppState}; -use pesde::{ - names::PackageName, - source::{ - git_index::{read_file, root_tree, GitBasedSource}, - pesde::IndexFile, - }, -}; +use tokio::task::JoinSet; #[derive(Deserialize)] pub struct Request { @@ -52,47 +50,58 @@ pub async fn search_packages( ) .unwrap(); - let source = app_state.source.read().await; - let repo = gix::open(source.path(&app_state.project))?; - let tree = root_tree(&repo)?; + // prevent a write lock on the source while we're reading the documents + let _guard = app_state.source.read().await; - let top_docs = top_docs + let mut results = Vec::with_capacity(top_docs.len()); + results.extend((0..top_docs.len()).map(|_| None::)); + + let mut tasks = top_docs .into_iter() - .map(|(_, doc_address)| { + .enumerate() + .map(|(i, (_, doc_address))| { + let app_state = app_state.clone(); let doc = searcher.doc::>(doc_address).unwrap(); - let id = doc - .get(&id) - .unwrap() - .as_str() - .unwrap() - .parse::() - .unwrap(); - let (scope, name) = id.as_str(); - let version = doc - .get(&version) - .unwrap() - .as_str() - .unwrap() - .parse::() - .unwrap(); + async move { + let id = doc + .get(&id) + .unwrap() + .as_str() + .unwrap() + .parse::() + .unwrap(); + let version = doc + .get(&version) + .unwrap() + .as_str() + .unwrap() + .parse::() + .unwrap(); - let file: IndexFile = - toml::de::from_str(&read_file(&tree, [scope, name]).unwrap().unwrap()).unwrap(); + let file = read_package(&app_state, &id, &*app_state.source.read().await) + .await? + .unwrap(); - let version_id = file - .entries - .keys() - .filter(|v_id| *v_id.version() == version) - .max() - .unwrap(); + let version_id = file + .entries + .keys() + .filter(|v_id| *v_id.version() == version) + .max() + .unwrap(); - PackageResponse::new(&id, version_id, &file) + Ok::<_, RegistryError>((i, PackageResponse::new(&id, version_id, &file))) + } }) - .collect::>(); + .collect::>(); + + while let Some(res) = tasks.join_next().await { + let (i, res) = res.unwrap()?; + results[i] = Some(res); + } Ok(HttpResponse::Ok().json(serde_json::json!({ - "data": top_docs, + "data": results, "count": count, }))) }