From e8c3a6652449fa681a59753e72d6687ac5875184 Mon Sep 17 00:00:00 2001 From: daimond113 <72147841+daimond113@users.noreply.github.com> Date: Fri, 10 Jan 2025 00:00:24 +0100 Subject: [PATCH] feat(registry): add individual job endpoints for package data --- Cargo.lock | 1 + registry/CHANGELOG.md | 1 + registry/Cargo.toml | 1 + registry/src/endpoints/mod.rs | 3 + registry/src/endpoints/package_archive.rs | 27 +++++++ registry/src/endpoints/package_doc.rs | 66 +++++++++++++++++ registry/src/endpoints/package_readme.rs | 27 +++++++ registry/src/endpoints/package_version.rs | 87 ++++++++--------------- registry/src/main.rs | 18 +++++ registry/src/request_path.rs | 32 ++++++++- registry/src/search.rs | 8 +-- registry/src/storage/fs.rs | 7 +- 12 files changed, 212 insertions(+), 66 deletions(-) create mode 100644 registry/src/endpoints/package_archive.rs create mode 100644 registry/src/endpoints/package_doc.rs create mode 100644 registry/src/endpoints/package_readme.rs diff --git a/Cargo.lock b/Cargo.lock index 46a1900..7fb3426 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3730,6 +3730,7 @@ dependencies = [ "thiserror 2.0.7", "tokio", "tokio-tar", + "tokio-util", "toml", "tracing", "tracing-actix-web", diff --git a/registry/CHANGELOG.md b/registry/CHANGELOG.md index 9ca582b..92abb8c 100644 --- a/registry/CHANGELOG.md +++ b/registry/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support deprecating and yanking packages by @daimond113 - Add yanking & deprecating to registry by @daimond113 - Log more information about configured auth & storage by @daimond113 +- Add individual endpoints for package data over using `Accept` header conditional returns by @daimond113 ### Performance - Switch to using a `RwLock` over a `Mutex` to store repository data by @daimond113 diff --git a/registry/Cargo.toml b/registry/Cargo.toml index 60c8c2d..b7a737d 100644 --- a/registry/Cargo.toml +++ b/registry/Cargo.toml @@ -16,6 +16,7 @@ semver = "1.0.24" chrono = { version = "0.4.39", features = ["serde"] } futures = "0.3.31" tokio = "1.42.0" +tokio-util = "0.7.13" tempfile = "3.14.0" fs-err = { version = "3.0.0", features = ["tokio"] } async-stream = "0.3.6" diff --git a/registry/src/endpoints/mod.rs b/registry/src/endpoints/mod.rs index 0870413..48bc3b9 100644 --- a/registry/src/endpoints/mod.rs +++ b/registry/src/endpoints/mod.rs @@ -1,4 +1,7 @@ pub mod deprecate_version; +pub mod package_archive; +pub mod package_doc; +pub mod package_readme; pub mod package_version; pub mod package_versions; pub mod publish_version; diff --git a/registry/src/endpoints/package_archive.rs b/registry/src/endpoints/package_archive.rs new file mode 100644 index 0000000..2554f6f --- /dev/null +++ b/registry/src/endpoints/package_archive.rs @@ -0,0 +1,27 @@ +use actix_web::{web, HttpResponse}; + +use crate::{ + error::RegistryError, + package::read_package, + request_path::{resolve_version_and_target, AnyOrSpecificTarget, LatestOrSpecificVersion}, + storage::StorageImpl, + AppState, +}; +use pesde::names::PackageName; + +pub async fn get_package_archive( + app_state: web::Data, + path: web::Path<(PackageName, LatestOrSpecificVersion, AnyOrSpecificTarget)>, +) -> Result { + let (name, version, target) = path.into_inner(); + + let Some(file) = read_package(&app_state, &name, &*app_state.source.read().await).await? else { + return Ok(HttpResponse::NotFound().finish()); + }; + + let Some(v_id) = resolve_version_and_target(&file, version, target) else { + return Ok(HttpResponse::NotFound().finish()); + }; + + app_state.storage.get_package(&name, v_id).await +} diff --git a/registry/src/endpoints/package_doc.rs b/registry/src/endpoints/package_doc.rs new file mode 100644 index 0000000..72ba0bd --- /dev/null +++ b/registry/src/endpoints/package_doc.rs @@ -0,0 +1,66 @@ +use crate::{ + error::RegistryError, + package::read_package, + request_path::{resolve_version_and_target, AnyOrSpecificTarget, LatestOrSpecificVersion}, + storage::StorageImpl, + AppState, +}; +use actix_web::{web, HttpResponse}; +use pesde::{ + names::PackageName, + source::{ + ids::VersionId, + pesde::{DocEntryKind, IndexFile}, + }, +}; +use serde::Deserialize; + +pub fn find_package_doc<'a>( + file: &'a IndexFile, + v_id: &VersionId, + doc_name: &str, +) -> Option<&'a str> { + let mut queue = file.entries[v_id] + .docs + .iter() + .map(|doc| &doc.kind) + .collect::>(); + while let Some(doc) = queue.pop() { + match doc { + DocEntryKind::Page { name, hash } if name == doc_name => return Some(hash.as_str()), + DocEntryKind::Category { items, .. } => { + queue.extend(items.iter().map(|item| &item.kind)) + } + _ => continue, + }; + } + + None +} + +#[derive(Debug, Deserialize)] +pub struct Query { + doc: String, +} + +pub async fn get_package_doc( + app_state: web::Data, + path: web::Path<(PackageName, LatestOrSpecificVersion, AnyOrSpecificTarget)>, + request_query: web::Query, +) -> Result { + let (name, version, target) = path.into_inner(); + + let Some(file) = read_package(&app_state, &name, &*app_state.source.read().await).await? else { + return Ok(HttpResponse::NotFound().finish()); + }; + + let Some(v_id) = resolve_version_and_target(&file, version, target) else { + return Ok(HttpResponse::NotFound().finish()); + }; + + let Some(hash) = find_package_doc(&file, v_id, &request_query.doc) else { + return Ok(HttpResponse::NotFound().finish()); + }; + + app_state.storage.get_doc(hash).await +} diff --git a/registry/src/endpoints/package_readme.rs b/registry/src/endpoints/package_readme.rs new file mode 100644 index 0000000..29e3578 --- /dev/null +++ b/registry/src/endpoints/package_readme.rs @@ -0,0 +1,27 @@ +use actix_web::{web, HttpResponse}; + +use crate::{ + error::RegistryError, + package::read_package, + request_path::{resolve_version_and_target, AnyOrSpecificTarget, LatestOrSpecificVersion}, + storage::StorageImpl, + AppState, +}; +use pesde::names::PackageName; + +pub async fn get_package_readme( + app_state: web::Data, + path: web::Path<(PackageName, LatestOrSpecificVersion, AnyOrSpecificTarget)>, +) -> Result { + let (name, version, target) = path.into_inner(); + + let Some(file) = read_package(&app_state, &name, &*app_state.source.read().await).await? else { + return Ok(HttpResponse::NotFound().finish()); + }; + + let Some(v_id) = resolve_version_and_target(&file, version, target) else { + return Ok(HttpResponse::NotFound().finish()); + }; + + app_state.storage.get_readme(&name, v_id).await +} diff --git a/registry/src/endpoints/package_version.rs b/registry/src/endpoints/package_version.rs index d212301..838e93b 100644 --- a/registry/src/endpoints/package_version.rs +++ b/registry/src/endpoints/package_version.rs @@ -2,13 +2,14 @@ use actix_web::{http::header::ACCEPT, web, HttpRequest, HttpResponse}; use serde::Deserialize; use crate::{ + endpoints::package_doc::find_package_doc, error::RegistryError, package::{read_package, PackageResponse}, - request_path::{AnyOrSpecificTarget, LatestOrSpecificVersion}, + request_path::{resolve_version_and_target, AnyOrSpecificTarget, LatestOrSpecificVersion}, storage::StorageImpl, AppState, }; -use pesde::{names::PackageName, source::pesde::DocEntryKind}; +use pesde::names::PackageName; #[derive(Debug, Deserialize)] pub struct Query { @@ -27,68 +28,38 @@ pub async fn get_package_version( return Ok(HttpResponse::NotFound().finish()); }; - let Some((v_id, entry)) = ({ - let version = match version { - LatestOrSpecificVersion::Latest => match file.entries.keys().map(|k| k.version()).max() - { - Some(latest) => latest.clone(), - None => return Ok(HttpResponse::NotFound().finish()), - }, - LatestOrSpecificVersion::Specific(version) => version, - }; - - let mut versions = file - .entries - .iter() - .filter(|(v_id, _)| *v_id.version() == version); - - match target { - AnyOrSpecificTarget::Any => versions.min_by_key(|(v_id, _)| *v_id.target()), - AnyOrSpecificTarget::Specific(kind) => { - versions.find(|(_, entry)| entry.target.kind() == kind) - } - } - }) else { + let Some(v_id) = resolve_version_and_target(&file, version, target) else { return Ok(HttpResponse::NotFound().finish()); }; - if let Some(doc_name) = request_query.doc.as_deref() { - let hash = 'finder: { - let mut queue = entry.docs.iter().map(|doc| &doc.kind).collect::>(); - while let Some(doc) = queue.pop() { - match doc { - DocEntryKind::Page { name, hash } if name == doc_name => { - break 'finder hash.clone() - } - DocEntryKind::Category { items, .. } => { - queue.extend(items.iter().map(|item| &item.kind)) - } - _ => continue, - }; - } + // TODO: this is deprecated, since the introduction of the specific endpoints for readme, doc and archive. + // remove this when we drop 0.5 support. + { + if let Some(doc_name) = request_query.doc.as_deref() { + let Some(hash) = find_package_doc(&file, v_id, doc_name) else { + return Ok(HttpResponse::NotFound().finish()); + }; - return Ok(HttpResponse::NotFound().finish()); - }; + return app_state.storage.get_doc(hash).await; + } - return app_state.storage.get_doc(&hash).await; - } + let accept = request + .headers() + .get(ACCEPT) + .and_then(|accept| accept.to_str().ok()) + .and_then(|accept| match accept.to_lowercase().as_str() { + "text/plain" => Some(true), + "application/octet-stream" => Some(false), + _ => None, + }); - let accept = request - .headers() - .get(ACCEPT) - .and_then(|accept| accept.to_str().ok()) - .and_then(|accept| match accept.to_lowercase().as_str() { - "text/plain" => Some(true), - "application/octet-stream" => Some(false), - _ => None, - }); - - if let Some(readme) = accept { - return if readme { - app_state.storage.get_readme(&name, v_id).await - } else { - app_state.storage.get_package(&name, v_id).await - }; + if let Some(readme) = accept { + return if readme { + app_state.storage.get_readme(&name, v_id).await + } else { + app_state.storage.get_package(&name, v_id).await + }; + } } Ok(HttpResponse::Ok().json(PackageResponse::new(&name, v_id, &file))) diff --git a/registry/src/main.rs b/registry/src/main.rs index 8d571d3..2045379 100644 --- a/registry/src/main.rs +++ b/registry/src/main.rs @@ -190,6 +190,24 @@ async fn run() -> std::io::Result<()> { .to(endpoints::package_version::get_package_version) .wrap(from_fn(auth::read_mw)), ) + .route( + "/packages/{name}/{version}/{target}/archive", + web::get() + .to(endpoints::package_archive::get_package_archive) + .wrap(from_fn(auth::read_mw)), + ) + .route( + "/packages/{name}/{version}/{target}/doc", + web::get() + .to(endpoints::package_doc::get_package_doc) + .wrap(from_fn(auth::read_mw)), + ) + .route( + "/packages/{name}/{version}/{target}/readme", + web::get() + .to(endpoints::package_readme::get_package_readme) + .wrap(from_fn(auth::read_mw)), + ) .service( web::resource("/packages/{name}/{version}/{target}/yank") .put(endpoints::yank_version::yank_package_version) diff --git a/registry/src/request_path.rs b/registry/src/request_path.rs index d49abf4..bc21c5f 100644 --- a/registry/src/request_path.rs +++ b/registry/src/request_path.rs @@ -1,4 +1,7 @@ -use pesde::manifest::target::TargetKind; +use pesde::{ + manifest::target::TargetKind, + source::{ids::VersionId, pesde::IndexFile}, +}; use semver::Version; use serde::{Deserialize, Deserializer}; @@ -46,6 +49,33 @@ impl<'de> Deserialize<'de> for AnyOrSpecificTarget { } } +pub fn resolve_version_and_target( + file: &IndexFile, + version: LatestOrSpecificVersion, + target: AnyOrSpecificTarget, +) -> Option<&VersionId> { + let version = match version { + LatestOrSpecificVersion::Latest => match file.entries.keys().map(|k| k.version()).max() { + Some(latest) => latest.clone(), + None => return None, + }, + LatestOrSpecificVersion::Specific(version) => version, + }; + + let mut versions = file + .entries + .iter() + .filter(|(v_id, _)| *v_id.version() == version); + + match target { + AnyOrSpecificTarget::Any => versions.min_by_key(|(v_id, _)| *v_id.target()), + AnyOrSpecificTarget::Specific(kind) => { + versions.find(|(_, entry)| entry.target.kind() == kind) + } + } + .map(|(v_id, _)| v_id) +} + #[derive(Debug)] pub enum AllOrSpecificTarget { All, diff --git a/registry/src/search.rs b/registry/src/search.rs index a4df6b2..036434b 100644 --- a/registry/src/search.rs +++ b/registry/src/search.rs @@ -132,8 +132,8 @@ pub async fn make_search( .add_document(doc!( id_field => pkg_name.to_string(), version => v_id.version().to_string(), - scope => pkg_name.as_str().0, - name => pkg_name.as_str().1, + scope => pkg_name.scope(), + name => pkg_name.name(), description => latest_entry.description.clone().unwrap_or_default(), published_at => DateTime::from_timestamp_secs(latest_entry.published_at.timestamp()), )) @@ -165,8 +165,8 @@ pub fn update_search_version( search_writer.add_document(doc!( id_field => name.to_string(), schema.get_field("version").unwrap() => version.to_string(), - schema.get_field("scope").unwrap() => name.as_str().0, - schema.get_field("name").unwrap() => name.as_str().1, + schema.get_field("scope").unwrap() => name.scope(), + schema.get_field("name").unwrap() => name.name(), schema.get_field("description").unwrap() => entry.description.clone().unwrap_or_default(), schema.get_field("published_at").unwrap() => DateTime::from_timestamp_secs(entry.published_at.timestamp()) )).unwrap(); diff --git a/registry/src/storage/fs.rs b/registry/src/storage/fs.rs index 7ed9d89..25ce90a 100644 --- a/registry/src/storage/fs.rs +++ b/registry/src/storage/fs.rs @@ -9,6 +9,7 @@ use std::{ fmt::Display, path::{Path, PathBuf}, }; +use tokio_util::io::ReaderStream; #[derive(Debug)] pub struct FSStorage { @@ -19,11 +20,11 @@ async fn read_file_to_response( path: &Path, content_type: &str, ) -> Result { - Ok(match fs::read(path).await { - Ok(contents) => HttpResponse::Ok() + Ok(match fs::File::open(path).await { + Ok(file) => HttpResponse::Ok() .append_header((CONTENT_TYPE, content_type)) .append_header((CONTENT_ENCODING, "gzip")) - .body(contents), + .streaming(ReaderStream::new(file)), Err(e) if e.kind() == std::io::ErrorKind::NotFound => HttpResponse::NotFound().finish(), Err(e) => return Err(e.into()), })