From a491ea9739bc996f02f48e3b213b72b8c9af11c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ma=C3=ABl=20Nison?= Date: Tue, 9 Jul 2024 10:55:30 +0200 Subject: [PATCH] API improvements --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 123 ++++++++++++++++++++++++++++++++++++++++++++++- src/fs.rs | 61 ++++++++++++----------- src/lib_tests.rs | 38 +++++++++++++++ 5 files changed, 195 insertions(+), 31 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad44dc8..818f696 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,7 +489,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pnp" -version = "0.6.0" +version = "0.7.0" dependencies = [ "arca", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 6876afd..dcaa4bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pnp" -version = "0.6.0" +version = "0.7.0" edition = "2021" license = "BSD-2-Clause" description = "Resolution primitives for Yarn PnP" diff --git a/README.md b/README.md index 827012e..5ff0e30 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,124 @@ # `pnp-rs` -This crate implements the Yarn Plug'n'Play [resolution algorithms](https://yarnpkg.com/advanced/pnp-spec). +This crate implements the Yarn Plug'n'Play [resolution algorithms](https://yarnpkg.com/advanced/pnp-spec) for Rust so that it can be easily reused within Rust-based tools. It also includes utilities allowing to transparently read files from within zip archives. + +## Install + +``` +cargo add pnp +``` + +## Resolution + +```rust +fn example() { + let manifest + = load_pnp_manifest(".pnp.cjs").unwrap(); + + let host = ResolutionHost { + find_pnp_manifest: Box::new(move |_| Ok(Some(manifest.clone()))), + ..Default::default() + }; + + let config = ResolutionConfig { + host, + ..Default::default() + }; + + let resolution = resolve_to_unqualified( + "lodash/cloneDeep", + std::path::PathBuf::from("/path/to/index.js"), + &config, + ); + + match resolution { + Ok(Resolution::Package(path, subpath)) => { + // path = "/path/to/lodash.zip" + // subpath = "cloneDeep" + }, + Ok(Resolution::Specifier(specifier)) => { + // This is returned when the PnP resolver decides that it shouldn't + // handle the resolution for this particular specifier. In that case, + // the specifier should be forwarded to the default resolver. + }, + Err(err) => { + // An error happened during the resolution. Falling back to the default + // resolver isn't recommended. + }, + }; +} +``` + +## Filesystem utilities + +While PnP only deals with the resolution, not the filesystem, the file maps generated by Yarn rely on virtual filesystem layers for two reasons: + +- [Virtual packages](https://yarnpkg.com/advanced/lexicon#virtual-package), which require a same package to have different paths to account for different set of dependencies (this only happens for packages that list peer dependencies) + +- Zip storage, which Yarn uses so the installed files never have to be unpacked from their archives, leading to faster installs and fewer risks of cache corruption. + +To make it easier to work with these virtual filesystems, the `pnp` crate also includes a `VPath` enum that lets you resolve virtual paths, and a set of zip manipulation utils (`open_zip_via_read` by default, and `open_zip_via_mmap` if the `mmap` feature is enabled). + +```rust +use pnp::fs::{VPath, open_zip_via_read}; + +fn read_file(p: PathBuf) -> std::io::Result { + match VPath::from(&p).unwrap() { + VPath::Virtual(info) => { + let physical_path + = info.physical_base_path(); + + match &info.zip_path { + // The path was virtual and stored within a zip file; we need to read from the zip file + // Note that this opens the zip file every time, which is expensive; we'll see how to optimize that + Some(zip_path) => open_zip_via_read(&physical_path) + .unwrap() + .read_to_string(&zip_path), + + // The path was virtual but not a zip file; we just need to read from the provided location + None => std::fs::read_to_string(info.physical_base_path()) + } + }, + + // Nothing special to do, it's a regular path + VPath::Native(p) => { + std::fs::read_to_string(&p) + }, + } +} +``` + +## Cache reuse + +Opening and dropping a zip archive for every single file access would be expensive. To avoid that, `pnp-rs` provides an helper class called `LruZipCache` which lets you abstract away the zip opening and closing, and only keep the most recently used archives open. + +```rust +use pnp::fs::{VPath, LruZipCache, open_zip_via_read}; + +const ZIP_CACHE: Lazy>> = Lazy::new(|| { + // It'll keep the last 50 zip archives open + LruZipCache::new(50, open_zip_via_read) +}); + +fn read_file(p: PathBuf) -> std::io::Result { + match VPath::from(&p).unwrap() { + VPath::Virtual(info) => { + let physical_path + = info.physical_base_path(); + + match &info.zip_path { + // The path was virtual and stored within a zip file; we need to read from the zip file + Some(zip_path) => ZIP_CACHE.read_to_string(info.physical_base_path()), + + // The path was virtual but not a zip file; we just need to read from the provided location + None => std::fs::read_to_string(info.physical_base_path()) + } + }, + + // Nothing special to do, it's a regular path + VPath::Native(p) => { + std::fs::read_to_string(&p) + }, + } +} +``` diff --git a/src/fs.rs b/src/fs.rs index 668724b..a6126fb 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -29,6 +29,12 @@ pub enum VPath { Native(PathBuf), } +impl VPath { + pub fn from(p: &Path) -> std::io::Result { + vpath(p) + } +} + #[derive(thiserror::Error, Debug)] pub enum Error { #[error("Entry not found")] @@ -60,7 +66,7 @@ fn io_bytes_to_str(vec: &[u8]) -> Result<&str, std::io::Error> { } #[cfg(feature = "mmap")] -pub fn open_zip_via_mmap(p: &Path) -> Result, std::io::Error> { +pub fn open_zip_via_mmap>(p: P) -> Result, std::io::Error> { let file = fs::File::open(p)?; let mmap_builder = mmap_rs::MmapOptions::new(file.metadata().unwrap().len().try_into().unwrap()) @@ -78,8 +84,7 @@ pub fn open_zip_via_mmap(p: &Path) -> Result, std::io::Error> Ok(zip) } - -pub fn open_zip_via_read(p: &Path) -> Result>, std::io::Error> { +pub fn open_zip_via_read>(p: P) -> Result>, std::io::Error> { let data = std::fs::read(p)?; let zip = Zip::new(data) @@ -89,26 +94,26 @@ pub fn open_zip_via_read(p: &Path) -> Result>, std::io::Error> { } pub trait ZipCache -where Storage : AsRef<[u8]> + Send + Sync { - fn act) -> T>(&self, p: &Path, cb: F) -> Result; +where Storage: AsRef<[u8]> + Send + Sync { + fn act, F : FnOnce(&Zip) -> T>(&self, p: P, cb: F) -> Result; - fn canonicalize(&self, zip_path: &Path, sub: &str) -> Result; + fn canonicalize, S: AsRef>(&self, zip_path: P, sub: S) -> Result; - fn is_dir(&self, zip_path: &Path, sub: &str) -> bool; - fn is_file(&self, zip_path: &Path, sub: &str) -> bool; + fn is_dir, S: AsRef>(&self, zip_path: P, sub: S) -> bool; + fn is_file, S: AsRef>(&self, zip_path: P, sub: S) -> bool; - fn read(&self, zip_path: &Path, sub: &str) -> Result, std::io::Error>; - fn read_to_string(&self, zip_path: &Path, sub: &str) -> Result; + fn read, S: AsRef>(&self, zip_path: P, sub: S) -> Result, std::io::Error>; + fn read_to_string, S: AsRef>(&self, zip_path: P, sub: S) -> Result; } pub struct LruZipCache -where Storage : AsRef<[u8]> + Send + Sync { +where Storage: AsRef<[u8]> + Send + Sync { lru: concurrent_lru::sharded::LruCache>, open: fn(&Path) -> std::io::Result>, } impl LruZipCache -where Storage : AsRef<[u8]> + Send + Sync { +where Storage: AsRef<[u8]> + Send + Sync { pub fn new(n: u64, open: fn(&Path) -> std::io::Result>) -> LruZipCache { LruZipCache { lru: concurrent_lru::sharded::LruCache::new(n), @@ -118,39 +123,39 @@ where Storage : AsRef<[u8]> + Send + Sync { } impl ZipCache for LruZipCache -where Storage : AsRef<[u8]> + Send + Sync { - fn act) -> T>(&self, p: &Path, cb: F) -> Result { - let zip = self.lru.get_or_try_init(p.to_path_buf(), 1, |p| { +where Storage: AsRef<[u8]> + Send + Sync { + fn act, F: FnOnce(&Zip) -> T>(&self, p: P, cb: F) -> Result { + let zip = self.lru.get_or_try_init(p.as_ref().to_path_buf(), 1, |p| { (self.open)(&p) })?; Ok(cb(zip.value())) } - fn canonicalize(&self, zip_path: &Path, sub: &str) -> Result { + fn canonicalize, S: AsRef>(&self, zip_path: P, sub: S) -> Result { let res = std::fs::canonicalize(zip_path)?; - Ok(res.join(sub)) + Ok(res.join(sub.as_ref())) } - fn is_dir(&self, zip_path: &Path, p: &str) -> bool { - self.act(zip_path, |zip| zip.is_dir(p)).unwrap_or(false) + fn is_dir, S: AsRef>(&self, zip_path: P, p: S) -> bool { + self.act(zip_path, |zip| zip.is_dir(p.as_ref())).unwrap_or(false) } - fn is_file(&self, zip_path: &Path, p: &str) -> bool { - self.act(zip_path, |zip| zip.is_file(p)).unwrap_or(false) + fn is_file, S: AsRef>(&self, zip_path: P, p: S) -> bool { + self.act(zip_path, |zip| zip.is_file(p.as_ref())).unwrap_or(false) } - fn read(&self, zip_path: &Path, p: &str) -> Result, std::io::Error> { - self.act(zip_path, |zip| zip.read(p))? + fn read, S: AsRef>(&self, zip_path: P, p: S) -> Result, std::io::Error> { + self.act(zip_path, |zip| zip.read(p.as_ref()))? } - fn read_to_string(&self, zip_path: &Path, p: &str) -> Result { - self.act(zip_path, |zip| zip.read_to_string(p))? + fn read_to_string, S: AsRef>(&self, zip_path: P, p: S) -> Result { + self.act(zip_path, |zip| zip.read_to_string(p.as_ref()))? } } -pub fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) { +fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) { lazy_static! { static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap(); } @@ -179,7 +184,7 @@ pub fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) { (p_bytes, None) } -pub fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> { +fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> { lazy_static! { static ref VIRTUAL_RE: Regex = Regex::new("(?:^|/)((?:\\$\\$virtual|__virtual__)/(?:[^/]+)-[a-f0-9]+/([0-9]+)/)").unwrap(); } @@ -195,7 +200,7 @@ pub fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, u Ok((p_bytes.len(), None)) } -pub fn vpath(p: &Path) -> std::io::Result { +fn vpath(p: &Path) -> std::io::Result { let p_str = arca::path::normalize_path( &p.as_os_str() .to_string_lossy() diff --git a/src/lib_tests.rs b/src/lib_tests.rs index a7bef1e..099c379 100644 --- a/src/lib_tests.rs +++ b/src/lib_tests.rs @@ -23,6 +23,44 @@ mod tests { use crate::{init_pnp_manifest, load_pnp_manifest, resolve_to_unqualified, ResolutionHost}; use super::*; + #[test] + fn example() { + let manifest + = load_pnp_manifest("data/pnp-yarn-v3.cjs").unwrap(); + + let host = ResolutionHost { + find_pnp_manifest: Box::new(move |_| Ok(Some(manifest.clone()))), + ..Default::default() + }; + + let config = ResolutionConfig { + host, + ..Default::default() + }; + + let resolution = resolve_to_unqualified( + "lodash/cloneDeep", + std::path::PathBuf::from("/path/to/file"), + &config, + ); + + match resolution { + Ok(Resolution::Package(path, subpath)) => { + // path = "/path/to/lodash.zip" + // subpath = "cloneDeep" + }, + Ok(Resolution::Specifier(specifier)) => { + // This is returned when the PnP resolver decides that it shouldn't + // handle the resolution for this particular specifier. In that case, + // the specifier should be forwarded to the default resolver. + }, + Err(err) => { + // An error happened during the resolution. Falling back to the default + // resolver isn't recommended. + }, + }; + } + #[test] fn test_load_pnp_manifest() { load_pnp_manifest("data/pnp-yarn-v3.cjs")