Skip to content

Commit

Permalink
API improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
arcanis committed Jul 9, 2024
1 parent d51fce6 commit a491ea9
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 31 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pnp"
version = "0.6.0"
version = "0.7.0"
edition = "2021"
license = "BSD-2-Clause"
description = "Resolution primitives for Yarn PnP"
Expand Down
123 changes: 122 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,124 @@
# `pnp-rs`

This crate implements the Yarn Plug'n'Play [resolution algorithms](https://yarnpkg.com/advanced/pnp-spec).
This crate implements the Yarn Plug'n'Play [resolution algorithms](https://yarnpkg.com/advanced/pnp-spec) for Rust so that it can be easily reused within Rust-based tools. It also includes utilities allowing to transparently read files from within zip archives.

## Install

```
cargo add pnp
```

## Resolution

```rust
fn example() {
let manifest
= load_pnp_manifest(".pnp.cjs").unwrap();

let host = ResolutionHost {
find_pnp_manifest: Box::new(move |_| Ok(Some(manifest.clone()))),
..Default::default()
};

let config = ResolutionConfig {
host,
..Default::default()
};

let resolution = resolve_to_unqualified(
"lodash/cloneDeep",
std::path::PathBuf::from("/path/to/index.js"),
&config,
);

match resolution {
Ok(Resolution::Package(path, subpath)) => {
// path = "/path/to/lodash.zip"
// subpath = "cloneDeep"
},
Ok(Resolution::Specifier(specifier)) => {
// This is returned when the PnP resolver decides that it shouldn't
// handle the resolution for this particular specifier. In that case,
// the specifier should be forwarded to the default resolver.
},
Err(err) => {
// An error happened during the resolution. Falling back to the default
// resolver isn't recommended.
},
};
}
```

## Filesystem utilities

While PnP only deals with the resolution, not the filesystem, the file maps generated by Yarn rely on virtual filesystem layers for two reasons:

- [Virtual packages](https://yarnpkg.com/advanced/lexicon#virtual-package), which require a same package to have different paths to account for different set of dependencies (this only happens for packages that list peer dependencies)

- Zip storage, which Yarn uses so the installed files never have to be unpacked from their archives, leading to faster installs and fewer risks of cache corruption.

To make it easier to work with these virtual filesystems, the `pnp` crate also includes a `VPath` enum that lets you resolve virtual paths, and a set of zip manipulation utils (`open_zip_via_read` by default, and `open_zip_via_mmap` if the `mmap` feature is enabled).

```rust
use pnp::fs::{VPath, open_zip_via_read};

fn read_file(p: PathBuf) -> std::io::Result<String> {
match VPath::from(&p).unwrap() {
VPath::Virtual(info) => {
let physical_path
= info.physical_base_path();

match &info.zip_path {
// The path was virtual and stored within a zip file; we need to read from the zip file
// Note that this opens the zip file every time, which is expensive; we'll see how to optimize that
Some(zip_path) => open_zip_via_read(&physical_path)
.unwrap()
.read_to_string(&zip_path),

// The path was virtual but not a zip file; we just need to read from the provided ___location
None => std::fs::read_to_string(info.physical_base_path())
}
},

// Nothing special to do, it's a regular path
VPath::Native(p) => {
std::fs::read_to_string(&p)
},
}
}
```

## Cache reuse

Opening and dropping a zip archive for every single file access would be expensive. To avoid that, `pnp-rs` provides an helper class called `LruZipCache` which lets you abstract away the zip opening and closing, and only keep the most recently used archives open.

```rust
use pnp::fs::{VPath, LruZipCache, open_zip_via_read};

const ZIP_CACHE: Lazy<LruZipCache<Vec<u8>>> = Lazy::new(|| {
// It'll keep the last 50 zip archives open
LruZipCache::new(50, open_zip_via_read)
});

fn read_file(p: PathBuf) -> std::io::Result<String> {
match VPath::from(&p).unwrap() {
VPath::Virtual(info) => {
let physical_path
= info.physical_base_path();

match &info.zip_path {
// The path was virtual and stored within a zip file; we need to read from the zip file
Some(zip_path) => ZIP_CACHE.read_to_string(info.physical_base_path()),

// The path was virtual but not a zip file; we just need to read from the provided ___location
None => std::fs::read_to_string(info.physical_base_path())
}
},

// Nothing special to do, it's a regular path
VPath::Native(p) => {
std::fs::read_to_string(&p)
},
}
}
```
61 changes: 33 additions & 28 deletions src/fs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ pub enum VPath {
Native(PathBuf),
}

impl VPath {
pub fn from(p: &Path) -> std::io::Result<VPath> {
vpath(p)
}
}

#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error("Entry not found")]
Expand Down Expand Up @@ -60,7 +66,7 @@ fn io_bytes_to_str(vec: &[u8]) -> Result<&str, std::io::Error> {
}

#[cfg(feature = "mmap")]
pub fn open_zip_via_mmap(p: &Path) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> {
pub fn open_zip_via_mmap<P: AsRef<Path>>(p: P) -> Result<Zip<mmap_rs::Mmap>, std::io::Error> {
let file = fs::File::open(p)?;

let mmap_builder = mmap_rs::MmapOptions::new(file.metadata().unwrap().len().try_into().unwrap())
Expand All @@ -78,8 +84,7 @@ pub fn open_zip_via_mmap(p: &Path) -> Result<Zip<mmap_rs::Mmap>, std::io::Error>

Ok(zip)
}

pub fn open_zip_via_read(p: &Path) -> Result<Zip<Vec<u8>>, std::io::Error> {
pub fn open_zip_via_read<P: AsRef<Path>>(p: P) -> Result<Zip<Vec<u8>>, std::io::Error> {
let data = std::fs::read(p)?;

let zip = Zip::new(data)
Expand All @@ -89,26 +94,26 @@ pub fn open_zip_via_read(p: &Path) -> Result<Zip<Vec<u8>>, std::io::Error> {
}

pub trait ZipCache<Storage>
where Storage : AsRef<[u8]> + Send + Sync {
fn act<T, F : FnOnce(&Zip<Storage>) -> T>(&self, p: &Path, cb: F) -> Result<T, std::io::Error>;
where Storage: AsRef<[u8]> + Send + Sync {
fn act<T, P: AsRef<Path>, F : FnOnce(&Zip<Storage>) -> T>(&self, p: P, cb: F) -> Result<T, std::io::Error>;

fn canonicalize(&self, zip_path: &Path, sub: &str) -> Result<PathBuf, std::io::Error>;
fn canonicalize<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, sub: S) -> Result<PathBuf, std::io::Error>;

fn is_dir(&self, zip_path: &Path, sub: &str) -> bool;
fn is_file(&self, zip_path: &Path, sub: &str) -> bool;
fn is_dir<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, sub: S) -> bool;
fn is_file<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, sub: S) -> bool;

fn read(&self, zip_path: &Path, sub: &str) -> Result<Vec<u8>, std::io::Error>;
fn read_to_string(&self, zip_path: &Path, sub: &str) -> Result<String, std::io::Error>;
fn read<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, sub: S) -> Result<Vec<u8>, std::io::Error>;
fn read_to_string<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, sub: S) -> Result<String, std::io::Error>;
}

pub struct LruZipCache<Storage>
where Storage : AsRef<[u8]> + Send + Sync {
where Storage: AsRef<[u8]> + Send + Sync {
lru: concurrent_lru::sharded::LruCache<PathBuf, Zip<Storage>>,
open: fn(&Path) -> std::io::Result<Zip<Storage>>,
}

impl<Storage> LruZipCache<Storage>
where Storage : AsRef<[u8]> + Send + Sync {
where Storage: AsRef<[u8]> + Send + Sync {
pub fn new(n: u64, open: fn(&Path) -> std::io::Result<Zip<Storage>>) -> LruZipCache<Storage> {
LruZipCache {
lru: concurrent_lru::sharded::LruCache::new(n),
Expand All @@ -118,39 +123,39 @@ where Storage : AsRef<[u8]> + Send + Sync {
}

impl<Storage> ZipCache<Storage> for LruZipCache<Storage>
where Storage : AsRef<[u8]> + Send + Sync {
fn act<T, F : FnOnce(&Zip<Storage>) -> T>(&self, p: &Path, cb: F) -> Result<T, std::io::Error> {
let zip = self.lru.get_or_try_init(p.to_path_buf(), 1, |p| {
where Storage: AsRef<[u8]> + Send + Sync {
fn act<T, P: AsRef<Path>, F: FnOnce(&Zip<Storage>) -> T>(&self, p: P, cb: F) -> Result<T, std::io::Error> {
let zip = self.lru.get_or_try_init(p.as_ref().to_path_buf(), 1, |p| {
(self.open)(&p)
})?;

Ok(cb(zip.value()))
}

fn canonicalize(&self, zip_path: &Path, sub: &str) -> Result<PathBuf, std::io::Error> {
fn canonicalize<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, sub: S) -> Result<PathBuf, std::io::Error> {
let res = std::fs::canonicalize(zip_path)?;

Ok(res.join(sub))
Ok(res.join(sub.as_ref()))
}

fn is_dir(&self, zip_path: &Path, p: &str) -> bool {
self.act(zip_path, |zip| zip.is_dir(p)).unwrap_or(false)
fn is_dir<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, p: S) -> bool {
self.act(zip_path, |zip| zip.is_dir(p.as_ref())).unwrap_or(false)
}

fn is_file(&self, zip_path: &Path, p: &str) -> bool {
self.act(zip_path, |zip| zip.is_file(p)).unwrap_or(false)
fn is_file<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, p: S) -> bool {
self.act(zip_path, |zip| zip.is_file(p.as_ref())).unwrap_or(false)
}

fn read(&self, zip_path: &Path, p: &str) -> Result<Vec<u8>, std::io::Error> {
self.act(zip_path, |zip| zip.read(p))?
fn read<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, p: S) -> Result<Vec<u8>, std::io::Error> {
self.act(zip_path, |zip| zip.read(p.as_ref()))?
}

fn read_to_string(&self, zip_path: &Path, p: &str) -> Result<String, std::io::Error> {
self.act(zip_path, |zip| zip.read_to_string(p))?
fn read_to_string<P: AsRef<Path>, S: AsRef<str>>(&self, zip_path: P, p: S) -> Result<String, std::io::Error> {
self.act(zip_path, |zip| zip.read_to_string(p.as_ref()))?
}
}

pub fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) {
fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) {
lazy_static! {
static ref ZIP_RE: Regex = Regex::new(r"\.zip").unwrap();
}
Expand Down Expand Up @@ -179,7 +184,7 @@ pub fn split_zip(p_bytes: &[u8]) -> (&[u8], Option<&[u8]>) {
(p_bytes, None)
}

pub fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> {
fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, usize)>)> {
lazy_static! {
static ref VIRTUAL_RE: Regex = Regex::new("(?:^|/)((?:\\$\\$virtual|__virtual__)/(?:[^/]+)-[a-f0-9]+/([0-9]+)/)").unwrap();
}
Expand All @@ -195,7 +200,7 @@ pub fn split_virtual(p_bytes: &[u8]) -> std::io::Result<(usize, Option<(usize, u
Ok((p_bytes.len(), None))
}

pub fn vpath(p: &Path) -> std::io::Result<VPath> {
fn vpath(p: &Path) -> std::io::Result<VPath> {
let p_str = arca::path::normalize_path(
&p.as_os_str()
.to_string_lossy()
Expand Down
38 changes: 38 additions & 0 deletions src/lib_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,44 @@ mod tests {
use crate::{init_pnp_manifest, load_pnp_manifest, resolve_to_unqualified, ResolutionHost};
use super::*;

#[test]
fn example() {
let manifest
= load_pnp_manifest("data/pnp-yarn-v3.cjs").unwrap();

let host = ResolutionHost {
find_pnp_manifest: Box::new(move |_| Ok(Some(manifest.clone()))),
..Default::default()
};

let config = ResolutionConfig {
host,
..Default::default()
};

let resolution = resolve_to_unqualified(
"lodash/cloneDeep",
std::path::PathBuf::from("/path/to/file"),
&config,
);

match resolution {
Ok(Resolution::Package(path, subpath)) => {
// path = "/path/to/lodash.zip"
// subpath = "cloneDeep"
},
Ok(Resolution::Specifier(specifier)) => {
// This is returned when the PnP resolver decides that it shouldn't
// handle the resolution for this particular specifier. In that case,
// the specifier should be forwarded to the default resolver.
},
Err(err) => {
// An error happened during the resolution. Falling back to the default
// resolver isn't recommended.
},
};
}

#[test]
fn test_load_pnp_manifest() {
load_pnp_manifest("data/pnp-yarn-v3.cjs")
Expand Down

0 comments on commit a491ea9

Please sign in to comment.