feat: SUPER fast standalone binaries using jemalloc & rayon

This commit is contained in:
Erica Marigold 2023-11-23 19:46:29 +05:30
parent 2af8ed3b9f
commit 1e43f70c92
No known key found for this signature in database
GPG key ID: 2768CC0C23D245D1
4 changed files with 108 additions and 67 deletions

21
Cargo.lock generated
View file

@ -1165,6 +1165,7 @@ dependencies = [
"serde_json", "serde_json",
"serde_yaml", "serde_yaml",
"thiserror", "thiserror",
"tikv-jemallocator",
"tokio", "tokio",
"tokio-tungstenite", "tokio-tungstenite",
"toml", "toml",
@ -2349,6 +2350,26 @@ dependencies = [
"once_cell", "once_cell",
] ]
[[package]]
name = "tikv-jemalloc-sys"
version = "0.5.4+5.3.0-patched"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9402443cb8fd499b6f327e40565234ff34dbda27460c5b47db0db77443dd85d1"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "tikv-jemallocator"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "965fe0c26be5c56c94e38ba547249074803efd52adfb66de62107d95aab3eaca"
dependencies = [
"libc",
"tikv-jemalloc-sys",
]
[[package]] [[package]]
name = "time" name = "time"
version = "0.2.27" version = "0.2.27"

View file

@ -83,6 +83,8 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
mlua = { version = "0.9.1", features = ["luau", "luau-jit", "serialize"] } mlua = { version = "0.9.1", features = ["luau", "luau-jit", "serialize"] }
tokio = { version = "1.24", features = ["full", "tracing"] } tokio = { version = "1.24", features = ["full", "tracing"] }
os_str_bytes = { version = "6.4", features = ["conversions"] } os_str_bytes = { version = "6.4", features = ["conversions"] }
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.5"
### SERDE ### SERDE

View file

@ -1,9 +1,13 @@
use std::{env, fmt::Write as _, path::PathBuf, process::ExitCode}; use std::{env, fmt::Write as _, ops::ControlFlow, path::PathBuf, process::ExitCode, sync::Mutex};
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use clap::Parser; use clap::Parser;
use lune::Lune; use lune::Lune;
use rayon::{
iter::{IndexedParallelIterator, ParallelIterator},
slice::ParallelSlice,
};
use tokio::{ use tokio::{
fs::read as read_to_vec, fs::read as read_to_vec,
io::{stdin, AsyncReadExt}, io::{stdin, AsyncReadExt},
@ -93,8 +97,8 @@ impl Cli {
let is_standalone = bin[bin.len() - signature.len()..bin.len()] == signature; let is_standalone = bin[bin.len() - signature.len()..bin.len()] == signature;
if is_standalone { if is_standalone {
let mut bytecode_offset = 0; let bytecode_offset = Mutex::new(0);
let mut bytecode_size = 0; let bytecode_size = Mutex::new(0);
// standalone binary structure (reversed, 8 bytes per field) // standalone binary structure (reversed, 8 bytes per field)
// [0] => signature // [0] => signature
@ -112,31 +116,45 @@ impl Cli {
// The rchunks will have unequally sized sections in the beginning // The rchunks will have unequally sized sections in the beginning
// but that doesn't matter to us because we don't need anything past the // but that doesn't matter to us because we don't need anything past the
// middle chunks where the bytecode is stored // middle chunks where the bytecode is stored
for (idx, chunk) in bin.rchunks(signature.len()).enumerate() { bin.par_rchunks(signature.len())
if idx == 0 && chunk != signature { .enumerate()
// Binary is guaranteed to be standalone, we've confirmed this before .try_for_each(|(idx, chunk)| {
unreachable!() let mut bytecode_offset = bytecode_offset.lock().unwrap();
} let mut bytecode_size = bytecode_size.lock().unwrap();
if idx == 3 { if *bytecode_offset != 0 && *bytecode_size != 0 {
bytecode_offset = u64::from_ne_bytes(chunk.try_into()?); return ControlFlow::Break(());
} }
if idx == 2 { if idx == 0 && chunk != signature {
bytecode_size = u64::from_ne_bytes(chunk.try_into()?); // Binary is guaranteed to be standalone, we've confirmed this before
} unreachable!("expected proper signature for standalone binary")
} }
if idx == 3 {
*bytecode_offset = u64::from_ne_bytes(chunk.try_into().unwrap());
}
if idx == 2 {
*bytecode_size = u64::from_ne_bytes(chunk.try_into().unwrap());
}
ControlFlow::Continue(())
});
let bytecode_offset_inner = bytecode_offset.into_inner().unwrap();
let bytecode_size_inner = bytecode_size.into_inner().unwrap();
// If we were able to retrieve the required metadata, we load // If we were able to retrieve the required metadata, we load
// and execute the bytecode // and execute the bytecode
if bytecode_offset != 0 && bytecode_size != 0 { // if bytecode_offset_inner != 0 && bytecode_size_inner != 0 {
// FIXME: Passing arguments does not work like it should, because the first // FIXME: Passing arguments does not work like it should, because the first
// argument provided is treated as the script path. We should probably also not // argument provided is treated as the script path. We should probably also not
// allow any runner functionality within standalone binaries // allow any runner functionality within standalone binaries
let mut reserved_args = Vec::new(); let mut reserved_args = Vec::new();
macro_rules! include_reserved_args { macro_rules! include_reserved_args {
($($arg_bool:expr=> $mapping:literal),*) => { ($($arg_bool:expr=> $mapping:literal),*) => {
$( $(
if $arg_bool { if $arg_bool {
@ -146,43 +164,41 @@ impl Cli {
}; };
} }
let mut real_args = Vec::new(); let mut real_args = Vec::new();
if let Some(first_arg) = self.script_path { if let Some(first_arg) = self.script_path {
println!("{first_arg}"); real_args.push(first_arg);
real_args.push(first_arg);
}
include_reserved_args! {
self.setup => "--setup",
self.generate_docs_file => "--generate-docs-file",
self.generate_selene_types => "--generate-selene-types",
self.generate_luau_types => "--generate-luau-types",
self.list => "--list",
self.build => "--build"
}
real_args.append(&mut reserved_args);
real_args.append(&mut self.script_args.clone());
let result = Lune::new()
.with_args(real_args) // TODO: args should also include lune reserved ones
.run(
"STANDALONE",
&bin[usize::try_from(bytecode_offset)?
..usize::try_from(bytecode_offset + bytecode_size)?],
)
.await;
return Ok(match result {
Err(err) => {
eprintln!("{err}");
ExitCode::FAILURE
}
Ok(code) => code,
});
} }
include_reserved_args! {
self.setup => "--setup",
self.generate_docs_file => "--generate-docs-file",
self.generate_selene_types => "--generate-selene-types",
self.generate_luau_types => "--generate-luau-types",
self.list => "--list",
self.build => "--build"
}
real_args.append(&mut reserved_args);
real_args.append(&mut self.script_args.clone());
let result = Lune::new()
.with_args(real_args) // TODO: args should also include lune reserved ones
.run(
"STANDALONE",
&bin[usize::try_from(bytecode_offset_inner)?
..usize::try_from(bytecode_offset_inner + bytecode_size_inner)?],
)
.await;
return Ok(match result {
Err(err) => {
eprintln!("{err}");
ExitCode::FAILURE
}
Ok(code) => code,
});
// }
} }
// List files in `lune` and `.lune` directories, if wanted // List files in `lune` and `.lune` directories, if wanted
@ -284,17 +300,13 @@ impl Cli {
output_path.to_string_lossy() output_path.to_string_lossy()
); );
return Ok( return Ok(match build_standalone(output_path, script_contents).await {
match build_standalone(output_path, strip_shebang(script_contents.clone())) Ok(exitcode) => exitcode,
.await Err(err) => {
{ eprintln!("{err}");
Ok(exitcode) => exitcode, ExitCode::FAILURE
Err(err) => { }
eprintln!("{err}"); });
ExitCode::FAILURE
}
},
);
} }
// Create a new lune object with all globals & run the script // Create a new lune object with all globals & run the script

View file

@ -16,6 +16,12 @@ pub(crate) mod cli;
use cli::Cli; use cli::Cli;
use console::style; use console::style;
#[cfg(not(target_env = "msvc"))]
use tikv_jemallocator::Jemalloc;
#[cfg(not(target_env = "msvc"))]
#[global_allocator]
static GLOBAL: Jemalloc = Jemalloc;
#[tokio::main(flavor = "multi_thread")] #[tokio::main(flavor = "multi_thread")]
async fn main() -> ExitCode { async fn main() -> ExitCode {