From bc42cf5a37a4edab82aa0138660858e279156331 Mon Sep 17 00:00:00 2001 From: Filip Tibell Date: Mon, 22 Apr 2024 22:57:24 +0200 Subject: [PATCH] Migrate regex builtin to lune-std-regex crate --- Cargo.lock | 2 + crates/lune-std-regex/Cargo.toml | 3 + crates/lune-std-regex/src/captures.rs | 91 +++++++++++++++++++++++++++ crates/lune-std-regex/src/lib.rs | 14 ++++- crates/lune-std-regex/src/matches.rs | 53 ++++++++++++++++ crates/lune-std-regex/src/regex.rs | 76 ++++++++++++++++++++++ 6 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 crates/lune-std-regex/src/captures.rs create mode 100644 crates/lune-std-regex/src/matches.rs create mode 100644 crates/lune-std-regex/src/regex.rs diff --git a/Cargo.lock b/Cargo.lock index b86ff79..5c2a7c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1614,6 +1614,8 @@ version = "0.1.0" dependencies = [ "lune-utils", "mlua", + "regex", + "self_cell", ] [[package]] diff --git a/crates/lune-std-regex/Cargo.toml b/crates/lune-std-regex/Cargo.toml index b4821c7..a7dc859 100644 --- a/crates/lune-std-regex/Cargo.toml +++ b/crates/lune-std-regex/Cargo.toml @@ -13,4 +13,7 @@ workspace = true [dependencies] mlua = { version = "0.9.7", features = ["luau"] } +regex = "1.10" +self_cell = "1.0" + lune-utils = { version = "0.1.0", path = "../lune-utils" } diff --git a/crates/lune-std-regex/src/captures.rs b/crates/lune-std-regex/src/captures.rs new file mode 100644 index 0000000..5dbea74 --- /dev/null +++ b/crates/lune-std-regex/src/captures.rs @@ -0,0 +1,91 @@ +use std::sync::Arc; + +use mlua::prelude::*; +use regex::{Captures, Regex}; +use self_cell::self_cell; + +use super::matches::LuaMatch; + +type OptionalCaptures<'a> = Option>; + +self_cell! { + struct LuaCapturesInner { + owner: Arc, + #[covariant] + dependent: OptionalCaptures, + } +} + +/** + A wrapper over the `regex::Captures` struct that can be used from Lua. +*/ +pub struct LuaCaptures { + inner: LuaCapturesInner, +} + +impl LuaCaptures { + /** + Create a new `LuaCaptures` instance from a `Regex` pattern and a `String` text. + + Returns `Some(_)` if captures were found, `None` if no captures were found. + */ + pub fn new(pattern: &Regex, text: String) -> Option { + let inner = + LuaCapturesInner::new(Arc::from(text), |owned| pattern.captures(owned.as_str())); + if inner.borrow_dependent().is_some() { + Some(Self { inner }) + } else { + None + } + } + + fn captures(&self) -> &Captures { + self.inner + .borrow_dependent() + .as_ref() + .expect("None captures should not be used") + } + + fn num_captures(&self) -> usize { + // NOTE: Here we exclude the match for the entire regex + // pattern, only counting the named and numbered captures + self.captures().len() - 1 + } + + fn text(&self) -> Arc { + Arc::clone(self.inner.borrow_owner()) + } +} + +impl LuaUserData for LuaCaptures { + fn add_methods<'lua, M: LuaUserDataMethods<'lua, Self>>(methods: &mut M) { + methods.add_method("get", |_, this, index: usize| { + Ok(this + .captures() + .get(index) + .map(|m| LuaMatch::new(this.text(), m))) + }); + + methods.add_method("group", |_, this, group: String| { + Ok(this + .captures() + .name(&group) + .map(|m| LuaMatch::new(this.text(), m))) + }); + + methods.add_method("format", |_, this, format: String| { + let mut new = String::new(); + this.captures().expand(&format, &mut new); + Ok(new) + }); + + methods.add_meta_method(LuaMetaMethod::Len, |_, this, ()| Ok(this.num_captures())); + methods.add_meta_method(LuaMetaMethod::ToString, |_, this, ()| { + Ok(format!("RegexCaptures({})", this.num_captures())) + }); + } + + fn add_fields<'lua, F: LuaUserDataFields<'lua, Self>>(fields: &mut F) { + fields.add_meta_field(LuaMetaMethod::Type, "RegexCaptures"); + } +} diff --git a/crates/lune-std-regex/src/lib.rs b/crates/lune-std-regex/src/lib.rs index ad73214..97fb279 100644 --- a/crates/lune-std-regex/src/lib.rs +++ b/crates/lune-std-regex/src/lib.rs @@ -4,6 +4,12 @@ use mlua::prelude::*; use lune_utils::TableBuilder; +mod captures; +mod matches; +mod regex; + +use self::regex::LuaRegex; + /** Creates the `regex` standard library module. @@ -12,5 +18,11 @@ use lune_utils::TableBuilder; Errors when out of memory. */ pub fn module(lua: &Lua) -> LuaResult { - TableBuilder::new(lua)?.build_readonly() + TableBuilder::new(lua)? + .with_function("new", new_regex)? + .build_readonly() +} + +fn new_regex(_: &Lua, pattern: String) -> LuaResult { + LuaRegex::new(pattern) } diff --git a/crates/lune-std-regex/src/matches.rs b/crates/lune-std-regex/src/matches.rs new file mode 100644 index 0000000..bc109f8 --- /dev/null +++ b/crates/lune-std-regex/src/matches.rs @@ -0,0 +1,53 @@ +use std::{ops::Range, sync::Arc}; + +use mlua::prelude::*; +use regex::Match; + +/** + A wrapper over the `regex::Match` struct that can be used from Lua. +*/ +pub struct LuaMatch { + text: Arc, + start: usize, + end: usize, +} + +impl LuaMatch { + /** + Create a new `LuaMatch` instance from a `String` text and a `regex::Match`. + */ + pub fn new(text: Arc, matched: Match) -> Self { + Self { + text, + start: matched.start(), + end: matched.end(), + } + } + + fn range(&self) -> Range { + self.start..self.end + } + + fn slice(&self) -> &str { + &self.text[self.range()] + } +} + +impl LuaUserData for LuaMatch { + fn add_fields<'lua, F: LuaUserDataFields<'lua, Self>>(fields: &mut F) { + // NOTE: Strings are 0 based in Rust but 1 based in Luau, and end of range in Rust is exclusive + fields.add_field_method_get("start", |_, this| Ok(this.start.saturating_add(1))); + fields.add_field_method_get("finish", |_, this| Ok(this.end)); + fields.add_field_method_get("len", |_, this| Ok(this.range().len())); + fields.add_field_method_get("text", |_, this| Ok(this.slice().to_string())); + + fields.add_meta_field(LuaMetaMethod::Type, "RegexMatch"); + } + + fn add_methods<'lua, M: LuaUserDataMethods<'lua, Self>>(methods: &mut M) { + methods.add_meta_method(LuaMetaMethod::Len, |_, this, ()| Ok(this.range().len())); + methods.add_meta_method(LuaMetaMethod::ToString, |_, this, ()| { + Ok(format!("RegexMatch({})", this.slice())) + }); + } +} diff --git a/crates/lune-std-regex/src/regex.rs b/crates/lune-std-regex/src/regex.rs new file mode 100644 index 0000000..9b83544 --- /dev/null +++ b/crates/lune-std-regex/src/regex.rs @@ -0,0 +1,76 @@ +use std::sync::Arc; + +use mlua::prelude::*; +use regex::Regex; + +use super::{captures::LuaCaptures, matches::LuaMatch}; + +/** + A wrapper over the `regex::Regex` struct that can be used from Lua. +*/ +#[derive(Debug, Clone)] +pub struct LuaRegex { + inner: Regex, +} + +impl LuaRegex { + /** + Create a new `LuaRegex` instance from a `String` pattern. + */ + pub fn new(pattern: String) -> LuaResult { + Regex::new(&pattern) + .map(|inner| Self { inner }) + .map_err(LuaError::external) + } +} + +impl LuaUserData for LuaRegex { + fn add_methods<'lua, M: LuaUserDataMethods<'lua, Self>>(methods: &mut M) { + methods.add_method("isMatch", |_, this, text: String| { + Ok(this.inner.is_match(&text)) + }); + + methods.add_method("find", |_, this, text: String| { + let arc = Arc::new(text); + Ok(this + .inner + .find(&arc) + .map(|m| LuaMatch::new(Arc::clone(&arc), m))) + }); + + methods.add_method("captures", |_, this, text: String| { + Ok(LuaCaptures::new(&this.inner, text)) + }); + + methods.add_method("split", |_, this, text: String| { + Ok(this + .inner + .split(&text) + .map(ToString::to_string) + .collect::>()) + }); + + // TODO: Determine whether it's desirable and / or feasible to support + // using a function or table for `replace` like in the lua string library + methods.add_method( + "replace", + |_, this, (haystack, replacer): (String, String)| { + Ok(this.inner.replace(&haystack, replacer).to_string()) + }, + ); + methods.add_method( + "replaceAll", + |_, this, (haystack, replacer): (String, String)| { + Ok(this.inner.replace_all(&haystack, replacer).to_string()) + }, + ); + + methods.add_meta_method(LuaMetaMethod::ToString, |_, this, ()| { + Ok(format!("Regex({})", this.inner.as_str())) + }); + } + + fn add_fields<'lua, F: LuaUserDataFields<'lua, Self>>(fields: &mut F) { + fields.add_meta_field(LuaMetaMethod::Type, "Regex"); + } +}