From 75afd7747ea074df1d06c34d56bce2234fe4d9a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petri=20H=C3=A4kkinen?= Date: Wed, 8 Nov 2023 14:47:02 +0200 Subject: [PATCH] Optimize vector literals by storing them in the constant table (supported only for 3 arguments) --- Common/include/Luau/Bytecode.h | 5 +- Compiler/include/Luau/BytecodeBuilder.h | 9 ++- Compiler/src/BytecodeBuilder.cpp | 79 ++++++++++++++++++++----- Compiler/src/Compiler.cpp | 22 +++++++ VM/src/lvmload.cpp | 9 +++ 5 files changed, 106 insertions(+), 18 deletions(-) diff --git a/Common/include/Luau/Bytecode.h b/Common/include/Luau/Bytecode.h index 8096eec5..65542e93 100644 --- a/Common/include/Luau/Bytecode.h +++ b/Common/include/Luau/Bytecode.h @@ -426,8 +426,8 @@ enum LuauBytecodeTag { // Bytecode version; runtime supports [MIN, MAX], compiler emits TARGET by default but may emit a higher version when flags are enabled LBC_VERSION_MIN = 3, - LBC_VERSION_MAX = 4, - LBC_VERSION_TARGET = 4, + LBC_VERSION_MAX = 5, + LBC_VERSION_TARGET = 5, // Type encoding version LBC_TYPE_VERSION = 1, // Types of constant table entries @@ -438,6 +438,7 @@ enum LuauBytecodeTag LBC_CONSTANT_IMPORT, LBC_CONSTANT_TABLE, LBC_CONSTANT_CLOSURE, + LBC_CONSTANT_VECTOR, }; // Type table tags diff --git a/Compiler/include/Luau/BytecodeBuilder.h b/Compiler/include/Luau/BytecodeBuilder.h index f5098d17..88ecd132 100644 --- a/Compiler/include/Luau/BytecodeBuilder.h +++ b/Compiler/include/Luau/BytecodeBuilder.h @@ -54,6 +54,7 @@ public: int32_t addConstantNil(); int32_t addConstantBoolean(bool value); int32_t addConstantNumber(double value); + int32_t addConstantVector(double x, double y, double z); int32_t addConstantString(StringRef value); int32_t addImport(uint32_t iid); int32_t addConstantTable(const TableShape& shape); @@ -145,6 +146,7 @@ private: Type_Nil, Type_Boolean, Type_Number, + Type_Vector, Type_String, Type_Import, Type_Table, @@ -156,6 +158,7 @@ private: { bool valueBoolean; double valueNumber; + float valueVector[ 3 ]; unsigned int valueString; // index into string table uint32_t valueImport; // 10-10-10-2 encoded import id uint32_t valueTable; // index into tableShapes[] @@ -166,12 +169,14 @@ private: struct ConstantKey { Constant::Type type; - // Note: this stores value* from Constant; when type is Number_Double, this stores the same bits as double does but in uint64_t. + // Note: this stores value* from Constant; when type is Type_Number, this stores the same bits as double does but in uint64_t. + // for Type_Vector, x and y are stored in value and z is stored in extra. uint64_t value; + uint32_t extra = 0; bool operator==(const ConstantKey& key) const { - return type == key.type && value == key.value; + return type == key.type && value == key.value && extra == key.extra; } }; diff --git a/Compiler/src/BytecodeBuilder.cpp b/Compiler/src/BytecodeBuilder.cpp index 83fb9ce5..5ec42181 100644 --- a/Compiler/src/BytecodeBuilder.cpp +++ b/Compiler/src/BytecodeBuilder.cpp @@ -42,6 +42,11 @@ static void writeInt(std::string& ss, int value) ss.append(reinterpret_cast(&value), sizeof(value)); } +static void writeFloat(std::string& ss, float value) +{ + ss.append(reinterpret_cast(&value), sizeof(value)); +} + static void writeDouble(std::string& ss, double value) { ss.append(reinterpret_cast(&value), sizeof(value)); @@ -147,23 +152,42 @@ size_t BytecodeBuilder::StringRefHash::operator()(const StringRef& v) const size_t BytecodeBuilder::ConstantKeyHash::operator()(const ConstantKey& key) const { - // finalizer from MurmurHash64B - const uint32_t m = 0x5bd1e995; + if (key.type == Constant::Type_Vector) + { + uint32_t i[3]; + static_assert(sizeof(key.value) + sizeof(key.extra) == sizeof(i), "Expecting vector to have three 32-bit components"); + memcpy(i, &key.value, sizeof(i)); - uint32_t h1 = uint32_t(key.value); - uint32_t h2 = uint32_t(key.value >> 32) ^ (key.type * m); + // scramble bits to make sure that integer coordinates have entropy in lower bits + i[0] ^= i[0] >> 17; + i[1] ^= i[1] >> 17; + i[2] ^= i[2] >> 17; - h1 ^= h2 >> 18; - h1 *= m; - h2 ^= h1 >> 22; - h2 *= m; - h1 ^= h2 >> 17; - h1 *= m; - h2 ^= h1 >> 19; - h2 *= m; + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + uint32_t h = (i[0] * 73856093) ^ (i[1] * 19349663) ^ (i[2] * 83492791); - // ... truncated to 32-bit output (normally hash is equal to (uint64_t(h1) << 32) | h2, but we only really need the lower 32-bit half) - return size_t(h2); + return size_t(h); + } + else + { + // finalizer from MurmurHash64B + const uint32_t m = 0x5bd1e995; + + uint32_t h1 = uint32_t(key.value); + uint32_t h2 = uint32_t(key.value >> 32) ^ (key.type * m); + + h1 ^= h2 >> 18; + h1 *= m; + h2 ^= h1 >> 22; + h2 *= m; + h1 ^= h2 >> 17; + h1 *= m; + h2 ^= h1 >> 19; + h2 *= m; + + // ... truncated to 32-bit output (normally hash is equal to (uint64_t(h1) << 32) | h2, but we only really need the lower 32-bit half) + return size_t(h2); + } } size_t BytecodeBuilder::TableShapeHash::operator()(const TableShape& v) const @@ -330,6 +354,26 @@ int32_t BytecodeBuilder::addConstantNumber(double value) return addConstant(k, c); } +int32_t BytecodeBuilder::addConstantVector(double x, double y, double z) +{ + float fx = (float)x; + float fy = (float)y; + float fz = (float)z; + + Constant c = {Constant::Type_Vector}; + c.valueVector[0] = fx; + c.valueVector[1] = fy; + c.valueVector[2] = fz; + + ConstantKey k = {Constant::Type_Vector}; + static_assert(sizeof(k.value) == sizeof(fx) + sizeof(fy) && sizeof(k.extra) == sizeof(fz), "Expecting vector to have three 32-bit components"); + memcpy(&k.value, &fx, sizeof(fx)); + memcpy((char*)&k.value + sizeof(fx), &fy, sizeof(fy)); + memcpy(&k.extra, &fz, sizeof(fz)); + + return addConstant(k, c); +} + int32_t BytecodeBuilder::addConstantString(StringRef value) { unsigned int index = addStringTableEntry(value); @@ -642,6 +686,13 @@ void BytecodeBuilder::writeFunction(std::string& ss, uint32_t id, uint8_t flags) writeDouble(ss, c.valueNumber); break; + case Constant::Type_Vector: + writeByte(ss, LBC_CONSTANT_VECTOR); + writeFloat(ss, c.valueVector[0]); + writeFloat(ss, c.valueVector[1]); + writeFloat(ss, c.valueVector[2]); + break; + case Constant::Type_String: writeByte(ss, LBC_CONSTANT_STRING); writeVarInt(ss, c.valueString); diff --git a/Compiler/src/Compiler.cpp b/Compiler/src/Compiler.cpp index e0a0cac8..7859b618 100644 --- a/Compiler/src/Compiler.cpp +++ b/Compiler/src/Compiler.cpp @@ -816,6 +816,28 @@ struct Compiler } } + // Optimization: replace vector constructor calls with constant loads when all arguments are numbers + if (bfid == LBF_VECTOR && expr->args.size == 3 && targetCount == 1 && isConstant(expr->args.data[0]) && isConstant(expr->args.data[1]) && isConstant(expr->args.data[2])) + { + Constant cx = getConstant(expr->args.data[0]); + Constant cy = getConstant(expr->args.data[1]); + Constant cz = getConstant(expr->args.data[2]); + + if (cx.type == Constant::Type_Number && cy.type == Constant::Type_Number && cz.type == Constant::Type_Number) + { + double x = cx.valueNumber; + double y = cy.valueNumber; + double z = cz.valueNumber; + + int32_t cid = bytecode.addConstantVector(x, y, z); + if (cid < 0) + CompileError::raise(expr->location, "Exceeded constant limit; simplify the code to compile"); + + emitLoadK(target, cid); + return; + } + } + if (expr->self) { AstExprIndexName* fi = expr->func->as(); diff --git a/VM/src/lvmload.cpp b/VM/src/lvmload.cpp index 365aa5d3..dc11dcd1 100644 --- a/VM/src/lvmload.cpp +++ b/VM/src/lvmload.cpp @@ -287,6 +287,15 @@ int luau_load(lua_State* L, const char* chunkname, const char* data, size_t size break; } + case LBC_CONSTANT_VECTOR: + { + float x = read(data, size, offset); + float y = read(data, size, offset); + float z = read(data, size, offset); + setvvalue(&p->k[j], x, y, z, 0.0f); + break; + } + case LBC_CONSTANT_STRING: { TString* v = readString(strings, data, size, offset);