mirror of
https://github.com/nodejs/node.git
synced 2024-11-21 21:19:50 +01:00
src: use simdutf for converting externalized builtins to UTF-16
Remove the dependency on ICU for this part, as well as the hacky way of converting embedder main sources to UTF-8 via V8 APIs. Allow `UnionBytes` to own the memory its pointing to in order to simplify the code on the `BuiltinLoader` side. PR-URL: https://github.com/nodejs/node/pull/46119 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com> Reviewed-By: Darshan Sen <raisinten@gmail.com>
This commit is contained in:
parent
757c104147
commit
21fb98e2bf
@ -2024,11 +2024,7 @@ output['variables']['node_builtin_shareable_builtins'] = []
|
||||
for builtin in shareable_builtins:
|
||||
builtin_id = 'node_shared_builtin_' + builtin.replace('/', '_') + '_path'
|
||||
if getattr(options, builtin_id):
|
||||
if options.with_intl == 'none':
|
||||
option_name = '--shared-builtin-' + builtin + '-path'
|
||||
error(option_name + ' is incompatible with --with-intl=none' )
|
||||
else:
|
||||
output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)]
|
||||
output['defines'] += [builtin_id.upper() + '=' + getattr(options, builtin_id)]
|
||||
else:
|
||||
output['variables']['node_builtin_shareable_builtins'] += [shareable_builtins[builtin]]
|
||||
|
||||
|
1
node.gyp
1
node.gyp
@ -973,6 +973,7 @@
|
||||
'deps/googletest/googletest.gyp:gtest_main',
|
||||
'deps/histogram/histogram.gyp:histogram',
|
||||
'deps/uvwasi/uvwasi.gyp:uvwasi',
|
||||
'deps/simdutf/simdutf.gyp:simdutf',
|
||||
],
|
||||
|
||||
'includes': [
|
||||
|
@ -467,21 +467,10 @@ MaybeLocal<Value> LoadEnvironment(
|
||||
Environment* env,
|
||||
const char* main_script_source_utf8) {
|
||||
CHECK_NOT_NULL(main_script_source_utf8);
|
||||
Isolate* isolate = env->isolate();
|
||||
return LoadEnvironment(
|
||||
env,
|
||||
[&](const StartExecutionCallbackInfo& info) -> MaybeLocal<Value> {
|
||||
// This is a slightly hacky way to convert UTF-8 to UTF-16.
|
||||
Local<String> str =
|
||||
String::NewFromUtf8(isolate,
|
||||
main_script_source_utf8).ToLocalChecked();
|
||||
auto main_utf16 = std::make_unique<String::Value>(isolate, str);
|
||||
|
||||
// TODO(addaleax): Avoid having a global table for all scripts.
|
||||
env, [&](const StartExecutionCallbackInfo& info) -> MaybeLocal<Value> {
|
||||
std::string name = "embedder_main_" + std::to_string(env->thread_id());
|
||||
builtins::BuiltinLoader::Add(
|
||||
name.c_str(), UnionBytes(**main_utf16, main_utf16->length()));
|
||||
env->set_main_utf16(std::move(main_utf16));
|
||||
builtins::BuiltinLoader::Add(name.c_str(), main_script_source_utf8);
|
||||
Realm* realm = env->principal_realm();
|
||||
|
||||
return realm->ExecuteBootstrapper(name.c_str());
|
||||
|
@ -805,11 +805,6 @@ void Environment::RemoveCleanupHook(CleanupQueue::Callback fn, void* arg) {
|
||||
cleanup_queue_.Remove(fn, arg);
|
||||
}
|
||||
|
||||
void Environment::set_main_utf16(std::unique_ptr<v8::String::Value> str) {
|
||||
CHECK(!main_utf16_);
|
||||
main_utf16_ = std::move(str);
|
||||
}
|
||||
|
||||
void Environment::set_process_exit_handler(
|
||||
std::function<void(Environment*, ExitCode)>&& handler) {
|
||||
process_exit_handler_ = std::move(handler);
|
||||
|
@ -961,7 +961,6 @@ class Environment : public MemoryRetainer {
|
||||
|
||||
#endif // HAVE_INSPECTOR
|
||||
|
||||
inline void set_main_utf16(std::unique_ptr<v8::String::Value>);
|
||||
inline void set_process_exit_handler(
|
||||
std::function<void(Environment*, ExitCode)>&& handler);
|
||||
|
||||
@ -1144,11 +1143,6 @@ class Environment : public MemoryRetainer {
|
||||
|
||||
std::unique_ptr<Realm> principal_realm_ = nullptr;
|
||||
|
||||
// Keeps the main script source alive is one was passed to LoadEnvironment().
|
||||
// We should probably find a way to just use plain `v8::String`s created from
|
||||
// the source passed to LoadEnvironment() directly instead.
|
||||
std::unique_ptr<v8::String::Value> main_utf16_;
|
||||
|
||||
// Used by allocate_managed_buffer() and release_managed_buffer() to keep
|
||||
// track of the BackingStore for a given pointer.
|
||||
std::unordered_map<char*, std::unique_ptr<v8::BackingStore>>
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "env-inl.h"
|
||||
#include "node_external_reference.h"
|
||||
#include "node_internals.h"
|
||||
#include "simdutf.h"
|
||||
#include "util-inl.h"
|
||||
|
||||
namespace node {
|
||||
@ -35,7 +36,6 @@ BuiltinLoader BuiltinLoader::instance_;
|
||||
|
||||
BuiltinLoader::BuiltinLoader() : config_(GetConfig()), has_code_cache_(false) {
|
||||
LoadJavaScriptSource();
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
#ifdef NODE_SHARED_BUILTIN_CJS_MODULE_LEXER_LEXER_PATH
|
||||
AddExternalizedBuiltin(
|
||||
"internal/deps/cjs-module-lexer/lexer",
|
||||
@ -52,7 +52,6 @@ BuiltinLoader::BuiltinLoader() : config_(GetConfig()), has_code_cache_(false) {
|
||||
AddExternalizedBuiltin("internal/deps/undici/undici",
|
||||
STRINGIFY(NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH));
|
||||
#endif // NODE_SHARED_BUILTIN_UNDICI_UNDICI_PATH
|
||||
#endif // NODE_HAVE_I18N_SUPPORT
|
||||
}
|
||||
|
||||
BuiltinLoader* BuiltinLoader::GetInstance() {
|
||||
@ -240,7 +239,6 @@ MaybeLocal<String> BuiltinLoader::LoadBuiltinSource(Isolate* isolate,
|
||||
#endif // NODE_BUILTIN_MODULES_PATH
|
||||
}
|
||||
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
void BuiltinLoader::AddExternalizedBuiltin(const char* id,
|
||||
const char* filename) {
|
||||
std::string source;
|
||||
@ -252,16 +250,20 @@ void BuiltinLoader::AddExternalizedBuiltin(const char* id,
|
||||
return;
|
||||
}
|
||||
|
||||
icu::UnicodeString utf16 = icu::UnicodeString::fromUTF8(
|
||||
icu::StringPiece(source.data(), source.length()));
|
||||
auto source_utf16 = std::make_unique<icu::UnicodeString>(utf16);
|
||||
Add(id,
|
||||
UnionBytes(reinterpret_cast<const uint16_t*>((*source_utf16).getBuffer()),
|
||||
utf16.length()));
|
||||
// keep source bytes for builtin alive while BuiltinLoader exists
|
||||
GetInstance()->externalized_source_bytes_.push_back(std::move(source_utf16));
|
||||
Add(id, source);
|
||||
}
|
||||
|
||||
bool BuiltinLoader::Add(const char* id, std::string_view utf8source) {
|
||||
size_t expected_u16_length =
|
||||
simdutf::utf16_length_from_utf8(utf8source.data(), utf8source.length());
|
||||
auto out = std::make_shared<std::vector<uint16_t>>(expected_u16_length);
|
||||
size_t u16_length = simdutf::convert_utf8_to_utf16le(
|
||||
utf8source.data(),
|
||||
utf8source.length(),
|
||||
reinterpret_cast<char16_t*>(out->data()));
|
||||
out->resize(u16_length);
|
||||
return Add(id, UnionBytes(out));
|
||||
}
|
||||
#endif // NODE_HAVE_I18N_SUPPORT
|
||||
|
||||
// Returns Local<Function> of the compiled module if return_code_cache
|
||||
// is false (we are only compiling the function).
|
||||
|
@ -8,9 +8,6 @@
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
#include <unicode/unistr.h>
|
||||
#endif // NODE_HAVE_I18N_SUPPORT
|
||||
#include <vector>
|
||||
#include "node_mutex.h"
|
||||
#include "node_union_bytes.h"
|
||||
@ -71,6 +68,7 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
|
||||
static v8::Local<v8::String> GetConfigString(v8::Isolate* isolate);
|
||||
static bool Exists(const char* id);
|
||||
static bool Add(const char* id, const UnionBytes& source);
|
||||
static bool Add(const char* id, std::string_view utf8source);
|
||||
|
||||
static bool CompileAllBuiltins(v8::Local<v8::Context> context);
|
||||
static void RefreshCodeCache(const std::vector<CodeCacheInfo>& in);
|
||||
@ -136,18 +134,13 @@ class NODE_EXTERN_PRIVATE BuiltinLoader {
|
||||
static void HasCachedBuiltins(
|
||||
const v8::FunctionCallbackInfo<v8::Value>& args);
|
||||
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
static void AddExternalizedBuiltin(const char* id, const char* filename);
|
||||
#endif // NODE_HAVE_I18N_SUPPORT
|
||||
|
||||
static BuiltinLoader instance_;
|
||||
BuiltinCategories builtin_categories_;
|
||||
BuiltinSourceMap source_;
|
||||
BuiltinCodeCacheMap code_cache_;
|
||||
UnionBytes config_;
|
||||
#if defined(NODE_HAVE_I18N_SUPPORT)
|
||||
std::list<std::unique_ptr<icu::UnicodeString>> externalized_source_bytes_;
|
||||
#endif // NODE_HAVE_I18N_SUPPORT
|
||||
|
||||
// Used to synchronize access to the code cache map
|
||||
Mutex code_cache_mutex_;
|
||||
|
@ -11,57 +11,20 @@
|
||||
|
||||
namespace node {
|
||||
|
||||
class NonOwningExternalOneByteResource
|
||||
: public v8::String::ExternalOneByteStringResource {
|
||||
public:
|
||||
explicit NonOwningExternalOneByteResource(const uint8_t* data, size_t length)
|
||||
: data_(data), length_(length) {}
|
||||
~NonOwningExternalOneByteResource() override = default;
|
||||
|
||||
const char* data() const override {
|
||||
return reinterpret_cast<const char*>(data_);
|
||||
}
|
||||
size_t length() const override { return length_; }
|
||||
|
||||
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
|
||||
delete;
|
||||
NonOwningExternalOneByteResource& operator=(
|
||||
const NonOwningExternalOneByteResource&) = delete;
|
||||
|
||||
private:
|
||||
const uint8_t* data_;
|
||||
size_t length_;
|
||||
};
|
||||
|
||||
class NonOwningExternalTwoByteResource
|
||||
: public v8::String::ExternalStringResource {
|
||||
public:
|
||||
explicit NonOwningExternalTwoByteResource(const uint16_t* data, size_t length)
|
||||
: data_(data), length_(length) {}
|
||||
~NonOwningExternalTwoByteResource() override = default;
|
||||
|
||||
const uint16_t* data() const override { return data_; }
|
||||
size_t length() const override { return length_; }
|
||||
|
||||
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
|
||||
delete;
|
||||
NonOwningExternalTwoByteResource& operator=(
|
||||
const NonOwningExternalTwoByteResource&) = delete;
|
||||
|
||||
private:
|
||||
const uint16_t* data_;
|
||||
size_t length_;
|
||||
};
|
||||
|
||||
// Similar to a v8::String, but it's independent from Isolates
|
||||
// and can be materialized in Isolates as external Strings
|
||||
// via ToStringChecked. The data pointers are owned by the caller.
|
||||
// via ToStringChecked.
|
||||
class UnionBytes {
|
||||
public:
|
||||
UnionBytes(const uint16_t* data, size_t length)
|
||||
: one_bytes_(nullptr), two_bytes_(data), length_(length) {}
|
||||
UnionBytes(const uint8_t* data, size_t length)
|
||||
: one_bytes_(data), two_bytes_(nullptr), length_(length) {}
|
||||
template <typename T> // T = uint8_t or uint16_t
|
||||
explicit UnionBytes(std::shared_ptr<std::vector</*const*/ T>> data)
|
||||
: UnionBytes(data->data(), data->size()) {
|
||||
owning_ptr_ = data;
|
||||
}
|
||||
|
||||
UnionBytes(const UnionBytes&) = default;
|
||||
UnionBytes& operator=(const UnionBytes&) = default;
|
||||
@ -77,23 +40,14 @@ class UnionBytes {
|
||||
CHECK_NOT_NULL(one_bytes_);
|
||||
return one_bytes_;
|
||||
}
|
||||
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const {
|
||||
if (is_one_byte()) {
|
||||
NonOwningExternalOneByteResource* source =
|
||||
new NonOwningExternalOneByteResource(one_bytes_data(), length_);
|
||||
return v8::String::NewExternalOneByte(isolate, source).ToLocalChecked();
|
||||
} else {
|
||||
NonOwningExternalTwoByteResource* source =
|
||||
new NonOwningExternalTwoByteResource(two_bytes_data(), length_);
|
||||
return v8::String::NewExternalTwoByte(isolate, source).ToLocalChecked();
|
||||
}
|
||||
}
|
||||
size_t length() { return length_; }
|
||||
v8::Local<v8::String> ToStringChecked(v8::Isolate* isolate) const;
|
||||
size_t length() const { return length_; }
|
||||
|
||||
private:
|
||||
const uint8_t* one_bytes_;
|
||||
const uint16_t* two_bytes_;
|
||||
size_t length_;
|
||||
std::shared_ptr<void> owning_ptr_;
|
||||
};
|
||||
|
||||
} // namespace node
|
||||
|
62
src/util.cc
62
src/util.cc
@ -515,4 +515,66 @@ void SetConstructorFunction(Isolate* isolate,
|
||||
that->Set(name, tmpl);
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class NonOwningExternalOneByteResource
|
||||
: public v8::String::ExternalOneByteStringResource {
|
||||
public:
|
||||
explicit NonOwningExternalOneByteResource(const UnionBytes& source)
|
||||
: source_(source) {}
|
||||
~NonOwningExternalOneByteResource() override = default;
|
||||
|
||||
const char* data() const override {
|
||||
return reinterpret_cast<const char*>(source_.one_bytes_data());
|
||||
}
|
||||
size_t length() const override { return source_.length(); }
|
||||
|
||||
NonOwningExternalOneByteResource(const NonOwningExternalOneByteResource&) =
|
||||
delete;
|
||||
NonOwningExternalOneByteResource& operator=(
|
||||
const NonOwningExternalOneByteResource&) = delete;
|
||||
|
||||
private:
|
||||
const UnionBytes source_;
|
||||
};
|
||||
|
||||
class NonOwningExternalTwoByteResource
|
||||
: public v8::String::ExternalStringResource {
|
||||
public:
|
||||
explicit NonOwningExternalTwoByteResource(const UnionBytes& source)
|
||||
: source_(source) {}
|
||||
~NonOwningExternalTwoByteResource() override = default;
|
||||
|
||||
const uint16_t* data() const override { return source_.two_bytes_data(); }
|
||||
size_t length() const override { return source_.length(); }
|
||||
|
||||
NonOwningExternalTwoByteResource(const NonOwningExternalTwoByteResource&) =
|
||||
delete;
|
||||
NonOwningExternalTwoByteResource& operator=(
|
||||
const NonOwningExternalTwoByteResource&) = delete;
|
||||
|
||||
private:
|
||||
const UnionBytes source_;
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
Local<String> UnionBytes::ToStringChecked(Isolate* isolate) const {
|
||||
if (UNLIKELY(length() == 0)) {
|
||||
// V8 requires non-null data pointers for empty external strings,
|
||||
// but we don't guarantee that. Solve this by not creating an
|
||||
// external string at all in that case.
|
||||
return String::Empty(isolate);
|
||||
}
|
||||
if (is_one_byte()) {
|
||||
NonOwningExternalOneByteResource* source =
|
||||
new NonOwningExternalOneByteResource(*this);
|
||||
return String::NewExternalOneByte(isolate, source).ToLocalChecked();
|
||||
} else {
|
||||
NonOwningExternalTwoByteResource* source =
|
||||
new NonOwningExternalTwoByteResource(*this);
|
||||
return String::NewExternalTwoByte(isolate, source).ToLocalChecked();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace node
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include "util-inl.h"
|
||||
#include "debug_utils-inl.h"
|
||||
#include "env-inl.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "simdutf.h"
|
||||
#include "util-inl.h"
|
||||
|
||||
using node::Calloc;
|
||||
using node::Malloc;
|
||||
@ -298,3 +299,17 @@ TEST(UtilTest, SPrintF) {
|
||||
const std::string with_zero = std::string("a") + '\0' + 'b';
|
||||
EXPECT_EQ(SPrintF("%s", with_zero), with_zero);
|
||||
}
|
||||
|
||||
TEST(UtilTest, SimdutfEndiannessDoesNotMeanEndianness) {
|
||||
// In simdutf, "LE" does *not* refer to Little Endian, it refers
|
||||
// to 16-byte code units that are stored using *host* endianness.
|
||||
// This is weird and confusing naming, and so we add this assertion
|
||||
// here to verify that this is actually the case (so that CI tells
|
||||
// us if it changed, because for most people Little Endian is
|
||||
// host endianness, so locally everything would work fine).
|
||||
const char utf8source[] = "\xe7\x8c\xab";
|
||||
char16_t u16output;
|
||||
size_t u16len = simdutf::convert_utf8_to_utf16le(utf8source, 3, &u16output);
|
||||
EXPECT_EQ(u16len, 1u);
|
||||
EXPECT_EQ(u16output, 0x732B);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user