mirror of
https://github.com/nodejs/node.git
synced 2024-11-29 07:00:59 +01:00
5aaa7fee2e
Co-authored-by: Ben Noordhuis <info@bnoordhuis.nl> PR-URL: https://github.com/nodejs/node/pull/27770 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Eugene Ostroukhov <eostroukhov@google.com> Reviewed-By: Rich Trott <rtrott@gmail.com>
511 lines
20 KiB
C++
511 lines
20 KiB
C++
// Copyright 2019 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#ifndef V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
|
|
#define V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <limits>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace v8_inspector_protocol_encoding {
|
|
|
|
// =============================================================================
|
|
// span - sequence of bytes
|
|
// =============================================================================
|
|
|
|
// This template is similar to std::span, which will be included in C++20.
|
|
template <typename T>
|
|
class span {
|
|
public:
|
|
using index_type = size_t;
|
|
|
|
span() : data_(nullptr), size_(0) {}
|
|
span(const T* data, index_type size) : data_(data), size_(size) {}
|
|
|
|
const T* data() const { return data_; }
|
|
|
|
const T* begin() const { return data_; }
|
|
const T* end() const { return data_ + size_; }
|
|
|
|
const T& operator[](index_type idx) const { return data_[idx]; }
|
|
|
|
span<T> subspan(index_type offset, index_type count) const {
|
|
return span(data_ + offset, count);
|
|
}
|
|
|
|
span<T> subspan(index_type offset) const {
|
|
return span(data_ + offset, size_ - offset);
|
|
}
|
|
|
|
bool empty() const { return size_ == 0; }
|
|
|
|
index_type size() const { return size_; }
|
|
index_type size_bytes() const { return size_ * sizeof(T); }
|
|
|
|
private:
|
|
const T* data_;
|
|
index_type size_;
|
|
};
|
|
|
|
template <typename T>
|
|
span<T> SpanFrom(const std::vector<T>& v) {
|
|
return span<T>(v.data(), v.size());
|
|
}
|
|
|
|
template <size_t N>
|
|
span<uint8_t> SpanFrom(const char (&str)[N]) {
|
|
return span<uint8_t>(reinterpret_cast<const uint8_t*>(str), N - 1);
|
|
}
|
|
|
|
inline span<uint8_t> SpanFrom(const char* str) {
|
|
return str ? span<uint8_t>(reinterpret_cast<const uint8_t*>(str), strlen(str))
|
|
: span<uint8_t>();
|
|
}
|
|
|
|
inline span<uint8_t> SpanFrom(const std::string& v) {
|
|
return span<uint8_t>(reinterpret_cast<const uint8_t*>(v.data()), v.size());
|
|
}
|
|
|
|
// =============================================================================
|
|
// Status and Error codes
|
|
// =============================================================================
|
|
enum class Error {
|
|
OK = 0,
|
|
// JSON parsing errors - json_parser.{h,cc}.
|
|
JSON_PARSER_UNPROCESSED_INPUT_REMAINS = 0x01,
|
|
JSON_PARSER_STACK_LIMIT_EXCEEDED = 0x02,
|
|
JSON_PARSER_NO_INPUT = 0x03,
|
|
JSON_PARSER_INVALID_TOKEN = 0x04,
|
|
JSON_PARSER_INVALID_NUMBER = 0x05,
|
|
JSON_PARSER_INVALID_STRING = 0x06,
|
|
JSON_PARSER_UNEXPECTED_ARRAY_END = 0x07,
|
|
JSON_PARSER_COMMA_OR_ARRAY_END_EXPECTED = 0x08,
|
|
JSON_PARSER_STRING_LITERAL_EXPECTED = 0x09,
|
|
JSON_PARSER_COLON_EXPECTED = 0x0a,
|
|
JSON_PARSER_UNEXPECTED_MAP_END = 0x0b,
|
|
JSON_PARSER_COMMA_OR_MAP_END_EXPECTED = 0x0c,
|
|
JSON_PARSER_VALUE_EXPECTED = 0x0d,
|
|
|
|
CBOR_INVALID_INT32 = 0x0e,
|
|
CBOR_INVALID_DOUBLE = 0x0f,
|
|
CBOR_INVALID_ENVELOPE = 0x10,
|
|
CBOR_INVALID_STRING8 = 0x11,
|
|
CBOR_INVALID_STRING16 = 0x12,
|
|
CBOR_INVALID_BINARY = 0x13,
|
|
CBOR_UNSUPPORTED_VALUE = 0x14,
|
|
CBOR_NO_INPUT = 0x15,
|
|
CBOR_INVALID_START_BYTE = 0x16,
|
|
CBOR_UNEXPECTED_EOF_EXPECTED_VALUE = 0x17,
|
|
CBOR_UNEXPECTED_EOF_IN_ARRAY = 0x18,
|
|
CBOR_UNEXPECTED_EOF_IN_MAP = 0x19,
|
|
CBOR_INVALID_MAP_KEY = 0x1a,
|
|
CBOR_STACK_LIMIT_EXCEEDED = 0x1b,
|
|
CBOR_TRAILING_JUNK = 0x1c,
|
|
CBOR_MAP_START_EXPECTED = 0x1d,
|
|
CBOR_MAP_STOP_EXPECTED = 0x1e,
|
|
CBOR_ENVELOPE_SIZE_LIMIT_EXCEEDED = 0x1f,
|
|
};
|
|
|
|
// A status value with position that can be copied. The default status
|
|
// is OK. Usually, error status values should come with a valid position.
|
|
struct Status {
|
|
static constexpr size_t npos() { return std::numeric_limits<size_t>::max(); }
|
|
|
|
bool ok() const { return error == Error::OK; }
|
|
|
|
Error error = Error::OK;
|
|
size_t pos = npos();
|
|
Status(Error error, size_t pos) : error(error), pos(pos) {}
|
|
Status() = default;
|
|
|
|
// Returns a 7 bit US-ASCII string, either "OK" or an error message
|
|
// that includes the position.
|
|
std::string ToASCIIString() const;
|
|
|
|
private:
|
|
std::string ToASCIIString(const char* msg) const;
|
|
};
|
|
|
|
// Handler interface for parser events emitted by a streaming parser.
|
|
// See cbor::NewCBOREncoder, cbor::ParseCBOR, json::NewJSONEncoder,
|
|
// json::ParseJSON.
|
|
class StreamingParserHandler {
|
|
public:
|
|
virtual ~StreamingParserHandler() = default;
|
|
virtual void HandleMapBegin() = 0;
|
|
virtual void HandleMapEnd() = 0;
|
|
virtual void HandleArrayBegin() = 0;
|
|
virtual void HandleArrayEnd() = 0;
|
|
virtual void HandleString8(span<uint8_t> chars) = 0;
|
|
virtual void HandleString16(span<uint16_t> chars) = 0;
|
|
virtual void HandleBinary(span<uint8_t> bytes) = 0;
|
|
virtual void HandleDouble(double value) = 0;
|
|
virtual void HandleInt32(int32_t value) = 0;
|
|
virtual void HandleBool(bool value) = 0;
|
|
virtual void HandleNull() = 0;
|
|
|
|
// The parser may send one error even after other events have already
|
|
// been received. Client code is reponsible to then discard the
|
|
// already processed events.
|
|
// |error| must be an eror, as in, |error.is_ok()| can't be true.
|
|
virtual void HandleError(Status error) = 0;
|
|
};
|
|
|
|
namespace cbor {
|
|
// The binary encoding for the inspector protocol follows the CBOR specification
|
|
// (RFC 7049). Additional constraints:
|
|
// - Only indefinite length maps and arrays are supported.
|
|
// - Maps and arrays are wrapped with an envelope, that is, a
|
|
// CBOR tag with value 24 followed by a byte string specifying
|
|
// the byte length of the enclosed map / array. The byte string
|
|
// must use a 32 bit wide length.
|
|
// - At the top level, a message must be an indefinite length map
|
|
// wrapped by an envelope.
|
|
// - Maximal size for messages is 2^32 (4 GB).
|
|
// - For scalars, we support only the int32_t range, encoded as
|
|
// UNSIGNED/NEGATIVE (major types 0 / 1).
|
|
// - UTF16 strings, including with unbalanced surrogate pairs, are encoded
|
|
// as CBOR BYTE_STRING (major type 2). For such strings, the number of
|
|
// bytes encoded must be even.
|
|
// - UTF8 strings (major type 3) are supported.
|
|
// - 7 bit US-ASCII strings must always be encoded as UTF8 strings, never
|
|
// as UTF16 strings.
|
|
// - Arbitrary byte arrays, in the inspector protocol called 'binary',
|
|
// are encoded as BYTE_STRING (major type 2), prefixed with a byte
|
|
// indicating base64 when rendered as JSON.
|
|
|
|
// =============================================================================
|
|
// Detecting CBOR content
|
|
// =============================================================================
|
|
|
|
// The first byte for an envelope, which we use for wrapping dictionaries
|
|
// and arrays; and the byte that indicates a byte string with 32 bit length.
|
|
// These two bytes start an envelope, and thereby also any CBOR message
|
|
// produced or consumed by this protocol. See also |EnvelopeEncoder| below.
|
|
uint8_t InitialByteForEnvelope();
|
|
uint8_t InitialByteFor32BitLengthByteString();
|
|
|
|
// Checks whether |msg| is a cbor message.
|
|
bool IsCBORMessage(span<uint8_t> msg);
|
|
|
|
// =============================================================================
|
|
// Encoding individual CBOR items
|
|
// =============================================================================
|
|
|
|
// Some constants for CBOR tokens that only take a single byte on the wire.
|
|
uint8_t EncodeTrue();
|
|
uint8_t EncodeFalse();
|
|
uint8_t EncodeNull();
|
|
uint8_t EncodeIndefiniteLengthArrayStart();
|
|
uint8_t EncodeIndefiniteLengthMapStart();
|
|
uint8_t EncodeStop();
|
|
|
|
// Encodes |value| as |UNSIGNED| (major type 0) iff >= 0, or |NEGATIVE|
|
|
// (major type 1) iff < 0.
|
|
void EncodeInt32(int32_t value, std::vector<uint8_t>* out);
|
|
void EncodeInt32(int32_t value, std::string* out);
|
|
|
|
// Encodes a UTF16 string as a BYTE_STRING (major type 2). Each utf16
|
|
// character in |in| is emitted with most significant byte first,
|
|
// appending to |out|.
|
|
void EncodeString16(span<uint16_t> in, std::vector<uint8_t>* out);
|
|
void EncodeString16(span<uint16_t> in, std::string* out);
|
|
|
|
// Encodes a UTF8 string |in| as STRING (major type 3).
|
|
void EncodeString8(span<uint8_t> in, std::vector<uint8_t>* out);
|
|
void EncodeString8(span<uint8_t> in, std::string* out);
|
|
|
|
// Encodes the given |latin1| string as STRING8.
|
|
// If any non-ASCII character is present, it will be represented
|
|
// as a 2 byte UTF8 sequence.
|
|
void EncodeFromLatin1(span<uint8_t> latin1, std::vector<uint8_t>* out);
|
|
void EncodeFromLatin1(span<uint8_t> latin1, std::string* out);
|
|
|
|
// Encodes the given |utf16| string as STRING8 if it's entirely US-ASCII.
|
|
// Otherwise, encodes as STRING16.
|
|
void EncodeFromUTF16(span<uint16_t> utf16, std::vector<uint8_t>* out);
|
|
void EncodeFromUTF16(span<uint16_t> utf16, std::string* out);
|
|
|
|
// Encodes arbitrary binary data in |in| as a BYTE_STRING (major type 2) with
|
|
// definitive length, prefixed with tag 22 indicating expected conversion to
|
|
// base64 (see RFC 7049, Table 3 and Section 2.4.4.2).
|
|
void EncodeBinary(span<uint8_t> in, std::vector<uint8_t>* out);
|
|
void EncodeBinary(span<uint8_t> in, std::string* out);
|
|
|
|
// Encodes / decodes a double as Major type 7 (SIMPLE_VALUE),
|
|
// with additional info = 27, followed by 8 bytes in big endian.
|
|
void EncodeDouble(double value, std::vector<uint8_t>* out);
|
|
void EncodeDouble(double value, std::string* out);
|
|
|
|
// =============================================================================
|
|
// cbor::EnvelopeEncoder - for wrapping submessages
|
|
// =============================================================================
|
|
|
|
// An envelope indicates the byte length of a wrapped item.
|
|
// We use this for maps and array, which allows the decoder
|
|
// to skip such (nested) values whole sale.
|
|
// It's implemented as a CBOR tag (major type 6) with additional
|
|
// info = 24, followed by a byte string with a 32 bit length value;
|
|
// so the maximal structure that we can wrap is 2^32 bits long.
|
|
// See also: https://tools.ietf.org/html/rfc7049#section-2.4.4.1
|
|
class EnvelopeEncoder {
|
|
public:
|
|
// Emits the envelope start bytes and records the position for the
|
|
// byte size in |byte_size_pos_|. Also emits empty bytes for the
|
|
// byte sisze so that encoding can continue.
|
|
void EncodeStart(std::vector<uint8_t>* out);
|
|
void EncodeStart(std::string* out);
|
|
// This records the current size in |out| at position byte_size_pos_.
|
|
// Returns true iff successful.
|
|
bool EncodeStop(std::vector<uint8_t>* out);
|
|
bool EncodeStop(std::string* out);
|
|
|
|
private:
|
|
size_t byte_size_pos_ = 0;
|
|
};
|
|
|
|
// =============================================================================
|
|
// cbor::NewCBOREncoder - for encoding from a streaming parser
|
|
// =============================================================================
|
|
|
|
// This can be used to convert to CBOR, by passing the return value to a parser
|
|
// that drives it. The handler will encode into |out|, and iff an error occurs
|
|
// it will set |status| to an error and clear |out|. Otherwise, |status.ok()|
|
|
// will be |true|.
|
|
std::unique_ptr<StreamingParserHandler> NewCBOREncoder(
|
|
std::vector<uint8_t>* out,
|
|
Status* status);
|
|
std::unique_ptr<StreamingParserHandler> NewCBOREncoder(std::string* out,
|
|
Status* status);
|
|
|
|
// =============================================================================
|
|
// cbor::CBORTokenizer - for parsing individual CBOR items
|
|
// =============================================================================
|
|
|
|
// Tags for the tokens within a CBOR message that CBORTokenizer understands.
|
|
// Note that this is not the same terminology as the CBOR spec (RFC 7049),
|
|
// but rather, our adaptation. For instance, we lump unsigned and signed
|
|
// major type into INT32 here (and disallow values outside the int32_t range).
|
|
enum class CBORTokenTag {
|
|
// Encountered an error in the structure of the message. Consult
|
|
// status() for details.
|
|
ERROR_VALUE,
|
|
// Booleans and NULL.
|
|
TRUE_VALUE,
|
|
FALSE_VALUE,
|
|
NULL_VALUE,
|
|
// An int32_t (signed 32 bit integer).
|
|
INT32,
|
|
// A double (64 bit floating point).
|
|
DOUBLE,
|
|
// A UTF8 string.
|
|
STRING8,
|
|
// A UTF16 string.
|
|
STRING16,
|
|
// A binary string.
|
|
BINARY,
|
|
// Starts an indefinite length map; after the map start we expect
|
|
// alternating keys and values, followed by STOP.
|
|
MAP_START,
|
|
// Starts an indefinite length array; after the array start we
|
|
// expect values, followed by STOP.
|
|
ARRAY_START,
|
|
// Ends a map or an array.
|
|
STOP,
|
|
// An envelope indicator, wrapping a map or array.
|
|
// Internally this carries the byte length of the wrapped
|
|
// map or array. While CBORTokenizer::Next() will read / skip the entire
|
|
// envelope, CBORTokenizer::EnterEnvelope() reads the tokens
|
|
// inside of it.
|
|
ENVELOPE,
|
|
// We've reached the end there is nothing else to read.
|
|
DONE,
|
|
};
|
|
|
|
// The major types from RFC 7049 Section 2.1.
|
|
enum class MajorType {
|
|
UNSIGNED = 0,
|
|
NEGATIVE = 1,
|
|
BYTE_STRING = 2,
|
|
STRING = 3,
|
|
ARRAY = 4,
|
|
MAP = 5,
|
|
TAG = 6,
|
|
SIMPLE_VALUE = 7
|
|
};
|
|
|
|
// CBORTokenizer segments a CBOR message, presenting the tokens therein as
|
|
// numbers, strings, etc. This is not a complete CBOR parser, but makes it much
|
|
// easier to implement one (e.g. ParseCBOR, above). It can also be used to parse
|
|
// messages partially.
|
|
class CBORTokenizer {
|
|
public:
|
|
explicit CBORTokenizer(span<uint8_t> bytes);
|
|
~CBORTokenizer();
|
|
|
|
// Identifies the current token that we're looking at,
|
|
// or ERROR_VALUE (in which ase ::Status() has details)
|
|
// or DONE (if we're past the last token).
|
|
CBORTokenTag TokenTag() const;
|
|
|
|
// Advances to the next token.
|
|
void Next();
|
|
// Can only be called if TokenTag() == CBORTokenTag::ENVELOPE.
|
|
// While Next() would skip past the entire envelope / what it's
|
|
// wrapping, EnterEnvelope positions the cursor inside of the envelope,
|
|
// letting the client explore the nested structure.
|
|
void EnterEnvelope();
|
|
|
|
// If TokenTag() is CBORTokenTag::ERROR_VALUE, then Status().error describes
|
|
// the error more precisely; otherwise it'll be set to Error::OK.
|
|
// In either case, Status().pos is the current position.
|
|
struct Status Status() const;
|
|
|
|
// The following methods retrieve the token values. They can only
|
|
// be called if TokenTag() matches.
|
|
|
|
// To be called only if ::TokenTag() == CBORTokenTag::INT32.
|
|
int32_t GetInt32() const;
|
|
|
|
// To be called only if ::TokenTag() == CBORTokenTag::DOUBLE.
|
|
double GetDouble() const;
|
|
|
|
// To be called only if ::TokenTag() == CBORTokenTag::STRING8.
|
|
span<uint8_t> GetString8() const;
|
|
|
|
// Wire representation for STRING16 is low byte first (little endian).
|
|
// To be called only if ::TokenTag() == CBORTokenTag::STRING16.
|
|
span<uint8_t> GetString16WireRep() const;
|
|
|
|
// To be called only if ::TokenTag() == CBORTokenTag::BINARY.
|
|
span<uint8_t> GetBinary() const;
|
|
|
|
// To be called only if ::TokenTag() == CBORTokenTag::ENVELOPE.
|
|
span<uint8_t> GetEnvelopeContents() const;
|
|
|
|
private:
|
|
void ReadNextToken(bool enter_envelope);
|
|
void SetToken(CBORTokenTag token, size_t token_byte_length);
|
|
void SetError(Error error);
|
|
|
|
span<uint8_t> bytes_;
|
|
CBORTokenTag token_tag_;
|
|
struct Status status_;
|
|
size_t token_byte_length_;
|
|
MajorType token_start_type_;
|
|
uint64_t token_start_internal_value_;
|
|
};
|
|
|
|
// =============================================================================
|
|
// cbor::ParseCBOR - for receiving streaming parser events for CBOR messages
|
|
// =============================================================================
|
|
|
|
// Parses a CBOR encoded message from |bytes|, sending events to
|
|
// |out|. If an error occurs, sends |out->HandleError|, and parsing stops.
|
|
// The client is responsible for discarding the already received information in
|
|
// that case.
|
|
void ParseCBOR(span<uint8_t> bytes, StreamingParserHandler* out);
|
|
|
|
// =============================================================================
|
|
// cbor::AppendString8EntryToMap - for limited in-place editing of messages
|
|
// =============================================================================
|
|
|
|
// Modifies the |cbor| message by appending a new key/value entry at the end
|
|
// of the map. Patches up the envelope size; Status.ok() iff successful.
|
|
// If not successful, |cbor| may be corrupted after this call.
|
|
Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
|
|
span<uint8_t> string8_value,
|
|
std::vector<uint8_t>* cbor);
|
|
Status AppendString8EntryToCBORMap(span<uint8_t> string8_key,
|
|
span<uint8_t> string8_value,
|
|
std::string* cbor);
|
|
|
|
namespace internals { // Exposed only for writing tests.
|
|
int8_t ReadTokenStart(span<uint8_t> bytes,
|
|
cbor::MajorType* type,
|
|
uint64_t* value);
|
|
|
|
void WriteTokenStart(cbor::MajorType type,
|
|
uint64_t value,
|
|
std::vector<uint8_t>* encoded);
|
|
void WriteTokenStart(cbor::MajorType type,
|
|
uint64_t value,
|
|
std::string* encoded);
|
|
} // namespace internals
|
|
} // namespace cbor
|
|
|
|
namespace json {
|
|
// Client code must provide an instance. Implementation should delegate
|
|
// to whatever is appropriate.
|
|
class Platform {
|
|
public:
|
|
virtual ~Platform() = default;
|
|
// Parses |str| into |result|. Returns false iff there are
|
|
// leftover characters or parsing errors.
|
|
virtual bool StrToD(const char* str, double* result) const = 0;
|
|
|
|
// Prints |value| in a format suitable for JSON.
|
|
virtual std::unique_ptr<char[]> DToStr(double value) const = 0;
|
|
};
|
|
|
|
// =============================================================================
|
|
// json::NewJSONEncoder - for encoding streaming parser events as JSON
|
|
// =============================================================================
|
|
|
|
// Returns a handler object which will write ascii characters to |out|.
|
|
// |status->ok()| will be false iff the handler routine HandleError() is called.
|
|
// In that case, we'll stop emitting output.
|
|
// Except for calling the HandleError routine at any time, the client
|
|
// code must call the Handle* methods in an order in which they'd occur
|
|
// in valid JSON; otherwise we may crash (the code uses assert).
|
|
std::unique_ptr<StreamingParserHandler> NewJSONEncoder(
|
|
const Platform* platform,
|
|
std::vector<uint8_t>* out,
|
|
Status* status);
|
|
std::unique_ptr<StreamingParserHandler> NewJSONEncoder(const Platform* platform,
|
|
std::string* out,
|
|
Status* status);
|
|
|
|
// =============================================================================
|
|
// json::ParseJSON - for receiving streaming parser events for JSON
|
|
// =============================================================================
|
|
|
|
void ParseJSON(const Platform& platform,
|
|
span<uint8_t> chars,
|
|
StreamingParserHandler* handler);
|
|
void ParseJSON(const Platform& platform,
|
|
span<uint16_t> chars,
|
|
StreamingParserHandler* handler);
|
|
|
|
// =============================================================================
|
|
// json::ConvertCBORToJSON, json::ConvertJSONToCBOR - for transcoding
|
|
// =============================================================================
|
|
Status ConvertCBORToJSON(const Platform& platform,
|
|
span<uint8_t> cbor,
|
|
std::string* json);
|
|
Status ConvertCBORToJSON(const Platform& platform,
|
|
span<uint8_t> cbor,
|
|
std::vector<uint8_t>* json);
|
|
Status ConvertJSONToCBOR(const Platform& platform,
|
|
span<uint8_t> json,
|
|
std::vector<uint8_t>* cbor);
|
|
Status ConvertJSONToCBOR(const Platform& platform,
|
|
span<uint16_t> json,
|
|
std::vector<uint8_t>* cbor);
|
|
Status ConvertJSONToCBOR(const Platform& platform,
|
|
span<uint8_t> json,
|
|
std::string* cbor);
|
|
Status ConvertJSONToCBOR(const Platform& platform,
|
|
span<uint16_t> json,
|
|
std::string* cbor);
|
|
} // namespace json
|
|
} // namespace v8_inspector_protocol_encoding
|
|
|
|
#endif // V8_INSPECTOR_PROTOCOL_ENCODING_ENCODING_H_
|