From e8f177aa2d5b1e7e45d07df7e218482fb3b35d91 Mon Sep 17 00:00:00 2001 From: Ryan Date: Tue, 26 May 2009 17:46:56 +0200 Subject: [PATCH] Clean up outgoing encoding API. Generally: send(chunk, encoding). --- src/file.js | 7 ++- src/http.js | 152 +++++++++++++++++++-------------------------- src/net.cc | 53 ++++++---------- src/node.cc | 19 ++++++ src/node.h | 3 +- src/node.js | 2 +- website/index.html | 45 +++++++++----- 7 files changed, 139 insertions(+), 142 deletions(-) diff --git a/src/file.js b/src/file.js index 6e0cead1c44..12c95fa7f32 100644 --- a/src/file.js +++ b/src/file.js @@ -41,10 +41,11 @@ node.fs.File = function (options) { var self = this; options = options || {}; - if (options.encoding === undefined) + if (options.encoding === "utf8") { + self.encoding = node.fs.UTF8; + } else { self.encoding = node.fs.RAW; - else - self.encoding = options.encoding + } //node.debug("encoding: opts=" + options.encoding + " self=" + self.encoding); self.fd = options.fd || null; diff --git a/src/http.js b/src/http.js index 05aee5d37cb..fad9535ea9c 100644 --- a/src/http.js +++ b/src/http.js @@ -102,55 +102,54 @@ function toRaw(string) { return a; } +// The send method appends data onto the output array. The deal is, +// the data is either an array of integer, representing binary or it +// is a string in which case it's UTF8 encoded. +// Two things to be considered: +// - we should be able to send mixed encodings. +// - we don't want to call connection.send("smallstring") because that +// is wasteful. *I think* its rather faster to concat inside of JS +// Thus I attempt to concat as much as possible. +// +// XXX this function is extremely ugly +function send (output, data, encoding) { + if (data.constructor === String) + encoding = encoding || "ascii"; + else + encoding = "raw"; + + if (output.length == 0) { + output.push([data, encoding]); + return; + } + + var li = output.length-1; + var last_encoding = output[li][1]; + + if (data.constructor === String) { + if ( last_encoding === encoding + || (last_encoding === "utf8" && encoding === "ascii") + ) + { + output[li][0] += data; + return; + } + } + + if (data.constructor === Array && last_encoding === encoding) { + output[li][0] = output[li][0].concat(data); + return; + } + + output.push([data, encoding]); +}; + node.http.ServerResponse = function (connection, responses) { responses.push(this); this.connection = connection; this.closeOnFinish = false; var output = []; - // The send method appends data onto the output array. The deal is, - // the data is either an array of integer, representing binary or it - // is a string in which case it's UTF8 encoded. - // Two things to considered: - // - we should be able to send mixed encodings. - // - we don't want to call connection.send("smallstring") because that - // is wasteful. *I think* its rather faster to concat inside of JS - // Thus I attempt to concat as much as possible. - function send (data) { - if (connection.readyState === "closed" || connection.readyState === "readOnly") - { - responses = []; - return; - } - - if (output.length == 0) { - output.push(data); - return; - } - - var li = output.length-1; - - if (data.constructor == String && output[li].constructor == String) { - output[li] += data; - return; - } - - if (data.constructor == Array && output[li].constructor == Array) { - output[li] = output[li].concat(data); - return; - } - - // If the string is small enough, just convert it to binary - if (data.constructor == String - && data.length < 128 - && output[li].constructor == Array) - { - output[li] = output[li].concat(toRaw(data)); - return; - } - - output.push(data); - }; var chunked_encoding = false; @@ -206,34 +205,39 @@ node.http.ServerResponse = function (connection, responses) { header += CRLF; - send(header); + send(output, header); }; - this.sendBody = function (chunk) { + this.sendBody = function (chunk, encoding) { if (chunked_encoding) { - send(chunk.length.toString(16)); - send(CRLF); - send(chunk); - send(CRLF); + send(output, chunk.length.toString(16)); + send(output, CRLF); + send(output, chunk, encoding); + send(output, CRLF); } else { - send(chunk); + send(output, chunk, encoding); } this.flush(); }; this.flush = function () { + if (connection.readyState === "closed" || connection.readyState === "readOnly") + { + responses = []; + return; + } if (responses.length > 0 && responses[0] === this) while (output.length > 0) { var out = output.shift(); - connection.send(out); + connection.send(out[0], out[1]); } }; this.finished = false; this.finish = function () { if (chunked_encoding) - send("0\r\n\r\n"); // last chunk + send(output, "0\r\n\r\n"); // last chunk this.finished = true; @@ -386,44 +390,14 @@ node.http.Client = function (port, host) { var output = [header]; - function send (data) { - if (output.length == 0) { - output.push(data); - return; - } - - var li = output.length-1; - - if (data.constructor == String && output[li].constructor == String) { - output[li] += data; - return; - } - - if (data.constructor == Array && output[li].constructor == Array) { - output[li] = output[li].concat(data); - return; - } - - // If the string is small enough, just convert it to binary - if (data.constructor == String - && data.length < 128 - && output[li].constructor == Array) - { - output[li] = output[li].concat(toRaw(data)); - return; - } - - output.push(data); - }; - - this.sendBody = function (chunk) { + this.sendBody = function (chunk, encoding) { if (chunked_encoding) { - send(chunk.length.toString(16)); - send(CRLF); - send(chunk); - send(CRLF); + send(output, chunk.length.toString(16)); + send(output, CRLF); + send(output, chunk, encoding); + send(output, CRLF); } else { - send(chunk); + send(output, chunk, encoding); } this.flush(); @@ -443,7 +417,7 @@ node.http.Client = function (port, host) { this.finish = function (responseHandler) { this.responseHandler = responseHandler; if (chunked_encoding) - send("0\r\n\r\n"); // last chunk + send(output, "0\r\n\r\n"); // last chunk this.flush(); }; diff --git a/src/net.cc b/src/net.cc index b2695e93d0a..8a216a8d32f 100644 --- a/src/net.cc +++ b/src/net.cc @@ -62,7 +62,6 @@ Connection::Initialize (v8::Handle target) NODE_SET_PROTOTYPE_METHOD(constructor_template, "connect", Connect); NODE_SET_PROTOTYPE_METHOD(constructor_template, "send", Send); - NODE_SET_PROTOTYPE_METHOD(constructor_template, "sendUtf8", SendUtf8); NODE_SET_PROTOTYPE_METHOD(constructor_template, "close", Close); NODE_SET_PROTOTYPE_METHOD(constructor_template, "fullClose", FullClose); NODE_SET_PROTOTYPE_METHOD(constructor_template, "forceClose", ForceClose); @@ -326,31 +325,6 @@ new_buf (size_t size) return b; } -Handle -Connection::SendUtf8 (const Arguments& args) -{ - HandleScope scope; - Connection *connection = NODE_UNWRAP(Connection, args.Holder()); - if (!connection) return Handle(); - - if ( connection->ReadyState() != OPEN - && connection->ReadyState() != WRITE_ONLY - ) - return ThrowException(String::New("Socket is not open for writing")); - - if (!args[0]->IsString()) - return ThrowException(String::New("Must have string argument")); - - // utf8 encoding - Local s = args[0]->ToString(); - size_t length = s->Utf8Length(); - oi_buf *buf = new_buf(length); - s->WriteUtf8(buf->base, length); - connection->Send(buf); - - return Undefined(); -} - Handle Connection::Send (const Arguments& args) { @@ -372,19 +346,30 @@ Connection::Send (const Arguments& args) // addressed. if (args[0]->IsString()) { - // ASCII encoding + enum encoding enc = ParseEncoding(args[1]); Local s = args[0]->ToString(); - size_t length = s->Utf8Length(); - oi_buf *buf = new_buf(length); - s->WriteAscii(buf->base, 0, length); + size_t len = s->Utf8Length(); + oi_buf *buf = new_buf(len); + switch (enc) { + case RAW: + case ASCII: + s->WriteAscii(buf->base, 0, len); + break; + + case UTF8: + s->WriteUtf8(buf->base, len); + break; + + default: + assert(0 && "unhandled string encoding"); + } connection->Send(buf); } else if (args[0]->IsArray()) { - // raw encoding Handle array = Handle::Cast(args[0]); - size_t length = array->Length(); - oi_buf *buf = new_buf(length); - for (size_t i = 0; i < length; i++) { + size_t len = array->Length(); + oi_buf *buf = new_buf(len); + for (size_t i = 0; i < len; i++) { Local int_value = array->Get(Integer::New(i)); buf->base[i] = int_value->IntegerValue(); } diff --git a/src/node.cc b/src/node.cc index f189ea38550..9df86554d1c 100644 --- a/src/node.cc +++ b/src/node.cc @@ -243,6 +243,25 @@ node::eio_warmup (void) ev_async_start(EV_DEFAULT_UC_ &eio_watcher); } +enum encoding +node::ParseEncoding (Handle encoding_v) +{ + HandleScope scope; + + if (!encoding_v->IsString()) + return RAW; + + String::Utf8Value encoding(encoding_v->ToString()); + + if(strcasecmp(*encoding, "utf8") == 0) { + return UTF8; + } else if (strcasecmp(*encoding, "ascii") == 0) { + return ASCII; + } else { + return RAW; + } +} + int main (int argc, char *argv[]) { diff --git a/src/node.h b/src/node.h index 7d1d2960787..d207d11124c 100644 --- a/src/node.h +++ b/src/node.h @@ -23,7 +23,8 @@ do { \ templ->PrototypeTemplate()->Set(NODE_SYMBOL(name), __callback##_TEM); \ } while(0) -enum encoding {UTF8, RAW}; +enum encoding {ASCII, UTF8, RAW}; +enum encoding ParseEncoding (v8::Handle encoding_v); void fatal_exception (v8::TryCatch &try_catch); void eio_warmup (void); // call this before creating a new eio event. diff --git a/src/node.js b/src/node.js index 6bdef379b26..e3d9d1688c5 100644 --- a/src/node.js +++ b/src/node.js @@ -134,7 +134,7 @@ clearInterval = clearTimeout; } function loadScript (filename, target, callback) { - node.fs.cat(filename, node.fs.UTF8, function (status, content) { + node.fs.cat(filename, "utf8", function (status, content) { if (status != 0) { stderr.puts("Error reading " + filename + ": " + node.fs.strerror(status)); node.exit(1); diff --git a/website/index.html b/website/index.html index 2833245ba71..933264775ba 100644 --- a/website/index.html +++ b/website/index.html @@ -37,7 +37,8 @@ h1, h2, h3, h4 { margin: 2em 0; } -h1 a { color: inherit; } +h1 code, h2 code, h3 code, h4 code { color: inherit; } +h1 a, h2 a, h3 a, h4 a { color: inherit; } pre, code { @@ -165,11 +166,15 @@ make install on. All methods and members are camel cased. Constructors always have a capital first letter. -

Node uses strings to represent ASCII or UTF-8 encoded data. For the -moment, arrays of integers are used to represent raw binary data—this -representation is rather inefficient. In the future, when V8 natively supports binary -Blob objects, Node will use them. +

+Node supports 3 byte-string encodings: +ASCII ("ascii"), +UTF-8 ("utf8"), and +raw binary ("raw"). +It uses strings to represent ASCII and UTF-8 encoded data. For the moment, +arrays of integers are used to represent raw binary data—this +representation is rather inefficient. This will change in the future, when V8 supports Blob objects.

The following are global functions:

@@ -463,18 +468,27 @@ server.listen(7000, "localhost");
Creates a new connection object.
+
connection.readyState
+
Either "closed", "open", + "readOnly", or "writeOnly". +
+
connection.setEncoding(encoding)
Sets the encoding (either "utf8" or "raw") for data that is received.
-
connection.send(data)
-
sends data on the connection +
connection.send(data, encoding="ascii")
+
Sends data on the connection. The data should be eithre an array of + integers (for raw binary) or a string (for utf8 or ascii). The second + parameter specifies the encoding in the case of a string—it defaults + to ASCII because encoding to UTF8 is rather slow.
connection.close()
Half-closes the connection. I.E. sends a FIN packet. It is possible the server will still send some data. + After calling this readyState will be "readOnly".
connection.fullClose()
@@ -500,8 +514,9 @@ server.listen(7000, "localhost");
conneciton.onEOF = function () { };
Called when the other end of the connection sends a FIN packet. onReceive will not be called after this. + After receiving this readyState will be "writeOnly". You should probably just call connection.close() in this - callback. + callback.
conneciton.onDisconnect = function () { };
Called once the connection is fully disconnected.
@@ -611,7 +626,7 @@ req.onBody = function (chunk) { }; A chunk of the body is given as the single argument. The transfer-encoding - has been removed. + has been decoded.

The body chunk is either a String in the case of UTF-8 encoding or an array of numbers in the case of raw encoding. The body encoding is set with @@ -654,11 +669,15 @@ res.sendHeader(200, [ ["Content-Length", body.length] before res.finish() is called. -

res.sendBody(chunk)
+
res.sendBody(chunk, encoding="ascii")
This method must be called after sendHeader was called. It sends a chunk of the response body. This method may be called multiple times to provide successive parts of the body. + +

If chunk is a string, the second parameter specifies how + to encode it into a byte stream. By default the encoding is + "ascii".

res.finish()
@@ -730,7 +749,7 @@ it, so neither do we. whose header has already been sent.
-
req.sendBody(chunk, encoding)
+
req.sendBody(chunk, encoding="ascii")
Sends a sucessive peice of the body. By calling this method many times, the user can stream a request body to a server—in that case it is suggested to use the ["Transfer-Encoding", @@ -743,8 +762,6 @@ suggested to use the ["Transfer-Encoding", "utf8" or "ascii". By default the body uses ASCII encoding, as it is faster. -

TODO -

req.finish(response_handler)
Finishes sending the request. If any parts of the body are