From ac2857b12cd819b68405b15c3f8e95e48bcc32d8 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Thu, 4 Sep 2014 22:03:24 -0700 Subject: [PATCH] build, i18n: improve Intl build, add "--with-intl" The two main goals of this change are: - To make it easier to build the Intl option using ICU (particularly, using a newer ICU than v8/Chromium's version) - To enable a much smaller ICU build with only English support The goal here is to get node.js binaries built this way by default so that the Intl API can be used. Additional data can be added at execution time (see Readme and wiki) More details are at https://github.com/joyent/node/pull/7719 In particular, this change adds the "--with-intl=" configure option to provide more ways of building "Intl": - "full-icu" picks up an ICU from deps/icu - "small-icu" is similar, but builds only English - "system-icu" uses pkg-config to find an installed ICU - "none" does nothing (no Intl) For Windows builds, the "full-icu" or "small-icu" options are added to vcbuild.bat. Note that the existing "--with-icu-path" option is not removed from configure, but may not be used alongside the new option. Wiki changes have already been made on https://github.com/joyent/node/wiki/Installation and a new page created at https://github.com/joyent/node/wiki/Intl (marked as provisional until this change lands.) Summary of changes: * README.md : doc updates * .gitignore : added "deps/icu" as this is the location where ICU is unpacked to. * Makefile : added the tools/icu/* files to cpplint, but excluded a problematic file. * configure : added the "--with-intl" option mentioned above. Calculate at config time the list of ICU source files to use and data packaging options. * node.gyp : add the new files src/node_i18n.cc/.h as well as ICU linkage. * src/node.cc : add call into node::i18n::InitializeICUDirectory(icu_data_dir) as well as new --icu-data-dir option and NODE_ICU_DATA env variable to configure ICU data loading. This loading is only relevant in the "small" configuration. * src/node_i18n.cc : new source file for the above Initialize.. function, to setup ICU as needed. * tools/icu : new directory with some tools needed for this build. * tools/icu/icu-generic.gyp : new .gyp file that builds ICU in some new ways, both on unix/mac and windows. * tools/icu/icu-system.gyp : new .gyp file to build node against a pkg-config detected ICU. * tools/icu/icu_small.json : new config file for the "English-only" small build. * tools/icu/icutrim.py : new tool for trimming down ICU data. Reads the above .json file. * tools/icu/iculslocs.cc : new tool for repairing ICU data manifests after trim operation. * tools/icu/no-op.cc : dummy file to force .gyp into using a C++ linker. * vcbuild.bat : added small-icu and full-icu options, to call into configure. * Fixed toolset dependencies, see https://github.com/joyent/node/pull/7719#issuecomment-54641687 Note that because of a bug in gyp {CC,CXX}_host must also be set. Otherwise gcc/g++ will be used by default for part of the build. Reviewed-by: Trevor Norris Reviewed-by: Fedor Indutny --- .gitignore | 1 + Makefile | 2 +- README.md | 44 ++-- configure | 138 ++++++++++++- node.gyp | 13 ++ src/node.cc | 39 ++++ src/node_i18n.cc | 88 ++++++++ src/node_i18n.h | 39 ++++ tools/icu/README.md | 26 +++ tools/icu/icu-generic.gyp | 414 ++++++++++++++++++++++++++++++++++++++ tools/icu/icu-system.gyp | 18 ++ tools/icu/icu_small.json | 47 +++++ tools/icu/iculslocs.cc | 388 +++++++++++++++++++++++++++++++++++ tools/icu/icutrim.py | 338 +++++++++++++++++++++++++++++++ tools/icu/no-op.cc | 18 ++ vcbuild.bat | 8 +- 16 files changed, 1603 insertions(+), 18 deletions(-) create mode 100644 src/node_i18n.cc create mode 100644 src/node_i18n.h create mode 100644 tools/icu/README.md create mode 100644 tools/icu/icu-generic.gyp create mode 100644 tools/icu/icu-system.gyp create mode 100644 tools/icu/icu_small.json create mode 100644 tools/icu/iculslocs.cc create mode 100755 tools/icu/icutrim.py create mode 100644 tools/icu/no-op.cc diff --git a/.gitignore b/.gitignore index e00450690da..f5b0105840e 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ ipch/ /test/addons/doc-*/ email.md deps/v8-* +deps/icu ./node_modules .svn/ diff --git a/Makefile b/Makefile index 11304e118f3..fb854603d8b 100644 --- a/Makefile +++ b/Makefile @@ -406,7 +406,7 @@ CPPLINT_EXCLUDE += src/queue.h CPPLINT_EXCLUDE += src/tree.h CPPLINT_EXCLUDE += src/v8abbr.h -CPPLINT_FILES = $(filter-out $(CPPLINT_EXCLUDE), $(wildcard src/*.cc src/*.h src/*.c)) +CPPLINT_FILES = $(filter-out $(CPPLINT_EXCLUDE), $(wildcard src/*.cc src/*.h src/*.c tools/icu/*.h tools/icu/*.cc)) cpplint: @$(PYTHON) tools/cpplint.py $(CPPLINT_FILES) diff --git a/README.md b/README.md index f1301a0df76..0032c63c073 100644 --- a/README.md +++ b/README.md @@ -19,17 +19,6 @@ make make install ``` -With libicu i18n support: - -```sh -svn checkout --force --revision 214189 \ - http://src.chromium.org/svn/trunk/deps/third_party/icu46 \ - deps/v8/third_party/icu46 -./configure --with-icu-path=deps/v8/third_party/icu46/icu.gyp -make -make install -``` - If your python binary is in a non-standard location or has a non-standard name, run the following instead: @@ -47,7 +36,9 @@ Prerequisites (Windows only): Windows: - vcbuild nosign +```sh +vcbuild nosign +``` You can download pre-built binaries for various operating systems from [http://nodejs.org/download/](http://nodejs.org/download/). The Windows @@ -92,6 +83,35 @@ make doc man doc/node.1 ``` +### To build `Intl` (ECMA-402) support: + +*Note:* more docs, including how to reduce disk footprint, are on +[the wiki](https://github.com/joyent/node/wiki/Intl). + +#### Use existing installed ICU (Unix/Macintosh only): + +```sh +pkg-config --modversion icu-i18n && ./configure --with-intl=system-icu +``` + +#### Build ICU from source: + +First: Unpack latest ICU + [icu4c-**##.#**-src.tgz](http://icu-project.org/download) (or `.zip`) + as `deps/icu` (You'll have: `deps/icu/source/...`) + +Unix/Macintosh: + +```sh +./configure --with-intl=full-icu +``` + +Windows: + +```sh +vcbuild full-icu +``` + Resources for Newcomers --- - [The Wiki](https://github.com/joyent/node/wiki) diff --git a/configure b/configure index 2e085832546..6db8703201e 100755 --- a/configure +++ b/configure @@ -241,6 +241,11 @@ parser.add_option('--with-icu-path', dest='with_icu_path', help='Path to icu.gyp (ICU i18n, Chromium version only.)') +parser.add_option('--with-intl', + action='store', + dest='with_intl', + help='Intl mode: none, full-icu, small-icu (default is none)') + parser.add_option('--with-perfctr', action='store_true', dest='with_perfctr', @@ -686,13 +691,138 @@ def configure_winsdk(o): print('ctrpp not found in WinSDK path--using pre-gen files ' 'from tools/msvs/genfiles.') +def glob_to_var(dir_base, dir_sub): + list = [] + dir_all = os.path.join(dir_base, dir_sub) + files = os.walk(dir_all) + for ent in files: + (path, dirs, files) = ent + for file in files: + if file.endswith('.cpp') or file.endswith('.c') or file.endswith('.h'): + list.append('%s/%s' % (dir_sub, file)) + break + return list -def configure_icu(o): + +def configure_intl(o): + # small ICU is off by default. + # always set icu_small, node.gyp depends on it being defined. + o['variables']['icu_small'] = b(False) + + with_intl = options.with_intl have_icu_path = bool(options.with_icu_path) - o['variables']['v8_enable_i18n_support'] = int(have_icu_path) - if have_icu_path: + if have_icu_path and with_intl: + print 'Error: Cannot specify both --with-icu-path and --with-intl' + sys.exit(1) + elif have_icu_path: + # Chromium .gyp mode: --with-icu-path + o['variables']['v8_enable_i18n_support'] = 1 + # use the .gyp given o['variables']['icu_gyp_path'] = options.with_icu_path + return + # --with-intl= + if with_intl == 'none' or with_intl is None: + o['variables']['v8_enable_i18n_support'] = 0 + return # no Intl + elif with_intl == 'small-icu': + # small ICU (English only) + o['variables']['v8_enable_i18n_support'] = 1 + o['variables']['icu_small'] = b(True) + elif with_intl == 'full-icu': + # full ICU + o['variables']['v8_enable_i18n_support'] = 1 + elif with_intl == 'system-icu': + # ICU from pkg-config. + o['variables']['v8_enable_i18n_support'] = 1 + pkgicu = pkg_config('icu-i18n') + if not pkgicu: + print 'Error: could not load pkg-config data for "icu-i18n".' + print 'See above errors or the README.md.' + sys.exit(1) + (libs, cflags) = pkgicu + o['libraries'] += libs.split() + o['cflags'] += cflags.split() + # use the "system" .gyp + o['variables']['icu_gyp_path'] = 'tools/icu/icu-system.gyp' + return + else: + print 'Error: unknown value --with-intl=%s' % with_intl + sys.exit(1) + # Note: non-ICU implementations could use other 'with_intl' + # values. + # ICU mode. (icu-generic.gyp) + byteorder = sys.byteorder + o['variables']['icu_gyp_path'] = 'tools/icu/icu-generic.gyp' + # ICU source dir relative to root + icu_full_path = os.path.join(root_dir, 'deps/icu') + o['variables']['icu_path'] = icu_full_path + if not os.path.isdir(icu_full_path): + print 'Error: ICU path is not a directory: %s' % (icu_full_path) + sys.exit(1) + # Now, what version of ICU is it? We just need the "major", such as 54. + # uvernum.h contains it as a #define. + uvernum_h = os.path.join(icu_full_path, 'source/common/unicode/uvernum.h') + if not os.path.isfile(uvernum_h): + print 'Error: could not load %s - is ICU installed?' % uvernum_h + sys.exit(1) + icu_ver_major = None + matchVerExp = r'^\s*#define\s+U_ICU_VERSION_SHORT\s+"([^"]*)".*' + match_version = re.compile(matchVerExp) + for line in open(uvernum_h).readlines(): + m = match_version.match(line) + if m: + icu_ver_major = m.group(1) + if not icu_ver_major: + print 'Could not read U_ICU_VERSION_SHORT version from %s' % uvernum_h + sys.exit(1) + icu_endianness = sys.byteorder[0]; # TODO(srl295): EBCDIC should be 'e' + o['variables']['icu_ver_major'] = icu_ver_major + o['variables']['icu_endianness'] = icu_endianness + icu_data_file_l = 'icudt%s%s.dat' % (icu_ver_major, 'l') + icu_data_file = 'icudt%s%s.dat' % (icu_ver_major, icu_endianness) + # relative to configure + icu_data_path = os.path.join(icu_full_path, + 'source/data/in', + icu_data_file_l) + # relative to dep.. + icu_data_in = os.path.join('../../deps/icu/source/data/in', icu_data_file_l) + if not os.path.isfile(icu_data_path) and icu_endianness != 'l': + # use host endianness + icu_data_path = os.path.join(icu_full_path, + 'source/data/in', + icu_data_file) + # relative to dep.. + icu_data_in = os.path.join('icu/source/data/in', + icu_data_file) + # this is the input '.dat' file to use .. icudt*.dat + # may be little-endian if from a icu-project.org tarball + o['variables']['icu_data_in'] = icu_data_in + # this is the icudt*.dat file which node will be using (platform endianness) + o['variables']['icu_data_file'] = icu_data_file + if not os.path.isfile(icu_data_path): + print 'Error: ICU prebuilt data file %s does not exist.' % icu_data_path + print 'See the README.md.' + # .. and we're not about to build it from .gyp! + sys.exit(1) + # map from variable name to subdirs + icu_src = { + 'stubdata': 'stubdata', + 'common': 'common', + 'i18n': 'i18n', + 'io': 'io', + 'tools': 'tools/toolutil', + 'genccode': 'tools/genccode', + 'genrb': 'tools/genrb', + 'icupkg': 'tools/icupkg', + } + # this creates a variable icu_src_XXX for each of the subdirs + # with a list of the src files to use + for i in icu_src: + var = 'icu_src_%s' % i + path = '../../deps/icu/source/%s' % icu_src[i] + o['variables'][var] = glob_to_var('tools/icu', path) + return # end of configure_intl # determine the "flavor" (operating system) we're building for, # leveraging gyp's GetFlavor function @@ -717,7 +847,7 @@ configure_libuv(output) configure_v8(output) configure_openssl(output) configure_winsdk(output) -configure_icu(output) +configure_intl(output) configure_fullystatic(output) # variables should be a root level element, diff --git a/node.gyp b/node.gyp index 24c8860ac17..de48ae83785 100644 --- a/node.gyp +++ b/node.gyp @@ -103,6 +103,7 @@ 'src/node_stat_watcher.cc', 'src/node_watchdog.cc', 'src/node_zlib.cc', + 'src/node_i18n.cc', 'src/pipe_wrap.cc', 'src/signal_wrap.cc', 'src/smalloc.cc', @@ -134,6 +135,7 @@ 'src/node_version.h', 'src/node_watchdog.h', 'src/node_wrap.h', + 'src/node_i18n.h', 'src/pipe_wrap.h', 'src/queue.h', 'src/smalloc.h', @@ -164,6 +166,17 @@ ], 'conditions': [ + [ 'v8_enable_i18n_support==1', { + 'defines': [ 'NODE_HAVE_I18N_SUPPORT=1' ], + 'dependencies': [ + '<(icu_gyp_path):icui18n', + '<(icu_gyp_path):icuuc', + ], + 'conditions': [ + [ 'icu_small=="true"', { + 'defines': [ 'NODE_HAVE_SMALL_ICU=1' ], + }]], + }], [ 'node_use_openssl=="true"', { 'defines': [ 'HAVE_OPENSSL=1' ], 'sources': [ diff --git a/src/node.cc b/src/node.cc index cf8f4ccbb5a..23edf4d4e2c 100644 --- a/src/node.cc +++ b/src/node.cc @@ -35,6 +35,10 @@ #include "node_crypto.h" #endif +#if defined(NODE_HAVE_I18N_SUPPORT) +#include "node_i18n.h" +#endif + #if defined HAVE_DTRACE || defined HAVE_ETW #include "node_dtrace.h" #endif @@ -135,6 +139,11 @@ static node_module* modpending; static node_module* modlist_builtin; static node_module* modlist_addon; +#if defined(NODE_HAVE_I18N_SUPPORT) +// Path to ICU data (for i18n / Intl) +static const char* icu_data_dir = NULL; +#endif + // used by C++ modules as well bool no_deprecation = false; @@ -2953,6 +2962,14 @@ static void PrintHelp() { " --trace-deprecation show stack traces on deprecations\n" " --v8-options print v8 command line options\n" " --max-stack-size=val set max v8 stack size (bytes)\n" +#if defined(NODE_HAVE_I18N_SUPPORT) + " --icu-data-dir=dir set ICU data load path to dir\n" + " (overrides NODE_ICU_DATA)\n" +#if !defined(NODE_HAVE_SMALL_ICU) + " Note: linked-in ICU data is\n" + " present.\n" +#endif +#endif "\n" "Environment variables:\n" #ifdef _WIN32 @@ -2964,6 +2981,12 @@ static void PrintHelp() { "NODE_MODULE_CONTEXTS Set to 1 to load modules in their own\n" " global contexts.\n" "NODE_DISABLE_COLORS Set to 1 to disable colors in the REPL\n" +#if defined(NODE_HAVE_I18N_SUPPORT) + "NODE_ICU_DATA Data path for ICU (Intl object) data\n" +#if !defined(NODE_HAVE_SMALL_ICU) + " (will extend linked-in data)\n" +#endif +#endif "\n" "Documentation can be found at http://nodejs.org/\n"); } @@ -3054,6 +3077,10 @@ static void ParseArgs(int* argc, } else if (strcmp(arg, "--v8-options") == 0) { new_v8_argv[new_v8_argc] = "--help"; new_v8_argc += 1; +#if defined(NODE_HAVE_I18N_SUPPORT) + } else if (strncmp(arg, "--icu-data-dir=", 15) == 0) { + icu_data_dir = arg + 15; +#endif } else { // V8 option. Pass through as-is. new_v8_argv[new_v8_argc] = arg; @@ -3410,6 +3437,18 @@ void Init(int* argc, } } +#if defined(NODE_HAVE_I18N_SUPPORT) + if (icu_data_dir == NULL) { + // if the parameter isn't given, use the env variable. + icu_data_dir = getenv("NODE_ICU_DATA"); + } + // Initialize ICU. + // If icu_data_dir is NULL here, it will load the 'minimal' data. + if (!i18n::InitializeICUDirectory(icu_data_dir)) { + FatalError(NULL, "Could not initialize ICU " + "(check NODE_ICU_DATA or --icu-data-dir parameters)"); + } +#endif // The const_cast doesn't violate conceptual const-ness. V8 doesn't modify // the argv array or the elements it points to. V8::SetFlagsFromCommandLine(&v8_argc, const_cast(v8_argv), true); diff --git a/src/node_i18n.cc b/src/node_i18n.cc new file mode 100644 index 00000000000..6d6144dc782 --- /dev/null +++ b/src/node_i18n.cc @@ -0,0 +1,88 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + + +/* + * notes: by srl295 + * - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data + * ( stubdata/libicudata.a ) containing nothing, no data, and it's also + * linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT + * macro names. That's the "english+root" data. + * + * If icu_data_path is non-null, the user has provided a path and we assume + * it goes somewhere useful. We set that path in ICU, and exit. + * If icu_data_path is null, they haven't set a path and we want the + * "english+root" data. We call + * udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...) + * to load up the english+root data. + * + * - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full + * data. All of the variables and command line options for changing data at + * runtime are disabled, as they wouldn't fully override the internal data. + * See: http://bugs.icu-project.org/trac/ticket/10924 + */ + + +#include "node_i18n.h" + +#if defined(NODE_HAVE_I18N_SUPPORT) + +#include +#include + +#ifdef NODE_HAVE_SMALL_ICU +/* if this is defined, we have a 'secondary' entry point. + compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ +#define SMALL_ICUDATA_ENTRY_POINT \ + SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) +#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) +#ifndef U_LIB_SUFFIX_C_NAME +#define SMALL_DEF(major, suff) icusmdt##major##_dat +#else +#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat +#endif + +extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; +#endif + +namespace node { +namespace i18n { + +bool InitializeICUDirectory(const char* icu_data_path) { + if (icu_data_path != NULL) { + u_setDataDirectory(icu_data_path); + return true; // no error + } else { + UErrorCode status = U_ZERO_ERROR; +#ifdef NODE_HAVE_SMALL_ICU + // install the 'small' data. + udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status); +#else // !NODE_HAVE_SMALL_ICU + // no small data, so nothing to do. +#endif // !NODE_HAVE_SMALL_ICU + return (status == U_ZERO_ERROR); + } +} + +} // namespace i18n +} // namespace node + +#endif // NODE_HAVE_I18N_SUPPORT diff --git a/src/node_i18n.h b/src/node_i18n.h new file mode 100644 index 00000000000..f6807a911ec --- /dev/null +++ b/src/node_i18n.h @@ -0,0 +1,39 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +#ifndef SRC_NODE_I18N_H_ +#define SRC_NODE_I18N_H_ + +#include "node.h" + +#if defined(NODE_HAVE_I18N_SUPPORT) + +namespace node { +namespace i18n { + +NODE_EXTERN bool InitializeICUDirectory(const char* icu_data_path); + +} // namespace i18n +} // namespace node + +#endif // NODE_HAVE_I18N_SUPPORT + +#endif // SRC_NODE_I18N_H_ diff --git a/tools/icu/README.md b/tools/icu/README.md new file mode 100644 index 00000000000..40d5287d9f1 --- /dev/null +++ b/tools/icu/README.md @@ -0,0 +1,26 @@ +Notes about the icu directory. +=== + +The files in this directory were written for the node.js effort. It's +the intent of their author (Steven R. Loomis / srl295) to merge them +upstream into ICU, pending much discussion within the ICU-PMC. + +`icu_small.json` is somewhat node-specific as it specifies a "small ICU" +configuration file for the `icutrim.py` script. `icutrim.py` and +`iculslocs.cpp` may themselves be superseded by components built into +ICU in the future. + +The following tickets were opened during this work, and their +resolution may inform the reader as to the current state of icu-trim +upstream: + + * [#10919](http://bugs.icu-project.org/trac/ticket/10919) + (experimental branch - may copy any source patches here) + * [#10922](http://bugs.icu-project.org/trac/ticket/10922) + (data packaging improvements) + * [#10923](http://bugs.icu-project.org/trac/ticket/10923) + (rewrite data building in python) + +When/if components (not including the `.json` file) are merged into +ICU, this code and `configure` will be updated to detect and use those +variants rather than the ones in this directory. diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp new file mode 100644 index 00000000000..fa5e5773b2b --- /dev/null +++ b/tools/icu/icu-generic.gyp @@ -0,0 +1,414 @@ +# Copyright (c) IBM Corporation and Others. All Rights Reserved. +# very loosely based on icu.gyp from Chromium: +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +{ + 'variables': { + 'icu_src_derb': [ '../../deps/icu/source/tools/genrb/derb.c' ], + }, + 'targets': [ + { + # a target to hold uconfig defines. + # for now these are hard coded, but could be defined. + 'target_name': 'icu_uconfig', + 'type': 'none', + 'toolsets': [ 'host', 'target' ], + 'direct_dependent_settings': { + 'defines': [ + 'UCONFIG_NO_LEGACY_CONVERSION=1', + 'UCONFIG_NO_IDNA=1', + 'UCONFIG_NO_TRANSLITERATION=1', + 'UCONFIG_NO_SERVICE=1', + 'UCONFIG_NO_REGULAR_EXPRESSIONS=1', + 'U_ENABLE_DYLOAD=0', + 'U_STATIC_IMPLEMENTATION=1', + # TODO(srl295): reenable following pending + # https://code.google.com/p/v8/issues/detail?id=3345 + # (saves some space) + 'UCONFIG_NO_BREAK_ITERATION=0', + ], + } + }, + { + # a target to hold common settings. + # make any target that is ICU implementation depend on this. + 'target_name': 'icu_implementation', + 'toolsets': [ 'host', 'target' ], + 'type': 'none', + 'direct_dependent_settings': { + 'conditions': [ + [ 'os_posix == 1 and OS != "mac" and OS != "ios"', { + 'cflags': [ '-Wno-deprecated-declarations' ], + 'cflags_cc': [ '-frtti' ], + }], + [ 'OS == "mac" or OS == "ios"', { + 'xcode_settings': {'GCC_ENABLE_CPP_RTTI': 'YES' }, + }], + [ 'OS == "win"', { + 'msvs_settings': { + 'VCCLCompilerTool': {'RuntimeTypeInfo': 'true'}, + } + }], + ], + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeTypeInfo': 'true', + 'ExceptionHandling': '1', + }, + }, + 'configurations': { + # TODO: why does this need to be redefined for Release and Debug? + # Maybe this should be pushed into common.gypi with an "if v8 i18n"? + 'Release': { + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeTypeInfo': 'true', + 'ExceptionHandling': '1', + }, + }, + }, + 'Debug': { + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeTypeInfo': 'true', + 'ExceptionHandling': '1', + }, + }, + }, + }, + 'defines': [ + 'U_ATTRIBUTE_DEPRECATED=', + '_CRT_SECURE_NO_DEPRECATE=', + 'U_STATIC_IMPLEMENTATION=1', + ], + }, + }, + { + 'target_name': 'icui18n', + 'type': '<(library)', + 'toolsets': [ 'host', 'target' ], + 'sources': [ + '<@(icu_src_i18n)' + ], + 'include_dirs': [ + '../../deps/icu/source/i18n', + ], + 'defines': [ + 'U_I18N_IMPLEMENTATION=1', + ], + 'dependencies': [ 'icuucx', 'icu_implementation', 'icu_uconfig' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/i18n', + ], + }, + 'export_dependent_settings': [ 'icuucx' ], + }, + # this library is only built for derb.. + { + 'target_name': 'icuio', + 'type': '<(library)', + 'toolsets': [ 'host' ], + 'sources': [ + '<@(icu_src_io)' + ], + 'include_dirs': [ + '../../deps/icu/source/io', + ], + 'defines': [ + 'U_IO_IMPLEMENTATION=1', + ], + 'dependencies': [ 'icuucx', 'icui18n', 'icu_implementation', 'icu_uconfig' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/io', + ], + }, + 'export_dependent_settings': [ 'icuucx', 'icui18n' ], + }, + # This exports actual ICU data + { + 'target_name': 'icudata', + 'type': '<(library)', + 'toolsets': [ 'target' ], + 'conditions': [ + [ 'OS == "win"', { + 'conditions': [ + [ 'icu_small == "false"', { # and OS=win + # full data - just build the full data file, then we are done. + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'dependencies': [ 'genccode#host' ], + 'actions': [ + { + 'action_name': 'icudata', + 'inputs': [ '<(icu_data_in)' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'action': [ '<(PRODUCT_DIR)/genccode', + '-o', + '-d', '<(SHARED_INTERMEDIATE_DIR)', + '-n', 'icudata', + '-e', 'icudt<(icu_ver_major)', + '<@(_inputs)' ], + }, + ], + }, { # icu_small == TRUE and OS == win + # link against stub data primarily + # then, use icupkg and genccode to rebuild data + 'dependencies': [ 'icustubdata', 'genccode#host', 'icupkg#host', 'genrb#host', 'iculslocs#host' ], + 'export_dependent_settings': [ 'icustubdata' ], + 'actions': [ + { + # trim down ICU + 'action_name': 'icutrim', + 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], + 'outputs': [ '../../out/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'action': [ 'python', + 'icutrim.py', + '-P', '../../<(CONFIGURATION_NAME)', + '-D', '<(icu_data_in)', + '--delete-tmp', + '-T', '../../out/icutmp', + '-F', 'icu_small.json', + '-O', 'icudt<(icu_ver_major)<(icu_endianness).dat', + '-v' ], + }, + { + # build final .dat -> .obj + 'action_name': 'genccode', + 'inputs': [ '../../out/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs': [ '../../out/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'action': [ '../../<(CONFIGURATION_NAME)/genccode', + '-o', + '-d', '../../out/', + '-n', 'icudata', + '-e', 'icusmdt<(icu_ver_major)', + '<@(_inputs)' ], + }, + ], + # This file contains the small ICU data. + 'sources': [ '../../out/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + } ] ], #end of OS==win and icu_small == true + }, { # OS != win + 'conditions': [ + [ 'icu_small == "false"', { + # full data - just build the full data file, then we are done. + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + 'dependencies': [ 'genccode#host', 'icupkg#host', 'icu_implementation#host', 'icu_uconfig' ], + 'include_dirs': [ + '../../deps/icu/source/common', + ], + 'actions': [ + { + # Swap endianness (if needed), or at least copy the file + 'action_name': 'icupkg', + 'inputs': [ '<(icu_data_in)' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'action': [ '<(PRODUCT_DIR)/icupkg', + '-t<(icu_endianness)', + '<@(_inputs)', + '<@(_outputs)', + ], + }, + { + # Rename without the endianness marker + 'action_name': 'copy', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], + 'action': [ 'cp', + '<@(_inputs)', + '<@(_outputs)', + ], + }, + { + 'action_name': 'icudata', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + 'action': [ '<(PRODUCT_DIR)/genccode', + '-e', 'icudt<(icu_ver_major)', + '-d', '<(SHARED_INTERMEDIATE_DIR)', + '-f', 'icudt<(icu_ver_major)_dat', + '<@(_inputs)' ], + }, + ], # end actions + }, { # icu_small == true ( and OS != win ) + # link against stub data (as primary data) + # then, use icupkg and genccode to rebuild small data + 'dependencies': [ 'icustubdata', 'genccode#host', 'icupkg#host', 'genrb#host', 'iculslocs#host', + 'icu_implementation', 'icu_uconfig' ], + 'export_dependent_settings': [ 'icustubdata' ], + 'actions': [ + { + # trim down ICU + 'action_name': 'icutrim', + 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'action': [ 'python', + 'icutrim.py', + '-P', '<(PRODUCT_DIR)', + '-D', '<(icu_data_in)', + '--delete-tmp', + '-T', '<(SHARED_INTERMEDIATE_DIR)/icutmp', + '-F', 'icu_small.json', + '-O', 'icudt<(icu_ver_major)<(icu_endianness).dat', + '-v' ], + }, { + # rename to get the final entrypoint name right + 'action_name': 'rename', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], + 'action': [ 'cp', + '<@(_inputs)', + '<@(_outputs)', + ], + }, { + # build final .dat -> .obj + 'action_name': 'genccode', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + 'action': [ '<(PRODUCT_DIR)/genccode', + '-d', '<(SHARED_INTERMEDIATE_DIR)', + '<@(_inputs)' ], + }, + ], + # This file contains the small ICU data + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + # for umachine.h + 'include_dirs': [ + '../../deps/icu/source/common', + ], + }]], # end icu_small == true + }]], # end OS != win + }, # end icudata + # icustubdata is a tiny (~1k) symbol with no ICU data in it. + # tools must link against it as they are generating the full data. + { + 'target_name': 'icustubdata', + 'type': '<(library)', + 'toolsets': [ 'host', 'target' ], + 'dependencies': [ 'icu_implementation' ], + 'sources': [ + '<@(icu_src_stubdata)' + ], + 'include_dirs': [ + '../../deps/icu/source/common', + ], + }, + # this target is for v8 consumption. + # it is icuuc + stubdata + # it is only built for target + { + 'target_name': 'icuuc', + 'type': 'none', + 'toolsets': [ 'target' ], + 'dependencies': [ 'icuucx', 'icudata' ], + 'export_dependent_settings': [ 'icuucx', 'icudata' ], + }, + # This is the 'real' icuuc. + # tools can depend on 'icuuc + stubdata' + { + 'target_name': 'icuucx', + 'type': '<(library)', + 'dependencies': [ 'icu_implementation', 'icu_uconfig' ], + 'toolsets': [ 'host', 'target' ], + 'sources': [ + '<@(icu_src_common)' + ], + 'include_dirs': [ + '../../deps/icu/source/common', + ], + 'defines': [ + 'U_COMMON_IMPLEMENTATION=1', + ], + 'export_dependent_settings': [ 'icu_uconfig' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/common', + ], + 'conditions': [ + [ 'OS=="win"', { + 'link_settings': { + 'libraries': [ '-lAdvAPI32.Lib', '-lUser32.lib' ], + }, + }], + ], + }, + }, + # tools library + { + 'target_name': 'icutools', + 'type': '<(library)', + 'toolsets': [ 'host' ], + 'dependencies': [ 'icuucx', 'icui18n', 'icustubdata' ], + 'sources': [ + '<@(icu_src_tools)' + ], + 'include_dirs': [ + '../../deps/icu/source/tools/toolutil', + ], + 'defines': [ + 'U_TOOLUTIL_IMPLEMENTATION=1', + #'DEBUG=0', # http://bugs.icu-project.org/trac/ticket/10977 + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/tools/toolutil', + ], + }, + 'export_dependent_settings': [ 'icuucx', 'icui18n', 'icustubdata' ], + }, + # This tool is needed to rebuild .res files from .txt, + # or to build index (res_index.txt) files for small-icu + { + 'target_name': 'genrb', + 'type': 'executable', + 'toolsets': [ 'host' ], + 'dependencies': [ 'icutools', 'icuucx', 'icui18n' ], + 'sources': [ + '<@(icu_src_genrb)' + ], + # derb is a separate executable + # (which is not currently built) + 'sources!': [ + '<@(icu_src_derb)', + 'no-op.cc', + ], + }, + # This tool is used to rebuild res_index.res manifests + { + 'target_name': 'iculslocs', + 'toolsets': [ 'host' ], + 'type': 'executable', + 'dependencies': [ 'icutools', 'icuucx', 'icui18n', 'icuio' ], + 'sources': [ + 'iculslocs.cc', + 'no-op.cc', + ], + }, + # This tool is used to package, unpackage, repackage .dat files + # and convert endianesses + { + 'target_name': 'icupkg', + 'toolsets': [ 'host' ], + 'type': 'executable', + 'dependencies': [ 'icutools', 'icuucx', 'icui18n' ], + 'sources': [ + '<@(icu_src_icupkg)', + 'no-op.cc', + ], + }, + # this is used to convert .dat directly into .obj + { + 'target_name': 'genccode', + 'toolsets': [ 'host' ], + 'type': 'executable', + 'dependencies': [ 'icutools', 'icuucx', 'icui18n' ], + 'sources': [ + '<@(icu_src_genccode)', + 'no-op.cc', + ], + }, + ], +} diff --git a/tools/icu/icu-system.gyp b/tools/icu/icu-system.gyp new file mode 100644 index 00000000000..44d4f5feff4 --- /dev/null +++ b/tools/icu/icu-system.gyp @@ -0,0 +1,18 @@ +# Copyright (c) 2014 IBM Corporation and Others. All Rights Reserved. + +# This variant is used for the '--with-intl=system-icu' option. +# 'configure' has already set 'libs' and 'cflags' - so, +# there's nothing to do in these targets. + +{ + 'targets': [ + { + 'target_name': 'icuuc', + 'type': 'none', + }, + { + 'target_name': 'icui18n', + 'type': 'none', + }, + ], +} diff --git a/tools/icu/icu_small.json b/tools/icu/icu_small.json new file mode 100644 index 00000000000..ddf7d1204e8 --- /dev/null +++ b/tools/icu/icu_small.json @@ -0,0 +1,47 @@ +{ + "copyright": "Copyright (c) 2014 IBM Corporation and Others. All Rights Reserved.", + "comment": "icutrim.py config: Trim down ICU to just English, needed for node.js use.", + "variables": { + "none": { + "only": [] + }, + "en_only": { + "only": [ + "root", + "en" + ] + }, + "leavealone": { + } + }, + "trees": { + "ROOT": "en_only", + "brkitr": "none", + "coll": "en_only", + "curr": "en_only", + "lang": "none", + "rbnf": "none", + "region": "none", + "zone": "en_only", + "converters": "none", + "stringprep": "none", + "translit": "none", + "brkfiles": "none", + "brkdict": "none", + "confusables": "none" + }, + "remove": [ + "cnvalias.icu", + "postalCodeData.res", + "uts46.nrm", + "genderList.res", + "brkitr/root.res", + "unames.icu" + ], + "keep": [ + "pool.res", + "supplementalData.res", + "zoneinfo64.res", + "likelySubtags.res" + ] +} diff --git a/tools/icu/iculslocs.cc b/tools/icu/iculslocs.cc new file mode 100644 index 00000000000..66becace0a4 --- /dev/null +++ b/tools/icu/iculslocs.cc @@ -0,0 +1,388 @@ +/* +********************************************************************** +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* Created 2014-06-20 by Steven R. Loomis +* +* See: http://bugs.icu-project.org/trac/ticket/10922 +* +*/ + +/* +WHAT IS THIS? + +Here's the problem: It's difficult to reconfigure ICU from the command +line without using the full makefiles. You can do a lot, but not +everything. + +Consider: + + $ icupkg -r 'ja*' icudt53l.dat + +Great, you've now removed the (main) Japanese data. But something's +still wrong-- res_index (and thus, getAvailable* functions) still +claim the locale is present. + +You are reading the source to a tool (using only public API C code) +that can solve this problem. Use as follows: + + $ iculslocs -i . -N icudt53l -b res_index.txt + +.. Generates a NEW res_index.txt (by looking at the .dat file, and +figuring out which locales are actually available. Has commented out +the ones which are no longer available: + + ... + it_SM {""} +// ja {""} +// ja_JP {""} + jgo {""} + ... + +Then you can build and in-place patch it with existing ICU tools: + $ genrb res_index.txt + $ icupkg -a res_index.res icudt53l.dat + +.. Now you have a patched icudt539.dat that not only doesn't have +Japanese, it doesn't *claim* to have Japanese. + +*/ + +#include "string.h" +#include "charstr.h" // ICU internal header +#include +#include +#include + +const char* PROG = "iculslocs"; +const char* NAME = U_ICUDATA_NAME; // assume ICU data +const char* TREE = "ROOT"; +int VERBOSE = 0; + +#define RES_INDEX "res_index" +#define INSTALLEDLOCALES "InstalledLocales" + +CharString packageName; +const char* locale = RES_INDEX; // locale referring to our index + +void usage() { + u_printf("Usage: %s [options]\n", PROG); + u_printf( + "This program lists and optionally regenerates the locale " + "manifests\n" + " in ICU 'res_index.res' files.\n"); + u_printf( + " -i ICUDATA Set ICUDATA dir to ICUDATA.\n" + " NOTE: this must be the first option given.\n"); + u_printf(" -h This Help\n"); + u_printf(" -v Verbose Mode on\n"); + u_printf(" -l List locales to stdout\n"); + u_printf( + " if Verbose mode, then missing (unopenable)" + "locales\n" + " will be listed preceded by a '#'.\n"); + u_printf( + " -b res_index.txt Write 'corrected' bundle " + "to res_index.txt\n" + " missing bundles will be " + "OMITTED\n"); + u_printf( + " -T TREE Choose tree TREE\n" + " (TREE should be one of: \n" + " ROOT, brkitr, coll, curr, lang, rbnf, region, zone)\n"); + // see ureslocs.h and elsewhere + u_printf( + " -N NAME Choose name NAME\n" + " (default: '%s')\n", + U_ICUDATA_NAME); + u_printf( + "\nNOTE: for best results, this tool ought to be " + "linked against\n" + "stubdata. i.e. '%s -l' SHOULD return an error with " + " no data.\n", + PROG); +} + +#define ASSERT_SUCCESS(what) \ + if (U_FAILURE(status)) { \ + u_printf("%s:%d: %s: ERROR: %s %s\n", \ + __FILE__, \ + __LINE__, \ + PROG, \ + u_errorName(status), \ + what); \ + return 1; \ + } + +/** + * @param status changed from reference to pointer to match node.js style + */ +void calculatePackageName(UErrorCode* status) { + packageName.clear(); + if (strcmp(NAME, "NONE")) { + packageName.append(NAME, *status); + if (strcmp(TREE, "ROOT")) { + packageName.append(U_TREE_SEPARATOR_STRING, *status); + packageName.append(TREE, *status); + } + } + if (VERBOSE) { + u_printf("packageName: %s\n", packageName.data()); + } +} + +/** + * Does the locale exist? + * return zero for false, or nonzero if it was openable. + * Assumes calculatePackageName was called. + * @param exists set to TRUE if exists, FALSE otherwise. + * Changed from reference to pointer to match node.js style + * @return 0 on "OK" (success or resource-missing), + * 1 on "FAILURE" (unexpected error) + */ +int localeExists(const char* loc, UBool* exists) { + UErrorCode status = U_ZERO_ERROR; + if (VERBOSE > 1) { + u_printf("Trying to open %s:%s\n", packageName.data(), loc); + } + LocalUResourceBundlePointer aResource( + ures_openDirect(packageName.data(), loc, &status)); + *exists = FALSE; + if (U_SUCCESS(status)) { + *exists = true; + if (VERBOSE > 1) { + u_printf("%s:%s existed!\n", packageName.data(), loc); + } + return 0; + } else if (status == U_MISSING_RESOURCE_ERROR) { + *exists = false; + if (VERBOSE > 1) { + u_printf("%s:%s did NOT exist (%s)!\n", + packageName.data(), + loc, + u_errorName(status)); + } + return 0; // "good" failure + } else { + // some other failure.. + u_printf("%s:%d: %s: ERROR %s opening %s:%s for test.\n", + __FILE__, + __LINE__, + u_errorName(status), + packageName.data(), + loc); + return 1; // abort + } +} + +void printIndent(const LocalUFILEPointer& bf, int indent) { + for (int i = 0; i < indent + 1; i++) { + u_fprintf(bf.getAlias(), " "); + } +} + +/** + * Dumps a table resource contents + * if lev==0, skips INSTALLEDLOCALES + * @return 0 for OK, 1 for err + */ +int dumpAllButInstalledLocales(int lev, + LocalUResourceBundlePointer& bund, + LocalUFILEPointer& bf, + UErrorCode& status) { + ures_resetIterator(bund.getAlias()); + const UBool isTable = (UBool)(ures_getType(bund.getAlias()) == URES_TABLE); + LocalUResourceBundlePointer t; + while (U_SUCCESS(status) && ures_hasNext(bund.getAlias())) { + t.adoptInstead(ures_getNextResource(bund.getAlias(), t.orphan(), &status)); + ASSERT_SUCCESS("while processing table"); + const char* key = ures_getKey(t.getAlias()); + if (VERBOSE > 1) { + u_printf("dump@%d: got key %s\n", lev, key); + } + if (lev == 0 && !strcmp(key, INSTALLEDLOCALES)) { + if (VERBOSE > 1) { + u_printf("dump: skipping '%s' as it must be evaluated.\n", key); + } + } else { + printIndent(bf, lev); + u_fprintf(bf.getAlias(), "%s", key); + switch (ures_getType(t.getAlias())) { + case URES_STRING: { + int32_t len = 0; + const UChar* s = ures_getString(t.getAlias(), &len, &status); + ASSERT_SUCCESS("getting string"); + u_fprintf(bf.getAlias(), ":string {\""); + u_file_write(s, len, bf.getAlias()); + u_fprintf(bf.getAlias(), "\"}"); + } break; + default: { + u_printf("ERROR: unhandled type in dumpAllButInstalledLocales().\n"); + return 1; + } break; + } + u_fprintf(bf.getAlias(), "\n"); + } + } + return 0; +} + +int list(const char* toBundle) { + UErrorCode status = U_ZERO_ERROR; + + LocalUFILEPointer bf; + + if (toBundle != NULL) { + if (VERBOSE) { + u_printf("writing to bundle %s\n", toBundle); + } + // we write UTF-8 with BOM only. No exceptions. + bf.adoptInstead(u_fopen(toBundle, "w", "en_US_POSIX", "UTF-8")); + if (bf.isNull()) { + u_printf("ERROR: Could not open '%s' for writing.\n", toBundle); + return 1; + } + u_fputc(0xFEFF, bf.getAlias()); // write BOM + u_fprintf(bf.getAlias(), "// -*- Coding: utf-8; -*-\n//\n"); + } + + // first, calculate the bundle name. + calculatePackageName(&status); + ASSERT_SUCCESS("calculating package name"); + + if (VERBOSE) { + u_printf("\"locale\": %s\n", locale); + } + + LocalUResourceBundlePointer bund( + ures_openDirect(packageName.data(), locale, &status)); + ASSERT_SUCCESS("while opening the bundle"); + LocalUResourceBundlePointer installedLocales( + ures_getByKey(bund.getAlias(), INSTALLEDLOCALES, NULL, &status)); + ASSERT_SUCCESS("while fetching installed locales"); + + int32_t count = ures_getSize(installedLocales.getAlias()); + if (VERBOSE) { + u_printf("Locales: %d\n", count); + } + + if (bf.isValid()) { + // write the HEADER + u_fprintf(bf.getAlias(), + "// Warning this file is automatically generated\n" + "// Updated by %s based on %s:%s.txt\n", + PROG, + packageName.data(), + locale); + u_fprintf(bf.getAlias(), + "%s:table(nofallback) {\n" + " // First, everything besides InstalledLocales:\n", + locale); + if (dumpAllButInstalledLocales(0, bund, bf, status)) { + u_printf("Error dumping prolog for %s\n", toBundle); + return 1; + } + ASSERT_SUCCESS("while writing prolog"); // in case an error was missed + + u_fprintf(bf.getAlias(), + " %s:table { // %d locales in input %s.res\n", + INSTALLEDLOCALES, + count, + locale); + } + + // OK, now list them. + LocalUResourceBundlePointer subkey; + + int validCount = 0; + for (int32_t i = 0; i < count; i++) { + subkey.adoptInstead(ures_getByIndex( + installedLocales.getAlias(), i, subkey.orphan(), &status)); + ASSERT_SUCCESS("while fetching an installed locale's name"); + + const char* key = ures_getKey(subkey.getAlias()); + if (VERBOSE > 1) { + u_printf("@%d: %s\n", i, key); + } + // now, see if the locale is installed.. + + UBool exists; + if (localeExists(key, &exists)) { + return 1; // get out. + } + if (exists) { + validCount++; + u_printf("%s\n", key); + if (bf.isValid()) { + u_fprintf(bf.getAlias(), " %s {\"\"}\n", key); + } + } else { + if (bf.isValid()) { + u_fprintf(bf.getAlias(), "// %s {\"\"}\n", key); + } + if (VERBOSE) { + u_printf("#%s\n", key); // verbosity one - '' vs '#' + } + } + } + + if (bf.isValid()) { + u_fprintf(bf.getAlias(), " } // %d/%d valid\n", validCount, count); + // write the HEADER + u_fprintf(bf.getAlias(), "}\n"); + } + return 0; +} + +int main(int argc, const char* argv[]) { + PROG = argv[0]; + for (int i = 1; i < argc; i++) { + const char* arg = argv[i]; + int argsLeft = argc - i - 1; /* how many remain? */ + if (!strcmp(arg, "-v")) { + VERBOSE++; + } else if (!strcmp(arg, "-i") && (argsLeft >= 1)) { + if (i != 1) { + u_printf("ERROR: -i must be the first argument given.\n"); + usage(); + return 1; + } + const char* dir = argv[++i]; + u_setDataDirectory(dir); + if (VERBOSE) { + u_printf("ICUDATA is now %s\n", dir); + } + } else if (!strcmp(arg, "-T") && (argsLeft >= 1)) { + TREE = argv[++i]; + if (VERBOSE) { + u_printf("TREE is now %s\n", TREE); + } + } else if (!strcmp(arg, "-N") && (argsLeft >= 1)) { + NAME = argv[++i]; + if (VERBOSE) { + u_printf("NAME is now %s\n", NAME); + } + } else if (!strcmp(arg, "-?") || !strcmp(arg, "-h")) { + usage(); + return 0; + } else if (!strcmp(arg, "-l")) { + if (list(NULL)) { + return 1; + } + } else if (!strcmp(arg, "-b") && (argsLeft >= 1)) { + if (list(argv[++i])) { + return 1; + } + } else { + u_printf("Unknown or malformed option: %s\n", arg); + usage(); + return 1; + } + } +} + +// Local Variables: +// compile-command: "icurun iculslocs.cpp" +// End: diff --git a/tools/icu/icutrim.py b/tools/icu/icutrim.py new file mode 100755 index 00000000000..f6b54956c55 --- /dev/null +++ b/tools/icu/icutrim.py @@ -0,0 +1,338 @@ +#!/usr/bin/python +# +# Copyright (C) 2014 IBM Corporation and Others. All Rights Reserved. +# +# @author Steven R. Loomis +# +# This tool slims down an ICU data (.dat) file according to a config file. +# +# See: http://bugs.icu-project.org/trac/ticket/10922 +# +# Usage: +# Use "-h" to get help options. + +import sys +import shutil +# for utf-8 +reload(sys) +sys.setdefaultencoding("utf-8") + +import argparse +import os +import json +import re + +endian=sys.byteorder + +parser = argparse.ArgumentParser(description="ICU Datafile repackager. Example of use: \"mkdir tmp ; python icutrim.py -D ~/Downloads/icudt53l.dat -T tmp -F trim_en.json -O icudt53l.dat\" you will then find a smaller icudt53l.dat in 'tmp'. ", + epilog="ICU tool, http://icu-project.org - master copy at http://source.icu-project.org/repos/icu/tools/trunk/scripts/icutrim.py") + +parser.add_argument("-P","--tool-path", + action="store", + dest="toolpath", + help="set the prefix directory for ICU tools") + +parser.add_argument("-D","--input-file", + action="store", + dest="datfile", + help="input data file (icudt__.dat)", + required=True) + +parser.add_argument("-F","--filter-file", + action="store", + dest="filterfile", + help="filter file (JSON format)", + required=True) + +parser.add_argument("-T","--tmp-dir", + action="store", + dest="tmpdir", + help="working directory.", + required=True) + +parser.add_argument("--delete-tmp", + action="count", + dest="deltmpdir", + help="delete working directory.", + default=0) + +parser.add_argument("-O","--outfile", + action="store", + dest="outfile", + help="outfile (NOT a full path)", + required=True) + +parser.add_argument("-v","--verbose", + action="count", + default=0) + +parser.add_argument('-e', '--endian', action='store', dest='endian', help='endian, big, little or host, your default is "%s".' % endian, default=endian, metavar='endianness') + + +args = parser.parse_args() + +if args.verbose>0: + print "Options: "+str(args) + +if (os.path.isdir(args.tmpdir) and args.deltmpdir): + if args.verbose>1: + print "Deleting tmp dir %s.." % (args.tmpdir) + shutil.rmtree(args.tmpdir) + +if not (os.path.isdir(args.tmpdir)): + os.mkdir(args.tmpdir) +else: + print "Please delete tmpdir %s before beginning." % args.tmpdir + sys.exit(1) + +if args.endian not in ("big","little","host"): + print "Unknown endianness: %s" % args.endian + sys.exit(1) + +if args.endian is "host": + args.endian = endian + +if not os.path.isdir(args.tmpdir): + print "Error, tmpdir not a directory: %s" % (args.tmpdir) + sys.exit(1) + +if not os.path.isfile(args.filterfile): + print "Filterfile doesn't exist: %s" % (args.filterfile) + sys.exit(1) + +if not os.path.isfile(args.datfile): + print "Datfile doesn't exist: %s" % (args.datfile) + sys.exit(1) + +if not args.datfile.endswith(".dat"): + print "Datfile doesn't end with .dat: %s" % (args.datfile) + sys.exit(1) + +outfile = os.path.join(args.tmpdir, args.outfile) + +if os.path.isfile(outfile): + print "Error, output file does exist: %s" % (outfile) + sys.exit(1) + +if not args.outfile.endswith(".dat"): + print "Outfile doesn't end with .dat: %s" % (args.outfile) + sys.exit(1) + +dataname=args.outfile[0:-4] + + +## TODO: need to improve this. Quotes, etc. +def runcmd(tool, cmd, doContinue=False): + if(args.toolpath): + cmd = os.path.join(args.toolpath, tool) + " " + cmd + else: + cmd = tool + " " + cmd + + if(args.verbose>4): + print "# " + cmd + + rc = os.system(cmd) + if rc is not 0 and not doContinue: + print "FAILED: %s" % cmd + sys.exit(1) + return rc + +## STEP 0 - read in json config +fi= open(args.filterfile, "rb") +config=json.load(fi) +fi.close() + +if (args.verbose > 6): + print config + +if(config.has_key("comment")): + print "%s: %s" % (args.filterfile, config["comment"]) + +## STEP 1 - copy the data file, swapping endianness +endian_letter = "l" + + +runcmd("icupkg", "-t%s %s %s""" % (endian_letter, args.datfile, outfile)) + +## STEP 2 - get listing +listfile = os.path.join(args.tmpdir,"icudata.lst") +runcmd("icupkg", "-l %s > %s""" % (outfile, listfile)) + +fi = open(listfile, 'rb') +items = fi.readlines() +items = [items[i].strip() for i in range(len(items))] +fi.close() + +itemset = set(items) + +if (args.verbose>1): + print "input file: %d items" % (len(items)) + +# list of all trees +trees = {} +RES_INDX = "res_index.res" +remove = None +# remove - always remove these +if config.has_key("remove"): + remove = set(config["remove"]) +else: + remove = set() + +# keep - always keep these +if config.has_key("keep"): + keep = set(config["keep"]) +else: + keep = set() + +def queueForRemoval(tree): + global remove + if not config.has_key("trees"): + # no config + return + if not config["trees"].has_key(tree): + return + mytree = trees[tree] + if(args.verbose>0): + print "* %s: %d items" % (tree, len(mytree["locs"])) + # do varible substitution for this tree here + if type(config["trees"][tree]) == str or type(config["trees"][tree]) == unicode: + treeStr = config["trees"][tree] + if(args.verbose>5): + print " Substituting $%s for tree %s" % (treeStr, tree) + if(not config.has_key("variables") or not config["variables"].has_key(treeStr)): + print " ERROR: no variable: variables.%s for tree %s" % (treeStr, tree) + sys.exit(1) + config["trees"][tree] = config["variables"][treeStr] + myconfig = config["trees"][tree] + if(args.verbose>4): + print " Config: %s" % (myconfig) + # Process this tree + if(len(myconfig)==0 or len(mytree["locs"])==0): + if(args.verbose>2): + print " No processing for %s - skipping" % (tree) + else: + only = None + if myconfig.has_key("only"): + only = set(myconfig["only"]) + if (len(only)==0) and (mytree["treeprefix"] != ""): + thePool = "%spool.res" % (mytree["treeprefix"]) + if (thePool in itemset): + if(args.verbose>0): + print "Removing %s because tree %s is empty." % (thePool, tree) + remove.add(thePool) + else: + print "tree %s - no ONLY" + for l in range(len(mytree["locs"])): + loc = mytree["locs"][l] + if (only is not None) and not loc in only: + # REMOVE loc + toRemove = "%s%s%s" % (mytree["treeprefix"], loc, mytree["extension"]) + if(args.verbose>6): + print "Queueing for removal: %s" % toRemove + remove.add(toRemove) + +def addTreeByType(tree, mytree): + if(args.verbose>1): + print "(considering %s): %s" % (tree, mytree) + trees[tree] = mytree + mytree["locs"]=[] + for i in range(len(items)): + item = items[i] + if item.startswith(mytree["treeprefix"]) and item.endswith(mytree["extension"]): + mytree["locs"].append(item[len(mytree["treeprefix"]):-4]) + # now, process + queueForRemoval(tree) + +addTreeByType("converters",{"treeprefix":"", "extension":".cnv"}) +addTreeByType("stringprep",{"treeprefix":"", "extension":".spp"}) +addTreeByType("translit",{"treeprefix":"translit/", "extension":".res"}) +addTreeByType("brkfiles",{"treeprefix":"brkitr/", "extension":".brk"}) +addTreeByType("brkdict",{"treeprefix":"brkitr/", "extension":"dict"}) +addTreeByType("confusables",{"treeprefix":"", "extension":".cfu"}) + +for i in range(len(items)): + item = items[i] + if item.endswith(RES_INDX): + treeprefix = item[0:item.rindex(RES_INDX)] + tree = None + if treeprefix == "": + tree = "ROOT" + else: + tree = treeprefix[0:-1] + if(args.verbose>6): + print "procesing %s" % (tree) + trees[tree] = { "extension": ".res", "treeprefix": treeprefix, "hasIndex": True } + # read in the resource list for the tree + treelistfile = os.path.join(args.tmpdir,"%s.lst" % tree) + runcmd("iculslocs", "-i %s -N %s -T %s -l > %s" % (outfile, dataname, tree, treelistfile)) + fi = open(treelistfile, 'rb') + treeitems = fi.readlines() + trees[tree]["locs"] = [treeitems[i].strip() for i in range(len(treeitems))] + fi.close() + if(not config.has_key("trees") or not config["trees"].has_key(tree)): + print " Warning: filter file %s does not mention trees.%s - will be kept as-is" % (args.filterfile, tree) + else: + queueForRemoval(tree) + +def removeList(count=0): + # don't allow "keep" items to creep in here. + global remove + remove = remove - keep + if(count > 10): + print "Giving up - %dth attempt at removal." % count + sys.exit(1) + if(args.verbose>1): + print "%d items to remove - try #%d" % (len(remove),count) + if(len(remove)>0): + oldcount = len(remove) + hackerrfile=os.path.join(args.tmpdir, "REMOVE.err") + removefile = os.path.join(args.tmpdir, "REMOVE.lst") + fi = open(removefile, 'wb') + for i in remove: + print >>fi, i + fi.close() + rc = runcmd("icupkg","-r %s %s 2> %s" % (removefile,outfile,hackerrfile),True) + if rc is not 0: + if(args.verbose>5): + print "## Damage control, trying to parse stderr from icupkg.." + fi = open(hackerrfile, 'rb') + erritems = fi.readlines() + fi.close() + #Item zone/zh_Hant_TW.res depends on missing item zone/zh_Hant.res + pat = re.compile("""^Item ([^ ]+) depends on missing item ([^ ]+).*""") + for i in range(len(erritems)): + line = erritems[i].strip() + m = pat.match(line) + if m: + toDelete = m.group(1) + if(args.verbose > 5): + print "<< %s added to delete" % toDelete + remove.add(toDelete) + else: + print "ERROR: could not match errline: %s" % line + sys.exit(1) + if(args.verbose > 5): + print " now %d items to remove" % len(remove) + if(oldcount == len(remove)): + print " ERROR: could not add any mor eitems to remove. Fail." + sys.exit(1) + removeList(count+1) + +# fire it up +removeList(1) + +# now, fixup res_index, one at a time +for tree in trees: + # skip trees that don't have res_index + if not trees[tree].has_key("hasIndex"): + continue + treebunddir = args.tmpdir + if(trees[tree]["treeprefix"]): + treebunddir = os.path.join(treebunddir, trees[tree]["treeprefix"]) + if not (os.path.isdir(treebunddir)): + os.mkdir(treebunddir) + treebundres = os.path.join(treebunddir,RES_INDX) + treebundtxt = "%s.txt" % (treebundres[0:-4]) + runcmd("iculslocs", "-i %s -N %s -T %s -b %s" % (outfile, dataname, tree, treebundtxt)) + runcmd("genrb","-d %s -s %s res_index.txt" % (treebunddir, treebunddir)) + runcmd("icupkg","-s %s -a %s%s %s" % (args.tmpdir, trees[tree]["treeprefix"], RES_INDX, outfile)) diff --git a/tools/icu/no-op.cc b/tools/icu/no-op.cc new file mode 100644 index 00000000000..08d1599a264 --- /dev/null +++ b/tools/icu/no-op.cc @@ -0,0 +1,18 @@ +/* +********************************************************************** +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +*/ + +// +// ICU needs the C++, not the C linker to be used, even if the main function +// is in C. +// +// This is a dummy function just to get gyp to compile some internal +// tools as C++. +// +// It should not appear in production node binaries. + +extern void icu_dummy_cxx() {} diff --git a/vcbuild.bat b/vcbuild.bat index 0c9b0b285a6..bd1afd77ec5 100644 --- a/vcbuild.bat +++ b/vcbuild.bat @@ -35,6 +35,7 @@ set noetw_msi_arg= set noperfctr= set noperfctr_arg= set noperfctr_msi_arg= +set i18n_arg= :next-arg if "%1"=="" goto args-done @@ -62,6 +63,8 @@ if /i "%1"=="test" set test=test&goto arg-ok if /i "%1"=="msi" set msi=1&set licensertf=1&goto arg-ok if /i "%1"=="upload" set upload=1&goto arg-ok if /i "%1"=="jslint" set jslint=1&goto arg-ok +if /i "%1"=="small-icu" set i18n_arg=%1&goto arg-ok +if /i "%1"=="full-icu" set i18n_arg=%1&goto arg-ok echo Warning: ignoring invalid command line option `%1`. @@ -80,6 +83,9 @@ if defined nosnapshot set nosnapshot_arg=--without-snapshot if defined noetw set noetw_arg=--without-etw& set noetw_msi_arg=/p:NoETW=1 if defined noperfctr set noperfctr_arg=--without-perfctr& set noperfctr_msi_arg=/p:NoPerfCtr=1 +if "%i18n_arg%"=="full-icu" set i18n_arg=--with-intl=full-icu +if "%i18n_arg%"=="small-icu" set i18n_arg=--with-intl=small-icu + :project-gen @rem Skip project generation if requested. if defined noprojgen goto msbuild @@ -89,7 +95,7 @@ if defined NIGHTLY set TAG=nightly-%NIGHTLY% @rem Generate the VS project. SETLOCAL if defined VS100COMNTOOLS call "%VS100COMNTOOLS%\VCVarsQueryRegistry.bat" - python configure %debug_arg% %nosnapshot_arg% %noetw_arg% %noperfctr_arg% --dest-cpu=%target_arch% --tag=%TAG% + python configure %i18n_arg% %debug_arg% %nosnapshot_arg% %noetw_arg% %noperfctr_arg% --dest-cpu=%target_arch% --tag=%TAG% if errorlevel 1 goto create-msvs-files-failed if not exist node.sln goto create-msvs-files-failed echo Project files generated.