diff --git a/.gitignore b/.gitignore index e00450690da..f5b0105840e 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ ipch/ /test/addons/doc-*/ email.md deps/v8-* +deps/icu ./node_modules .svn/ diff --git a/Makefile b/Makefile index 11304e118f3..fb854603d8b 100644 --- a/Makefile +++ b/Makefile @@ -406,7 +406,7 @@ CPPLINT_EXCLUDE += src/queue.h CPPLINT_EXCLUDE += src/tree.h CPPLINT_EXCLUDE += src/v8abbr.h -CPPLINT_FILES = $(filter-out $(CPPLINT_EXCLUDE), $(wildcard src/*.cc src/*.h src/*.c)) +CPPLINT_FILES = $(filter-out $(CPPLINT_EXCLUDE), $(wildcard src/*.cc src/*.h src/*.c tools/icu/*.h tools/icu/*.cc)) cpplint: @$(PYTHON) tools/cpplint.py $(CPPLINT_FILES) diff --git a/README.md b/README.md index f1301a0df76..0032c63c073 100644 --- a/README.md +++ b/README.md @@ -19,17 +19,6 @@ make make install ``` -With libicu i18n support: - -```sh -svn checkout --force --revision 214189 \ - http://src.chromium.org/svn/trunk/deps/third_party/icu46 \ - deps/v8/third_party/icu46 -./configure --with-icu-path=deps/v8/third_party/icu46/icu.gyp -make -make install -``` - If your python binary is in a non-standard location or has a non-standard name, run the following instead: @@ -47,7 +36,9 @@ Prerequisites (Windows only): Windows: - vcbuild nosign +```sh +vcbuild nosign +``` You can download pre-built binaries for various operating systems from [http://nodejs.org/download/](http://nodejs.org/download/). The Windows @@ -92,6 +83,35 @@ make doc man doc/node.1 ``` +### To build `Intl` (ECMA-402) support: + +*Note:* more docs, including how to reduce disk footprint, are on +[the wiki](https://github.com/joyent/node/wiki/Intl). + +#### Use existing installed ICU (Unix/Macintosh only): + +```sh +pkg-config --modversion icu-i18n && ./configure --with-intl=system-icu +``` + +#### Build ICU from source: + +First: Unpack latest ICU + [icu4c-**##.#**-src.tgz](http://icu-project.org/download) (or `.zip`) + as `deps/icu` (You'll have: `deps/icu/source/...`) + +Unix/Macintosh: + +```sh +./configure --with-intl=full-icu +``` + +Windows: + +```sh +vcbuild full-icu +``` + Resources for Newcomers --- - [The Wiki](https://github.com/joyent/node/wiki) diff --git a/configure b/configure index 2e085832546..6db8703201e 100755 --- a/configure +++ b/configure @@ -241,6 +241,11 @@ parser.add_option('--with-icu-path', dest='with_icu_path', help='Path to icu.gyp (ICU i18n, Chromium version only.)') +parser.add_option('--with-intl', + action='store', + dest='with_intl', + help='Intl mode: none, full-icu, small-icu (default is none)') + parser.add_option('--with-perfctr', action='store_true', dest='with_perfctr', @@ -686,13 +691,138 @@ def configure_winsdk(o): print('ctrpp not found in WinSDK path--using pre-gen files ' 'from tools/msvs/genfiles.') +def glob_to_var(dir_base, dir_sub): + list = [] + dir_all = os.path.join(dir_base, dir_sub) + files = os.walk(dir_all) + for ent in files: + (path, dirs, files) = ent + for file in files: + if file.endswith('.cpp') or file.endswith('.c') or file.endswith('.h'): + list.append('%s/%s' % (dir_sub, file)) + break + return list -def configure_icu(o): + +def configure_intl(o): + # small ICU is off by default. + # always set icu_small, node.gyp depends on it being defined. + o['variables']['icu_small'] = b(False) + + with_intl = options.with_intl have_icu_path = bool(options.with_icu_path) - o['variables']['v8_enable_i18n_support'] = int(have_icu_path) - if have_icu_path: + if have_icu_path and with_intl: + print 'Error: Cannot specify both --with-icu-path and --with-intl' + sys.exit(1) + elif have_icu_path: + # Chromium .gyp mode: --with-icu-path + o['variables']['v8_enable_i18n_support'] = 1 + # use the .gyp given o['variables']['icu_gyp_path'] = options.with_icu_path + return + # --with-intl= + if with_intl == 'none' or with_intl is None: + o['variables']['v8_enable_i18n_support'] = 0 + return # no Intl + elif with_intl == 'small-icu': + # small ICU (English only) + o['variables']['v8_enable_i18n_support'] = 1 + o['variables']['icu_small'] = b(True) + elif with_intl == 'full-icu': + # full ICU + o['variables']['v8_enable_i18n_support'] = 1 + elif with_intl == 'system-icu': + # ICU from pkg-config. + o['variables']['v8_enable_i18n_support'] = 1 + pkgicu = pkg_config('icu-i18n') + if not pkgicu: + print 'Error: could not load pkg-config data for "icu-i18n".' + print 'See above errors or the README.md.' + sys.exit(1) + (libs, cflags) = pkgicu + o['libraries'] += libs.split() + o['cflags'] += cflags.split() + # use the "system" .gyp + o['variables']['icu_gyp_path'] = 'tools/icu/icu-system.gyp' + return + else: + print 'Error: unknown value --with-intl=%s' % with_intl + sys.exit(1) + # Note: non-ICU implementations could use other 'with_intl' + # values. + # ICU mode. (icu-generic.gyp) + byteorder = sys.byteorder + o['variables']['icu_gyp_path'] = 'tools/icu/icu-generic.gyp' + # ICU source dir relative to root + icu_full_path = os.path.join(root_dir, 'deps/icu') + o['variables']['icu_path'] = icu_full_path + if not os.path.isdir(icu_full_path): + print 'Error: ICU path is not a directory: %s' % (icu_full_path) + sys.exit(1) + # Now, what version of ICU is it? We just need the "major", such as 54. + # uvernum.h contains it as a #define. + uvernum_h = os.path.join(icu_full_path, 'source/common/unicode/uvernum.h') + if not os.path.isfile(uvernum_h): + print 'Error: could not load %s - is ICU installed?' % uvernum_h + sys.exit(1) + icu_ver_major = None + matchVerExp = r'^\s*#define\s+U_ICU_VERSION_SHORT\s+"([^"]*)".*' + match_version = re.compile(matchVerExp) + for line in open(uvernum_h).readlines(): + m = match_version.match(line) + if m: + icu_ver_major = m.group(1) + if not icu_ver_major: + print 'Could not read U_ICU_VERSION_SHORT version from %s' % uvernum_h + sys.exit(1) + icu_endianness = sys.byteorder[0]; # TODO(srl295): EBCDIC should be 'e' + o['variables']['icu_ver_major'] = icu_ver_major + o['variables']['icu_endianness'] = icu_endianness + icu_data_file_l = 'icudt%s%s.dat' % (icu_ver_major, 'l') + icu_data_file = 'icudt%s%s.dat' % (icu_ver_major, icu_endianness) + # relative to configure + icu_data_path = os.path.join(icu_full_path, + 'source/data/in', + icu_data_file_l) + # relative to dep.. + icu_data_in = os.path.join('../../deps/icu/source/data/in', icu_data_file_l) + if not os.path.isfile(icu_data_path) and icu_endianness != 'l': + # use host endianness + icu_data_path = os.path.join(icu_full_path, + 'source/data/in', + icu_data_file) + # relative to dep.. + icu_data_in = os.path.join('icu/source/data/in', + icu_data_file) + # this is the input '.dat' file to use .. icudt*.dat + # may be little-endian if from a icu-project.org tarball + o['variables']['icu_data_in'] = icu_data_in + # this is the icudt*.dat file which node will be using (platform endianness) + o['variables']['icu_data_file'] = icu_data_file + if not os.path.isfile(icu_data_path): + print 'Error: ICU prebuilt data file %s does not exist.' % icu_data_path + print 'See the README.md.' + # .. and we're not about to build it from .gyp! + sys.exit(1) + # map from variable name to subdirs + icu_src = { + 'stubdata': 'stubdata', + 'common': 'common', + 'i18n': 'i18n', + 'io': 'io', + 'tools': 'tools/toolutil', + 'genccode': 'tools/genccode', + 'genrb': 'tools/genrb', + 'icupkg': 'tools/icupkg', + } + # this creates a variable icu_src_XXX for each of the subdirs + # with a list of the src files to use + for i in icu_src: + var = 'icu_src_%s' % i + path = '../../deps/icu/source/%s' % icu_src[i] + o['variables'][var] = glob_to_var('tools/icu', path) + return # end of configure_intl # determine the "flavor" (operating system) we're building for, # leveraging gyp's GetFlavor function @@ -717,7 +847,7 @@ configure_libuv(output) configure_v8(output) configure_openssl(output) configure_winsdk(output) -configure_icu(output) +configure_intl(output) configure_fullystatic(output) # variables should be a root level element, diff --git a/node.gyp b/node.gyp index 24c8860ac17..de48ae83785 100644 --- a/node.gyp +++ b/node.gyp @@ -103,6 +103,7 @@ 'src/node_stat_watcher.cc', 'src/node_watchdog.cc', 'src/node_zlib.cc', + 'src/node_i18n.cc', 'src/pipe_wrap.cc', 'src/signal_wrap.cc', 'src/smalloc.cc', @@ -134,6 +135,7 @@ 'src/node_version.h', 'src/node_watchdog.h', 'src/node_wrap.h', + 'src/node_i18n.h', 'src/pipe_wrap.h', 'src/queue.h', 'src/smalloc.h', @@ -164,6 +166,17 @@ ], 'conditions': [ + [ 'v8_enable_i18n_support==1', { + 'defines': [ 'NODE_HAVE_I18N_SUPPORT=1' ], + 'dependencies': [ + '<(icu_gyp_path):icui18n', + '<(icu_gyp_path):icuuc', + ], + 'conditions': [ + [ 'icu_small=="true"', { + 'defines': [ 'NODE_HAVE_SMALL_ICU=1' ], + }]], + }], [ 'node_use_openssl=="true"', { 'defines': [ 'HAVE_OPENSSL=1' ], 'sources': [ diff --git a/src/node.cc b/src/node.cc index cf8f4ccbb5a..23edf4d4e2c 100644 --- a/src/node.cc +++ b/src/node.cc @@ -35,6 +35,10 @@ #include "node_crypto.h" #endif +#if defined(NODE_HAVE_I18N_SUPPORT) +#include "node_i18n.h" +#endif + #if defined HAVE_DTRACE || defined HAVE_ETW #include "node_dtrace.h" #endif @@ -135,6 +139,11 @@ static node_module* modpending; static node_module* modlist_builtin; static node_module* modlist_addon; +#if defined(NODE_HAVE_I18N_SUPPORT) +// Path to ICU data (for i18n / Intl) +static const char* icu_data_dir = NULL; +#endif + // used by C++ modules as well bool no_deprecation = false; @@ -2953,6 +2962,14 @@ static void PrintHelp() { " --trace-deprecation show stack traces on deprecations\n" " --v8-options print v8 command line options\n" " --max-stack-size=val set max v8 stack size (bytes)\n" +#if defined(NODE_HAVE_I18N_SUPPORT) + " --icu-data-dir=dir set ICU data load path to dir\n" + " (overrides NODE_ICU_DATA)\n" +#if !defined(NODE_HAVE_SMALL_ICU) + " Note: linked-in ICU data is\n" + " present.\n" +#endif +#endif "\n" "Environment variables:\n" #ifdef _WIN32 @@ -2964,6 +2981,12 @@ static void PrintHelp() { "NODE_MODULE_CONTEXTS Set to 1 to load modules in their own\n" " global contexts.\n" "NODE_DISABLE_COLORS Set to 1 to disable colors in the REPL\n" +#if defined(NODE_HAVE_I18N_SUPPORT) + "NODE_ICU_DATA Data path for ICU (Intl object) data\n" +#if !defined(NODE_HAVE_SMALL_ICU) + " (will extend linked-in data)\n" +#endif +#endif "\n" "Documentation can be found at http://nodejs.org/\n"); } @@ -3054,6 +3077,10 @@ static void ParseArgs(int* argc, } else if (strcmp(arg, "--v8-options") == 0) { new_v8_argv[new_v8_argc] = "--help"; new_v8_argc += 1; +#if defined(NODE_HAVE_I18N_SUPPORT) + } else if (strncmp(arg, "--icu-data-dir=", 15) == 0) { + icu_data_dir = arg + 15; +#endif } else { // V8 option. Pass through as-is. new_v8_argv[new_v8_argc] = arg; @@ -3410,6 +3437,18 @@ void Init(int* argc, } } +#if defined(NODE_HAVE_I18N_SUPPORT) + if (icu_data_dir == NULL) { + // if the parameter isn't given, use the env variable. + icu_data_dir = getenv("NODE_ICU_DATA"); + } + // Initialize ICU. + // If icu_data_dir is NULL here, it will load the 'minimal' data. + if (!i18n::InitializeICUDirectory(icu_data_dir)) { + FatalError(NULL, "Could not initialize ICU " + "(check NODE_ICU_DATA or --icu-data-dir parameters)"); + } +#endif // The const_cast doesn't violate conceptual const-ness. V8 doesn't modify // the argv array or the elements it points to. V8::SetFlagsFromCommandLine(&v8_argc, const_cast(v8_argv), true); diff --git a/src/node_i18n.cc b/src/node_i18n.cc new file mode 100644 index 00000000000..6d6144dc782 --- /dev/null +++ b/src/node_i18n.cc @@ -0,0 +1,88 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + + +/* + * notes: by srl295 + * - When in NODE_HAVE_SMALL_ICU mode, ICU is linked against "stub" (null) data + * ( stubdata/libicudata.a ) containing nothing, no data, and it's also + * linked against a "small" data file which the SMALL_ICUDATA_ENTRY_POINT + * macro names. That's the "english+root" data. + * + * If icu_data_path is non-null, the user has provided a path and we assume + * it goes somewhere useful. We set that path in ICU, and exit. + * If icu_data_path is null, they haven't set a path and we want the + * "english+root" data. We call + * udata_setCommonData(SMALL_ICUDATA_ENTRY_POINT,...) + * to load up the english+root data. + * + * - when NOT in NODE_HAVE_SMALL_ICU mode, ICU is linked directly with its full + * data. All of the variables and command line options for changing data at + * runtime are disabled, as they wouldn't fully override the internal data. + * See: http://bugs.icu-project.org/trac/ticket/10924 + */ + + +#include "node_i18n.h" + +#if defined(NODE_HAVE_I18N_SUPPORT) + +#include +#include + +#ifdef NODE_HAVE_SMALL_ICU +/* if this is defined, we have a 'secondary' entry point. + compare following to utypes.h defs for U_ICUDATA_ENTRY_POINT */ +#define SMALL_ICUDATA_ENTRY_POINT \ + SMALL_DEF2(U_ICU_VERSION_MAJOR_NUM, U_LIB_SUFFIX_C_NAME) +#define SMALL_DEF2(major, suff) SMALL_DEF(major, suff) +#ifndef U_LIB_SUFFIX_C_NAME +#define SMALL_DEF(major, suff) icusmdt##major##_dat +#else +#define SMALL_DEF(major, suff) icusmdt##suff##major##_dat +#endif + +extern "C" const char U_DATA_API SMALL_ICUDATA_ENTRY_POINT[]; +#endif + +namespace node { +namespace i18n { + +bool InitializeICUDirectory(const char* icu_data_path) { + if (icu_data_path != NULL) { + u_setDataDirectory(icu_data_path); + return true; // no error + } else { + UErrorCode status = U_ZERO_ERROR; +#ifdef NODE_HAVE_SMALL_ICU + // install the 'small' data. + udata_setCommonData(&SMALL_ICUDATA_ENTRY_POINT, &status); +#else // !NODE_HAVE_SMALL_ICU + // no small data, so nothing to do. +#endif // !NODE_HAVE_SMALL_ICU + return (status == U_ZERO_ERROR); + } +} + +} // namespace i18n +} // namespace node + +#endif // NODE_HAVE_I18N_SUPPORT diff --git a/src/node_i18n.h b/src/node_i18n.h new file mode 100644 index 00000000000..f6807a911ec --- /dev/null +++ b/src/node_i18n.h @@ -0,0 +1,39 @@ +// Copyright Joyent, Inc. and other Node contributors. +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the +// "Software"), to deal in the Software without restriction, including +// without limitation the rights to use, copy, modify, merge, publish, +// distribute, sublicense, and/or sell copies of the Software, and to permit +// persons to whom the Software is furnished to do so, subject to the +// following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN +// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +// USE OR OTHER DEALINGS IN THE SOFTWARE. + +#ifndef SRC_NODE_I18N_H_ +#define SRC_NODE_I18N_H_ + +#include "node.h" + +#if defined(NODE_HAVE_I18N_SUPPORT) + +namespace node { +namespace i18n { + +NODE_EXTERN bool InitializeICUDirectory(const char* icu_data_path); + +} // namespace i18n +} // namespace node + +#endif // NODE_HAVE_I18N_SUPPORT + +#endif // SRC_NODE_I18N_H_ diff --git a/tools/icu/README.md b/tools/icu/README.md new file mode 100644 index 00000000000..40d5287d9f1 --- /dev/null +++ b/tools/icu/README.md @@ -0,0 +1,26 @@ +Notes about the icu directory. +=== + +The files in this directory were written for the node.js effort. It's +the intent of their author (Steven R. Loomis / srl295) to merge them +upstream into ICU, pending much discussion within the ICU-PMC. + +`icu_small.json` is somewhat node-specific as it specifies a "small ICU" +configuration file for the `icutrim.py` script. `icutrim.py` and +`iculslocs.cpp` may themselves be superseded by components built into +ICU in the future. + +The following tickets were opened during this work, and their +resolution may inform the reader as to the current state of icu-trim +upstream: + + * [#10919](http://bugs.icu-project.org/trac/ticket/10919) + (experimental branch - may copy any source patches here) + * [#10922](http://bugs.icu-project.org/trac/ticket/10922) + (data packaging improvements) + * [#10923](http://bugs.icu-project.org/trac/ticket/10923) + (rewrite data building in python) + +When/if components (not including the `.json` file) are merged into +ICU, this code and `configure` will be updated to detect and use those +variants rather than the ones in this directory. diff --git a/tools/icu/icu-generic.gyp b/tools/icu/icu-generic.gyp new file mode 100644 index 00000000000..fa5e5773b2b --- /dev/null +++ b/tools/icu/icu-generic.gyp @@ -0,0 +1,414 @@ +# Copyright (c) IBM Corporation and Others. All Rights Reserved. +# very loosely based on icu.gyp from Chromium: +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + + +{ + 'variables': { + 'icu_src_derb': [ '../../deps/icu/source/tools/genrb/derb.c' ], + }, + 'targets': [ + { + # a target to hold uconfig defines. + # for now these are hard coded, but could be defined. + 'target_name': 'icu_uconfig', + 'type': 'none', + 'toolsets': [ 'host', 'target' ], + 'direct_dependent_settings': { + 'defines': [ + 'UCONFIG_NO_LEGACY_CONVERSION=1', + 'UCONFIG_NO_IDNA=1', + 'UCONFIG_NO_TRANSLITERATION=1', + 'UCONFIG_NO_SERVICE=1', + 'UCONFIG_NO_REGULAR_EXPRESSIONS=1', + 'U_ENABLE_DYLOAD=0', + 'U_STATIC_IMPLEMENTATION=1', + # TODO(srl295): reenable following pending + # https://code.google.com/p/v8/issues/detail?id=3345 + # (saves some space) + 'UCONFIG_NO_BREAK_ITERATION=0', + ], + } + }, + { + # a target to hold common settings. + # make any target that is ICU implementation depend on this. + 'target_name': 'icu_implementation', + 'toolsets': [ 'host', 'target' ], + 'type': 'none', + 'direct_dependent_settings': { + 'conditions': [ + [ 'os_posix == 1 and OS != "mac" and OS != "ios"', { + 'cflags': [ '-Wno-deprecated-declarations' ], + 'cflags_cc': [ '-frtti' ], + }], + [ 'OS == "mac" or OS == "ios"', { + 'xcode_settings': {'GCC_ENABLE_CPP_RTTI': 'YES' }, + }], + [ 'OS == "win"', { + 'msvs_settings': { + 'VCCLCompilerTool': {'RuntimeTypeInfo': 'true'}, + } + }], + ], + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeTypeInfo': 'true', + 'ExceptionHandling': '1', + }, + }, + 'configurations': { + # TODO: why does this need to be redefined for Release and Debug? + # Maybe this should be pushed into common.gypi with an "if v8 i18n"? + 'Release': { + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeTypeInfo': 'true', + 'ExceptionHandling': '1', + }, + }, + }, + 'Debug': { + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeTypeInfo': 'true', + 'ExceptionHandling': '1', + }, + }, + }, + }, + 'defines': [ + 'U_ATTRIBUTE_DEPRECATED=', + '_CRT_SECURE_NO_DEPRECATE=', + 'U_STATIC_IMPLEMENTATION=1', + ], + }, + }, + { + 'target_name': 'icui18n', + 'type': '<(library)', + 'toolsets': [ 'host', 'target' ], + 'sources': [ + '<@(icu_src_i18n)' + ], + 'include_dirs': [ + '../../deps/icu/source/i18n', + ], + 'defines': [ + 'U_I18N_IMPLEMENTATION=1', + ], + 'dependencies': [ 'icuucx', 'icu_implementation', 'icu_uconfig' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/i18n', + ], + }, + 'export_dependent_settings': [ 'icuucx' ], + }, + # this library is only built for derb.. + { + 'target_name': 'icuio', + 'type': '<(library)', + 'toolsets': [ 'host' ], + 'sources': [ + '<@(icu_src_io)' + ], + 'include_dirs': [ + '../../deps/icu/source/io', + ], + 'defines': [ + 'U_IO_IMPLEMENTATION=1', + ], + 'dependencies': [ 'icuucx', 'icui18n', 'icu_implementation', 'icu_uconfig' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/io', + ], + }, + 'export_dependent_settings': [ 'icuucx', 'icui18n' ], + }, + # This exports actual ICU data + { + 'target_name': 'icudata', + 'type': '<(library)', + 'toolsets': [ 'target' ], + 'conditions': [ + [ 'OS == "win"', { + 'conditions': [ + [ 'icu_small == "false"', { # and OS=win + # full data - just build the full data file, then we are done. + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'dependencies': [ 'genccode#host' ], + 'actions': [ + { + 'action_name': 'icudata', + 'inputs': [ '<(icu_data_in)' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'action': [ '<(PRODUCT_DIR)/genccode', + '-o', + '-d', '<(SHARED_INTERMEDIATE_DIR)', + '-n', 'icudata', + '-e', 'icudt<(icu_ver_major)', + '<@(_inputs)' ], + }, + ], + }, { # icu_small == TRUE and OS == win + # link against stub data primarily + # then, use icupkg and genccode to rebuild data + 'dependencies': [ 'icustubdata', 'genccode#host', 'icupkg#host', 'genrb#host', 'iculslocs#host' ], + 'export_dependent_settings': [ 'icustubdata' ], + 'actions': [ + { + # trim down ICU + 'action_name': 'icutrim', + 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], + 'outputs': [ '../../out/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'action': [ 'python', + 'icutrim.py', + '-P', '../../<(CONFIGURATION_NAME)', + '-D', '<(icu_data_in)', + '--delete-tmp', + '-T', '../../out/icutmp', + '-F', 'icu_small.json', + '-O', 'icudt<(icu_ver_major)<(icu_endianness).dat', + '-v' ], + }, + { + # build final .dat -> .obj + 'action_name': 'genccode', + 'inputs': [ '../../out/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs': [ '../../out/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + 'action': [ '../../<(CONFIGURATION_NAME)/genccode', + '-o', + '-d', '../../out/', + '-n', 'icudata', + '-e', 'icusmdt<(icu_ver_major)', + '<@(_inputs)' ], + }, + ], + # This file contains the small ICU data. + 'sources': [ '../../out/icudt<(icu_ver_major)<(icu_endianness)_dat.obj' ], + } ] ], #end of OS==win and icu_small == true + }, { # OS != win + 'conditions': [ + [ 'icu_small == "false"', { + # full data - just build the full data file, then we are done. + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + 'dependencies': [ 'genccode#host', 'icupkg#host', 'icu_implementation#host', 'icu_uconfig' ], + 'include_dirs': [ + '../../deps/icu/source/common', + ], + 'actions': [ + { + # Swap endianness (if needed), or at least copy the file + 'action_name': 'icupkg', + 'inputs': [ '<(icu_data_in)' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'action': [ '<(PRODUCT_DIR)/icupkg', + '-t<(icu_endianness)', + '<@(_inputs)', + '<@(_outputs)', + ], + }, + { + # Rename without the endianness marker + 'action_name': 'copy', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], + 'action': [ 'cp', + '<@(_inputs)', + '<@(_outputs)', + ], + }, + { + 'action_name': 'icudata', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major).dat' ], + 'outputs':[ '<(SHARED_INTERMEDIATE_DIR)/icudt<(icu_ver_major)_dat.c' ], + 'action': [ '<(PRODUCT_DIR)/genccode', + '-e', 'icudt<(icu_ver_major)', + '-d', '<(SHARED_INTERMEDIATE_DIR)', + '-f', 'icudt<(icu_ver_major)_dat', + '<@(_inputs)' ], + }, + ], # end actions + }, { # icu_small == true ( and OS != win ) + # link against stub data (as primary data) + # then, use icupkg and genccode to rebuild small data + 'dependencies': [ 'icustubdata', 'genccode#host', 'icupkg#host', 'genrb#host', 'iculslocs#host', + 'icu_implementation', 'icu_uconfig' ], + 'export_dependent_settings': [ 'icustubdata' ], + 'actions': [ + { + # trim down ICU + 'action_name': 'icutrim', + 'inputs': [ '<(icu_data_in)', 'icu_small.json' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'action': [ 'python', + 'icutrim.py', + '-P', '<(PRODUCT_DIR)', + '-D', '<(icu_data_in)', + '--delete-tmp', + '-T', '<(SHARED_INTERMEDIATE_DIR)/icutmp', + '-F', 'icu_small.json', + '-O', 'icudt<(icu_ver_major)<(icu_endianness).dat', + '-v' ], + }, { + # rename to get the final entrypoint name right + 'action_name': 'rename', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icudt<(icu_ver_major)<(icu_endianness).dat' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], + 'action': [ 'cp', + '<@(_inputs)', + '<@(_outputs)', + ], + }, { + # build final .dat -> .obj + 'action_name': 'genccode', + 'inputs': [ '<(SHARED_INTERMEDIATE_DIR)/icutmp/icusmdt<(icu_ver_major).dat' ], + 'outputs': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + 'action': [ '<(PRODUCT_DIR)/genccode', + '-d', '<(SHARED_INTERMEDIATE_DIR)', + '<@(_inputs)' ], + }, + ], + # This file contains the small ICU data + 'sources': [ '<(SHARED_INTERMEDIATE_DIR)/icusmdt<(icu_ver_major)_dat.c' ], + # for umachine.h + 'include_dirs': [ + '../../deps/icu/source/common', + ], + }]], # end icu_small == true + }]], # end OS != win + }, # end icudata + # icustubdata is a tiny (~1k) symbol with no ICU data in it. + # tools must link against it as they are generating the full data. + { + 'target_name': 'icustubdata', + 'type': '<(library)', + 'toolsets': [ 'host', 'target' ], + 'dependencies': [ 'icu_implementation' ], + 'sources': [ + '<@(icu_src_stubdata)' + ], + 'include_dirs': [ + '../../deps/icu/source/common', + ], + }, + # this target is for v8 consumption. + # it is icuuc + stubdata + # it is only built for target + { + 'target_name': 'icuuc', + 'type': 'none', + 'toolsets': [ 'target' ], + 'dependencies': [ 'icuucx', 'icudata' ], + 'export_dependent_settings': [ 'icuucx', 'icudata' ], + }, + # This is the 'real' icuuc. + # tools can depend on 'icuuc + stubdata' + { + 'target_name': 'icuucx', + 'type': '<(library)', + 'dependencies': [ 'icu_implementation', 'icu_uconfig' ], + 'toolsets': [ 'host', 'target' ], + 'sources': [ + '<@(icu_src_common)' + ], + 'include_dirs': [ + '../../deps/icu/source/common', + ], + 'defines': [ + 'U_COMMON_IMPLEMENTATION=1', + ], + 'export_dependent_settings': [ 'icu_uconfig' ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/common', + ], + 'conditions': [ + [ 'OS=="win"', { + 'link_settings': { + 'libraries': [ '-lAdvAPI32.Lib', '-lUser32.lib' ], + }, + }], + ], + }, + }, + # tools library + { + 'target_name': 'icutools', + 'type': '<(library)', + 'toolsets': [ 'host' ], + 'dependencies': [ 'icuucx', 'icui18n', 'icustubdata' ], + 'sources': [ + '<@(icu_src_tools)' + ], + 'include_dirs': [ + '../../deps/icu/source/tools/toolutil', + ], + 'defines': [ + 'U_TOOLUTIL_IMPLEMENTATION=1', + #'DEBUG=0', # http://bugs.icu-project.org/trac/ticket/10977 + ], + 'direct_dependent_settings': { + 'include_dirs': [ + '../../deps/icu/source/tools/toolutil', + ], + }, + 'export_dependent_settings': [ 'icuucx', 'icui18n', 'icustubdata' ], + }, + # This tool is needed to rebuild .res files from .txt, + # or to build index (res_index.txt) files for small-icu + { + 'target_name': 'genrb', + 'type': 'executable', + 'toolsets': [ 'host' ], + 'dependencies': [ 'icutools', 'icuucx', 'icui18n' ], + 'sources': [ + '<@(icu_src_genrb)' + ], + # derb is a separate executable + # (which is not currently built) + 'sources!': [ + '<@(icu_src_derb)', + 'no-op.cc', + ], + }, + # This tool is used to rebuild res_index.res manifests + { + 'target_name': 'iculslocs', + 'toolsets': [ 'host' ], + 'type': 'executable', + 'dependencies': [ 'icutools', 'icuucx', 'icui18n', 'icuio' ], + 'sources': [ + 'iculslocs.cc', + 'no-op.cc', + ], + }, + # This tool is used to package, unpackage, repackage .dat files + # and convert endianesses + { + 'target_name': 'icupkg', + 'toolsets': [ 'host' ], + 'type': 'executable', + 'dependencies': [ 'icutools', 'icuucx', 'icui18n' ], + 'sources': [ + '<@(icu_src_icupkg)', + 'no-op.cc', + ], + }, + # this is used to convert .dat directly into .obj + { + 'target_name': 'genccode', + 'toolsets': [ 'host' ], + 'type': 'executable', + 'dependencies': [ 'icutools', 'icuucx', 'icui18n' ], + 'sources': [ + '<@(icu_src_genccode)', + 'no-op.cc', + ], + }, + ], +} diff --git a/tools/icu/icu-system.gyp b/tools/icu/icu-system.gyp new file mode 100644 index 00000000000..44d4f5feff4 --- /dev/null +++ b/tools/icu/icu-system.gyp @@ -0,0 +1,18 @@ +# Copyright (c) 2014 IBM Corporation and Others. All Rights Reserved. + +# This variant is used for the '--with-intl=system-icu' option. +# 'configure' has already set 'libs' and 'cflags' - so, +# there's nothing to do in these targets. + +{ + 'targets': [ + { + 'target_name': 'icuuc', + 'type': 'none', + }, + { + 'target_name': 'icui18n', + 'type': 'none', + }, + ], +} diff --git a/tools/icu/icu_small.json b/tools/icu/icu_small.json new file mode 100644 index 00000000000..ddf7d1204e8 --- /dev/null +++ b/tools/icu/icu_small.json @@ -0,0 +1,47 @@ +{ + "copyright": "Copyright (c) 2014 IBM Corporation and Others. All Rights Reserved.", + "comment": "icutrim.py config: Trim down ICU to just English, needed for node.js use.", + "variables": { + "none": { + "only": [] + }, + "en_only": { + "only": [ + "root", + "en" + ] + }, + "leavealone": { + } + }, + "trees": { + "ROOT": "en_only", + "brkitr": "none", + "coll": "en_only", + "curr": "en_only", + "lang": "none", + "rbnf": "none", + "region": "none", + "zone": "en_only", + "converters": "none", + "stringprep": "none", + "translit": "none", + "brkfiles": "none", + "brkdict": "none", + "confusables": "none" + }, + "remove": [ + "cnvalias.icu", + "postalCodeData.res", + "uts46.nrm", + "genderList.res", + "brkitr/root.res", + "unames.icu" + ], + "keep": [ + "pool.res", + "supplementalData.res", + "zoneinfo64.res", + "likelySubtags.res" + ] +} diff --git a/tools/icu/iculslocs.cc b/tools/icu/iculslocs.cc new file mode 100644 index 00000000000..66becace0a4 --- /dev/null +++ b/tools/icu/iculslocs.cc @@ -0,0 +1,388 @@ +/* +********************************************************************** +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +* Created 2014-06-20 by Steven R. Loomis +* +* See: http://bugs.icu-project.org/trac/ticket/10922 +* +*/ + +/* +WHAT IS THIS? + +Here's the problem: It's difficult to reconfigure ICU from the command +line without using the full makefiles. You can do a lot, but not +everything. + +Consider: + + $ icupkg -r 'ja*' icudt53l.dat + +Great, you've now removed the (main) Japanese data. But something's +still wrong-- res_index (and thus, getAvailable* functions) still +claim the locale is present. + +You are reading the source to a tool (using only public API C code) +that can solve this problem. Use as follows: + + $ iculslocs -i . -N icudt53l -b res_index.txt + +.. Generates a NEW res_index.txt (by looking at the .dat file, and +figuring out which locales are actually available. Has commented out +the ones which are no longer available: + + ... + it_SM {""} +// ja {""} +// ja_JP {""} + jgo {""} + ... + +Then you can build and in-place patch it with existing ICU tools: + $ genrb res_index.txt + $ icupkg -a res_index.res icudt53l.dat + +.. Now you have a patched icudt539.dat that not only doesn't have +Japanese, it doesn't *claim* to have Japanese. + +*/ + +#include "string.h" +#include "charstr.h" // ICU internal header +#include +#include +#include + +const char* PROG = "iculslocs"; +const char* NAME = U_ICUDATA_NAME; // assume ICU data +const char* TREE = "ROOT"; +int VERBOSE = 0; + +#define RES_INDEX "res_index" +#define INSTALLEDLOCALES "InstalledLocales" + +CharString packageName; +const char* locale = RES_INDEX; // locale referring to our index + +void usage() { + u_printf("Usage: %s [options]\n", PROG); + u_printf( + "This program lists and optionally regenerates the locale " + "manifests\n" + " in ICU 'res_index.res' files.\n"); + u_printf( + " -i ICUDATA Set ICUDATA dir to ICUDATA.\n" + " NOTE: this must be the first option given.\n"); + u_printf(" -h This Help\n"); + u_printf(" -v Verbose Mode on\n"); + u_printf(" -l List locales to stdout\n"); + u_printf( + " if Verbose mode, then missing (unopenable)" + "locales\n" + " will be listed preceded by a '#'.\n"); + u_printf( + " -b res_index.txt Write 'corrected' bundle " + "to res_index.txt\n" + " missing bundles will be " + "OMITTED\n"); + u_printf( + " -T TREE Choose tree TREE\n" + " (TREE should be one of: \n" + " ROOT, brkitr, coll, curr, lang, rbnf, region, zone)\n"); + // see ureslocs.h and elsewhere + u_printf( + " -N NAME Choose name NAME\n" + " (default: '%s')\n", + U_ICUDATA_NAME); + u_printf( + "\nNOTE: for best results, this tool ought to be " + "linked against\n" + "stubdata. i.e. '%s -l' SHOULD return an error with " + " no data.\n", + PROG); +} + +#define ASSERT_SUCCESS(what) \ + if (U_FAILURE(status)) { \ + u_printf("%s:%d: %s: ERROR: %s %s\n", \ + __FILE__, \ + __LINE__, \ + PROG, \ + u_errorName(status), \ + what); \ + return 1; \ + } + +/** + * @param status changed from reference to pointer to match node.js style + */ +void calculatePackageName(UErrorCode* status) { + packageName.clear(); + if (strcmp(NAME, "NONE")) { + packageName.append(NAME, *status); + if (strcmp(TREE, "ROOT")) { + packageName.append(U_TREE_SEPARATOR_STRING, *status); + packageName.append(TREE, *status); + } + } + if (VERBOSE) { + u_printf("packageName: %s\n", packageName.data()); + } +} + +/** + * Does the locale exist? + * return zero for false, or nonzero if it was openable. + * Assumes calculatePackageName was called. + * @param exists set to TRUE if exists, FALSE otherwise. + * Changed from reference to pointer to match node.js style + * @return 0 on "OK" (success or resource-missing), + * 1 on "FAILURE" (unexpected error) + */ +int localeExists(const char* loc, UBool* exists) { + UErrorCode status = U_ZERO_ERROR; + if (VERBOSE > 1) { + u_printf("Trying to open %s:%s\n", packageName.data(), loc); + } + LocalUResourceBundlePointer aResource( + ures_openDirect(packageName.data(), loc, &status)); + *exists = FALSE; + if (U_SUCCESS(status)) { + *exists = true; + if (VERBOSE > 1) { + u_printf("%s:%s existed!\n", packageName.data(), loc); + } + return 0; + } else if (status == U_MISSING_RESOURCE_ERROR) { + *exists = false; + if (VERBOSE > 1) { + u_printf("%s:%s did NOT exist (%s)!\n", + packageName.data(), + loc, + u_errorName(status)); + } + return 0; // "good" failure + } else { + // some other failure.. + u_printf("%s:%d: %s: ERROR %s opening %s:%s for test.\n", + __FILE__, + __LINE__, + u_errorName(status), + packageName.data(), + loc); + return 1; // abort + } +} + +void printIndent(const LocalUFILEPointer& bf, int indent) { + for (int i = 0; i < indent + 1; i++) { + u_fprintf(bf.getAlias(), " "); + } +} + +/** + * Dumps a table resource contents + * if lev==0, skips INSTALLEDLOCALES + * @return 0 for OK, 1 for err + */ +int dumpAllButInstalledLocales(int lev, + LocalUResourceBundlePointer& bund, + LocalUFILEPointer& bf, + UErrorCode& status) { + ures_resetIterator(bund.getAlias()); + const UBool isTable = (UBool)(ures_getType(bund.getAlias()) == URES_TABLE); + LocalUResourceBundlePointer t; + while (U_SUCCESS(status) && ures_hasNext(bund.getAlias())) { + t.adoptInstead(ures_getNextResource(bund.getAlias(), t.orphan(), &status)); + ASSERT_SUCCESS("while processing table"); + const char* key = ures_getKey(t.getAlias()); + if (VERBOSE > 1) { + u_printf("dump@%d: got key %s\n", lev, key); + } + if (lev == 0 && !strcmp(key, INSTALLEDLOCALES)) { + if (VERBOSE > 1) { + u_printf("dump: skipping '%s' as it must be evaluated.\n", key); + } + } else { + printIndent(bf, lev); + u_fprintf(bf.getAlias(), "%s", key); + switch (ures_getType(t.getAlias())) { + case URES_STRING: { + int32_t len = 0; + const UChar* s = ures_getString(t.getAlias(), &len, &status); + ASSERT_SUCCESS("getting string"); + u_fprintf(bf.getAlias(), ":string {\""); + u_file_write(s, len, bf.getAlias()); + u_fprintf(bf.getAlias(), "\"}"); + } break; + default: { + u_printf("ERROR: unhandled type in dumpAllButInstalledLocales().\n"); + return 1; + } break; + } + u_fprintf(bf.getAlias(), "\n"); + } + } + return 0; +} + +int list(const char* toBundle) { + UErrorCode status = U_ZERO_ERROR; + + LocalUFILEPointer bf; + + if (toBundle != NULL) { + if (VERBOSE) { + u_printf("writing to bundle %s\n", toBundle); + } + // we write UTF-8 with BOM only. No exceptions. + bf.adoptInstead(u_fopen(toBundle, "w", "en_US_POSIX", "UTF-8")); + if (bf.isNull()) { + u_printf("ERROR: Could not open '%s' for writing.\n", toBundle); + return 1; + } + u_fputc(0xFEFF, bf.getAlias()); // write BOM + u_fprintf(bf.getAlias(), "// -*- Coding: utf-8; -*-\n//\n"); + } + + // first, calculate the bundle name. + calculatePackageName(&status); + ASSERT_SUCCESS("calculating package name"); + + if (VERBOSE) { + u_printf("\"locale\": %s\n", locale); + } + + LocalUResourceBundlePointer bund( + ures_openDirect(packageName.data(), locale, &status)); + ASSERT_SUCCESS("while opening the bundle"); + LocalUResourceBundlePointer installedLocales( + ures_getByKey(bund.getAlias(), INSTALLEDLOCALES, NULL, &status)); + ASSERT_SUCCESS("while fetching installed locales"); + + int32_t count = ures_getSize(installedLocales.getAlias()); + if (VERBOSE) { + u_printf("Locales: %d\n", count); + } + + if (bf.isValid()) { + // write the HEADER + u_fprintf(bf.getAlias(), + "// Warning this file is automatically generated\n" + "// Updated by %s based on %s:%s.txt\n", + PROG, + packageName.data(), + locale); + u_fprintf(bf.getAlias(), + "%s:table(nofallback) {\n" + " // First, everything besides InstalledLocales:\n", + locale); + if (dumpAllButInstalledLocales(0, bund, bf, status)) { + u_printf("Error dumping prolog for %s\n", toBundle); + return 1; + } + ASSERT_SUCCESS("while writing prolog"); // in case an error was missed + + u_fprintf(bf.getAlias(), + " %s:table { // %d locales in input %s.res\n", + INSTALLEDLOCALES, + count, + locale); + } + + // OK, now list them. + LocalUResourceBundlePointer subkey; + + int validCount = 0; + for (int32_t i = 0; i < count; i++) { + subkey.adoptInstead(ures_getByIndex( + installedLocales.getAlias(), i, subkey.orphan(), &status)); + ASSERT_SUCCESS("while fetching an installed locale's name"); + + const char* key = ures_getKey(subkey.getAlias()); + if (VERBOSE > 1) { + u_printf("@%d: %s\n", i, key); + } + // now, see if the locale is installed.. + + UBool exists; + if (localeExists(key, &exists)) { + return 1; // get out. + } + if (exists) { + validCount++; + u_printf("%s\n", key); + if (bf.isValid()) { + u_fprintf(bf.getAlias(), " %s {\"\"}\n", key); + } + } else { + if (bf.isValid()) { + u_fprintf(bf.getAlias(), "// %s {\"\"}\n", key); + } + if (VERBOSE) { + u_printf("#%s\n", key); // verbosity one - '' vs '#' + } + } + } + + if (bf.isValid()) { + u_fprintf(bf.getAlias(), " } // %d/%d valid\n", validCount, count); + // write the HEADER + u_fprintf(bf.getAlias(), "}\n"); + } + return 0; +} + +int main(int argc, const char* argv[]) { + PROG = argv[0]; + for (int i = 1; i < argc; i++) { + const char* arg = argv[i]; + int argsLeft = argc - i - 1; /* how many remain? */ + if (!strcmp(arg, "-v")) { + VERBOSE++; + } else if (!strcmp(arg, "-i") && (argsLeft >= 1)) { + if (i != 1) { + u_printf("ERROR: -i must be the first argument given.\n"); + usage(); + return 1; + } + const char* dir = argv[++i]; + u_setDataDirectory(dir); + if (VERBOSE) { + u_printf("ICUDATA is now %s\n", dir); + } + } else if (!strcmp(arg, "-T") && (argsLeft >= 1)) { + TREE = argv[++i]; + if (VERBOSE) { + u_printf("TREE is now %s\n", TREE); + } + } else if (!strcmp(arg, "-N") && (argsLeft >= 1)) { + NAME = argv[++i]; + if (VERBOSE) { + u_printf("NAME is now %s\n", NAME); + } + } else if (!strcmp(arg, "-?") || !strcmp(arg, "-h")) { + usage(); + return 0; + } else if (!strcmp(arg, "-l")) { + if (list(NULL)) { + return 1; + } + } else if (!strcmp(arg, "-b") && (argsLeft >= 1)) { + if (list(argv[++i])) { + return 1; + } + } else { + u_printf("Unknown or malformed option: %s\n", arg); + usage(); + return 1; + } + } +} + +// Local Variables: +// compile-command: "icurun iculslocs.cpp" +// End: diff --git a/tools/icu/icutrim.py b/tools/icu/icutrim.py new file mode 100755 index 00000000000..f6b54956c55 --- /dev/null +++ b/tools/icu/icutrim.py @@ -0,0 +1,338 @@ +#!/usr/bin/python +# +# Copyright (C) 2014 IBM Corporation and Others. All Rights Reserved. +# +# @author Steven R. Loomis +# +# This tool slims down an ICU data (.dat) file according to a config file. +# +# See: http://bugs.icu-project.org/trac/ticket/10922 +# +# Usage: +# Use "-h" to get help options. + +import sys +import shutil +# for utf-8 +reload(sys) +sys.setdefaultencoding("utf-8") + +import argparse +import os +import json +import re + +endian=sys.byteorder + +parser = argparse.ArgumentParser(description="ICU Datafile repackager. Example of use: \"mkdir tmp ; python icutrim.py -D ~/Downloads/icudt53l.dat -T tmp -F trim_en.json -O icudt53l.dat\" you will then find a smaller icudt53l.dat in 'tmp'. ", + epilog="ICU tool, http://icu-project.org - master copy at http://source.icu-project.org/repos/icu/tools/trunk/scripts/icutrim.py") + +parser.add_argument("-P","--tool-path", + action="store", + dest="toolpath", + help="set the prefix directory for ICU tools") + +parser.add_argument("-D","--input-file", + action="store", + dest="datfile", + help="input data file (icudt__.dat)", + required=True) + +parser.add_argument("-F","--filter-file", + action="store", + dest="filterfile", + help="filter file (JSON format)", + required=True) + +parser.add_argument("-T","--tmp-dir", + action="store", + dest="tmpdir", + help="working directory.", + required=True) + +parser.add_argument("--delete-tmp", + action="count", + dest="deltmpdir", + help="delete working directory.", + default=0) + +parser.add_argument("-O","--outfile", + action="store", + dest="outfile", + help="outfile (NOT a full path)", + required=True) + +parser.add_argument("-v","--verbose", + action="count", + default=0) + +parser.add_argument('-e', '--endian', action='store', dest='endian', help='endian, big, little or host, your default is "%s".' % endian, default=endian, metavar='endianness') + + +args = parser.parse_args() + +if args.verbose>0: + print "Options: "+str(args) + +if (os.path.isdir(args.tmpdir) and args.deltmpdir): + if args.verbose>1: + print "Deleting tmp dir %s.." % (args.tmpdir) + shutil.rmtree(args.tmpdir) + +if not (os.path.isdir(args.tmpdir)): + os.mkdir(args.tmpdir) +else: + print "Please delete tmpdir %s before beginning." % args.tmpdir + sys.exit(1) + +if args.endian not in ("big","little","host"): + print "Unknown endianness: %s" % args.endian + sys.exit(1) + +if args.endian is "host": + args.endian = endian + +if not os.path.isdir(args.tmpdir): + print "Error, tmpdir not a directory: %s" % (args.tmpdir) + sys.exit(1) + +if not os.path.isfile(args.filterfile): + print "Filterfile doesn't exist: %s" % (args.filterfile) + sys.exit(1) + +if not os.path.isfile(args.datfile): + print "Datfile doesn't exist: %s" % (args.datfile) + sys.exit(1) + +if not args.datfile.endswith(".dat"): + print "Datfile doesn't end with .dat: %s" % (args.datfile) + sys.exit(1) + +outfile = os.path.join(args.tmpdir, args.outfile) + +if os.path.isfile(outfile): + print "Error, output file does exist: %s" % (outfile) + sys.exit(1) + +if not args.outfile.endswith(".dat"): + print "Outfile doesn't end with .dat: %s" % (args.outfile) + sys.exit(1) + +dataname=args.outfile[0:-4] + + +## TODO: need to improve this. Quotes, etc. +def runcmd(tool, cmd, doContinue=False): + if(args.toolpath): + cmd = os.path.join(args.toolpath, tool) + " " + cmd + else: + cmd = tool + " " + cmd + + if(args.verbose>4): + print "# " + cmd + + rc = os.system(cmd) + if rc is not 0 and not doContinue: + print "FAILED: %s" % cmd + sys.exit(1) + return rc + +## STEP 0 - read in json config +fi= open(args.filterfile, "rb") +config=json.load(fi) +fi.close() + +if (args.verbose > 6): + print config + +if(config.has_key("comment")): + print "%s: %s" % (args.filterfile, config["comment"]) + +## STEP 1 - copy the data file, swapping endianness +endian_letter = "l" + + +runcmd("icupkg", "-t%s %s %s""" % (endian_letter, args.datfile, outfile)) + +## STEP 2 - get listing +listfile = os.path.join(args.tmpdir,"icudata.lst") +runcmd("icupkg", "-l %s > %s""" % (outfile, listfile)) + +fi = open(listfile, 'rb') +items = fi.readlines() +items = [items[i].strip() for i in range(len(items))] +fi.close() + +itemset = set(items) + +if (args.verbose>1): + print "input file: %d items" % (len(items)) + +# list of all trees +trees = {} +RES_INDX = "res_index.res" +remove = None +# remove - always remove these +if config.has_key("remove"): + remove = set(config["remove"]) +else: + remove = set() + +# keep - always keep these +if config.has_key("keep"): + keep = set(config["keep"]) +else: + keep = set() + +def queueForRemoval(tree): + global remove + if not config.has_key("trees"): + # no config + return + if not config["trees"].has_key(tree): + return + mytree = trees[tree] + if(args.verbose>0): + print "* %s: %d items" % (tree, len(mytree["locs"])) + # do varible substitution for this tree here + if type(config["trees"][tree]) == str or type(config["trees"][tree]) == unicode: + treeStr = config["trees"][tree] + if(args.verbose>5): + print " Substituting $%s for tree %s" % (treeStr, tree) + if(not config.has_key("variables") or not config["variables"].has_key(treeStr)): + print " ERROR: no variable: variables.%s for tree %s" % (treeStr, tree) + sys.exit(1) + config["trees"][tree] = config["variables"][treeStr] + myconfig = config["trees"][tree] + if(args.verbose>4): + print " Config: %s" % (myconfig) + # Process this tree + if(len(myconfig)==0 or len(mytree["locs"])==0): + if(args.verbose>2): + print " No processing for %s - skipping" % (tree) + else: + only = None + if myconfig.has_key("only"): + only = set(myconfig["only"]) + if (len(only)==0) and (mytree["treeprefix"] != ""): + thePool = "%spool.res" % (mytree["treeprefix"]) + if (thePool in itemset): + if(args.verbose>0): + print "Removing %s because tree %s is empty." % (thePool, tree) + remove.add(thePool) + else: + print "tree %s - no ONLY" + for l in range(len(mytree["locs"])): + loc = mytree["locs"][l] + if (only is not None) and not loc in only: + # REMOVE loc + toRemove = "%s%s%s" % (mytree["treeprefix"], loc, mytree["extension"]) + if(args.verbose>6): + print "Queueing for removal: %s" % toRemove + remove.add(toRemove) + +def addTreeByType(tree, mytree): + if(args.verbose>1): + print "(considering %s): %s" % (tree, mytree) + trees[tree] = mytree + mytree["locs"]=[] + for i in range(len(items)): + item = items[i] + if item.startswith(mytree["treeprefix"]) and item.endswith(mytree["extension"]): + mytree["locs"].append(item[len(mytree["treeprefix"]):-4]) + # now, process + queueForRemoval(tree) + +addTreeByType("converters",{"treeprefix":"", "extension":".cnv"}) +addTreeByType("stringprep",{"treeprefix":"", "extension":".spp"}) +addTreeByType("translit",{"treeprefix":"translit/", "extension":".res"}) +addTreeByType("brkfiles",{"treeprefix":"brkitr/", "extension":".brk"}) +addTreeByType("brkdict",{"treeprefix":"brkitr/", "extension":"dict"}) +addTreeByType("confusables",{"treeprefix":"", "extension":".cfu"}) + +for i in range(len(items)): + item = items[i] + if item.endswith(RES_INDX): + treeprefix = item[0:item.rindex(RES_INDX)] + tree = None + if treeprefix == "": + tree = "ROOT" + else: + tree = treeprefix[0:-1] + if(args.verbose>6): + print "procesing %s" % (tree) + trees[tree] = { "extension": ".res", "treeprefix": treeprefix, "hasIndex": True } + # read in the resource list for the tree + treelistfile = os.path.join(args.tmpdir,"%s.lst" % tree) + runcmd("iculslocs", "-i %s -N %s -T %s -l > %s" % (outfile, dataname, tree, treelistfile)) + fi = open(treelistfile, 'rb') + treeitems = fi.readlines() + trees[tree]["locs"] = [treeitems[i].strip() for i in range(len(treeitems))] + fi.close() + if(not config.has_key("trees") or not config["trees"].has_key(tree)): + print " Warning: filter file %s does not mention trees.%s - will be kept as-is" % (args.filterfile, tree) + else: + queueForRemoval(tree) + +def removeList(count=0): + # don't allow "keep" items to creep in here. + global remove + remove = remove - keep + if(count > 10): + print "Giving up - %dth attempt at removal." % count + sys.exit(1) + if(args.verbose>1): + print "%d items to remove - try #%d" % (len(remove),count) + if(len(remove)>0): + oldcount = len(remove) + hackerrfile=os.path.join(args.tmpdir, "REMOVE.err") + removefile = os.path.join(args.tmpdir, "REMOVE.lst") + fi = open(removefile, 'wb') + for i in remove: + print >>fi, i + fi.close() + rc = runcmd("icupkg","-r %s %s 2> %s" % (removefile,outfile,hackerrfile),True) + if rc is not 0: + if(args.verbose>5): + print "## Damage control, trying to parse stderr from icupkg.." + fi = open(hackerrfile, 'rb') + erritems = fi.readlines() + fi.close() + #Item zone/zh_Hant_TW.res depends on missing item zone/zh_Hant.res + pat = re.compile("""^Item ([^ ]+) depends on missing item ([^ ]+).*""") + for i in range(len(erritems)): + line = erritems[i].strip() + m = pat.match(line) + if m: + toDelete = m.group(1) + if(args.verbose > 5): + print "<< %s added to delete" % toDelete + remove.add(toDelete) + else: + print "ERROR: could not match errline: %s" % line + sys.exit(1) + if(args.verbose > 5): + print " now %d items to remove" % len(remove) + if(oldcount == len(remove)): + print " ERROR: could not add any mor eitems to remove. Fail." + sys.exit(1) + removeList(count+1) + +# fire it up +removeList(1) + +# now, fixup res_index, one at a time +for tree in trees: + # skip trees that don't have res_index + if not trees[tree].has_key("hasIndex"): + continue + treebunddir = args.tmpdir + if(trees[tree]["treeprefix"]): + treebunddir = os.path.join(treebunddir, trees[tree]["treeprefix"]) + if not (os.path.isdir(treebunddir)): + os.mkdir(treebunddir) + treebundres = os.path.join(treebunddir,RES_INDX) + treebundtxt = "%s.txt" % (treebundres[0:-4]) + runcmd("iculslocs", "-i %s -N %s -T %s -b %s" % (outfile, dataname, tree, treebundtxt)) + runcmd("genrb","-d %s -s %s res_index.txt" % (treebunddir, treebunddir)) + runcmd("icupkg","-s %s -a %s%s %s" % (args.tmpdir, trees[tree]["treeprefix"], RES_INDX, outfile)) diff --git a/tools/icu/no-op.cc b/tools/icu/no-op.cc new file mode 100644 index 00000000000..08d1599a264 --- /dev/null +++ b/tools/icu/no-op.cc @@ -0,0 +1,18 @@ +/* +********************************************************************** +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* +*/ + +// +// ICU needs the C++, not the C linker to be used, even if the main function +// is in C. +// +// This is a dummy function just to get gyp to compile some internal +// tools as C++. +// +// It should not appear in production node binaries. + +extern void icu_dummy_cxx() {} diff --git a/vcbuild.bat b/vcbuild.bat index 0c9b0b285a6..bd1afd77ec5 100644 --- a/vcbuild.bat +++ b/vcbuild.bat @@ -35,6 +35,7 @@ set noetw_msi_arg= set noperfctr= set noperfctr_arg= set noperfctr_msi_arg= +set i18n_arg= :next-arg if "%1"=="" goto args-done @@ -62,6 +63,8 @@ if /i "%1"=="test" set test=test&goto arg-ok if /i "%1"=="msi" set msi=1&set licensertf=1&goto arg-ok if /i "%1"=="upload" set upload=1&goto arg-ok if /i "%1"=="jslint" set jslint=1&goto arg-ok +if /i "%1"=="small-icu" set i18n_arg=%1&goto arg-ok +if /i "%1"=="full-icu" set i18n_arg=%1&goto arg-ok echo Warning: ignoring invalid command line option `%1`. @@ -80,6 +83,9 @@ if defined nosnapshot set nosnapshot_arg=--without-snapshot if defined noetw set noetw_arg=--without-etw& set noetw_msi_arg=/p:NoETW=1 if defined noperfctr set noperfctr_arg=--without-perfctr& set noperfctr_msi_arg=/p:NoPerfCtr=1 +if "%i18n_arg%"=="full-icu" set i18n_arg=--with-intl=full-icu +if "%i18n_arg%"=="small-icu" set i18n_arg=--with-intl=small-icu + :project-gen @rem Skip project generation if requested. if defined noprojgen goto msbuild @@ -89,7 +95,7 @@ if defined NIGHTLY set TAG=nightly-%NIGHTLY% @rem Generate the VS project. SETLOCAL if defined VS100COMNTOOLS call "%VS100COMNTOOLS%\VCVarsQueryRegistry.bat" - python configure %debug_arg% %nosnapshot_arg% %noetw_arg% %noperfctr_arg% --dest-cpu=%target_arch% --tag=%TAG% + python configure %i18n_arg% %debug_arg% %nosnapshot_arg% %noetw_arg% %noperfctr_arg% --dest-cpu=%target_arch% --tag=%TAG% if errorlevel 1 goto create-msvs-files-failed if not exist node.sln goto create-msvs-files-failed echo Project files generated.