2010-05-26 21:04:56 +02:00
|
|
|
// text.h
|
2010-06-04 16:25:07 +02:00
|
|
|
/*
|
|
|
|
* Copyright 2010 10gen Inc.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2010-05-26 21:04:56 +02:00
|
|
|
|
2010-06-03 20:03:10 +02:00
|
|
|
/* Copyright 2009 10gen Inc.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2010-05-26 21:04:56 +02:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
namespace mongo {
|
|
|
|
|
|
|
|
class StringSplitter {
|
|
|
|
public:
|
|
|
|
StringSplitter( const char * big , const char * splitter )
|
|
|
|
: _big( big ) , _splitter( splitter ){
|
|
|
|
}
|
|
|
|
|
|
|
|
bool more(){
|
|
|
|
return _big[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
string next(){
|
|
|
|
const char * foo = strstr( _big , _splitter );
|
|
|
|
if ( foo ){
|
|
|
|
string s( _big , foo - _big );
|
|
|
|
_big = foo + 1;
|
2010-05-26 21:24:06 +02:00
|
|
|
while ( *_big && strstr( _big , _splitter ) == _big )
|
|
|
|
_big++;
|
2010-05-26 21:04:56 +02:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
string s = _big;
|
|
|
|
_big += strlen( _big );
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
void split( vector<string>& l ){
|
|
|
|
while ( more() ){
|
|
|
|
l.push_back( next() );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
vector<string> split(){
|
|
|
|
vector<string> l;
|
|
|
|
split( l );
|
|
|
|
return l;
|
|
|
|
}
|
|
|
|
|
|
|
|
static vector<string> split( const string& big , const string& splitter ){
|
|
|
|
StringSplitter ss( big.c_str() , splitter.c_str() );
|
|
|
|
return ss.split();
|
|
|
|
}
|
|
|
|
|
|
|
|
static string join( vector<string>& l , const string& split ){
|
|
|
|
stringstream ss;
|
|
|
|
for ( unsigned i=0; i<l.size(); i++ ){
|
|
|
|
if ( i > 0 )
|
|
|
|
ss << split;
|
|
|
|
ss << l[i];
|
|
|
|
}
|
|
|
|
return ss.str();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const char * _big;
|
|
|
|
const char * _splitter;
|
|
|
|
};
|
|
|
|
|
2010-06-03 21:35:03 +02:00
|
|
|
/* This doesn't defend against ALL bad UTF8, but it will guarantee that the
|
|
|
|
* string can be converted to sequence of codepoints. However, it doesn't
|
|
|
|
* guarantee that the codepoints are valid.
|
|
|
|
*/
|
|
|
|
bool isValidUTF8(const char *s);
|
2010-06-05 01:14:59 +02:00
|
|
|
inline bool isValidUTF8(string s) { return isValidUTF8(s.c_str()); }
|
|
|
|
|
2010-06-14 18:40:39 +02:00
|
|
|
#if defined(_WIN32)
|
2010-06-05 01:14:59 +02:00
|
|
|
|
|
|
|
std::string toUtf8String(const std::wstring& wide);
|
|
|
|
|
|
|
|
std::wstring toWideString(const char *s);
|
|
|
|
|
2010-06-14 18:40:39 +02:00
|
|
|
/* like toWideString but UNICODE macro sensitive */
|
|
|
|
# if !defined(_UNICODE)
|
|
|
|
#error temp error
|
|
|
|
inline std::string toNativeString(const char *s) { return s; }
|
|
|
|
# else
|
|
|
|
inline std::wstring toNativeString(const char *s) { return toWideString(s); }
|
|
|
|
# endif
|
2010-06-23 05:44:22 +02:00
|
|
|
|
2010-06-14 18:40:39 +02:00
|
|
|
#endif
|
2010-06-23 05:44:22 +02:00
|
|
|
|
|
|
|
// expect that n contains a base ten number and nothing else after it
|
|
|
|
// NOTE win version hasn't been tested directly
|
|
|
|
inline long long parseLL( const char *n ) {
|
|
|
|
long long ret;
|
2010-06-23 07:14:08 +02:00
|
|
|
uassert( 13307, "cannot convert empty string to long long", *n != 0 );
|
2010-06-23 05:44:22 +02:00
|
|
|
#if !defined(_WIN32)
|
|
|
|
char *endPtr = 0;
|
|
|
|
errno = 0;
|
|
|
|
ret = strtoll( n, &endPtr, 10 );
|
|
|
|
uassert( 13305, "could not convert string to long long", *endPtr == 0 && errno == 0 );
|
2010-07-03 00:14:45 +02:00
|
|
|
#elif _MSC_VER>=1600 // 1600 is VS2k10 1500 is VS2k8
|
2010-06-23 05:44:22 +02:00
|
|
|
size_t endLen = 0;
|
|
|
|
try {
|
|
|
|
ret = stoll( n, &endLen, 10 );
|
|
|
|
} catch ( ... ) {
|
|
|
|
endLen = 0;
|
|
|
|
}
|
|
|
|
uassert( 13306, "could not convert string to long long", endLen != 0 && n[ endLen ] == 0 );
|
2010-06-26 18:28:17 +02:00
|
|
|
#else // stoll() wasn't introduced until VS 2010.
|
2010-07-03 00:18:01 +02:00
|
|
|
char* endPtr = 0;
|
|
|
|
ret = _strtoi64( n, &endPtr, 10 );
|
2010-07-03 00:23:22 +02:00
|
|
|
uassert( 13310, "could not convert string to long long", (*endPtr == 0) && (ret != _I64_MAX) && (ret != _I64_MIN) );
|
2010-06-26 18:28:17 +02:00
|
|
|
#endif // !defined(_WIN32)
|
2010-06-23 05:44:22 +02:00
|
|
|
return ret;
|
|
|
|
}
|
2010-05-26 21:04:56 +02:00
|
|
|
}
|