0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 09:32:32 +01:00
mongodb/util/text.h

147 lines
4.5 KiB
C
Raw Normal View History

2010-05-26 21:04:56 +02:00
// text.h
2010-06-04 16:25:07 +02:00
/*
* Copyright 2010 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2010-05-26 21:04:56 +02:00
2010-06-03 20:03:10 +02:00
/* Copyright 2009 10gen Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2010-05-26 21:04:56 +02:00
#pragma once
namespace mongo {
class StringSplitter {
public:
StringSplitter( const char * big , const char * splitter )
: _big( big ) , _splitter( splitter ){
}
bool more(){
return _big[0];
}
string next(){
const char * foo = strstr( _big , _splitter );
if ( foo ){
string s( _big , foo - _big );
_big = foo + 1;
2010-05-26 21:24:06 +02:00
while ( *_big && strstr( _big , _splitter ) == _big )
_big++;
2010-05-26 21:04:56 +02:00
return s;
}
string s = _big;
_big += strlen( _big );
return s;
}
void split( vector<string>& l ){
while ( more() ){
l.push_back( next() );
}
}
vector<string> split(){
vector<string> l;
split( l );
return l;
}
static vector<string> split( const string& big , const string& splitter ){
StringSplitter ss( big.c_str() , splitter.c_str() );
return ss.split();
}
static string join( vector<string>& l , const string& split ){
stringstream ss;
for ( unsigned i=0; i<l.size(); i++ ){
if ( i > 0 )
ss << split;
ss << l[i];
}
return ss.str();
}
private:
const char * _big;
const char * _splitter;
};
2010-06-03 21:35:03 +02:00
/* This doesn't defend against ALL bad UTF8, but it will guarantee that the
* string can be converted to sequence of codepoints. However, it doesn't
* guarantee that the codepoints are valid.
*/
bool isValidUTF8(const char *s);
inline bool isValidUTF8(string s) { return isValidUTF8(s.c_str()); }
#if defined(_WIN32)
std::string toUtf8String(const std::wstring& wide);
std::wstring toWideString(const char *s);
/* like toWideString but UNICODE macro sensitive */
# if !defined(_UNICODE)
#error temp error
inline std::string toNativeString(const char *s) { return s; }
# else
inline std::wstring toNativeString(const char *s) { return toWideString(s); }
# endif
2010-06-23 05:44:22 +02:00
#endif
2010-06-23 05:44:22 +02:00
// expect that n contains a base ten number and nothing else after it
// NOTE win version hasn't been tested directly
inline long long parseLL( const char *n ) {
long long ret;
uassert( 13307, "cannot convert empty string to long long", *n != 0 );
2010-06-23 05:44:22 +02:00
#if !defined(_WIN32)
char *endPtr = 0;
errno = 0;
ret = strtoll( n, &endPtr, 10 );
uassert( 13305, "could not convert string to long long", *endPtr == 0 && errno == 0 );
#elif _MSC_VER>=1600 // 1600 is VS2k10 1500 is VS2k8
2010-06-23 05:44:22 +02:00
size_t endLen = 0;
try {
ret = stoll( n, &endLen, 10 );
} catch ( ... ) {
endLen = 0;
}
uassert( 13306, "could not convert string to long long", endLen != 0 && n[ endLen ] == 0 );
#else // stoll() wasn't introduced until VS 2010.
char* endPtr = (char *)&n[strlen(n) - 1];
try {
ret = _strtoi64( n, &endPtr, 10 );
} catch ( ... ) {
endPtr = 0;
}
uassert( 13310, "could not convert string to long long", *endPtr == 0 );
#endif // !defined(_WIN32)
2010-06-23 05:44:22 +02:00
return ret;
}
2010-05-26 21:04:56 +02:00
}