mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 01:21:03 +01:00
431 lines
15 KiB
C++
431 lines
15 KiB
C++
/**
|
|
* Copyright (C) 2008 10gen Inc.
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3,
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,b
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "pch.h"
|
|
#include "rs.h"
|
|
#include "health.h"
|
|
#include "../../util/background.h"
|
|
#include "../../client/dbclient.h"
|
|
#include "../../client/connpool.h"
|
|
#include "../commands.h"
|
|
#include "../../util/concurrency/value.h"
|
|
#include "../../util/concurrency/task.h"
|
|
#include "../../util/mongoutils/html.h"
|
|
#include "../../util/goodies.h"
|
|
#include "../../util/ramlog.h"
|
|
#include "../helpers/dblogger.h"
|
|
#include "connections.h"
|
|
#include "../../util/unittest.h"
|
|
#include "../dbhelpers.h"
|
|
|
|
namespace mongo {
|
|
|
|
/* decls for connections.h */
|
|
ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M());
|
|
mutex ScopedConn::mapMutex("ScopedConn::mapMutex");
|
|
}
|
|
|
|
namespace mongo {
|
|
|
|
using namespace mongoutils::html;
|
|
using namespace bson;
|
|
|
|
static RamLog * _rsLog = new RamLog( "rs" );
|
|
Tee *rsLog = _rsLog;
|
|
extern bool replSetBlind;
|
|
|
|
string ago(time_t t) {
|
|
if( t == 0 ) return "";
|
|
|
|
time_t x = time(0) - t;
|
|
stringstream s;
|
|
if( x < 180 ) {
|
|
s << x << " sec";
|
|
if( x != 1 ) s << 's';
|
|
}
|
|
else if( x < 3600 ) {
|
|
s.precision(2);
|
|
s << x / 60.0 << " mins";
|
|
}
|
|
else {
|
|
s.precision(2);
|
|
s << x / 3600.0 << " hrs";
|
|
}
|
|
return s.str();
|
|
}
|
|
|
|
void Member::summarizeMember(stringstream& s) const {
|
|
s << tr();
|
|
{
|
|
stringstream u;
|
|
u << "http://" << h().host() << ':' << (h().port() + 1000) << "/_replSet";
|
|
s << td( a(u.str(), "", fullName()) );
|
|
}
|
|
s << td( id() );
|
|
double h = hbinfo().health;
|
|
bool ok = h > 0;
|
|
s << td(red(str::stream() << h,h == 0));
|
|
s << td(ago(hbinfo().upSince));
|
|
bool never = false;
|
|
{
|
|
string h;
|
|
time_t hb = hbinfo().lastHeartbeat;
|
|
if( hb == 0 ) {
|
|
h = "never";
|
|
never = true;
|
|
}
|
|
else h = ago(hb) + " ago";
|
|
s << td(h);
|
|
}
|
|
s << td(config().votes);
|
|
s << td(config().priority);
|
|
{
|
|
string stateText = state().toString();
|
|
if( _config.hidden )
|
|
stateText += " (hidden)";
|
|
if( ok || stateText.empty() )
|
|
s << td(stateText); // text blank if we've never connected
|
|
else
|
|
s << td( grey(str::stream() << "(was " << state().toString() << ')', true) );
|
|
}
|
|
s << td( grey(hbinfo().lastHeartbeatMsg,!ok) );
|
|
stringstream q;
|
|
q << "/_replSetOplog?_id=" << id();
|
|
s << td( a(q.str(), "", never ? "?" : hbinfo().opTime.toString()) );
|
|
if( hbinfo().skew > INT_MIN ) {
|
|
s << td( grey(str::stream() << hbinfo().skew,!ok) );
|
|
}
|
|
else
|
|
s << td("");
|
|
s << _tr();
|
|
}
|
|
|
|
string ReplSetImpl::stateAsHtml(MemberState s) {
|
|
if( s.s == MemberState::RS_STARTUP ) return a("", "serving still starting up, or still trying to initiate the set", "STARTUP");
|
|
if( s.s == MemberState::RS_PRIMARY ) return a("", "this server thinks it is primary", "PRIMARY");
|
|
if( s.s == MemberState::RS_SECONDARY ) return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
|
|
if( s.s == MemberState::RS_RECOVERING ) return a("", "recovering/resyncing; after recovery usually auto-transitions to secondary", "RECOVERING");
|
|
if( s.s == MemberState::RS_FATAL ) return a("", "something bad has occurred and server is not completely offline with regard to the replica set. fatal error.", "FATAL");
|
|
if( s.s == MemberState::RS_STARTUP2 ) return a("", "loaded config, still determining who is primary", "STARTUP2");
|
|
if( s.s == MemberState::RS_ARBITER ) return a("", "this server is an arbiter only", "ARBITER");
|
|
if( s.s == MemberState::RS_DOWN ) return a("", "member is down, slow, or unreachable", "DOWN");
|
|
if( s.s == MemberState::RS_ROLLBACK ) return a("", "rolling back operations to get in sync", "ROLLBACK");
|
|
return "";
|
|
}
|
|
|
|
extern time_t started;
|
|
|
|
// oplogdiags in web ui
|
|
static void say(stringstream&ss, const bo& op) {
|
|
ss << "<tr>";
|
|
|
|
set<string> skip;
|
|
be e = op["ts"];
|
|
if( e.type() == Date || e.type() == Timestamp ) {
|
|
OpTime ot = e._opTime();
|
|
ss << td( time_t_to_String_short( ot.getSecs() ) );
|
|
ss << td( ot.toString() );
|
|
skip.insert("ts");
|
|
}
|
|
else ss << td("?") << td("?");
|
|
|
|
e = op["h"];
|
|
if( e.type() == NumberLong ) {
|
|
ss << "<td>" << hex << e.Long() << "</td>\n";
|
|
skip.insert("h");
|
|
}
|
|
else
|
|
ss << td("?");
|
|
|
|
ss << td(op["op"].valuestrsafe());
|
|
ss << td(op["ns"].valuestrsafe());
|
|
skip.insert("op");
|
|
skip.insert("ns");
|
|
|
|
ss << "<td>";
|
|
for( bo::iterator i(op); i.more(); ) {
|
|
be e = i.next();
|
|
if( skip.count(e.fieldName()) ) continue;
|
|
ss << e.toString() << ' ';
|
|
}
|
|
ss << "</td></tr>\n";
|
|
}
|
|
|
|
void ReplSetImpl::_getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const {
|
|
const Member *m = findById(server_id);
|
|
if( m == 0 ) {
|
|
ss << "Error : can't find a member with id: " << server_id << '\n';
|
|
return;
|
|
}
|
|
|
|
ss << p("Server : " + m->fullName() + "<br>ns : " + rsoplog );
|
|
|
|
//const bo fields = BSON( "o" << false << "o2" << false );
|
|
const bo fields;
|
|
|
|
/** todo fix we might want an so timeout here */
|
|
DBClientConnection conn(false, 0, /*timeout*/ 20);
|
|
{
|
|
string errmsg;
|
|
if( !conn.connect(m->fullName(), errmsg) ) {
|
|
ss << "couldn't connect to " << m->fullName() << ' ' << errmsg;
|
|
return;
|
|
}
|
|
}
|
|
|
|
auto_ptr<DBClientCursor> c = conn.query(rsoplog, Query().sort("$natural",1), 20, 0, &fields);
|
|
if( c.get() == 0 ) {
|
|
ss << "couldn't query " << rsoplog;
|
|
return;
|
|
}
|
|
static const char *h[] = {"ts","optime", "h","op","ns","rest",0};
|
|
|
|
ss << "<style type=\"text/css\" media=\"screen\">"
|
|
"table { font-size:75% }\n"
|
|
// "th { background-color:#bbb; color:#000 }\n"
|
|
// "td,th { padding:.25em }\n"
|
|
"</style>\n";
|
|
|
|
ss << table(h, true);
|
|
//ss << "<pre>\n";
|
|
int n = 0;
|
|
OpTime otFirst;
|
|
OpTime otLast;
|
|
OpTime otEnd;
|
|
while( c->more() ) {
|
|
bo o = c->next();
|
|
otLast = o["ts"]._opTime();
|
|
if( otFirst.isNull() )
|
|
otFirst = otLast;
|
|
say(ss, o);
|
|
n++;
|
|
}
|
|
if( n == 0 ) {
|
|
ss << rsoplog << " is empty\n";
|
|
}
|
|
else {
|
|
auto_ptr<DBClientCursor> c = conn.query(rsoplog, Query().sort("$natural",-1), 20, 0, &fields);
|
|
if( c.get() == 0 ) {
|
|
ss << "couldn't query [2] " << rsoplog;
|
|
return;
|
|
}
|
|
string x;
|
|
bo o = c->next();
|
|
otEnd = o["ts"]._opTime();
|
|
while( 1 ) {
|
|
stringstream z;
|
|
if( o["ts"]._opTime() == otLast )
|
|
break;
|
|
say(z, o);
|
|
x = z.str() + x;
|
|
if( !c->more() )
|
|
break;
|
|
o = c->next();
|
|
}
|
|
if( !x.empty() ) {
|
|
ss << "<tr><td>...</td><td>...</td><td>...</td><td>...</td><td>...</td></tr>\n" << x;
|
|
//ss << "\n...\n\n" << x;
|
|
}
|
|
}
|
|
ss << _table();
|
|
ss << p(time_t_to_String_short(time(0)) + " current time");
|
|
|
|
if( !otEnd.isNull() ) {
|
|
ss << "<p>Log length in time: ";
|
|
unsigned d = otEnd.getSecs() - otFirst.getSecs();
|
|
double h = d / 3600.0;
|
|
ss.precision(3);
|
|
if( h < 72 )
|
|
ss << h << " hours";
|
|
else
|
|
ss << h / 24.0 << " days";
|
|
ss << "</p>\n";
|
|
}
|
|
}
|
|
|
|
void ReplSetImpl::_summarizeAsHtml(stringstream& s) const {
|
|
s << table(0, false);
|
|
s << tr("Set name:", _name);
|
|
s << tr("Majority up:", elect.aMajoritySeemsToBeUp()?"yes":"no" );
|
|
s << _table();
|
|
|
|
const char *h[] = {"Member",
|
|
"<a title=\"member id in the replset config\">id</a>",
|
|
"Up",
|
|
"<a title=\"length of time we have been continuously connected to the other member with no reconnects (for self, shows uptime)\">cctime</a>",
|
|
"<a title=\"when this server last received a heartbeat response - includes error code responses\">Last heartbeat</a>",
|
|
"Votes", "Priority", "State", "Messages",
|
|
"<a title=\"how up to date this server is. this value polled every few seconds so actually lag is typically much lower than value shown here.\">optime</a>",
|
|
"<a title=\"Clock skew in seconds relative to this server. Informational; server clock variances will make the diagnostics hard to read, but otherwise are benign..\">skew</a>",
|
|
0
|
|
};
|
|
s << table(h);
|
|
|
|
/* this is to sort the member rows by their ordinal _id, so they show up in the same
|
|
order on all the different web ui's; that is less confusing for the operator. */
|
|
map<int,string> mp;
|
|
|
|
string myMinValid;
|
|
try {
|
|
readlocktry lk("local.replset.minvalid", 300);
|
|
if( lk.got() ) {
|
|
BSONObj mv;
|
|
if( Helpers::getSingleton("local.replset.minvalid", mv) ) {
|
|
myMinValid = "minvalid:" + mv["ts"]._opTime().toString();
|
|
}
|
|
}
|
|
else myMinValid = ".";
|
|
}
|
|
catch(...) {
|
|
myMinValid = "exception fetching minvalid";
|
|
}
|
|
|
|
const Member *_self = this->_self;
|
|
assert(_self);
|
|
{
|
|
stringstream s;
|
|
/* self row */
|
|
s << tr() << td(_self->fullName() + " (me)") <<
|
|
td(_self->id()) <<
|
|
td("1") << //up
|
|
td(ago(started)) <<
|
|
td("") << // last heartbeat
|
|
td(ToString(_self->config().votes)) <<
|
|
td(ToString(_self->config().priority)) <<
|
|
td( stateAsHtml(box.getState()) + (_self->config().hidden?" (hidden)":"") );
|
|
s << td( _hbmsg );
|
|
stringstream q;
|
|
q << "/_replSetOplog?_id=" << _self->id();
|
|
s << td( a(q.str(), myMinValid, theReplSet->lastOpTimeWritten.toString()) );
|
|
s << td(""); // skew
|
|
s << _tr();
|
|
mp[_self->hbinfo().id()] = s.str();
|
|
}
|
|
Member *m = head();
|
|
while( m ) {
|
|
stringstream s;
|
|
m->summarizeMember(s);
|
|
mp[m->hbinfo().id()] = s.str();
|
|
m = m->next();
|
|
}
|
|
|
|
for( map<int,string>::const_iterator i = mp.begin(); i != mp.end(); i++ )
|
|
s << i->second;
|
|
s << _table();
|
|
}
|
|
|
|
|
|
void fillRsLog(stringstream& s) {
|
|
_rsLog->toHTML( s );
|
|
}
|
|
|
|
const Member* ReplSetImpl::findById(unsigned id) const {
|
|
if( _self && id == _self->id() ) return _self;
|
|
|
|
for( Member *m = head(); m; m = m->next() )
|
|
if( m->id() == id )
|
|
return m;
|
|
return 0;
|
|
}
|
|
|
|
const OpTime ReplSetImpl::lastOtherOpTime() const {
|
|
OpTime closest(0,0);
|
|
|
|
for( Member *m = _members.head(); m; m=m->next() ) {
|
|
if (!m->hbinfo().up()) {
|
|
continue;
|
|
}
|
|
|
|
if (m->hbinfo().opTime > closest) {
|
|
closest = m->hbinfo().opTime;
|
|
}
|
|
}
|
|
|
|
return closest;
|
|
}
|
|
|
|
void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const {
|
|
vector<BSONObj> v;
|
|
|
|
const Member *_self = this->_self;
|
|
assert( _self );
|
|
|
|
// add self
|
|
{
|
|
BSONObjBuilder bb;
|
|
bb.append("_id", (int) _self->id());
|
|
bb.append("name", _self->fullName());
|
|
bb.append("health", 1.0);
|
|
bb.append("state", (int) box.getState().s);
|
|
bb.append("stateStr", box.getState().toString());
|
|
bb.appendTimestamp("optime", lastOpTimeWritten.asDate());
|
|
bb.appendDate("optimeDate", lastOpTimeWritten.getSecs() * 1000LL);
|
|
string s = _self->lhb();
|
|
if( !s.empty() )
|
|
bb.append("errmsg", s);
|
|
bb.append("self", true);
|
|
v.push_back(bb.obj());
|
|
}
|
|
|
|
Member *m =_members.head();
|
|
while( m ) {
|
|
BSONObjBuilder bb;
|
|
bb.append("_id", (int) m->id());
|
|
bb.append("name", m->fullName());
|
|
double h = m->hbinfo().health;
|
|
bb.append("health", h);
|
|
bb.append("state", (int) m->state().s);
|
|
if( h == 0 ) {
|
|
// if we can't connect the state info is from the past and could be confusing to show
|
|
bb.append("stateStr", "(not reachable/healthy)");
|
|
}
|
|
else {
|
|
bb.append("stateStr", m->state().toString());
|
|
}
|
|
bb.append("uptime", (unsigned) (m->hbinfo().upSince ? (time(0)-m->hbinfo().upSince) : 0));
|
|
bb.appendTimestamp("optime", m->hbinfo().opTime.asDate());
|
|
bb.appendDate("optimeDate", m->hbinfo().opTime.getSecs() * 1000LL);
|
|
bb.appendTimeT("lastHeartbeat", m->hbinfo().lastHeartbeat);
|
|
bb.append("pingMs", m->hbinfo().ping);
|
|
string s = m->lhb();
|
|
if( !s.empty() )
|
|
bb.append("errmsg", s);
|
|
v.push_back(bb.obj());
|
|
m = m->next();
|
|
}
|
|
sort(v.begin(), v.end());
|
|
b.append("set", name());
|
|
b.appendTimeT("date", time(0));
|
|
b.append("myState", box.getState().s);
|
|
const Member *syncTarget = _currentSyncTarget;
|
|
if (syncTarget) {
|
|
b.append("syncingTo", syncTarget->fullName());
|
|
}
|
|
b.append("members", v);
|
|
if( replSetBlind )
|
|
b.append("blind",true); // to avoid confusion if set...normally never set except for testing.
|
|
}
|
|
|
|
static struct Test : public UnitTest {
|
|
void run() {
|
|
HealthOptions a,b;
|
|
assert( a == b );
|
|
assert( a.isDefault() );
|
|
}
|
|
} test;
|
|
|
|
}
|