0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 01:21:03 +01:00
mongodb/db/reccache.h

261 lines
7.1 KiB
C
Raw Normal View History

2009-05-16 00:47:54 +02:00
// reccache.h
/*
* Copyright (C) 2010 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2009-05-16 00:47:54 +02:00
/* CachedBasicRecStore
This is our store which implements a traditional page-cache type of storage
(not memory mapped files).
*/
/* LOCK HIERARCHY
dblock
RecCache::rcmutex
i.e. always lock dblock first if you lock both
*/
#pragma once
#include "reci.h"
#include "recstore.h"
namespace mongo {
class RecCache {
struct Node {
Node(void* _data) : data((char *) _data) { dirty = false; newer = 0; }
~Node() {
free(data);
data = 0;
}
char *data;
DiskLoc loc;
bool dirty;
Node *older, *newer; // lru
};
mongo::mutex rcmutex; // mainly to coordinate with the lazy writer thread
2009-05-16 00:47:54 +02:00
unsigned recsize;
map<DiskLoc, Node*> m; // the cache
Node *newest, *oldest;
unsigned nnodes;
set<DiskLoc> dirtyl;
vector<BasicRecStore*> stores; // DiskLoc::a() indicates the index into this vector
map<string, BasicRecStore*> storesByNsKey; // nskey -> BasicRecStore*
public:
static unsigned MAXNODES;
enum BaseValue { Base = 10000 };
private:
BasicRecStore* _initStore(string fname);
BasicRecStore* initStore(int n);
string findStoreFilename(const char *_ns, bool& found);
void initStoreByNs(const char *ns, const string& nskey);
void closeStore(BasicRecStore *rs);
static string directory();
static string mknskey(const char *ns) {
return directory() + ns;
}
/* get the right file for a given diskloc */
BasicRecStore& store(DiskLoc& d) {
int n = d.a() - Base;
if( (int) stores.size() > n ) {
BasicRecStore *rs = stores[n];
if( rs ) {
assert( rs->fileNumber == n );
return *rs;
}
}
return *initStore(n);
}
BasicRecStore& store(const char *ns) {
string nskey = mknskey(ns);
BasicRecStore *&rs = storesByNsKey[nskey];
if( rs )
return *rs;
initStoreByNs(ns, nskey);
return *rs;
}
void writeDirty( set<DiskLoc>::iterator i, bool rawLog = false );
void writeIfDirty(Node *n);
void touch(Node* n) {
if( n == newest )
return;
if( n == oldest ) {
oldest = oldest->newer;
assert( oldest || nnodes == 1 );
}
if( n->older )
n->older->newer = n->newer;
if( n->newer )
n->newer->older = n->older;
n->newer = 0;
n->older = newest;
newest->newer = n;
newest = n;
}
Node* mkNode() {
Node *n = new Node(calloc(recsize,1)); // calloc is TEMP for testing. change to malloc
n->older = newest;
if( newest )
newest->newer = n;
else {
assert( oldest == 0 );
oldest = n;
}
newest = n;
nnodes++;
return n;
}
fileofs fileOfs(DiskLoc d) {
return ((fileofs) d.getOfs()) * recsize;
}
void dump();
void _ejectOld();
public:
/* all public functions (except constructor) should use the mutex */
RecCache(unsigned recsz) : recsize(recsz) {
2009-05-16 00:47:54 +02:00
nnodes = 0;
newest = oldest = 0;
}
/* call this after doing some work, after you are sure you are done with modifications.
we call it from dbunlocking().
*/
void ejectOld() {
if( nnodes > MAXNODES ) // just enough here to be inlineable for speed reasons. _ejectOld does the real work
_ejectOld();
}
/* bg writer thread invokes this */
void writeLazily();
/* Note that this may be called BEFORE the actual writing to the node
takes place. We do flushing later on a dbunlocking() call, which happens
after the writing.
*/
void dirty(DiskLoc d) {
assert( d.a() >= Base );
scoped_lock lk(rcmutex);
2009-05-16 00:47:54 +02:00
map<DiskLoc, Node*>::iterator i = m.find(d);
if( i != m.end() ) {
Node *n = i->second;
if( !n->dirty ) {
n->dirty = true;
dirtyl.insert(n->loc);
}
}
}
char* get(DiskLoc d, unsigned len) {
assert( d.a() >= Base );
assert( len == recsize );
scoped_lock lk(rcmutex);
2009-05-16 00:47:54 +02:00
map<DiskLoc, Node*>::iterator i = m.find(d);
if( i != m.end() ) {
touch(i->second);
return i->second->data;
}
Node *n = mkNode();
n->loc = d;
store(d).get(fileOfs(d), n->data, recsize); // could throw exception
m.insert( pair<DiskLoc, Node*>(d, n) );
return n->data;
}
void drop(const char *ns);
DiskLoc insert(const char *ns, const void *obuf, int len, bool god) {
scoped_lock lk(rcmutex);
2009-05-16 00:47:54 +02:00
BasicRecStore& rs = store(ns);
fileofs o = rs.insert((const char *) obuf, len);
assert( o % recsize == 0 );
fileofs recnum = o / recsize;
massert( 10377 , "RecCache file too large?", recnum <= 0x7fffffff );
2009-05-16 00:47:54 +02:00
Node *n = mkNode();
memcpy(n->data, obuf, len);
DiskLoc d(rs.fileNumber + Base, (int) recnum);
n->loc = d;
m[d] = n;
return d;
}
void closeFiles(string dbname, string path);
// at termination: write dirty pages and close all files
void closing();
};
extern RecCache theRecCache;
class CachedBasicRecStore : public RecStoreInterface {
public:
2010-04-11 03:18:38 +02:00
VIRT char* get(DiskLoc d, unsigned len) {
2009-05-16 00:47:54 +02:00
return theRecCache.get(d, len);
}
2010-04-11 03:18:38 +02:00
VIRT DiskLoc insert(const char *ns, const void *obuf, int len, bool god) {
2009-05-16 00:47:54 +02:00
return theRecCache.insert(ns, obuf, len, god);
}
2010-04-11 03:18:38 +02:00
VIRT void modified(DiskLoc d) {
2009-05-16 00:47:54 +02:00
theRecCache.dirty(d);
}
/* drop collection */
2010-04-11 03:18:38 +02:00
VIRT void drop(const char *ns) {
2009-05-16 00:47:54 +02:00
theRecCache.drop(ns);
}
2010-04-11 03:18:38 +02:00
VIRT void rename(const char *fromNs, const char *toNs) {
massert( 10378 , "rename not yet implemented for CachedBasicRecStore", false );
}
2009-05-16 00:47:54 +02:00
/* close datafiles associated with the db specified. */
2010-04-11 03:18:38 +02:00
VIRT void closeFiles(string dbname, string path) {
2009-05-16 00:47:54 +02:00
theRecCache.closeFiles(dbname, dbpath);
}
};
2009-11-28 19:57:30 +01:00
/* see concurrency.h - note on a lock reset from read->write we don't
call dbunlocking_read, we just wait for the final dbunlocking_write
call
*/
2009-11-27 22:40:58 +01:00
inline void dbunlocking_read() {
/*
2009-10-14 21:59:55 +02:00
Client *c = currentClient.get();
2009-12-06 03:19:07 +01:00
if ( c )
c->top.clientStop();
*/
2009-05-16 00:47:54 +02:00
}
2009-11-27 22:40:58 +01:00
inline void dbunlocking_write() {
theRecCache.ejectOld();
dbunlocking_read();
}
2009-05-16 00:47:54 +02:00
} /*namespace*/