0
0
mirror of https://github.com/mongodb/mongo.git synced 2024-12-01 01:21:03 +01:00
mongodb/db/reccache.h

228 lines
6.2 KiB
C
Raw Normal View History

2009-02-03 00:18:22 +01:00
// reccache.h
/* CachedBasicRecStore
This is our store which implements a traditional page-cache type of storage
(not memory mapped files).
*/
/* LOCK HIERARCHY
dblock
RecCache::rcmutex
i.e. always lock dblock first if you lock both
*/
2009-02-03 00:18:22 +01:00
#pragma once
#include "reci.h"
#include "recstore.h"
namespace mongo {
class RecCache {
struct Node {
2009-02-05 22:17:46 +01:00
Node(void* _data) : data((char *) _data) { dirty = false; newer = 0; }
~Node() {
free(data);
data = 0;
}
2009-02-03 00:18:22 +01:00
char *data;
DiskLoc loc;
bool dirty;
Node *older, *newer; // lru
2009-02-03 00:18:22 +01:00
};
boost::mutex rcmutex; // mainly to coordinate with the lazy writer thread
2009-02-03 00:18:22 +01:00
unsigned recsize;
map<DiskLoc, Node*> m; // the cache
2009-02-03 00:18:22 +01:00
Node *newest, *oldest;
unsigned nnodes;
set<DiskLoc> dirtyl;
vector<BasicRecStore*> stores; // DiskLoc::a() indicates the index into this vector
map<string, BasicRecStore*> storesByNsKey; // nskey -> BasicRecStore*
2009-03-20 17:19:21 +01:00
public:
static unsigned MAXNODES;
enum BaseValue { Base = 10000 };
2009-03-20 17:19:21 +01:00
private:
BasicRecStore* _initStore(string fname);
BasicRecStore* initStore(int n);
2009-03-19 20:38:49 +01:00
string findStoreFilename(const char *_ns, bool& found);
void initStoreByNs(const char *ns, const string& nskey);
void closeStore(BasicRecStore *rs);
static string directory();
static string mknskey(const char *ns) {
return directory() + ns;
}
/* get the right file for a given diskloc */
BasicRecStore& store(DiskLoc& d) {
int n = d.a() - Base;
if( (int) stores.size() > n ) {
BasicRecStore *rs = stores[n];
if( rs ) {
assert( rs->fileNumber == n );
return *rs;
}
}
return *initStore(n);
}
BasicRecStore& store(const char *ns) {
string nskey = mknskey(ns);
BasicRecStore *&rs = storesByNsKey[nskey];
if( rs )
return *rs;
initStoreByNs(ns, nskey);
return *rs;
}
void writeDirty( set<DiskLoc>::iterator i, bool rawLog = false );
2009-02-03 00:18:22 +01:00
void writeIfDirty(Node *n);
void touch(Node* n) {
if( n == newest )
return;
if( n == oldest ) {
oldest = oldest->newer;
2009-02-04 17:26:51 +01:00
assert( oldest || nnodes == 1 );
2009-02-03 00:18:22 +01:00
}
if( n->older )
n->older->newer = n->newer;
if( n->newer )
n->newer->older = n->older;
n->newer = 0;
n->older = newest;
2009-02-04 17:26:51 +01:00
newest->newer = n;
2009-02-03 00:18:22 +01:00
newest = n;
}
Node* mkNode() {
2009-02-05 22:17:46 +01:00
Node *n = new Node(calloc(recsize,1)); // calloc is TEMP for testing. change to malloc
2009-02-03 00:18:22 +01:00
n->older = newest;
2009-02-04 17:26:51 +01:00
if( newest )
2009-02-03 00:18:22 +01:00
newest->newer = n;
2009-02-04 17:26:51 +01:00
else {
assert( oldest == 0 );
2009-02-03 00:18:22 +01:00
oldest = n;
2009-02-04 17:26:51 +01:00
}
2009-02-03 00:18:22 +01:00
newest = n;
nnodes++;
return n;
}
fileofs fileOfs(DiskLoc d) {
return ((fileofs) d.getOfs()) * recsize;
2009-02-03 00:18:22 +01:00
}
2009-02-04 17:26:51 +01:00
void dump();
void _ejectOld();
2009-02-04 17:26:51 +01:00
2009-02-03 00:18:22 +01:00
public:
/* all public functions (except constructor) should use the mutex */
RecCache(unsigned recsz) : recsize(recsz) {
2009-02-03 00:18:22 +01:00
nnodes = 0;
newest = oldest = 0;
}
/* call this after doing some work, after you are sure you are done with modifications.
we call it from dbunlocking().
*/
void ejectOld() {
if( nnodes > MAXNODES ) // just enough here to be inlineable for speed reasons. _ejectOld does the real work
_ejectOld();
}
/* bg writer thread invokes this */
void writeLazily();
/* Note that this may be called BEFORE the actual writing to the node
takes place. We do flushing later on a dbunlocking() call, which happens
after the writing.
*/
2009-02-03 00:18:22 +01:00
void dirty(DiskLoc d) {
assert( d.a() >= Base );
boostlock lk(rcmutex);
2009-02-03 00:18:22 +01:00
map<DiskLoc, Node*>::iterator i = m.find(d);
if( i != m.end() ) {
Node *n = i->second;
if( !n->dirty ) {
n->dirty = true;
2009-02-03 23:39:44 +01:00
dirtyl.insert(n->loc);
2009-02-03 00:18:22 +01:00
}
}
}
char* get(DiskLoc d, unsigned len) {
assert( d.a() >= Base );
2009-02-03 00:18:22 +01:00
assert( len == recsize );
boostlock lk(rcmutex);
2009-02-03 00:18:22 +01:00
map<DiskLoc, Node*>::iterator i = m.find(d);
if( i != m.end() ) {
touch(i->second);
return i->second->data;
}
Node *n = mkNode();
n->loc = d;
store(d).get(fileOfs(d), n->data, recsize); // could throw exception
2009-02-03 00:18:22 +01:00
m.insert( pair<DiskLoc, Node*>(d, n) );
return n->data;
}
void drop(const char *ns);
2009-02-03 00:18:22 +01:00
DiskLoc insert(const char *ns, const void *obuf, int len, bool god) {
boostlock lk(rcmutex);
BasicRecStore& rs = store(ns);
fileofs o = rs.insert((const char *) obuf, len);
assert( o % recsize == 0 );
fileofs recnum = o / recsize;
massert( "RecCache file too large?", recnum <= 0x7fffffff );
2009-02-03 00:18:22 +01:00
Node *n = mkNode();
memcpy(n->data, obuf, len);
DiskLoc d(rs.fileNumber + Base, (int) recnum);
2009-02-03 00:18:22 +01:00
n->loc = d;
m[d] = n;
return d;
}
2009-03-24 17:53:13 +01:00
void closeFiles(string dbname, string path);
// at termination: write dirty pages and close all files
void closing();
2009-02-03 00:18:22 +01:00
};
extern RecCache theRecCache;
class CachedBasicRecStore : public RecStoreInterface {
public:
virtual char* get(DiskLoc d, unsigned len) {
return theRecCache.get(d, len);
}
virtual DiskLoc insert(const char *ns, const void *obuf, int len, bool god) {
return theRecCache.insert(ns, obuf, len, god);
}
virtual void modified(DiskLoc d) {
theRecCache.dirty(d);
}
/* drop collection */
virtual void drop(const char *ns) {
theRecCache.drop(ns);
}
/* close datafiles associated with the db specified. */
virtual void closeFiles(string dbname, string path) {
theRecCache.closeFiles(dbname, dbpath);
}
};
2009-02-03 00:18:22 +01:00
inline void dbunlocking() {
// dassert( dbMutexInfo.isLocked() );
theRecCache.ejectOld();
}
2009-02-03 00:18:22 +01:00
} /*namespace*/