mirror of
https://github.com/mongodb/mongo.git
synced 2024-12-01 09:32:32 +01:00
Import wiredtiger: 90a2282276ccc25fcc662e6ebef6fbc91d0ff0af from branch mongodb-4.4
ref: 4cf221a61f..90a2282276 for: 4.5.1 WT-6042 Changes to checkpoint04 to remove or debug failures on Windows WT-6165 Fix cursor operation hung on WT_PREPARE_CONFLICT WT-6173 Change __wt_time_window_ functions to macros WT-6174 Fix dhandle->session_inuse counter leak in __evict_walk WT-6195 Ensure that we don't perform rollback to stable without a history store WT-6214 format.sh must disassociate from the child process
This commit is contained in:
parent
a3ab985544
commit
213de387cd
2
src/third_party/wiredtiger/import.data
vendored
2
src/third_party/wiredtiger/import.data
vendored
@ -2,5 +2,5 @@
|
||||
"vendor": "wiredtiger",
|
||||
"github": "wiredtiger/wiredtiger.git",
|
||||
"branch": "mongodb-4.4",
|
||||
"commit": "4cf221a61f19db1ed706f923d18d3d0b507101c9"
|
||||
"commit": "90a2282276ccc25fcc662e6ebef6fbc91d0ff0af"
|
||||
}
|
||||
|
@ -932,7 +932,7 @@ __wt_debug_cursor_hs(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor)
|
||||
uint8_t hs_upd_type;
|
||||
|
||||
ds = &_ds;
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
|
||||
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
|
||||
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
|
||||
|
@ -383,7 +383,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
* don't need to append a tombstone.
|
||||
*/
|
||||
__wt_read_row_time_window(session, page, rip, &tw);
|
||||
if (!__wt_time_window_has_stop(&tw)) {
|
||||
if (!WT_TIME_WINDOW_HAS_STOP(&tw)) {
|
||||
WT_ERR(__tombstone_update_alloc(session, page_del, &upd, &size));
|
||||
upd->next = upd_array[WT_ROW_SLOT(page, rip)];
|
||||
upd_array[WT_ROW_SLOT(page, rip)] = upd;
|
||||
|
@ -574,8 +574,8 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
* The visibility information is not referenced on the page so we need to ensure that
|
||||
* the value is globally visible at the point in time where we read the page into cache.
|
||||
*/
|
||||
if (!btree->huffman_value && (__wt_time_window_is_empty(&unpack.tw) ||
|
||||
(!__wt_time_window_has_stop(&unpack.tw) &&
|
||||
if (!btree->huffman_value && (WT_TIME_WINDOW_IS_EMPTY(&unpack.tw) ||
|
||||
(!WT_TIME_WINDOW_HAS_STOP(&unpack.tw) &&
|
||||
__wt_txn_tw_start_visible_all(session, &unpack.tw))))
|
||||
__wt_row_leaf_value_set(page, rip - 1, &unpack);
|
||||
break;
|
||||
@ -607,7 +607,7 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
/* Unpack the on-page value cell. */
|
||||
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
|
||||
if (unpack.tw.prepare) {
|
||||
if (!__wt_time_window_has_stop(&unpack.tw)) {
|
||||
if (!WT_TIME_WINDOW_HAS_STOP(&unpack.tw)) {
|
||||
/* Take the value from the original page cell. */
|
||||
WT_RET(__wt_page_cell_data_ref(session, page, &unpack, &buf));
|
||||
|
||||
|
@ -76,7 +76,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session, const uint8_t *key, size_t key
|
||||
|
||||
WT_RET(__wt_calloc_one(session, ©_addr));
|
||||
copy->addr = copy_addr;
|
||||
__wt_time_aggregate_copy(©_addr->ta, &unpack->ta);
|
||||
WT_TIME_AGGREGATE_COPY(©_addr->ta, &unpack->ta);
|
||||
WT_RET(__wt_memdup(session, unpack->data, unpack->size, ©_addr->addr));
|
||||
copy_addr->size = (uint8_t)unpack->size;
|
||||
copy_addr->type = unpack->type == WT_CELL_ADDR_LEAF ? WT_ADDR_LEAF : WT_ADDR_LEAF_NO;
|
||||
|
16
src/third_party/wiredtiger/src/btree/bt_ret.c
vendored
16
src/third_party/wiredtiger/src/btree/bt_ret.c
vendored
@ -79,7 +79,7 @@ __read_col_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell, W
|
||||
WT_CELL_UNPACK_KV unpack;
|
||||
|
||||
__wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
|
||||
__wt_time_window_copy(tw, &unpack.tw);
|
||||
WT_TIME_WINDOW_COPY(tw, &unpack.tw);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -91,7 +91,7 @@ __wt_read_row_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
|
||||
{
|
||||
WT_CELL_UNPACK_KV unpack;
|
||||
|
||||
__wt_time_window_init(tw);
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
/*
|
||||
* If a value is simple and is globally visible at the time of reading a page into cache, we set
|
||||
* the time pairs as globally visible.
|
||||
@ -100,7 +100,7 @@ __wt_read_row_time_window(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip,
|
||||
return;
|
||||
|
||||
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
|
||||
__wt_time_window_copy(tw, &unpack.tw);
|
||||
WT_TIME_WINDOW_COPY(tw, &unpack.tw);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -125,7 +125,7 @@ __wt_read_cell_time_window(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_TIME_WINDOW *tw
|
||||
__read_col_time_window(session, page, WT_COL_PTR(page, &page->pg_var[cbt->slot]), tw);
|
||||
} else {
|
||||
/* WT_PAGE_COL_FIX: return the default time pairs. */
|
||||
__wt_time_window_init(tw);
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
}
|
||||
}
|
||||
|
||||
@ -160,14 +160,14 @@ __wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_W
|
||||
*/
|
||||
if (__wt_row_leaf_value(page, rip, buf)) {
|
||||
if (tw != NULL)
|
||||
__wt_time_window_init(tw);
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Take the value from the original page cell. */
|
||||
__wt_row_leaf_value_cell(session, page, rip, NULL, &unpack);
|
||||
if (tw != NULL)
|
||||
__wt_time_window_copy(tw, &unpack.tw);
|
||||
WT_TIME_WINDOW_COPY(tw, &unpack.tw);
|
||||
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
|
||||
}
|
||||
|
||||
@ -176,7 +176,7 @@ __wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_W
|
||||
cell = WT_COL_PTR(page, &page->pg_var[cbt->slot]);
|
||||
__wt_cell_unpack_kv(session, page->dsk, cell, &unpack);
|
||||
if (tw != NULL)
|
||||
__wt_time_window_copy(tw, &unpack.tw);
|
||||
WT_TIME_WINDOW_COPY(tw, &unpack.tw);
|
||||
return (__wt_page_cell_data_ref(session, page, &unpack, buf));
|
||||
}
|
||||
|
||||
@ -186,7 +186,7 @@ __wt_value_return_buf(WT_CURSOR_BTREE *cbt, WT_REF *ref, WT_ITEM *buf, WT_TIME_W
|
||||
* FIXME-WT-6126: Should also check visibility here
|
||||
*/
|
||||
if (tw != NULL)
|
||||
__wt_time_window_init(tw);
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
v = __bit_getv_recno(ref, cursor->recno, btree->bitcnt);
|
||||
return (__wt_buf_set(session, buf, &v, 1));
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ __slvg_checkpoint(WT_SESSION_IMPL *session, WT_REF *root)
|
||||
__wt_seconds(session, &ckptbase->sec);
|
||||
WT_ERR(__wt_metadata_search(session, dhandle->name, &config));
|
||||
WT_ERR(__wt_meta_block_metadata(session, config, ckptbase));
|
||||
__wt_time_aggregate_init(&ckptbase->ta);
|
||||
WT_TIME_AGGREGATE_INIT(&ckptbase->ta);
|
||||
ckptbase->write_gen = btree->write_gen;
|
||||
F_SET(ckptbase, WT_CKPT_ADD);
|
||||
|
||||
@ -1164,7 +1164,7 @@ __slvg_col_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
|
||||
* regardless of a value's timestamps or transaction IDs.
|
||||
*/
|
||||
WT_ERR(__wt_calloc_one(session, &addr));
|
||||
__wt_time_aggregate_init(&addr->ta);
|
||||
WT_TIME_AGGREGATE_INIT(&addr->ta);
|
||||
WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
|
||||
addr->size = trk->trk_addr_size;
|
||||
addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
|
||||
@ -1767,7 +1767,7 @@ __slvg_row_build_internal(WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF
|
||||
* regardless of a value's timestamps or transaction IDs.
|
||||
*/
|
||||
WT_ERR(__wt_calloc_one(session, &addr));
|
||||
__wt_time_aggregate_init(&addr->ta);
|
||||
WT_TIME_AGGREGATE_INIT(&addr->ta);
|
||||
WT_ERR(__wt_memdup(session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
|
||||
addr->size = trk->trk_addr_size;
|
||||
addr->type = trk->trk_ovfl_cnt == 0 ? WT_ADDR_LEAF_NO : WT_ADDR_LEAF;
|
||||
|
@ -249,7 +249,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_ref
|
||||
if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
|
||||
__wt_cell_unpack_addr(session, from_home->dsk, (WT_CELL *)ref_addr, &unpack);
|
||||
WT_RET(__wt_calloc_one(session, &addr));
|
||||
__wt_time_aggregate_copy(&addr->ta, &unpack.ta);
|
||||
WT_TIME_AGGREGATE_COPY(&addr->ta, &unpack.ta);
|
||||
WT_ERR(__wt_memdup(session, unpack.data, unpack.size, &addr->addr));
|
||||
addr->size = (uint8_t)unpack.size;
|
||||
switch (unpack.raw) {
|
||||
@ -1699,7 +1699,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_R
|
||||
if (multi->addr.addr != NULL) {
|
||||
WT_RET(__wt_calloc_one(session, &addr));
|
||||
ref->addr = addr;
|
||||
__wt_time_aggregate_copy(&addr->ta, &multi->addr.ta);
|
||||
WT_TIME_AGGREGATE_COPY(&addr->ta, &multi->addr.ta);
|
||||
WT_RET(__wt_memdup(session, multi->addr.addr, multi->addr.size, &addr->addr));
|
||||
addr->size = multi->addr.size;
|
||||
addr->type = multi->addr.type;
|
||||
|
@ -258,7 +258,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
* Create a fake, unpacked parent cell for the tree based on the checkpoint information.
|
||||
*/
|
||||
memset(&addr_unpack, 0, sizeof(addr_unpack));
|
||||
__wt_time_aggregate_copy(&addr_unpack.ta, &ckpt->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&addr_unpack.ta, &ckpt->ta);
|
||||
if (ckpt->write_gen <= S2C(session)->base_write_gen) {
|
||||
addr_unpack.ta.oldest_start_txn = WT_TXN_NONE;
|
||||
addr_unpack.ta.newest_stop_txn = WT_TXN_MAX;
|
||||
|
@ -927,7 +927,7 @@ __verify_dsk_col_var(
|
||||
|
||||
last.data = NULL;
|
||||
last.size = 0;
|
||||
__wt_time_window_init(&last.tw);
|
||||
WT_TIME_WINDOW_INIT(&last.tw);
|
||||
last.deleted = false;
|
||||
|
||||
cell_num = 0;
|
||||
@ -960,7 +960,7 @@ __verify_dsk_col_var(
|
||||
* The time windows must match and we otherwise don't have to care about data encoding, a
|
||||
* byte comparison is enough.
|
||||
*/
|
||||
if (!__wt_time_windows_equal(&unpack->tw, &last.tw))
|
||||
if (!WT_TIME_WINDOWS_EQUAL(&unpack->tw, &last.tw))
|
||||
;
|
||||
else if (last.deleted) {
|
||||
if (cell_type == WT_CELL_DEL)
|
||||
@ -972,7 +972,7 @@ match_err:
|
||||
" on page at %s are identical and should have been run-length encoded",
|
||||
cell_num - 1, cell_num, tag);
|
||||
|
||||
__wt_time_window_copy(&last.tw, &unpack->tw);
|
||||
WT_TIME_WINDOW_COPY(&last.tw, &unpack->tw);
|
||||
switch (cell_type) {
|
||||
case WT_CELL_DEL:
|
||||
last.data = NULL;
|
||||
|
@ -1548,6 +1548,11 @@ err:
|
||||
if (dhandle_locked)
|
||||
__wt_readunlock(session, &conn->dhandle_lock);
|
||||
|
||||
if (incr) {
|
||||
WT_ASSERT(session, dhandle->session_inuse > 0);
|
||||
(void)__wt_atomic_subi32(&dhandle->session_inuse, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we didn't find any entries on a walk when we weren't interrupted, let our caller know.
|
||||
*/
|
||||
|
@ -1115,7 +1115,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
|
||||
|
||||
/* If off-page, the pointer references a WT_ADDR structure. */
|
||||
if (__wt_off_page(page, addr)) {
|
||||
__wt_time_aggregate_copy(©->ta, &addr->ta);
|
||||
WT_TIME_AGGREGATE_COPY(©->ta, &addr->ta);
|
||||
copy->type = addr->type;
|
||||
memcpy(copy->addr, addr->addr, copy->size = addr->size);
|
||||
return (true);
|
||||
@ -1123,7 +1123,7 @@ __wt_ref_addr_copy(WT_SESSION_IMPL *session, WT_REF *ref, WT_ADDR_COPY *copy)
|
||||
|
||||
/* If on-page, the pointer references a cell. */
|
||||
__wt_cell_unpack_addr(session, page->dsk, (WT_CELL *)addr, unpack);
|
||||
__wt_time_aggregate_copy(©->ta, &unpack->ta);
|
||||
WT_TIME_AGGREGATE_COPY(©->ta, &unpack->ta);
|
||||
copy->type = 0; /* Avoid static analyzer uninitialized value complaints. */
|
||||
switch (unpack->raw) {
|
||||
case WT_CELL_ADDR_INT:
|
||||
|
12
src/third_party/wiredtiger/src/include/cell.i
vendored
12
src/third_party/wiredtiger/src/include/cell.i
vendored
@ -66,7 +66,7 @@ __cell_pack_value_validity(
|
||||
uint8_t flags, *flagsp;
|
||||
|
||||
/* Globally visible values have no associated validity window. */
|
||||
if (__wt_time_window_is_empty(tw)) {
|
||||
if (WT_TIME_WINDOW_IS_EMPTY(tw)) {
|
||||
++*pp;
|
||||
return;
|
||||
}
|
||||
@ -184,7 +184,7 @@ __cell_pack_addr_validity(WT_SESSION_IMPL *session, uint8_t **pp, WT_TIME_AGGREG
|
||||
uint8_t flags, *flagsp;
|
||||
|
||||
/* Globally visible values have no associated validity window. */
|
||||
if (__wt_time_aggregate_is_empty(ta)) {
|
||||
if (WT_TIME_AGGREGATE_IS_EMPTY(ta)) {
|
||||
++*pp;
|
||||
return;
|
||||
}
|
||||
@ -748,14 +748,14 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CE
|
||||
if (unpack_addr == NULL) {
|
||||
unpack = (WT_CELL_UNPACK_COMMON *)unpack_value;
|
||||
tw = &unpack_value->tw;
|
||||
__wt_time_window_init(tw);
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
ta = NULL;
|
||||
} else {
|
||||
WT_ASSERT(session, unpack_value == NULL);
|
||||
|
||||
unpack = (WT_CELL_UNPACK_COMMON *)unpack_addr;
|
||||
ta = &unpack_addr->ta;
|
||||
__wt_time_aggregate_init(ta);
|
||||
WT_TIME_AGGREGATE_INIT(ta);
|
||||
tw = NULL;
|
||||
}
|
||||
|
||||
@ -950,7 +950,7 @@ copy_cell_restart:
|
||||
copy.v = unpack->v;
|
||||
copy.len = WT_PTRDIFF32(p, cell);
|
||||
tw = ©.tw;
|
||||
__wt_time_window_init(tw);
|
||||
WT_TIME_WINDOW_INIT(tw);
|
||||
cell = (WT_CELL *)((uint8_t *)cell - v);
|
||||
goto copy_cell_restart;
|
||||
|
||||
@ -1114,7 +1114,7 @@ __wt_cell_unpack_kv(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_CELL
|
||||
* If there isn't any value validity window (which is what it will take to get to a
|
||||
* zero-length item), the value must be stable.
|
||||
*/
|
||||
__wt_time_window_init(&unpack_value->tw);
|
||||
WT_TIME_WINDOW_INIT(&unpack_value->tw);
|
||||
|
||||
return;
|
||||
}
|
||||
|
18
src/third_party/wiredtiger/src/include/extern.h
vendored
18
src/third_party/wiredtiger/src/include/extern.h
vendored
@ -1831,14 +1831,6 @@ static inline bool __wt_session_can_wait(WT_SESSION_IMPL *session)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_split_descent_race(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
WT_PAGE_INDEX *saved_pindex) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_time_aggregate_is_empty(WT_TIME_AGGREGATE *ta)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_time_window_has_stop(WT_TIME_WINDOW *tw)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_time_window_is_empty(WT_TIME_WINDOW *tw)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_time_windows_equal(WT_TIME_WINDOW *tw1, WT_TIME_WINDOW *tw2)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
static inline bool __wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
@ -2192,18 +2184,8 @@ static inline void __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
|
||||
static inline void __wt_spin_lock_track(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
|
||||
static inline void __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t);
|
||||
static inline void __wt_struct_size_adjust(WT_SESSION_IMPL *session, size_t *sizep);
|
||||
static inline void __wt_time_aggregate_copy(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source);
|
||||
static inline void __wt_time_aggregate_init(WT_TIME_AGGREGATE *ta);
|
||||
static inline void __wt_time_aggregate_init_max(WT_TIME_AGGREGATE *ta);
|
||||
static inline void __wt_time_aggregate_merge(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source);
|
||||
static inline void __wt_time_aggregate_update(WT_TIME_AGGREGATE *ta, WT_TIME_WINDOW *tw);
|
||||
static inline void __wt_time_window_clear_obsolete(
|
||||
WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw, uint64_t oldest_id, wt_timestamp_t oldest_ts);
|
||||
static inline void __wt_time_window_copy(WT_TIME_WINDOW *dest, WT_TIME_WINDOW *source);
|
||||
static inline void __wt_time_window_init(WT_TIME_WINDOW *tw);
|
||||
static inline void __wt_time_window_init_max(WT_TIME_WINDOW *tw);
|
||||
static inline void __wt_time_window_set_start(WT_TIME_WINDOW *tw, WT_UPDATE *upd);
|
||||
static inline void __wt_time_window_set_stop(WT_TIME_WINDOW *tw, WT_UPDATE *upd);
|
||||
static inline void __wt_timing_stress(WT_SESSION_IMPL *session, u_int flag);
|
||||
static inline void __wt_tree_modify_set(WT_SESSION_IMPL *session);
|
||||
static inline void __wt_txn_cursor_op(WT_SESSION_IMPL *session);
|
||||
|
@ -52,9 +52,9 @@ __wt_rec_addr_ts_init(WT_RECONCILE *r, WT_TIME_AGGREGATE *ta)
|
||||
* simple durability.
|
||||
*/
|
||||
if (r->page->type == WT_PAGE_COL_FIX)
|
||||
__wt_time_aggregate_init(ta);
|
||||
WT_TIME_AGGREGATE_INIT(ta);
|
||||
else
|
||||
__wt_time_aggregate_init_max(ta);
|
||||
WT_TIME_AGGREGATE_INIT_MAX(ta);
|
||||
}
|
||||
|
||||
/*
|
||||
|
353
src/third_party/wiredtiger/src/include/timestamp.i
vendored
353
src/third_party/wiredtiger/src/include/timestamp.i
vendored
@ -6,51 +6,148 @@
|
||||
* See the file LICENSE for redistribution information.
|
||||
*/
|
||||
|
||||
/*
|
||||
* __wt_time_window_init --
|
||||
* Initialize the fields in a time window to their defaults.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_window_init(WT_TIME_WINDOW *tw)
|
||||
{
|
||||
tw->durable_start_ts = WT_TS_NONE;
|
||||
tw->start_ts = WT_TS_NONE;
|
||||
tw->start_txn = WT_TXN_NONE;
|
||||
/* Initialize the fields in a time window to their defaults. */
|
||||
#define WT_TIME_WINDOW_INIT(tw) \
|
||||
do { \
|
||||
(tw)->durable_start_ts = WT_TS_NONE; \
|
||||
(tw)->start_ts = WT_TS_NONE; \
|
||||
(tw)->start_txn = WT_TXN_NONE; \
|
||||
(tw)->durable_stop_ts = WT_TS_NONE; \
|
||||
(tw)->stop_ts = WT_TS_MAX; \
|
||||
(tw)->stop_txn = WT_TXN_MAX; \
|
||||
(tw)->prepare = 0; \
|
||||
} while (0)
|
||||
|
||||
tw->durable_stop_ts = WT_TS_NONE;
|
||||
tw->stop_ts = WT_TS_MAX;
|
||||
tw->stop_txn = WT_TXN_MAX;
|
||||
/* Initialize the fields in a time window to values that force an override. */
|
||||
#define WT_TIME_WINDOW_INIT_MAX(tw) \
|
||||
do { \
|
||||
(tw)->durable_start_ts = WT_TS_MAX; \
|
||||
(tw)->start_ts = WT_TS_MAX; \
|
||||
(tw)->start_txn = WT_TXN_MAX; \
|
||||
(tw)->durable_stop_ts = WT_TS_MAX; \
|
||||
(tw)->stop_ts = WT_TS_NONE; \
|
||||
(tw)->stop_txn = WT_TXN_NONE; \
|
||||
(tw)->prepare = 0; \
|
||||
} while (0)
|
||||
|
||||
tw->prepare = 0;
|
||||
}
|
||||
/* Copy the values from one time window structure to another. */
|
||||
#define WT_TIME_WINDOW_COPY(dest, source) (*(dest) = *(source))
|
||||
|
||||
/* Return true if the time window is equivalent to the default time window. */
|
||||
#define WT_TIME_WINDOW_IS_EMPTY(tw) \
|
||||
((tw)->durable_start_ts == WT_TS_NONE && (tw)->start_ts == WT_TS_NONE && \
|
||||
(tw)->start_txn == WT_TXN_NONE && (tw)->durable_stop_ts == WT_TS_NONE && \
|
||||
(tw)->stop_ts == WT_TS_MAX && (tw)->stop_txn == WT_TXN_MAX && (tw)->prepare == 0)
|
||||
|
||||
/* Check if the stop time window is set. */
|
||||
#define WT_TIME_WINDOW_HAS_STOP(tw) ((tw)->stop_txn != WT_TXN_MAX || (tw)->stop_ts != WT_TS_MAX)
|
||||
|
||||
/* Return true if the time windows are the same. */
|
||||
#define WT_TIME_WINDOWS_EQUAL(tw1, tw2) \
|
||||
((tw1)->durable_start_ts == (tw2)->durable_start_ts && (tw1)->start_ts == (tw2)->start_ts && \
|
||||
(tw1)->start_txn == (tw2)->start_txn && (tw1)->durable_stop_ts == (tw2)->durable_stop_ts && \
|
||||
(tw1)->stop_ts == (tw2)->stop_ts && (tw1)->stop_txn == (tw2)->stop_txn && \
|
||||
(tw1)->prepare == (tw2)->prepare)
|
||||
|
||||
/*
|
||||
* __wt_time_window_init_max --
|
||||
* Initialize the fields in a time window to values that force an override.
|
||||
* Set the start values of a time window from those in an update structure. Durable timestamp can be
|
||||
* 0 for prepared updates, in those cases use the prepared timestamp as durable timestamp.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_window_init_max(WT_TIME_WINDOW *tw)
|
||||
{
|
||||
tw->durable_start_ts = WT_TS_MAX;
|
||||
tw->start_ts = WT_TS_MAX;
|
||||
tw->start_txn = WT_TXN_MAX;
|
||||
|
||||
tw->durable_stop_ts = WT_TS_MAX;
|
||||
tw->stop_ts = WT_TS_NONE;
|
||||
tw->stop_txn = WT_TXN_NONE;
|
||||
|
||||
tw->prepare = 0;
|
||||
}
|
||||
#define WT_TIME_WINDOW_SET_START(tw, upd) \
|
||||
do { \
|
||||
(tw)->durable_start_ts = (tw)->start_ts = (upd)->start_ts; \
|
||||
if ((upd)->durable_ts != WT_TS_NONE) \
|
||||
(tw)->durable_start_ts = (upd)->durable_ts; \
|
||||
(tw)->start_txn = (upd)->txnid; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* __wt_time_window_copy --
|
||||
* Copy the values from one time window structure to another.
|
||||
* Set the start values of a time window from those in an update structure. Durable timestamp can be
|
||||
* 0 for prepared updates, in those cases use the prepared timestamp as durable timestamp.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_window_copy(WT_TIME_WINDOW *dest, WT_TIME_WINDOW *source)
|
||||
{
|
||||
*dest = *source;
|
||||
}
|
||||
#define WT_TIME_WINDOW_SET_STOP(tw, upd) \
|
||||
do { \
|
||||
(tw)->durable_stop_ts = (tw)->stop_ts = (upd)->start_ts; \
|
||||
if ((upd)->durable_ts != WT_TS_NONE) \
|
||||
(tw)->durable_stop_ts = (upd)->durable_ts; \
|
||||
(tw)->stop_txn = (upd)->txnid; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Initialize the fields in an aggregated time window to their defaults. The aggregated durable
|
||||
* timestamp values represent the maximum durable timestamp over set of timestamps. These aggregated
|
||||
* max values are used for rollback to stable operation to find out whether the page has any
|
||||
* timestamp updates more than stable timestamp.
|
||||
*/
|
||||
#define WT_TIME_AGGREGATE_INIT(ta) \
|
||||
do { \
|
||||
(ta)->newest_start_durable_ts = WT_TS_NONE; \
|
||||
(ta)->newest_stop_durable_ts = WT_TS_NONE; \
|
||||
(ta)->oldest_start_ts = WT_TS_NONE; \
|
||||
(ta)->oldest_start_txn = WT_TXN_NONE; \
|
||||
(ta)->newest_stop_ts = WT_TS_MAX; \
|
||||
(ta)->newest_stop_txn = WT_TXN_MAX; \
|
||||
(ta)->prepare = 0; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Initialize the fields in an aggregated time window to maximum values, since this structure is
|
||||
* generally populated by iterating over a set of timestamps and calculating max/min seen for each
|
||||
* value, it's useful to be able to start with a negatively initialized structure. The aggregated
|
||||
* durable timestamp values represent the maximum durable timestamp over set of timestamps. These
|
||||
* aggregated max values are used for rollback to stable operation to find out whether the page has
|
||||
* any timestamp updates more than stable timestamp.
|
||||
*/
|
||||
#define WT_TIME_AGGREGATE_INIT_MAX(ta) \
|
||||
do { \
|
||||
(ta)->newest_start_durable_ts = WT_TS_NONE; \
|
||||
(ta)->newest_stop_durable_ts = WT_TS_NONE; \
|
||||
(ta)->oldest_start_ts = WT_TS_MAX; \
|
||||
(ta)->oldest_start_txn = WT_TXN_MAX; \
|
||||
(ta)->newest_stop_ts = WT_TS_NONE; \
|
||||
(ta)->newest_stop_txn = WT_TXN_NONE; \
|
||||
(ta)->prepare = 0; \
|
||||
} while (0)
|
||||
|
||||
/* Return true if the time aggregate is equivalent to the default time aggregate. */
|
||||
#define WT_TIME_AGGREGATE_IS_EMPTY(ta) \
|
||||
((ta)->newest_start_durable_ts == WT_TS_NONE && (ta)->newest_stop_durable_ts == WT_TS_NONE && \
|
||||
(ta)->oldest_start_ts == WT_TS_MAX && (ta)->oldest_start_txn == WT_TXN_MAX && \
|
||||
(ta)->newest_stop_ts == WT_TS_NONE && (ta)->newest_stop_txn == WT_TXN_NONE && \
|
||||
(ta)->prepare == 0)
|
||||
|
||||
/* Copy the values from one time aggregate structure to another. */
|
||||
#define WT_TIME_AGGREGATE_COPY(dest, source) (*(dest) = *(source))
|
||||
|
||||
/* Update the aggregated window to reflect for a new time window. */
|
||||
#define WT_TIME_AGGREGATE_UPDATE(ta, tw) \
|
||||
do { \
|
||||
(ta)->newest_start_durable_ts = \
|
||||
WT_MAX((tw)->durable_start_ts, (ta)->newest_start_durable_ts); \
|
||||
(ta)->newest_stop_durable_ts = \
|
||||
WT_MAX((tw)->durable_stop_ts, (ta)->newest_stop_durable_ts); \
|
||||
(ta)->oldest_start_ts = WT_MIN((tw)->start_ts, (ta)->oldest_start_ts); \
|
||||
(ta)->oldest_start_txn = WT_MIN((tw)->start_txn, (ta)->oldest_start_txn); \
|
||||
(ta)->newest_stop_ts = WT_MAX((tw)->stop_ts, (ta)->newest_stop_ts); \
|
||||
(ta)->newest_stop_txn = WT_MAX((tw)->stop_txn, (ta)->newest_stop_txn); \
|
||||
if ((tw)->prepare != 0) \
|
||||
(ta)->prepare = 1; \
|
||||
} while (0)
|
||||
|
||||
/* Merge an aggregated time window into another - choosing the most conservative value from each. */
|
||||
#define WT_TIME_AGGREGATE_MERGE(dest, source) \
|
||||
do { \
|
||||
(dest)->newest_start_durable_ts = \
|
||||
WT_MAX((dest)->newest_start_durable_ts, (source)->newest_start_durable_ts); \
|
||||
(dest)->newest_stop_durable_ts = \
|
||||
WT_MAX((dest)->newest_stop_durable_ts, (source)->newest_stop_durable_ts); \
|
||||
(dest)->oldest_start_ts = WT_MIN((dest)->oldest_start_ts, (source)->oldest_start_ts); \
|
||||
(dest)->oldest_start_txn = WT_MIN((dest)->oldest_start_txn, (source)->oldest_start_txn); \
|
||||
(dest)->newest_stop_ts = WT_MAX((dest)->newest_stop_ts, (source)->newest_stop_ts); \
|
||||
(dest)->newest_stop_txn = WT_MAX((dest)->newest_stop_txn, (source)->newest_stop_txn); \
|
||||
if ((source)->prepare != 0) \
|
||||
(dest)->prepare = 1; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* __wt_time_window_clear_obsolete --
|
||||
@ -85,185 +182,3 @@ __wt_time_window_clear_obsolete(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_window_is_empty --
|
||||
* Return true if the time window is equivalent to the default time window.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_time_window_is_empty(WT_TIME_WINDOW *tw)
|
||||
{
|
||||
return (tw->durable_start_ts == WT_TS_NONE && tw->start_ts == WT_TS_NONE &&
|
||||
tw->start_txn == WT_TXN_NONE && tw->durable_stop_ts == WT_TS_NONE &&
|
||||
tw->stop_ts == WT_TS_MAX && tw->stop_txn == WT_TXN_MAX && tw->prepare == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_window_has_stop --
|
||||
* Check if the stop time window is set.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_time_window_has_stop(WT_TIME_WINDOW *tw)
|
||||
{
|
||||
return (tw->stop_txn != WT_TXN_MAX || tw->stop_ts != WT_TS_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_windows_equal --
|
||||
* Return true if the time windows are the same.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_time_windows_equal(WT_TIME_WINDOW *tw1, WT_TIME_WINDOW *tw2)
|
||||
{
|
||||
return (tw1->durable_start_ts == tw2->durable_start_ts && tw1->start_ts == tw2->start_ts &&
|
||||
tw1->start_txn == tw2->start_txn && tw1->durable_stop_ts == tw2->durable_stop_ts &&
|
||||
tw1->stop_ts == tw2->stop_ts && tw1->stop_txn == tw2->stop_txn &&
|
||||
tw1->prepare == tw2->prepare);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_window_set_start --
|
||||
* Set the start values of a time window from those in an update structure.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_window_set_start(WT_TIME_WINDOW *tw, WT_UPDATE *upd)
|
||||
{
|
||||
/*
|
||||
* Durable timestamp can be 0 for prepared updates, in those cases use the prepared timestamp as
|
||||
* durable timestamp.
|
||||
*/
|
||||
tw->durable_start_ts = tw->start_ts = upd->start_ts;
|
||||
if (upd->durable_ts != WT_TS_NONE)
|
||||
tw->durable_start_ts = upd->durable_ts;
|
||||
tw->start_txn = upd->txnid;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_window_set_stop --
|
||||
* Set the start values of a time window from those in an update structure.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_window_set_stop(WT_TIME_WINDOW *tw, WT_UPDATE *upd)
|
||||
{
|
||||
/*
|
||||
* Durable timestamp can be 0 for prepared updates, in those cases use the prepared timestamp as
|
||||
* durable timestamp.
|
||||
*/
|
||||
tw->durable_stop_ts = tw->stop_ts = upd->start_ts;
|
||||
if (upd->durable_ts != WT_TS_NONE)
|
||||
tw->durable_stop_ts = upd->durable_ts;
|
||||
tw->stop_txn = upd->txnid;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_aggregate_init --
|
||||
* Initialize the fields in an aggregated time window to their defaults.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_aggregate_init(WT_TIME_AGGREGATE *ta)
|
||||
{
|
||||
/*
|
||||
* The aggregated durable timestamp values represent the maximum durable timestamp over set of
|
||||
* timestamps. These aggregated max values are used for rollback to stable operation to find out
|
||||
* whether the page has any timestamp updates more than stable timestamp.
|
||||
*/
|
||||
ta->newest_start_durable_ts = WT_TS_NONE;
|
||||
ta->newest_stop_durable_ts = WT_TS_NONE;
|
||||
|
||||
ta->oldest_start_ts = WT_TS_NONE;
|
||||
ta->oldest_start_txn = WT_TXN_NONE;
|
||||
|
||||
ta->newest_stop_ts = WT_TS_MAX;
|
||||
ta->newest_stop_txn = WT_TXN_MAX;
|
||||
|
||||
ta->prepare = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_aggregate_init_max --
|
||||
* Initialize the fields in an aggregated time window to maximum values, since this structure is
|
||||
* generally populated by iterating over a set of timestamps and calculating max/min seen for
|
||||
* each value, it's useful to be able to start with a negatively initialized structure.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_aggregate_init_max(WT_TIME_AGGREGATE *ta)
|
||||
{
|
||||
/*
|
||||
* The aggregated durable timestamp values represent the maximum durable timestamp over set of
|
||||
* timestamps. These aggregated max values are used for rollback to stable operation to find out
|
||||
* whether the page has any timestamp updates more than stable timestamp.
|
||||
*/
|
||||
ta->newest_start_durable_ts = WT_TS_NONE;
|
||||
ta->newest_stop_durable_ts = WT_TS_NONE;
|
||||
|
||||
ta->oldest_start_ts = WT_TS_MAX;
|
||||
ta->oldest_start_txn = WT_TXN_MAX;
|
||||
|
||||
ta->newest_stop_ts = WT_TS_NONE;
|
||||
ta->newest_stop_txn = WT_TXN_NONE;
|
||||
|
||||
ta->prepare = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_aggregate_is_empty --
|
||||
* Return true if the time aggregate is equivalent to the default time aggregate.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_time_aggregate_is_empty(WT_TIME_AGGREGATE *ta)
|
||||
{
|
||||
return (ta->newest_start_durable_ts == WT_TS_NONE && ta->newest_stop_durable_ts == WT_TS_NONE &&
|
||||
ta->oldest_start_ts == WT_TS_MAX && ta->oldest_start_txn == WT_TXN_MAX &&
|
||||
ta->newest_stop_ts == WT_TS_NONE && ta->newest_stop_txn == WT_TXN_NONE && ta->prepare == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_aggregate_copy --
|
||||
* Copy the values from one time aggregate structure to another.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_aggregate_copy(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source)
|
||||
{
|
||||
*dest = *source;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_aggregate_update --
|
||||
* Update the aggregated window to reflect for a new time window.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_aggregate_update(WT_TIME_AGGREGATE *ta, WT_TIME_WINDOW *tw)
|
||||
{
|
||||
ta->newest_start_durable_ts = WT_MAX(tw->durable_start_ts, ta->newest_start_durable_ts);
|
||||
ta->newest_stop_durable_ts = WT_MAX(tw->durable_stop_ts, ta->newest_stop_durable_ts);
|
||||
|
||||
ta->oldest_start_ts = WT_MIN(tw->start_ts, ta->oldest_start_ts);
|
||||
ta->oldest_start_txn = WT_MIN(tw->start_txn, ta->oldest_start_txn);
|
||||
ta->newest_stop_ts = WT_MAX(tw->stop_ts, ta->newest_stop_ts);
|
||||
ta->newest_stop_txn = WT_MAX(tw->stop_txn, ta->newest_stop_txn);
|
||||
|
||||
if (tw->prepare != 0)
|
||||
ta->prepare = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_time_aggregate_merge --
|
||||
* Merge an aggregated time window into another - choosing the most conservative value from
|
||||
* each.
|
||||
*/
|
||||
static inline void
|
||||
__wt_time_aggregate_merge(WT_TIME_AGGREGATE *dest, WT_TIME_AGGREGATE *source)
|
||||
{
|
||||
dest->newest_start_durable_ts =
|
||||
WT_MAX(dest->newest_start_durable_ts, source->newest_start_durable_ts);
|
||||
dest->newest_stop_durable_ts =
|
||||
WT_MAX(dest->newest_stop_durable_ts, source->newest_stop_durable_ts);
|
||||
|
||||
dest->oldest_start_ts = WT_MIN(dest->oldest_start_ts, source->oldest_start_ts);
|
||||
dest->oldest_start_txn = WT_MIN(dest->oldest_start_txn, source->oldest_start_txn);
|
||||
dest->newest_stop_ts = WT_MAX(dest->newest_stop_ts, source->newest_stop_ts);
|
||||
dest->newest_stop_txn = WT_MAX(dest->newest_stop_txn, source->newest_stop_txn);
|
||||
|
||||
if (source->prepare != 0)
|
||||
dest->prepare = 1;
|
||||
}
|
||||
|
12
src/third_party/wiredtiger/src/include/txn.i
vendored
12
src/third_party/wiredtiger/src/include/txn.i
vendored
@ -608,7 +608,7 @@ __wt_txn_upd_value_visible_all(WT_SESSION_IMPL *session, WT_UPDATE_VALUE *upd_va
|
||||
static inline bool
|
||||
__wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
{
|
||||
return (__wt_time_window_has_stop(tw) && !tw->prepare &&
|
||||
return (WT_TIME_WINDOW_HAS_STOP(tw) && !tw->prepare &&
|
||||
__wt_txn_visible(session, tw->stop_txn, tw->stop_ts));
|
||||
}
|
||||
|
||||
@ -619,7 +619,7 @@ __wt_txn_tw_stop_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
static inline bool
|
||||
__wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
{
|
||||
return ((__wt_time_window_has_stop(tw) || !tw->prepare) &&
|
||||
return ((WT_TIME_WINDOW_HAS_STOP(tw) || !tw->prepare) &&
|
||||
__wt_txn_visible(session, tw->start_txn, tw->start_ts));
|
||||
}
|
||||
|
||||
@ -630,7 +630,7 @@ __wt_txn_tw_start_visible(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
static inline bool
|
||||
__wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
{
|
||||
return ((__wt_time_window_has_stop(tw) || !tw->prepare) &&
|
||||
return ((WT_TIME_WINDOW_HAS_STOP(tw) || !tw->prepare) &&
|
||||
__wt_txn_visible_all(session, tw->start_txn, tw->durable_start_ts));
|
||||
}
|
||||
|
||||
@ -641,7 +641,7 @@ __wt_txn_tw_start_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
static inline bool
|
||||
__wt_txn_tw_stop_visible_all(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
{
|
||||
return (__wt_time_window_has_stop(tw) && !tw->prepare &&
|
||||
return (WT_TIME_WINDOW_HAS_STOP(tw) && !tw->prepare &&
|
||||
__wt_txn_visible_all(session, tw->stop_txn, tw->durable_stop_ts));
|
||||
}
|
||||
|
||||
@ -903,10 +903,10 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *key, uint
|
||||
|
||||
/* Check the ondisk value. */
|
||||
if (vpack == NULL) {
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
WT_RET(__wt_value_return_buf(cbt, cbt->ref, &cbt->upd_value->buf, &tw));
|
||||
} else {
|
||||
__wt_time_window_copy(&tw, &vpack->tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &vpack->tw);
|
||||
cbt->upd_value->buf.data = vpack->data;
|
||||
cbt->upd_value->buf.size = vpack->size;
|
||||
}
|
||||
|
@ -596,7 +596,7 @@ __ckpt_load(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_C
|
||||
ckpt->size = (uint64_t)a.val;
|
||||
|
||||
/* Default to durability. */
|
||||
__wt_time_aggregate_init(&ckpt->ta);
|
||||
WT_TIME_AGGREGATE_INIT(&ckpt->ta);
|
||||
|
||||
ret = __wt_config_subgets(session, v, "oldest_start_ts", &a);
|
||||
WT_RET_NOTFOUND_OK(ret);
|
||||
|
@ -114,7 +114,7 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet
|
||||
|
||||
r = cbulk->reconcile;
|
||||
btree = S2BT(session);
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
|
||||
val = &r->v;
|
||||
if (deleted) {
|
||||
@ -138,7 +138,7 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool delet
|
||||
if (btree->dictionary)
|
||||
WT_RET(__wt_rec_dict_replace(session, r, &tw, cbulk->rle, val));
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
__wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
|
||||
WT_TIME_AGGREGATE_UPDATE(&r->cur_ptr->ta, &tw);
|
||||
|
||||
/* Update the starting record number in case we split. */
|
||||
r->recno += cbulk->rle;
|
||||
@ -178,7 +178,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
|
||||
/* Copy the value onto the page. */
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
__wt_time_aggregate_merge(&r->cur_ptr->ta, &addr->ta);
|
||||
WT_TIME_AGGREGATE_MERGE(&r->cur_ptr->ta, &addr->ta);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
@ -205,7 +205,7 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
|
||||
page = pageref->page;
|
||||
child = NULL;
|
||||
hazard = false;
|
||||
__wt_time_aggregate_init(&ta);
|
||||
WT_TIME_AGGREGATE_INIT(&ta);
|
||||
|
||||
val = &r->v;
|
||||
vpack = &_vpack;
|
||||
@ -280,10 +280,10 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
|
||||
val->buf.size = __wt_cell_total_len(vpack);
|
||||
val->cell_len = 0;
|
||||
val->len = val->buf.size;
|
||||
__wt_time_aggregate_copy(&ta, &vpack->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&ta, &vpack->ta);
|
||||
} else {
|
||||
__wt_rec_cell_build_addr(session, r, addr, NULL, false, ref->ref_recno);
|
||||
__wt_time_aggregate_copy(&ta, &addr->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&ta, &addr->ta);
|
||||
}
|
||||
WT_CHILD_RELEASE_ERR(session, hazard, ref);
|
||||
|
||||
@ -293,7 +293,7 @@ __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
|
||||
|
||||
/* Copy the value onto the page. */
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
__wt_time_aggregate_merge(&r->cur_ptr->ta, &ta);
|
||||
WT_TIME_AGGREGATE_MERGE(&r->cur_ptr->ta, &ta);
|
||||
}
|
||||
WT_INTL_FOREACH_END;
|
||||
|
||||
@ -547,7 +547,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_SALVAGE_COOKI
|
||||
if (!deleted && !overflow_type && btree->dictionary)
|
||||
WT_RET(__wt_rec_dict_replace(session, r, tw, rle, val));
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
__wt_time_aggregate_update(&r->cur_ptr->ta, tw);
|
||||
WT_TIME_AGGREGATE_UPDATE(&r->cur_ptr->ta, tw);
|
||||
|
||||
/* Update the starting record number in case we split. */
|
||||
r->recno += rle;
|
||||
@ -592,14 +592,14 @@ __wt_rec_col_var(
|
||||
upd = NULL;
|
||||
size = 0;
|
||||
data = NULL;
|
||||
__wt_time_window_init(&default_tw);
|
||||
WT_TIME_WINDOW_INIT(&default_tw);
|
||||
|
||||
cbt = &r->update_modify_cbt;
|
||||
cbt->iface.session = (WT_SESSION *)session;
|
||||
|
||||
/* Set the "last" values to cause failure if they're not set. */
|
||||
last.value = r->last;
|
||||
__wt_time_window_init_max(&last.tw);
|
||||
WT_TIME_WINDOW_INIT_MAX(&last.tw);
|
||||
last.deleted = false;
|
||||
|
||||
/*
|
||||
@ -607,7 +607,7 @@ __wt_rec_col_var(
|
||||
* [-Werror=maybe-uninitialized]
|
||||
*/
|
||||
/* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
|
||||
__wt_time_window_init_max(&tw);
|
||||
WT_TIME_WINDOW_INIT_MAX(&tw);
|
||||
|
||||
WT_RET(__wt_rec_split_init(session, r, page, pageref->ref_recno, btree->maxleafpage_precomp));
|
||||
|
||||
@ -626,7 +626,7 @@ __wt_rec_col_var(
|
||||
if (salvage != NULL && salvage->missing != 0) {
|
||||
if (salvage->skip == 0) {
|
||||
rle = salvage->missing;
|
||||
__wt_time_window_init(&last.tw);
|
||||
WT_TIME_WINDOW_INIT(&last.tw);
|
||||
last.deleted = true;
|
||||
|
||||
/*
|
||||
@ -726,12 +726,12 @@ record_loop:
|
||||
*/
|
||||
deleted = orig_deleted;
|
||||
if (deleted || salvage) {
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
|
||||
if (deleted)
|
||||
goto compare;
|
||||
} else
|
||||
__wt_time_window_copy(&tw, &vpack->tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &vpack->tw);
|
||||
|
||||
/*
|
||||
* If we are handling overflow items, use the overflow item itself exactly once,
|
||||
@ -780,7 +780,7 @@ record_loop:
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
__wt_time_window_copy(&tw, &upd_select.tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
|
||||
|
||||
switch (upd->type) {
|
||||
case WT_UPDATE_MODIFY:
|
||||
@ -797,7 +797,7 @@ record_loop:
|
||||
size = upd->size;
|
||||
break;
|
||||
case WT_UPDATE_TOMBSTONE:
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
deleted = true;
|
||||
break;
|
||||
default:
|
||||
@ -813,7 +813,7 @@ compare:
|
||||
* record number, we've been doing that all along.
|
||||
*/
|
||||
if (rle != 0) {
|
||||
if (__wt_time_windows_equal(&tw, &last.tw) &&
|
||||
if (WT_TIME_WINDOWS_EQUAL(&tw, &last.tw) &&
|
||||
((deleted && last.deleted) ||
|
||||
(!deleted && !last.deleted && last.value->size == size &&
|
||||
memcmp(last.value->data, data, size) == 0))) {
|
||||
@ -824,7 +824,7 @@ compare:
|
||||
* visible.
|
||||
*/
|
||||
WT_ASSERT(
|
||||
session, (!deleted && !last.deleted) || __wt_time_window_is_empty(&last.tw));
|
||||
session, (!deleted && !last.deleted) || WT_TIME_WINDOW_IS_EMPTY(&last.tw));
|
||||
rle += repeat_count;
|
||||
continue;
|
||||
}
|
||||
@ -853,7 +853,7 @@ compare:
|
||||
WT_ERR(__wt_buf_set(session, last.value, data, size));
|
||||
}
|
||||
|
||||
__wt_time_window_copy(&last.tw, &tw);
|
||||
WT_TIME_WINDOW_COPY(&last.tw, &tw);
|
||||
last.deleted = deleted;
|
||||
rle = repeat_count;
|
||||
}
|
||||
@ -916,7 +916,7 @@ compare:
|
||||
* tombstone to write to disk and the deletion of the keys must be globally
|
||||
* visible.
|
||||
*/
|
||||
WT_ASSERT(session, __wt_time_window_is_empty(&last.tw));
|
||||
WT_ASSERT(session, WT_TIME_WINDOW_IS_EMPTY(&last.tw));
|
||||
/*
|
||||
* The record adjustment is decremented by one so we can naturally fall into the
|
||||
* RLE accounting below, where we increment rle by one, then continue in the
|
||||
@ -927,14 +927,14 @@ compare:
|
||||
src_recno += skip;
|
||||
} else
|
||||
/* Set time pairs for the first deleted key in a deleted range. */
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
} else if (upd == NULL) {
|
||||
/* The updates on the key are all uncommitted so we write a deleted key to disk. */
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
deleted = true;
|
||||
} else {
|
||||
/* Set time pairs for a key. */
|
||||
__wt_time_window_copy(&tw, &upd_select.tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
|
||||
|
||||
switch (upd->type) {
|
||||
case WT_UPDATE_MODIFY:
|
||||
@ -954,7 +954,7 @@ compare:
|
||||
size = upd->size;
|
||||
break;
|
||||
case WT_UPDATE_TOMBSTONE:
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
deleted = true;
|
||||
break;
|
||||
default:
|
||||
@ -967,9 +967,9 @@ compare:
|
||||
* the same thing.
|
||||
*/
|
||||
if (rle != 0) {
|
||||
if (__wt_time_windows_equal(&last.tw, &tw) &&
|
||||
if (WT_TIME_WINDOWS_EQUAL(&last.tw, &tw) &&
|
||||
((deleted && last.deleted) ||
|
||||
(!deleted && !last.deleted && last.value->size == size &&
|
||||
(!deleted && !last.deleted && size != 0 && last.value->size == size &&
|
||||
memcmp(last.value->data, data, size) == 0))) {
|
||||
/*
|
||||
* The start time pair for deleted keys must be (WT_TS_NONE, WT_TXN_NONE) and
|
||||
@ -982,7 +982,7 @@ compare:
|
||||
(last.tw.durable_start_ts == tw.durable_start_ts &&
|
||||
last.tw.start_ts == WT_TS_NONE && last.tw.start_txn == WT_TXN_NONE &&
|
||||
last.tw.durable_stop_ts == tw.durable_stop_ts &&
|
||||
!__wt_time_window_has_stop(&last.tw)));
|
||||
!WT_TIME_WINDOW_HAS_STOP(&last.tw)));
|
||||
++rle;
|
||||
goto next;
|
||||
}
|
||||
@ -1006,7 +1006,7 @@ compare:
|
||||
}
|
||||
|
||||
/* Ready for the next loop, reset the RLE counter. */
|
||||
__wt_time_window_copy(&last.tw, &tw);
|
||||
WT_TIME_WINDOW_COPY(&last.tw, &tw);
|
||||
last.deleted = deleted;
|
||||
rle = 1;
|
||||
|
||||
|
@ -199,7 +199,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
|
||||
r = cbulk->reconcile;
|
||||
btree = S2BT(session);
|
||||
cursor = &cbulk->cbt.iface;
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
|
||||
key = &r->k;
|
||||
val = &r->v;
|
||||
@ -232,7 +232,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
|
||||
WT_RET(__wt_rec_dict_replace(session, r, &tw, 0, val));
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
}
|
||||
__wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
|
||||
WT_TIME_AGGREGATE_UPDATE(&r->cur_ptr->ta, &tw);
|
||||
|
||||
/* Update compression state. */
|
||||
__rec_key_state_update(r, ovfl_key);
|
||||
@ -276,7 +276,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
/* Copy the key and value onto the page. */
|
||||
__wt_rec_image_copy(session, r, key);
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
__wt_time_aggregate_merge(&r->cur_ptr->ta, &addr->ta);
|
||||
WT_TIME_AGGREGATE_MERGE(&r->cur_ptr->ta, &addr->ta);
|
||||
|
||||
/* Update compression state. */
|
||||
__rec_key_state_update(r, ovfl_key);
|
||||
@ -431,7 +431,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
*/
|
||||
if (__wt_off_page(page, addr)) {
|
||||
__wt_rec_cell_build_addr(session, r, addr, NULL, state == WT_CHILD_PROXY, WT_RECNO_OOB);
|
||||
__wt_time_aggregate_copy(&ta, &addr->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&ta, &addr->ta);
|
||||
} else {
|
||||
__wt_cell_unpack_addr(session, page->dsk, ref->addr, vpack);
|
||||
if (F_ISSET(vpack, WT_CELL_UNPACK_TIME_WINDOW_CLEARED)) {
|
||||
@ -452,7 +452,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
val->cell_len = 0;
|
||||
val->len = val->buf.size;
|
||||
}
|
||||
__wt_time_aggregate_copy(&ta, &vpack->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&ta, &vpack->ta);
|
||||
}
|
||||
WT_CHILD_RELEASE_ERR(session, hazard, ref);
|
||||
|
||||
@ -506,7 +506,7 @@ __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
/* Copy the key and value onto the page. */
|
||||
__wt_rec_image_copy(session, r, key);
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
__wt_time_aggregate_merge(&r->cur_ptr->ta, &ta);
|
||||
WT_TIME_AGGREGATE_MERGE(&r->cur_ptr->ta, &ta);
|
||||
|
||||
/* Update compression state. */
|
||||
__rec_key_state_update(r, ovfl_key);
|
||||
@ -533,7 +533,7 @@ __rec_row_zero_len(WT_SESSION_IMPL *session, WT_TIME_WINDOW *tw)
|
||||
* tempted to check the time window against the default here - the check is subtly different due
|
||||
* to the grouping.
|
||||
*/
|
||||
return (!__wt_time_window_has_stop(tw) &&
|
||||
return (!WT_TIME_WINDOW_HAS_STOP(tw) &&
|
||||
((tw->start_ts == WT_TS_NONE && tw->start_txn == WT_TXN_NONE) ||
|
||||
__wt_txn_tw_start_visible_all(session, tw)));
|
||||
}
|
||||
@ -568,7 +568,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
|
||||
if ((upd = upd_select.upd) == NULL)
|
||||
continue;
|
||||
|
||||
__wt_time_window_copy(&tw, &upd_select.tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
|
||||
|
||||
switch (upd->type) {
|
||||
case WT_UPDATE_MODIFY:
|
||||
@ -619,7 +619,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
|
||||
WT_RET(__wt_rec_dict_replace(session, r, &tw, 0, val));
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
}
|
||||
__wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
|
||||
WT_TIME_AGGREGATE_UPDATE(&r->cur_ptr->ta, &tw);
|
||||
|
||||
/* Update compression state. */
|
||||
__rec_key_state_update(r, ovfl_key);
|
||||
@ -691,7 +691,7 @@ __wt_rec_row_leaf(
|
||||
btree = S2BT(session);
|
||||
page = pageref->page;
|
||||
slvg_skip = salvage == NULL ? 0 : salvage->skip;
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
|
||||
cbt = &r->update_modify_cbt;
|
||||
cbt->iface.session = (WT_SESSION *)session;
|
||||
@ -756,11 +756,11 @@ __wt_rec_row_leaf(
|
||||
*/
|
||||
if (upd == NULL) {
|
||||
if (!salvage)
|
||||
__wt_time_window_copy(&tw, &vpack->tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &vpack->tw);
|
||||
else
|
||||
__wt_time_window_init(&tw);
|
||||
WT_TIME_WINDOW_INIT(&tw);
|
||||
} else
|
||||
__wt_time_window_copy(&tw, &upd_select.tw);
|
||||
WT_TIME_WINDOW_COPY(&tw, &upd_select.tw);
|
||||
|
||||
/*
|
||||
* If we reconcile an on disk key with a globally visible stop time pair and there are no
|
||||
@ -964,7 +964,7 @@ build:
|
||||
WT_ERR(__wt_rec_dict_replace(session, r, &tw, 0, val));
|
||||
__wt_rec_image_copy(session, r, val);
|
||||
}
|
||||
__wt_time_aggregate_update(&r->cur_ptr->ta, &tw);
|
||||
WT_TIME_AGGREGATE_UPDATE(&r->cur_ptr->ta, &tw);
|
||||
|
||||
/* Update compression state. */
|
||||
__rec_key_state_update(r, ovfl_key);
|
||||
|
@ -128,7 +128,7 @@ __rec_append_orig_value(
|
||||
* delete a value respectively at timestamp 0 and 10, and later insert it again at 20. We need
|
||||
* the tombstone to tell us there is no value between 10 and 20.
|
||||
*/
|
||||
if (__wt_time_window_has_stop(&unpack->tw)) {
|
||||
if (WT_TIME_WINDOW_HAS_STOP(&unpack->tw)) {
|
||||
tombstone_globally_visible = __wt_txn_tw_stop_visible_all(session, &unpack->tw);
|
||||
|
||||
/* No need to append the tombstone if it is already in the update chain. */
|
||||
@ -249,7 +249,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
|
||||
*/
|
||||
upd_select->upd = NULL;
|
||||
select_tw = &upd_select->tw;
|
||||
__wt_time_window_init(select_tw);
|
||||
WT_TIME_WINDOW_INIT(select_tw);
|
||||
|
||||
page = r->page;
|
||||
first_txn_upd = upd = last_upd = tombstone = NULL;
|
||||
@ -399,7 +399,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
|
||||
* indicate that the value is visible to any timestamp/transaction id ahead of it.
|
||||
*/
|
||||
if (upd->type == WT_UPDATE_TOMBSTONE) {
|
||||
__wt_time_window_set_stop(select_tw, upd);
|
||||
WT_TIME_WINDOW_SET_STOP(select_tw, upd);
|
||||
tombstone = upd;
|
||||
|
||||
/* Find the update this tombstone applies to. */
|
||||
@ -414,7 +414,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
|
||||
}
|
||||
if (upd != NULL)
|
||||
/* The beginning of the validity window is the selected update's time pair. */
|
||||
__wt_time_window_set_start(select_tw, upd);
|
||||
WT_TIME_WINDOW_SET_START(select_tw, upd);
|
||||
else if (select_tw->stop_ts != WT_TS_NONE || select_tw->stop_txn != WT_TXN_NONE) {
|
||||
/* If we only have a tombstone in the update list, we must have an ondisk value. */
|
||||
WT_ASSERT(session, vpack != NULL && tombstone != NULL && last_upd->next == NULL);
|
||||
@ -439,7 +439,7 @@ __wt_rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, v
|
||||
last_upd->next->start_ts == vpack->tw.start_ts &&
|
||||
last_upd->next->type == WT_UPDATE_STANDARD && last_upd->next->next == NULL);
|
||||
upd_select->upd = last_upd->next;
|
||||
__wt_time_window_set_start(select_tw, last_upd->next);
|
||||
WT_TIME_WINDOW_SET_START(select_tw, last_upd->next);
|
||||
} else {
|
||||
WT_ASSERT(
|
||||
session, __wt_txn_upd_visible_all(session, tombstone) && upd_select->upd == NULL);
|
||||
|
@ -1232,7 +1232,7 @@ __wt_rec_split_crossing_bnd(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t ne
|
||||
r->cur_ptr->min_recno = r->recno;
|
||||
if (S2BT(session)->type == BTREE_ROW)
|
||||
WT_RET(__rec_split_row_promote(session, r, &r->cur_ptr->min_key, r->page->type));
|
||||
__wt_time_aggregate_copy(&r->cur_ptr->ta_min, &r->cur_ptr->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&r->cur_ptr->ta_min, &r->cur_ptr->ta);
|
||||
|
||||
/* Assert we're not re-entering this code. */
|
||||
WT_ASSERT(session, r->cur_ptr->min_offset == 0);
|
||||
@ -1283,7 +1283,7 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
||||
* boundaries and create a single chunk.
|
||||
*/
|
||||
prev_ptr->entries += cur_ptr->entries;
|
||||
__wt_time_aggregate_merge(&prev_ptr->ta, &cur_ptr->ta);
|
||||
WT_TIME_AGGREGATE_MERGE(&prev_ptr->ta, &cur_ptr->ta);
|
||||
dsk = r->cur_ptr->image.mem;
|
||||
memcpy((uint8_t *)r->prev_ptr->image.mem + prev_ptr->image.size,
|
||||
WT_PAGE_HEADER_BYTE(btree, dsk), cur_ptr->image.size - WT_PAGE_HEADER_BYTE_SIZE(btree));
|
||||
@ -1326,11 +1326,11 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
||||
cur_ptr->recno = prev_ptr->min_recno;
|
||||
WT_RET(
|
||||
__wt_buf_set(session, &cur_ptr->key, prev_ptr->min_key.data, prev_ptr->min_key.size));
|
||||
__wt_time_aggregate_merge(&cur_ptr->ta, &prev_ptr->ta);
|
||||
WT_TIME_AGGREGATE_MERGE(&cur_ptr->ta, &prev_ptr->ta);
|
||||
cur_ptr->image.size += len_to_move;
|
||||
|
||||
prev_ptr->entries = prev_ptr->min_entries;
|
||||
__wt_time_aggregate_copy(&prev_ptr->ta, &prev_ptr->ta_min);
|
||||
WT_TIME_AGGREGATE_COPY(&prev_ptr->ta, &prev_ptr->ta_min);
|
||||
prev_ptr->image.size -= len_to_move;
|
||||
}
|
||||
|
||||
@ -1711,7 +1711,7 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_CHUNK *chunk
|
||||
multi = &r->multi[r->multi_next++];
|
||||
|
||||
/* Initialize the address (set the addr type for the parent). */
|
||||
__wt_time_aggregate_copy(&multi->addr.ta, &chunk->ta);
|
||||
WT_TIME_AGGREGATE_COPY(&multi->addr.ta, &chunk->ta);
|
||||
|
||||
switch (page->type) {
|
||||
case WT_PAGE_COL_FIX:
|
||||
@ -2026,7 +2026,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
bm = btree->bm;
|
||||
mod = page->modify;
|
||||
ref = r->ref;
|
||||
__wt_time_aggregate_init(&ta);
|
||||
WT_TIME_AGGREGATE_INIT(&ta);
|
||||
|
||||
/*
|
||||
* This page may have previously been reconciled, and that information is now about to be
|
||||
|
@ -1515,7 +1515,7 @@ __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, WT_TIME_AGGREGAT
|
||||
WT_CKPT_FOREACH (ckptbase, ckpt)
|
||||
if (F_ISSET(ckpt, WT_CKPT_ADD)) {
|
||||
ckpt->write_gen = btree->write_gen;
|
||||
__wt_time_aggregate_copy(&ckpt->ta, ta);
|
||||
WT_TIME_AGGREGATE_COPY(&ckpt->ta, ta);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1542,7 +1542,7 @@ __checkpoint_tree(WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[
|
||||
conn = S2C(session);
|
||||
dhandle = session->dhandle;
|
||||
fake_ckpt = resolve_bm = false;
|
||||
__wt_time_aggregate_init(&ta);
|
||||
WT_TIME_AGGREGATE_INIT(&ta);
|
||||
|
||||
/*
|
||||
* Set the checkpoint LSN to the maximum LSN so that if logging is disabled, recovery will never
|
||||
|
115
src/third_party/wiredtiger/src/txn/txn_recover.c
vendored
115
src/third_party/wiredtiger/src/txn/txn_recover.c
vendored
@ -511,6 +511,71 @@ __recovery_file_scan(WT_RECOVERY *r)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __hs_exists --
|
||||
* Check whether the history store exists. This function looks for both the history store URI in
|
||||
* the metadata file and for the history store data file itself. If we're running salvage, we'll
|
||||
* attempt to salvage the history store here.
|
||||
*/
|
||||
static int
|
||||
__hs_exists(WT_SESSION_IMPL *session, WT_CURSOR *metac, const char *cfg[], bool *hs_exists)
|
||||
{
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_DECL_RET;
|
||||
WT_SESSION *wt_session;
|
||||
|
||||
conn = S2C(session);
|
||||
|
||||
/*
|
||||
* We should check whether the history store file exists in the metadata or not. If it does not,
|
||||
* then we should skip rollback to stable for each table. This might happen if we're upgrading
|
||||
* from an older version. If it does exist in the metadata we should check that it exists on
|
||||
* disk to confirm that it wasn't deleted between runs.
|
||||
*
|
||||
* This needs to happen after we apply the logs as they may contain the metadata changes which
|
||||
* include the history store creation. As such the on disk metadata file won't contain the
|
||||
* history store but will after log application.
|
||||
*/
|
||||
metac->set_key(metac, WT_HS_URI);
|
||||
WT_ERR_NOTFOUND_OK(metac->search(metac), true);
|
||||
if (ret == WT_NOTFOUND) {
|
||||
*hs_exists = false;
|
||||
ret = 0;
|
||||
} else {
|
||||
/* Given the history store exists in the metadata validate whether it exists on disk. */
|
||||
WT_ERR(__wt_fs_exist(session, WT_HS_FILE, hs_exists));
|
||||
if (*hs_exists) {
|
||||
/*
|
||||
* Attempt to configure the history store, this will detect corruption if it fails.
|
||||
*/
|
||||
ret = __wt_hs_config(session, cfg);
|
||||
if (ret != 0) {
|
||||
if (F_ISSET(conn, WT_CONN_SALVAGE)) {
|
||||
wt_session = &session->iface;
|
||||
WT_ERR(wt_session->salvage(wt_session, WT_HS_URI, NULL));
|
||||
} else
|
||||
WT_ERR(ret);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* We're attempting to salvage the database with a missing history store, remove it from
|
||||
* the metadata and pretend it never existed. As such we won't run rollback to stable
|
||||
* later.
|
||||
*/
|
||||
if (F_ISSET(conn, WT_CONN_SALVAGE)) {
|
||||
*hs_exists = false;
|
||||
metac->remove(metac);
|
||||
} else
|
||||
/* The history store file has likely been deleted, we cannot recover from this. */
|
||||
WT_ERR_MSG(session, WT_TRY_SALVAGE, "%s file is corrupted or missing", WT_HS_FILE);
|
||||
}
|
||||
}
|
||||
err:
|
||||
/* Unpin the page from cache. */
|
||||
WT_TRET(metac->reset(metac));
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_txn_recover --
|
||||
* Run recovery.
|
||||
@ -523,7 +588,6 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
WT_DECL_RET;
|
||||
WT_RECOVERY r;
|
||||
WT_RECOVERY_FILE *metafile;
|
||||
WT_SESSION *wt_session;
|
||||
char *config;
|
||||
char ts_string[2][WT_TS_INT_STRING_SIZE];
|
||||
bool do_checkpoint, eviction_started, hs_exists, needs_rec, was_backup;
|
||||
@ -576,6 +640,7 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
WT_ERR(__wt_log_reset(session, r.max_ckpt_lsn.l.file));
|
||||
else
|
||||
do_checkpoint = false;
|
||||
WT_ERR(__hs_exists(session, metac, cfg, &hs_exists));
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -624,52 +689,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
WT_ERR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* We should check whether the history store file exists in the metadata or not. If it does not,
|
||||
* then we should skip rollback to stable for each table. This might happen if we're upgrading
|
||||
* from an older version. If it does exist in the metadata we should check that it exists on
|
||||
* disk to confirm that it wasn't deleted between runs.
|
||||
*
|
||||
* This needs to happen after we apply the logs as they may contain the metadata changes which
|
||||
* include the history store creation. As such the on disk metadata file won't contain the
|
||||
* history store but will after log application.
|
||||
*/
|
||||
metac->set_key(metac, WT_HS_URI);
|
||||
WT_ERR_NOTFOUND_OK(metac->search(metac), true);
|
||||
if (ret == WT_NOTFOUND) {
|
||||
hs_exists = false;
|
||||
} else {
|
||||
/* Given the history store exists in the metadata validate whether it exists on disk. */
|
||||
WT_ERR(__wt_fs_exist(session, WT_HS_FILE, &hs_exists));
|
||||
if (hs_exists) {
|
||||
/*
|
||||
* Attempt to configure the history store, this will detect corruption if it fails.
|
||||
*/
|
||||
ret = __wt_hs_config(session, cfg);
|
||||
if (ret != 0) {
|
||||
if (F_ISSET(conn, WT_CONN_SALVAGE)) {
|
||||
wt_session = &session->iface;
|
||||
WT_ERR(wt_session->salvage(wt_session, WT_HS_URI, NULL));
|
||||
} else
|
||||
WT_ERR(ret);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* We're attempting to salvage the database with a missing history store, remove it from
|
||||
* the metadata and pretend it never existed. As such we won't run rollback to stable
|
||||
* later.
|
||||
*/
|
||||
if (F_ISSET(conn, WT_CONN_SALVAGE)) {
|
||||
hs_exists = false;
|
||||
metac->remove(metac);
|
||||
} else
|
||||
/* The history store file has likely been deleted, we cannot recover from this. */
|
||||
WT_ERR_MSG(session, WT_TRY_SALVAGE, "%s file is corrupted or missing", WT_HS_FILE);
|
||||
}
|
||||
}
|
||||
|
||||
/* Unpin the page from cache. */
|
||||
WT_ERR(metac->reset(metac));
|
||||
/* Check whether the history store exists. */
|
||||
WT_ERR(__hs_exists(session, metac, cfg, &hs_exists));
|
||||
|
||||
/* Scan the metadata to find the live files and their IDs. */
|
||||
WT_ERR(__recovery_file_scan(&r));
|
||||
|
34
src/third_party/wiredtiger/test/format/format.i
vendored
34
src/third_party/wiredtiger/test/format/format.i
vendored
@ -34,27 +34,53 @@ static inline int
|
||||
read_op(WT_CURSOR *cursor, read_operation op, int *exactp)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
uint64_t start, now;
|
||||
|
||||
/*
|
||||
* Read operations wait out prepare-conflicts. (As part of the snapshot isolation checks, we
|
||||
* repeat reads that succeeded before, they should be repeatable.)
|
||||
*/
|
||||
__wt_seconds(NULL, &start);
|
||||
switch (op) {
|
||||
case NEXT:
|
||||
while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT)
|
||||
while ((ret = cursor->next(cursor)) == WT_PREPARE_CONFLICT) {
|
||||
__wt_yield();
|
||||
|
||||
/* Ignore clock reset. */
|
||||
__wt_seconds(NULL, &now);
|
||||
testutil_assertfmt(now < start || now - start < 60,
|
||||
"%s: timed out with prepare-conflict", "WT_CURSOR.next");
|
||||
}
|
||||
break;
|
||||
case PREV:
|
||||
while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT)
|
||||
while ((ret = cursor->prev(cursor)) == WT_PREPARE_CONFLICT) {
|
||||
__wt_yield();
|
||||
|
||||
/* Ignore clock reset. */
|
||||
__wt_seconds(NULL, &now);
|
||||
testutil_assertfmt(now < start || now - start < 60,
|
||||
"%s: timed out with prepare-conflict", "WT_CURSOR.prev");
|
||||
}
|
||||
break;
|
||||
case SEARCH:
|
||||
while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT)
|
||||
while ((ret = cursor->search(cursor)) == WT_PREPARE_CONFLICT) {
|
||||
__wt_yield();
|
||||
|
||||
/* Ignore clock reset. */
|
||||
__wt_seconds(NULL, &now);
|
||||
testutil_assertfmt(now < start || now - start < 60,
|
||||
"%s: timed out with prepare-conflict", "WT_CURSOR.search");
|
||||
}
|
||||
break;
|
||||
case SEARCH_NEAR:
|
||||
while ((ret = cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT)
|
||||
while ((ret = cursor->search_near(cursor, exactp)) == WT_PREPARE_CONFLICT) {
|
||||
__wt_yield();
|
||||
|
||||
/* Ignore clock reset. */
|
||||
__wt_seconds(NULL, &now);
|
||||
testutil_assertfmt(now < start || now - start < 60,
|
||||
"%s: timed out with prepare-conflict", "WT_CURSOR.search_near");
|
||||
}
|
||||
break;
|
||||
}
|
||||
return (ret);
|
||||
|
10
src/third_party/wiredtiger/test/format/format.sh
vendored
10
src/third_party/wiredtiger/test/format/format.sh
vendored
@ -78,7 +78,6 @@ timing_stress_split_test=0
|
||||
total_jobs=0
|
||||
verbose=0
|
||||
format_binary="./t"
|
||||
env_var=""
|
||||
|
||||
while :; do
|
||||
case "$1" in
|
||||
@ -92,7 +91,7 @@ while :; do
|
||||
config="$2"
|
||||
shift ; shift ;;
|
||||
-e)
|
||||
env_var="$2"
|
||||
export "$2"
|
||||
shift ; shift ;;
|
||||
-E)
|
||||
skip_errors=1
|
||||
@ -448,10 +447,11 @@ format()
|
||||
# continue to run.
|
||||
# Run format in its own session so child processes are in their own process gorups
|
||||
# and we can individually terminate (and clean up) running jobs and their children.
|
||||
eval $env_var setsid $cmd > $log 2>&1 &
|
||||
nohup setsid $cmd > $log 2>&1 &
|
||||
|
||||
# Check for setsid command failed execution, and forcibly quit.
|
||||
# The RUNDIR is not successfully created in this failure type.
|
||||
# Check for setsid command failed execution, and forcibly quit (setsid exits 0 if the
|
||||
# command execution fails so we can't check the exit status). The RUNDIR directory is
|
||||
# not created in this failure type, check the log file explicitly.
|
||||
sleep 1
|
||||
grep -E -i 'setsid: failed to execute' $log > /dev/null && {
|
||||
failure=$(($failure + 1))
|
||||
|
@ -50,13 +50,14 @@ class test_checkpoint04(wttest.WiredTigerTestCase):
|
||||
tables[uri] = ds
|
||||
return tables
|
||||
|
||||
def add_updates(self, uri, ds, value, nrows, ts):
|
||||
def add_updates(self, uri, ds, value, nrows):
|
||||
session = self.session
|
||||
cursor = session.open_cursor(uri)
|
||||
self.pr('update: ' + uri + ' for ' + str(nrows) + ' rows')
|
||||
for i in range(0, nrows):
|
||||
session.begin_transaction()
|
||||
cursor[ds.key(i)] = value
|
||||
session.commit_transaction('commit_timestamp=' + timestamp_str(ts))
|
||||
session.commit_transaction()
|
||||
cursor.close()
|
||||
|
||||
def get_stat(self, stat):
|
||||
@ -69,14 +70,11 @@ class test_checkpoint04(wttest.WiredTigerTestCase):
|
||||
nrows = 100
|
||||
ntables = 5
|
||||
|
||||
self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10) +
|
||||
',stable_timestamp=' + timestamp_str(10))
|
||||
|
||||
# Create many tables and perform many updates so our checkpoint stats are populated.
|
||||
value = "wired" * 100
|
||||
tables = self.create_tables(ntables)
|
||||
for uri, ds in tables.items():
|
||||
self.add_updates(uri, ds, value, nrows, 20)
|
||||
self.add_updates(uri, ds, value, nrows)
|
||||
|
||||
# Perform a checkpoint.
|
||||
self.session.checkpoint()
|
||||
@ -85,23 +83,48 @@ class test_checkpoint04(wttest.WiredTigerTestCase):
|
||||
value = "tiger" * 100
|
||||
tables = self.create_tables(ntables)
|
||||
for uri, ds in tables.items():
|
||||
self.add_updates(uri, ds, value, nrows, 30)
|
||||
self.add_updates(uri, ds, value, nrows)
|
||||
|
||||
# Perform a checkpoint.
|
||||
self.session.checkpoint()
|
||||
|
||||
# Check the statistics.
|
||||
self.assertEqual(self.get_stat(stat.conn.txn_checkpoint), 2)
|
||||
self.assertEqual(self.get_stat(stat.conn.txn_checkpoint_running), 0)
|
||||
self.assertEqual(self.get_stat(stat.conn.txn_checkpoint_prep_running), 0)
|
||||
self.assertLess(self.get_stat(stat.conn.txn_checkpoint_prep_min),
|
||||
self.get_stat(stat.conn.txn_checkpoint_time_min))
|
||||
self.assertLess(self.get_stat(stat.conn.txn_checkpoint_prep_max),
|
||||
self.get_stat(stat.conn.txn_checkpoint_time_max))
|
||||
self.assertLess(self.get_stat(stat.conn.txn_checkpoint_prep_recent),
|
||||
self.get_stat(stat.conn.txn_checkpoint_time_recent))
|
||||
self.assertLess(self.get_stat(stat.conn.txn_checkpoint_prep_total),
|
||||
self.get_stat(stat.conn.txn_checkpoint_time_total))
|
||||
# Set them into a variable so that we can print them all out. We've had a failure
|
||||
# on Windows that is very difficult to reproduce so collect what info we can.
|
||||
num_ckpt = self.get_stat(stat.conn.txn_checkpoint)
|
||||
self.pr('txn_checkpoint, number of checkpoints ' + str(num_ckpt))
|
||||
running = self.get_stat(stat.conn.txn_checkpoint_running)
|
||||
self.pr('txn_checkpoint_running ' + str(running))
|
||||
prep_running = self.get_stat(stat.conn.txn_checkpoint_prep_running)
|
||||
self.pr('txn_checkpoint_prep_running ' + str(prep_running))
|
||||
|
||||
prep_min = self.get_stat(stat.conn.txn_checkpoint_prep_min)
|
||||
self.pr('txn_checkpoint_prep_min ' + str(prep_min))
|
||||
time_min = self.get_stat(stat.conn.txn_checkpoint_time_min)
|
||||
self.pr('txn_checkpoint_time_min ' + str(time_min))
|
||||
|
||||
prep_max = self.get_stat(stat.conn.txn_checkpoint_prep_max)
|
||||
self.pr('txn_checkpoint_prep_max ' + str(prep_max))
|
||||
time_max = self.get_stat(stat.conn.txn_checkpoint_time_max)
|
||||
self.pr('txn_checkpoint_time_max ' + str(time_max))
|
||||
|
||||
prep_recent = self.get_stat(stat.conn.txn_checkpoint_prep_recent)
|
||||
self.pr('txn_checkpoint_prep_recent ' + str(prep_recent))
|
||||
time_recent = self.get_stat(stat.conn.txn_checkpoint_time_recent)
|
||||
self.pr('txn_checkpoint_time_recent ' + str(time_recent))
|
||||
|
||||
prep_total = self.get_stat(stat.conn.txn_checkpoint_prep_total)
|
||||
self.pr('txn_checkpoint_prep_total ' + str(prep_total))
|
||||
time_total = self.get_stat(stat.conn.txn_checkpoint_time_total)
|
||||
self.pr('txn_checkpoint_time_total ' + str(time_total))
|
||||
|
||||
self.assertEqual(num_ckpt, 2)
|
||||
self.assertEqual(running, 0)
|
||||
self.assertEqual(prep_running, 0)
|
||||
self.assertLess(prep_min, time_min)
|
||||
self.assertLess(prep_max, time_max)
|
||||
self.assertLess(prep_recent, time_recent)
|
||||
self.assertLess(prep_total, time_total)
|
||||
|
||||
if __name__ == '__main__':
|
||||
wttest.run()
|
||||
|
Loading…
Reference in New Issue
Block a user