mirror of
https://github.com/MariaDB/server.git
synced 2025-07-20 16:56:36 +00:00
MDEV-37244: Avoid page lookup after read
buf_read_page(): Return a pointer to a buffer-fixed, non-read-fixed page, or nullptr in case of an error. buf_inc_get(): Wrapper for buf_inc_get(ha_handler_stats*), to read the thread-local variable mariadb_stats before updating it. IORequest::read_complete(): Assert that the page is both read-fixed and buffer-fixed. Sample recv_sys.recovery_on only once. Buffer-unfix the page when the asynchronous read completes. buf_page_t::read_complete(): Assert that the page is both read-fixed and buffer-fixed. buf_page_t::read_wait(): Wait for a read-fixed and buffer-fixed page to be only read-fixed. Return the state and optionally page identifier when holding a shared latch. buf_page_init_for_read(): Return a pointer to a buffer-fixed block descriptor pointer, bitwise-ORed with 1 in case the block already exists in the buffer pool. buf_read_ahead_update(), buf_read_ahead_update_sql(): Common code for updating some statistics counters. buf_read_page_low(): Replace the parameter sync with err, which will return an error code to a synchronous caller. Add a parameter for thread-local mariadb_stats. Return the pointer to the block, or the special values nullptr (read failure) or -1 or -2 for asynchronous reads. Increment the statistics when a synchronous read was requested. In a synchronous read, if the page has already been allocated in the buffer pool but it is read-fixed, wait for the read to complete. buf_page_get_zip(): Get a buffer-fixed page from buf_read_page(), and unfix() it. Our caller is relying solely on a page latch. buf_read_page_background(): Update the statistics if supplied.
This commit is contained in:
@ -65,6 +65,7 @@ Created 10/16/1994 Heikki Tuuri
|
||||
#include "mysql/service_wsrep.h"
|
||||
#endif /* WITH_WSREP */
|
||||
#include "log.h"
|
||||
#include "mariadb_stats.h"
|
||||
|
||||
/** Modification types for the B-tree operation.
|
||||
Note that the order must be DELETE, BOTH, INSERT !!
|
||||
@ -2257,7 +2258,7 @@ Prefetch siblings of the leaf for the pessimistic operation.
|
||||
@param block leaf page
|
||||
@param index index of the page */
|
||||
static void btr_cur_prefetch_siblings(const buf_block_t *block,
|
||||
const dict_index_t *index)
|
||||
const dict_index_t *index) noexcept
|
||||
{
|
||||
ut_ad(page_is_leaf(block->page.frame));
|
||||
|
||||
@ -2266,15 +2267,23 @@ static void btr_cur_prefetch_siblings(const buf_block_t *block,
|
||||
uint32_t next= mach_read_from_4(my_assume_aligned<4>(page + FIL_PAGE_NEXT));
|
||||
|
||||
fil_space_t *space= index->table->space;
|
||||
page_id_t id{space->id, 0};
|
||||
|
||||
if (prev == FIL_NULL);
|
||||
else if (space->acquire())
|
||||
buf_read_page_background(space, page_id_t(space->id, prev),
|
||||
block->zip_size());
|
||||
if (next == FIL_NULL);
|
||||
else if (space->acquire())
|
||||
buf_read_page_background(space, page_id_t(space->id, next),
|
||||
block->zip_size());
|
||||
ha_handler_stats *stats= mariadb_stats;
|
||||
|
||||
if (prev != FIL_NULL)
|
||||
{
|
||||
id.set_page_no(prev);
|
||||
if (space->acquire())
|
||||
buf_read_page_background(id, space, stats);
|
||||
}
|
||||
|
||||
if (next != FIL_NULL)
|
||||
{
|
||||
id.set_page_no(next);
|
||||
if (space->acquire())
|
||||
buf_read_page_background(id, space, stats);
|
||||
}
|
||||
}
|
||||
|
||||
/*************************************************************//**
|
||||
|
@ -2196,17 +2196,22 @@ void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr)
|
||||
mtr->memo_push(block, MTR_MEMO_PAGE_X_MODIFY);
|
||||
}
|
||||
|
||||
static void buf_inc_get(ha_handler_stats *stats)
|
||||
static void buf_inc_get(ha_handler_stats *stats) noexcept
|
||||
{
|
||||
mariadb_increment_pages_accessed(stats);
|
||||
if (stats)
|
||||
mariadb_increment_pages_accessed(stats);
|
||||
++buf_pool.stat.n_page_gets;
|
||||
}
|
||||
|
||||
static void buf_inc_get() noexcept
|
||||
{
|
||||
buf_inc_get(mariadb_stats);
|
||||
}
|
||||
|
||||
TRANSACTIONAL_TARGET
|
||||
buf_page_t *buf_page_get_zip(const page_id_t page_id) noexcept
|
||||
{
|
||||
ha_handler_stats *const stats= mariadb_stats;
|
||||
buf_inc_get(stats);
|
||||
buf_inc_get();
|
||||
|
||||
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
|
||||
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
|
||||
@ -2235,29 +2240,33 @@ buf_page_t *buf_page_get_zip(const page_id_t page_id) noexcept
|
||||
if (!bpage)
|
||||
{
|
||||
hash_lock.unlock_shared();
|
||||
switch (dberr_t err= buf_read_page(page_id, chain, false)) {
|
||||
case DB_SUCCESS:
|
||||
case DB_SUCCESS_LOCKED_REC:
|
||||
mariadb_increment_pages_read(stats);
|
||||
continue;
|
||||
case DB_TABLESPACE_DELETED:
|
||||
return nullptr;
|
||||
default:
|
||||
sql_print_error("InnoDB: Reading compressed page "
|
||||
"[page id: space=" UINT32PF ", page number=" UINT32PF
|
||||
"] failed with error: %s",
|
||||
page_id.space(), page_id.page_no(), ut_strerr(err));
|
||||
dberr_t err;
|
||||
bpage= &buf_read_page(page_id, &err, chain, false)->page;
|
||||
if (!bpage)
|
||||
{
|
||||
if (err != DB_TABLESPACE_DELETED)
|
||||
sql_print_error("InnoDB: Reading compressed page "
|
||||
"[page id: space=" UINT32PF ", page number=" UINT32PF
|
||||
"] failed with error: %s",
|
||||
page_id.space(), page_id.page_no(), ut_strerr(err));
|
||||
return nullptr;
|
||||
}
|
||||
hash_lock.lock_shared();
|
||||
bpage->unfix();
|
||||
}
|
||||
|
||||
ut_ad(bpage->in_file());
|
||||
ut_d(uint32_t state= bpage->state());
|
||||
ut_ad(state >= buf_page_t::UNFIXED);
|
||||
ut_ad(page_id == bpage->id());
|
||||
|
||||
const bool got_s_latch= bpage->lock.s_lock_try();
|
||||
hash_lock.unlock_shared();
|
||||
if (UNIV_LIKELY(got_s_latch))
|
||||
{
|
||||
ut_ad(!bpage->is_read_fixed());
|
||||
break;
|
||||
}
|
||||
|
||||
/* We may fail to acquire bpage->lock because a read is holding an
|
||||
exclusive latch on this block and either in progress or invoking
|
||||
buf_pool_t::corrupted_evict().
|
||||
@ -2481,6 +2490,34 @@ buf_block_t *buf_pool_t::unzip(buf_page_t *b, buf_pool_t::hash_chain &chain)
|
||||
return block;
|
||||
}
|
||||
|
||||
uint32_t buf_page_t::read_wait(page_id_t *page_id, ha_handler_stats *stats)
|
||||
noexcept
|
||||
{
|
||||
ulonglong start= 0;
|
||||
if (stats)
|
||||
{
|
||||
tpool::tpool_wait_begin();
|
||||
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
|
||||
stats->pages_read_count++;
|
||||
if (stats->active)
|
||||
start= mariadb_measure();
|
||||
}
|
||||
lock.s_lock();
|
||||
uint32_t latched_state= state();
|
||||
ut_ad(latched_state > FREED);
|
||||
ut_ad(latched_state < READ_FIX || latched_state > WRITE_FIX);
|
||||
if (page_id)
|
||||
*page_id= id();
|
||||
lock.s_unlock();
|
||||
if (start)
|
||||
{
|
||||
stats->pages_read_time+= mariadb_measure() - start;
|
||||
tpool::tpool_wait_end();
|
||||
thd_wait_end(nullptr);
|
||||
}
|
||||
return latched_state;
|
||||
}
|
||||
|
||||
buf_block_t *buf_pool_t::page_fix(const page_id_t id,
|
||||
dberr_t *err,
|
||||
buf_pool_t::page_fix_conflicts c) noexcept
|
||||
@ -2493,9 +2530,10 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id,
|
||||
{
|
||||
hash_lock.lock_shared();
|
||||
buf_page_t *b= page_hash.get(id, chain);
|
||||
uint32_t state;
|
||||
if (b)
|
||||
{
|
||||
uint32_t state= b->fix() + 1;
|
||||
state= b->fix() + 1;
|
||||
hash_lock.unlock_shared();
|
||||
|
||||
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED))
|
||||
@ -2534,11 +2572,8 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id,
|
||||
std::this_thread::sleep_for(std::chrono::microseconds(100));
|
||||
continue;
|
||||
}
|
||||
b->lock.s_lock();
|
||||
state= b->state();
|
||||
ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX);
|
||||
|
||||
b->lock.s_unlock();
|
||||
state= b->read_wait(nullptr, stats);
|
||||
}
|
||||
|
||||
if (UNIV_UNLIKELY(!b->frame))
|
||||
@ -2566,17 +2601,21 @@ buf_block_t *buf_pool_t::page_fix(const page_id_t id,
|
||||
|
||||
if (c == FIX_NOWAIT)
|
||||
return reinterpret_cast<buf_block_t*>(-1);
|
||||
|
||||
switch (dberr_t local_err= buf_read_page(id, chain)) {
|
||||
default:
|
||||
if (err)
|
||||
*err= local_err;
|
||||
buf_block_t *block= buf_read_page(id, err, chain);
|
||||
if (!block)
|
||||
return nullptr;
|
||||
case DB_SUCCESS:
|
||||
case DB_SUCCESS_LOCKED_REC:
|
||||
mariadb_increment_pages_read(stats);
|
||||
buf_read_ahead_random(id);
|
||||
buf_read_ahead_random(id);
|
||||
if (err)
|
||||
{
|
||||
ut_ad(*err == DB_SUCCESS || *err == DB_SUCCESS_LOCKED_REC);
|
||||
*err= DB_SUCCESS;
|
||||
}
|
||||
if (UNIV_UNLIKELY(!block->page.frame))
|
||||
{
|
||||
b= &block->page;
|
||||
goto unzip;
|
||||
}
|
||||
return block;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2687,7 +2726,7 @@ buf_page_get_gen(
|
||||
}
|
||||
#endif /* UNIV_DEBUG */
|
||||
|
||||
ha_handler_stats* const stats = mariadb_stats;
|
||||
ha_handler_stats *const stats = mariadb_stats;
|
||||
buf_inc_get(stats);
|
||||
auto& chain= buf_pool.page_hash.cell_get(page_id.fold());
|
||||
page_hash_latch& hash_lock = buf_pool.page_hash.lock_get(chain);
|
||||
@ -2717,47 +2756,25 @@ loop:
|
||||
switch (mode) {
|
||||
case BUF_GET_IF_IN_POOL:
|
||||
case BUF_PEEK_IF_IN_POOL:
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* The call path is buf_read_page() ->
|
||||
buf_read_page_low() (fil_space_t::io()) ->
|
||||
buf_page_t::read_complete() ->
|
||||
buf_decrypt_after_read(). Here fil_space_t* is used
|
||||
and we decrypt -> buf_page_check_corrupt() where page
|
||||
checksums are compared. Decryption, decompression as
|
||||
well as error handling takes place at a lower level.
|
||||
Here we only need to know whether the page really is
|
||||
corrupted, or if an encrypted page with a valid
|
||||
checksum cannot be decypted. */
|
||||
|
||||
switch (dberr_t local_err = buf_read_page(page_id, chain)) {
|
||||
case DB_SUCCESS:
|
||||
case DB_SUCCESS_LOCKED_REC:
|
||||
mariadb_increment_pages_read(stats);
|
||||
buf_read_ahead_random(page_id);
|
||||
break;
|
||||
default:
|
||||
if (mode != BUF_GET_POSSIBLY_FREED
|
||||
&& retries++ < BUF_PAGE_READ_MAX_RETRIES) {
|
||||
DBUG_EXECUTE_IF("intermittent_read_failure",
|
||||
retries = BUF_PAGE_READ_MAX_RETRIES;);
|
||||
block = buf_read_page(page_id, err, chain);
|
||||
if (!block) {
|
||||
break;
|
||||
} else if (err) {
|
||||
*err = DB_SUCCESS;
|
||||
}
|
||||
/* fall through */
|
||||
case DB_PAGE_CORRUPTED:
|
||||
if (err) {
|
||||
*err = local_err;
|
||||
}
|
||||
return nullptr;
|
||||
|
||||
ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
|
||||
buf_read_ahead_random(page_id);
|
||||
state = block->page.state();
|
||||
goto not_read_fixed;
|
||||
}
|
||||
|
||||
ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate());
|
||||
goto loop;
|
||||
return nullptr;
|
||||
|
||||
got_block:
|
||||
state++;
|
||||
ut_ad(state > buf_page_t::FREED);
|
||||
|
||||
if (state > buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) {
|
||||
if (mode == BUF_PEEK_IF_IN_POOL) {
|
||||
ignore_block:
|
||||
@ -2778,12 +2795,8 @@ ignore_unfixed:
|
||||
in buf_page_t::read_complete() or
|
||||
buf_pool_t::corrupted_evict(), or
|
||||
after buf_zip_decompress() in this function. */
|
||||
block->page.lock.s_lock();
|
||||
state = block->page.state();
|
||||
ut_ad(state < buf_page_t::READ_FIX
|
||||
|| state >= buf_page_t::WRITE_FIX);
|
||||
const page_id_t id{block->page.id()};
|
||||
block->page.lock.s_unlock();
|
||||
page_id_t id{0};
|
||||
state = block->page.read_wait(&id, stats);
|
||||
|
||||
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) {
|
||||
block->page.unfix();
|
||||
@ -2807,12 +2820,19 @@ ignore_unfixed:
|
||||
return nullptr;
|
||||
}
|
||||
ut_ad(id == page_id);
|
||||
} else if (mode != BUF_PEEK_IF_IN_POOL) {
|
||||
} else if (UNIV_UNLIKELY(!block->page.frame)) {
|
||||
/* The BUF_PEEK_IF_IN_POOL mode is mainly used for dropping an
|
||||
adaptive hash index. There cannot be an
|
||||
adaptive hash index for a compressed-only page. */
|
||||
goto ignore_block;
|
||||
} else {
|
||||
not_read_fixed:
|
||||
ut_ad(state > buf_page_t::FREED);
|
||||
ut_ad(state < buf_page_t::READ_FIX
|
||||
|| state > buf_page_t::WRITE_FIX);
|
||||
if (mode != BUF_PEEK_IF_IN_POOL) {
|
||||
} else if (UNIV_UNLIKELY(!block->page.frame)) {
|
||||
/* The BUF_PEEK_IF_IN_POOL mode is mainly used
|
||||
for dropping an adaptive hash index. There
|
||||
cannot be an adaptive hash index for a
|
||||
compressed-only page. */
|
||||
goto ignore_block;
|
||||
}
|
||||
}
|
||||
|
||||
ut_ad(mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL
|
||||
@ -3008,7 +3028,7 @@ buf_block_t *buf_page_try_get(const page_id_t page_id, mtr_t *mtr) noexcept
|
||||
ut_ad(block->page.buf_fix_count());
|
||||
ut_ad(block->page.id() == page_id);
|
||||
|
||||
buf_inc_get(mariadb_stats);
|
||||
buf_inc_get();
|
||||
return block;
|
||||
}
|
||||
|
||||
@ -3411,17 +3431,26 @@ static dberr_t buf_page_check_corrupt(buf_page_t *bpage,
|
||||
|
||||
/** Complete a read of a page.
|
||||
@param node data file
|
||||
@param recovery recv_recovery_is_on()
|
||||
@return whether the operation succeeded
|
||||
@retval DB_SUCCESS if the read succeeded; caller must unfix()
|
||||
@retval DB_PAGE_CORRUPTED if the checksum or the page ID is incorrect
|
||||
@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */
|
||||
dberr_t buf_page_t::read_complete(const fil_node_t &node) noexcept
|
||||
dberr_t buf_page_t::read_complete(const fil_node_t &node,
|
||||
bool recovery) noexcept
|
||||
{
|
||||
const page_id_t expected_id{id()};
|
||||
ut_ad(is_read_fixed());
|
||||
{
|
||||
/* The block must be read-fixed and buffer-fixed. */
|
||||
ut_d(const auto s= state());
|
||||
ut_ad(s > READ_FIX);
|
||||
ut_ad(s < WRITE_FIX);
|
||||
}
|
||||
ut_ad(!buf_dblwr.is_inside(id()));
|
||||
ut_ad(id().space() == node.space->id);
|
||||
ut_ad(zip_size() == node.space->zip_size());
|
||||
ut_ad(!!zip.ssize == !!zip.data);
|
||||
ut_ad(recovery == recv_sys.recovery_on);
|
||||
|
||||
const byte *read_frame= zip.data ? zip.data : frame;
|
||||
ut_ad(read_frame);
|
||||
@ -3500,8 +3529,7 @@ database_corrupted_compressed:
|
||||
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
|
||||
{
|
||||
release_page:
|
||||
if (node.space->full_crc32() && node.space->crypt_data &&
|
||||
recv_recovery_is_on() &&
|
||||
if (recovery && node.space->full_crc32() && node.space->crypt_data &&
|
||||
recv_sys.dblwr.find_deferred_page(node, id().page_no(),
|
||||
const_cast<byte*>(read_frame)))
|
||||
{
|
||||
@ -3510,7 +3538,7 @@ release_page:
|
||||
goto success_page;
|
||||
}
|
||||
|
||||
if (recv_sys.free_corrupted_page(expected_id, node));
|
||||
if (recovery && recv_sys.free_corrupted_page(expected_id, node));
|
||||
else if (err == DB_FAIL)
|
||||
err= DB_PAGE_CORRUPTED;
|
||||
else
|
||||
@ -3528,28 +3556,25 @@ release_page:
|
||||
FORCE_RECOVERY_MSG);
|
||||
}
|
||||
|
||||
buf_pool.corrupted_evict(this, buf_page_t::READ_FIX);
|
||||
buf_pool.corrupted_evict(this, buf_page_t::READ_FIX + 1);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
success_page:
|
||||
|
||||
const bool recovery= frame && recv_recovery_is_on();
|
||||
|
||||
if (recovery && !recv_recover_page(node.space, this))
|
||||
if (!recovery || !frame)
|
||||
{
|
||||
ut_d(auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED);
|
||||
ut_ad(f > READ_FIX);
|
||||
ut_ad(f < WRITE_FIX);
|
||||
}
|
||||
else if (!recv_recover_page(node.space, this))
|
||||
return DB_PAGE_CORRUPTED;
|
||||
|
||||
if (UNIV_UNLIKELY(MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)))
|
||||
buf_page_monitor(*this, true);
|
||||
DBUG_PRINT("ib_buf", ("read page %u:%u", id().space(), id().page_no()));
|
||||
|
||||
if (!recovery)
|
||||
{
|
||||
ut_d(auto f=) zip.fix.fetch_sub(READ_FIX - UNFIXED);
|
||||
ut_ad(f >= READ_FIX);
|
||||
ut_ad(f < WRITE_FIX);
|
||||
}
|
||||
|
||||
lock.x_unlock(true);
|
||||
|
||||
return DB_SUCCESS;
|
||||
|
@ -585,7 +585,6 @@ buf_load()
|
||||
so all pages from a given tablespace are consecutive. */
|
||||
uint32_t cur_space_id = dump[0].space();
|
||||
fil_space_t* space = fil_space_t::get(cur_space_id);
|
||||
ulint zip_size = space ? space->zip_size() : 0;
|
||||
|
||||
PSI_stage_progress* pfs_stage_progress __attribute__((unused))
|
||||
= mysql_set_stage(srv_stage_buffer_pool_load.m_key);
|
||||
@ -608,12 +607,6 @@ buf_load()
|
||||
|
||||
cur_space_id = this_space_id;
|
||||
space = fil_space_t::get(cur_space_id);
|
||||
|
||||
if (!space) {
|
||||
continue;
|
||||
}
|
||||
|
||||
zip_size = space->zip_size();
|
||||
}
|
||||
|
||||
/* JAN: TODO: As we use background page read below,
|
||||
@ -632,7 +625,7 @@ buf_load()
|
||||
}
|
||||
|
||||
space->reacquire();
|
||||
buf_read_page_background(space, dump[i], zip_size);
|
||||
buf_read_page_background(dump[i], space, nullptr);
|
||||
|
||||
if (buf_load_abort_flag) {
|
||||
if (space) {
|
||||
|
@ -71,18 +71,20 @@ and the lock released later.
|
||||
@param chain buf_pool.page_hash cell for page_id
|
||||
@param block preallocated buffer block (set to nullptr if consumed)
|
||||
@return pointer to the block
|
||||
@retval nullptr in case of an error */
|
||||
@retval nullptr in case of an error
|
||||
@retval pointer to block | 1 if the page already exists in buf pool */
|
||||
TRANSACTIONAL_TARGET
|
||||
static buf_page_t *buf_page_init_for_read(const page_id_t page_id,
|
||||
ulint zip_size,
|
||||
buf_pool_t::hash_chain &chain,
|
||||
buf_block_t *&block)
|
||||
buf_block_t *&block) noexcept
|
||||
{
|
||||
buf_page_t *bpage= nullptr;
|
||||
constexpr uint32_t READ_BUF_FIX{buf_page_t::READ_FIX + 1};
|
||||
if (!zip_size || (zip_size & 1))
|
||||
{
|
||||
bpage= &block->page;
|
||||
block->initialise(page_id, zip_size & ~1, buf_page_t::READ_FIX);
|
||||
block->initialise(page_id, zip_size & ~1, READ_BUF_FIX);
|
||||
/* x_unlock() will be invoked
|
||||
in buf_page_t::read_complete() by the io-handler thread. */
|
||||
block->page.lock.x_lock(true);
|
||||
@ -90,11 +92,14 @@ static buf_page_t *buf_page_init_for_read(const page_id_t page_id,
|
||||
|
||||
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
|
||||
hash_lock.lock();
|
||||
if (buf_pool.page_hash.get(page_id, chain))
|
||||
buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain);
|
||||
if (hash_page)
|
||||
{
|
||||
page_exists:
|
||||
hash_lock.unlock();
|
||||
page_exists:
|
||||
/* The page is already in the buffer pool. */
|
||||
ut_d(const uint32_t state=) hash_page->fix();
|
||||
ut_ad(state >= buf_page_t::FREED);
|
||||
hash_lock.unlock();
|
||||
if (bpage)
|
||||
{
|
||||
bpage->lock.x_unlock(true);
|
||||
@ -102,7 +107,7 @@ page_exists:
|
||||
ut_d(bpage->set_state(buf_page_t::MEMORY));
|
||||
ut_d(mysql_mutex_unlock(&buf_pool.mutex));
|
||||
}
|
||||
return nullptr;
|
||||
return reinterpret_cast<buf_page_t*>(uintptr_t(hash_page) | 1);
|
||||
}
|
||||
|
||||
if (UNIV_UNLIKELY(mysql_mutex_trylock(&buf_pool.mutex)))
|
||||
@ -110,7 +115,8 @@ page_exists:
|
||||
hash_lock.unlock();
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
hash_lock.lock();
|
||||
if (buf_pool.page_hash.get(page_id, chain))
|
||||
hash_page= buf_pool.page_hash.get(page_id, chain);
|
||||
if (hash_page)
|
||||
{
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
goto page_exists;
|
||||
@ -160,11 +166,15 @@ page_exists:
|
||||
check the page_hash again, as it may have been modified. */
|
||||
if (UNIV_UNLIKELY(lru))
|
||||
{
|
||||
if (UNIV_LIKELY_NULL(buf_pool.page_hash.get(page_id, chain)))
|
||||
hash_page= buf_pool.page_hash.get(page_id, chain);
|
||||
if (UNIV_LIKELY_NULL(hash_page))
|
||||
{
|
||||
/* The block was added by some other thread. */
|
||||
ut_d(const uint32_t state=) hash_page->fix();
|
||||
ut_ad(state >= buf_page_t::FREED);
|
||||
buf_buddy_free(data, zip_size);
|
||||
goto func_exit;
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
return reinterpret_cast<buf_page_t*>(uintptr_t(hash_page) | 1);
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,7 +185,7 @@ page_exists:
|
||||
bpage->zip.data = (page_zip_t*) data;
|
||||
|
||||
bpage->lock.init();
|
||||
bpage->init(buf_page_t::READ_FIX, page_id);
|
||||
bpage->init(READ_BUF_FIX, page_id);
|
||||
bpage->lock.x_lock(true);
|
||||
|
||||
{
|
||||
@ -190,10 +200,8 @@ page_exists:
|
||||
}
|
||||
|
||||
buf_pool.stat.n_pages_read++;
|
||||
func_exit:
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
ut_ad(!bpage || bpage->in_file());
|
||||
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
return bpage;
|
||||
}
|
||||
|
||||
@ -205,76 +213,148 @@ flag is cleared and the x-lock released by an i/o-handler thread.
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size 0 or ROW_FORMAT=COMPRESSED page size
|
||||
bitwise-ORed with 1 to allocate an uncompressed frame
|
||||
@param[out] err nullptr for asynchronous; error code for synchronous:
|
||||
DB_SUCCESS if the page was successfully read,
|
||||
DB_SUCCESS_LOCKED_REC if the exists in the pool,
|
||||
DB_PAGE_CORRUPTED on page checksum mismatch,
|
||||
DB_DECRYPTION_FAILED if page post encryption checksum
|
||||
matches but after decryption normal page checksum
|
||||
does not match
|
||||
@param[in,out] chain buf_pool.page_hash cell for page_id
|
||||
@param[in,out] space tablespace
|
||||
@param[in,out] block preallocated buffer block
|
||||
@param[in] sync true if synchronous aio is desired
|
||||
@return error code
|
||||
@retval DB_SUCCESS if the page was read
|
||||
@retval DB_SUCCESS_LOCKED_REC if the page exists in the buffer pool already */
|
||||
@param[in,out] stats per-thread statistics
|
||||
@return buffer-fixed block (*err may be set to DB_SUCCESS_LOCKED_REC)
|
||||
@retval -1 if err==nullptr and an asynchronous read was submitted
|
||||
@retval -2 if err==nullptr and the page exists in the buffer pool
|
||||
@retval nullptr if the page was not successfully read (*err will be set) */
|
||||
static
|
||||
dberr_t
|
||||
buf_page_t*
|
||||
buf_read_page_low(
|
||||
const page_id_t page_id,
|
||||
ulint zip_size,
|
||||
dberr_t* err,
|
||||
buf_pool_t::hash_chain& chain,
|
||||
fil_space_t* space,
|
||||
buf_block_t*& block,
|
||||
bool sync = false) noexcept
|
||||
ha_handler_stats* stats) noexcept
|
||||
{
|
||||
buf_page_t* bpage;
|
||||
if (buf_dblwr.is_inside(page_id))
|
||||
{
|
||||
fail:
|
||||
space->release();
|
||||
if (err)
|
||||
*err= DB_PAGE_CORRUPTED;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (buf_dblwr.is_inside(page_id)) {
|
||||
space->release();
|
||||
return DB_PAGE_CORRUPTED;
|
||||
}
|
||||
buf_page_t *bpage= buf_page_init_for_read(page_id, zip_size, chain, block);
|
||||
if (UNIV_UNLIKELY(!bpage))
|
||||
goto fail;
|
||||
const bool exist(uintptr_t(bpage) & 1);
|
||||
bpage= reinterpret_cast<buf_page_t*>(uintptr_t(bpage) & ~uintptr_t{1});
|
||||
ulonglong start= 0;
|
||||
if (exist)
|
||||
{
|
||||
if (!err)
|
||||
{
|
||||
bpage->unfix();
|
||||
bpage= reinterpret_cast<buf_page_t*>(-2);
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32_t state{bpage->state()};
|
||||
IF_DBUG(page_id_t id{bpage->id()},);
|
||||
ut_ad(state > buf_page_t::FREED);
|
||||
if (state < buf_page_t::UNFIXED)
|
||||
{
|
||||
corrupted:
|
||||
DBUG_ASSERT(id == page_id || id == page_id_t{~0ULL});
|
||||
bpage->unfix();
|
||||
bpage= nullptr;
|
||||
*err= DB_PAGE_CORRUPTED;
|
||||
}
|
||||
else if (!bpage->is_read_fixed(state))
|
||||
*err= DB_SUCCESS_LOCKED_REC;
|
||||
else if (bpage->read_wait(IF_DBUG(&id,nullptr), stats) <
|
||||
buf_page_t::UNFIXED)
|
||||
goto corrupted;
|
||||
}
|
||||
|
||||
bpage = buf_page_init_for_read(page_id, zip_size, chain, block);
|
||||
space->release();
|
||||
return bpage;
|
||||
}
|
||||
|
||||
if (!bpage) {
|
||||
space->release();
|
||||
return DB_SUCCESS_LOCKED_REC;
|
||||
}
|
||||
ut_ad(bpage->in_file());
|
||||
if (err)
|
||||
{
|
||||
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
|
||||
if (stats != nullptr && stats->active)
|
||||
start= mariadb_measure();
|
||||
}
|
||||
|
||||
ut_ad(bpage->in_file());
|
||||
ulonglong mariadb_timer = 0;
|
||||
void* dst= zip_size > 1 ? bpage->zip.data : bpage->frame;
|
||||
const ulint len= zip_size & ~1 ? zip_size & ~1 : srv_page_size;
|
||||
|
||||
if (sync) {
|
||||
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
|
||||
if (const ha_handler_stats *stats = mariadb_stats) {
|
||||
if (stats->active) {
|
||||
mariadb_timer = mariadb_measure();
|
||||
}
|
||||
}
|
||||
}
|
||||
auto fio=
|
||||
space->io(IORequest(err
|
||||
? IORequest::READ_SYNC : IORequest::READ_ASYNC),
|
||||
os_offset_t{page_id.page_no()} * len, len, dst, bpage);
|
||||
|
||||
DBUG_LOG("ib_buf",
|
||||
"read page " << page_id << " zip_size=" << zip_size
|
||||
<< (sync ? " sync" : " async"));
|
||||
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS))
|
||||
{
|
||||
if (UNIV_LIKELY(err != nullptr))
|
||||
*err= fio.err;
|
||||
recv_sys.free_corrupted_page(page_id, *space->chain.start);
|
||||
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX + 1);
|
||||
bpage= nullptr;
|
||||
}
|
||||
else if (err != nullptr)
|
||||
{
|
||||
const bool recovery{recv_sys.recovery_on};
|
||||
if ((*err= fio.err= bpage->read_complete(*fio.node, recovery)))
|
||||
bpage= nullptr;
|
||||
thd_wait_end(nullptr);
|
||||
space->release();
|
||||
if (stats)
|
||||
{
|
||||
stats->pages_read_count++;
|
||||
if (start)
|
||||
stats->pages_read_time+= mariadb_measure() - start;
|
||||
}
|
||||
|
||||
void* dst = zip_size > 1 ? bpage->zip.data : bpage->frame;
|
||||
const ulint len = zip_size & ~1 ? zip_size & ~1 : srv_page_size;
|
||||
/* FIXME: Remove this, and accumulate stats->pages_read_count to
|
||||
global statistics somewhere! */
|
||||
buf_LRU_stat_inc_io();
|
||||
mysql_mutex_assert_not_owner(&buf_pool.mutex);
|
||||
}
|
||||
else
|
||||
bpage= reinterpret_cast<buf_page_t*>(-1);
|
||||
|
||||
auto fio = space->io(IORequest(sync
|
||||
? IORequest::READ_SYNC
|
||||
: IORequest::READ_ASYNC),
|
||||
os_offset_t{page_id.page_no()} * len, len,
|
||||
dst, bpage);
|
||||
return bpage;
|
||||
}
|
||||
|
||||
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) {
|
||||
recv_sys.free_corrupted_page(page_id, *space->chain.start);
|
||||
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX);
|
||||
} else if (sync) {
|
||||
thd_wait_end(nullptr);
|
||||
/* The i/o was already completed in space->io() */
|
||||
fio.err = bpage->read_complete(*fio.node);
|
||||
space->release();
|
||||
if (mariadb_timer) {
|
||||
mariadb_increment_pages_read_time(mariadb_timer);
|
||||
}
|
||||
}
|
||||
/** Update the read-ahead statistics.
|
||||
@param count number of pages that were read ahead */
|
||||
static void buf_read_ahead_update(size_t count) noexcept
|
||||
{
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
/* Read ahead is considered one I/O operation for the purpose of
|
||||
LRU policy decision. */
|
||||
buf_LRU_stat_inc_io();
|
||||
buf_pool.stat.n_ra_pages_read+= count;
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
}
|
||||
|
||||
return fio.err;
|
||||
/** Update the statistics for a read-ahead that is triggered from SQL.
|
||||
@param count number of pages that were read ahead
|
||||
@param stats per-thread statistics */
|
||||
static void buf_read_ahead_update_sql(size_t count, ha_handler_stats *stats)
|
||||
noexcept
|
||||
{
|
||||
if (stats)
|
||||
stats->pages_prefetched+= count;
|
||||
buf_read_ahead_update(count);
|
||||
}
|
||||
|
||||
/** Acquire a buffer block. */
|
||||
@ -284,7 +364,7 @@ static buf_block_t *buf_read_acquire()
|
||||
}
|
||||
|
||||
/** Free a buffer block if needed. */
|
||||
static void buf_read_release(buf_block_t *block)
|
||||
static buf_block_t *buf_read_release(buf_block_t *block) noexcept
|
||||
{
|
||||
if (block)
|
||||
{
|
||||
@ -292,6 +372,8 @@ static void buf_read_release(buf_block_t *block)
|
||||
buf_LRU_block_free_non_file_page(block);
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
/** Applies a random read-ahead in buf_pool if there are at least a threshold
|
||||
@ -364,13 +446,15 @@ read_ahead:
|
||||
goto allocate_block;
|
||||
}
|
||||
|
||||
/* Read all the suitable blocks within the area */
|
||||
for (page_id_t i= low; i < high; ++i)
|
||||
{
|
||||
if (space->is_stopping())
|
||||
break;
|
||||
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
|
||||
space->reacquire();
|
||||
if (buf_read_page_low(i, zip_size, chain, space, block) == DB_SUCCESS)
|
||||
if (reinterpret_cast<buf_page_t*>(-1) ==
|
||||
buf_read_page_low(i, zip_size, nullptr, chain, space, block, nullptr))
|
||||
{
|
||||
count++;
|
||||
ut_ad(!block);
|
||||
@ -382,16 +466,10 @@ read_ahead:
|
||||
|
||||
if (count)
|
||||
{
|
||||
mariadb_increment_pages_prefetched(count);
|
||||
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
|
||||
count, space->chain.start->name,
|
||||
low.page_no()));
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
/* Read ahead is considered one I/O operation for the purpose of
|
||||
LRU policy decision. */
|
||||
buf_LRU_stat_inc_io();
|
||||
buf_pool.stat.n_ra_pages_read_rnd+= count;
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
buf_read_ahead_update_sql(count, mariadb_stats);
|
||||
}
|
||||
|
||||
space->release();
|
||||
@ -399,10 +477,13 @@ read_ahead:
|
||||
return count;
|
||||
}
|
||||
|
||||
dberr_t buf_read_page(const page_id_t page_id,
|
||||
buf_pool_t::hash_chain &chain, bool unzip) noexcept
|
||||
buf_block_t *buf_read_page(const page_id_t page_id, dberr_t *err,
|
||||
buf_pool_t::hash_chain &chain, bool unzip) noexcept
|
||||
{
|
||||
fil_space_t *space= fil_space_t::get(page_id.space());
|
||||
dberr_t local_err;
|
||||
if (!err)
|
||||
err= &local_err;
|
||||
if (UNIV_UNLIKELY(!space))
|
||||
{
|
||||
sql_print_information("InnoDB: trying to read page "
|
||||
@ -410,7 +491,8 @@ dberr_t buf_read_page(const page_id_t page_id,
|
||||
", page number=" UINT32PF "]"
|
||||
" in nonexisting or being-dropped tablespace",
|
||||
page_id.space(), page_id.page_no());
|
||||
return DB_TABLESPACE_DELETED;
|
||||
*err= DB_TABLESPACE_DELETED;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/* Our caller should already have ensured that the page does not
|
||||
@ -422,7 +504,6 @@ dberr_t buf_read_page(const page_id_t page_id,
|
||||
{
|
||||
allocate_block:
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
buf_LRU_stat_inc_io();
|
||||
block= buf_LRU_get_free_block(have_mutex);
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
}
|
||||
@ -432,54 +513,41 @@ dberr_t buf_read_page(const page_id_t page_id,
|
||||
goto allocate_block;
|
||||
}
|
||||
|
||||
dberr_t err= buf_read_page_low(page_id, zip_size, chain, space, block, true);
|
||||
buf_page_t *b= buf_read_page_low(page_id, zip_size, err, chain, space,
|
||||
block, mariadb_stats);
|
||||
buf_read_release(block);
|
||||
return err;
|
||||
return reinterpret_cast<buf_block_t*>(b);
|
||||
}
|
||||
|
||||
/** High-level function which reads a page asynchronously from a file to the
|
||||
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
||||
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
|
||||
released by the i/o-handler thread.
|
||||
@param[in,out] space tablespace
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
|
||||
void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
|
||||
ulint zip_size) noexcept
|
||||
/** Read a page asynchronously into buf_pool if it is not already there.
|
||||
@param page_id page identifier
|
||||
@param space tablespace
|
||||
@param stats statistics */
|
||||
void buf_read_page_background(const page_id_t page_id, fil_space_t *space,
|
||||
ha_handler_stats *stats) noexcept
|
||||
{
|
||||
ut_ad(!recv_recovery_is_on());
|
||||
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
|
||||
if (buf_pool.page_hash_contains(page_id, chain))
|
||||
{
|
||||
skip:
|
||||
space->release();
|
||||
return;
|
||||
}
|
||||
|
||||
buf_block_t *block= nullptr;
|
||||
if (UNIV_LIKELY(!zip_size))
|
||||
{
|
||||
allocate_block:
|
||||
if (UNIV_UNLIKELY(!(block= buf_read_acquire())))
|
||||
goto skip;
|
||||
}
|
||||
else if (recv_recovery_is_on())
|
||||
{
|
||||
zip_size|= 1;
|
||||
goto allocate_block;
|
||||
}
|
||||
|
||||
if (buf_read_page_low(page_id, zip_size, chain, space, block) ==
|
||||
DB_SUCCESS)
|
||||
ut_ad(!block);
|
||||
else
|
||||
buf_read_release(block);
|
||||
|
||||
/* We do not increment number of I/O operations used for LRU policy
|
||||
here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
|
||||
about evicting uncompressed version of ROW_FORMAT=COMPRESSED pages
|
||||
from the buffer pool. Since this function is called from buffer pool
|
||||
load these IOs are deliberate and are not part of normal workload we
|
||||
can ignore these in our heuristics. */
|
||||
{
|
||||
buf_block_t *b= nullptr;
|
||||
ulint zip_size{space->zip_size()};
|
||||
if (UNIV_LIKELY(!zip_size) && UNIV_UNLIKELY(!(b= buf_read_acquire())))
|
||||
goto skip;
|
||||
buf_read_page_low(page_id, zip_size, nullptr, chain, space, b, nullptr);
|
||||
if (!buf_read_release(b) && stats)
|
||||
{
|
||||
stats->pages_prefetched++;
|
||||
buf_read_ahead_update(1);
|
||||
}
|
||||
/* buf_load() invokes this with stats=nullptr. In that case, we skip
|
||||
the call to buf_read_ahead_update() or buf_LRU_stat_inc_io(); these
|
||||
deliberate page reads are not part of a normal workload and therefore
|
||||
should not affect the unzip_LRU heuristics. */
|
||||
}
|
||||
}
|
||||
|
||||
/** Applies linear read-ahead if in the buf_pool the page is a border page of
|
||||
@ -662,8 +730,9 @@ failed:
|
||||
break;
|
||||
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(new_low.fold());
|
||||
space->reacquire();
|
||||
if (buf_read_page_low(new_low, zip_size, chain, space, block) ==
|
||||
DB_SUCCESS)
|
||||
if (reinterpret_cast<buf_page_t*>(-1) ==
|
||||
buf_read_page_low(new_low, zip_size, nullptr,
|
||||
chain, space, block, nullptr))
|
||||
{
|
||||
count++;
|
||||
ut_ad(!block);
|
||||
@ -675,16 +744,10 @@ failed:
|
||||
|
||||
if (count)
|
||||
{
|
||||
mariadb_increment_pages_prefetched(count);
|
||||
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
|
||||
DBUG_PRINT("ib_buf", ("linear read-ahead %zu pages from %s: %u",
|
||||
count, space->chain.start->name,
|
||||
new_low.page_no()));
|
||||
mysql_mutex_lock(&buf_pool.mutex);
|
||||
/* Read ahead is considered one I/O operation for the purpose of
|
||||
LRU policy decision. */
|
||||
buf_LRU_stat_inc_io();
|
||||
buf_pool.stat.n_ra_pages_read+= count;
|
||||
mysql_mutex_unlock(&buf_pool.mutex);
|
||||
buf_read_ahead_update_sql(count, mariadb_stats);
|
||||
}
|
||||
|
||||
space->release();
|
||||
@ -708,29 +771,23 @@ void buf_read_recover(fil_space_t *space, const page_id_t page_id,
|
||||
|
||||
if (init_lsn)
|
||||
{
|
||||
if (buf_page_t *bpage=
|
||||
buf_page_init_for_read(page_id, zip_size, chain, block))
|
||||
{
|
||||
ut_ad(bpage->in_file());
|
||||
buf_page_t *bpage= buf_page_init_for_read(page_id, zip_size, chain, block);
|
||||
if (UNIV_UNLIKELY(!bpage))
|
||||
goto fail;
|
||||
const bool exist(uintptr_t(bpage) & 1);
|
||||
bpage= reinterpret_cast<buf_page_t*>(uintptr_t(bpage) & ~uintptr_t{1});
|
||||
bpage->unfix();
|
||||
|
||||
if (!exist)
|
||||
os_fake_read(IORequest{bpage, (buf_tmp_buffer_t*) &recs,
|
||||
UT_LIST_GET_FIRST(space->chain),
|
||||
IORequest::READ_ASYNC}, init_lsn);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (dberr_t err=
|
||||
buf_read_page_low(page_id, zip_size, chain, space, block))
|
||||
{
|
||||
if (err != DB_SUCCESS_LOCKED_REC)
|
||||
sql_print_error("InnoDB: Recovery failed to read page "
|
||||
UINT32PF " from %s",
|
||||
page_id.page_no(), space->chain.start->name);
|
||||
}
|
||||
else
|
||||
{
|
||||
ut_ad(!block);
|
||||
return;
|
||||
}
|
||||
else if (!buf_read_page_low(page_id, zip_size, nullptr, chain, space, block,
|
||||
nullptr))
|
||||
fail:
|
||||
sql_print_error("InnoDB: Recovery failed to read page %" PRIu32 " from %s",
|
||||
page_id.page_no(), space->chain.start->name);
|
||||
|
||||
buf_LRU_block_free_non_file_page(block);
|
||||
buf_read_release(block);
|
||||
}
|
||||
|
@ -2927,25 +2927,31 @@ void IORequest::read_complete(int io_error) const noexcept
|
||||
ut_ad(node);
|
||||
ut_ad(is_read());
|
||||
ut_ad(bpage);
|
||||
ut_d(auto s= bpage->state());
|
||||
ut_ad(s > buf_page_t::READ_FIX);
|
||||
ut_ad(s <= buf_page_t::WRITE_FIX);
|
||||
|
||||
const page_id_t id(bpage->id());
|
||||
const bool in_recovery{recv_sys.recovery_on};
|
||||
|
||||
if (UNIV_UNLIKELY(io_error != 0))
|
||||
{
|
||||
sql_print_error("InnoDB: Read error %d of page " UINT32PF " in file %s",
|
||||
io_error, id.page_no(), node->name);
|
||||
recv_sys.free_corrupted_page(id, *node);
|
||||
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX);
|
||||
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX + 1);
|
||||
corrupted:
|
||||
if (recv_recovery_is_on() && !srv_force_recovery)
|
||||
if (in_recovery && !srv_force_recovery)
|
||||
{
|
||||
mysql_mutex_lock(&recv_sys.mutex);
|
||||
recv_sys.set_corrupt_fs();
|
||||
mysql_mutex_unlock(&recv_sys.mutex);
|
||||
}
|
||||
}
|
||||
else if (bpage->read_complete(*node))
|
||||
else if (bpage->read_complete(*node, in_recovery))
|
||||
goto corrupted;
|
||||
else
|
||||
bpage->unfix();
|
||||
|
||||
node->space->release();
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ Created 11/5/1995 Heikki Tuuri
|
||||
#include "log0log.h"
|
||||
#include "srv0srv.h"
|
||||
#include "transactional_lock_guard.h"
|
||||
#include "ha_handler_stats.h"
|
||||
#include <ostream>
|
||||
|
||||
/** The allocation granularity of innodb_buffer_pool_size */
|
||||
@ -703,10 +704,18 @@ public:
|
||||
|
||||
/** Complete a read of a page.
|
||||
@param node data file
|
||||
@param recovery recv_recovery_is_on()
|
||||
@return whether the operation succeeded
|
||||
@retval DB_SUCCESS if the read succeeded; caller must unfix()
|
||||
@retval DB_PAGE_CORRUPTED if the checksum or the page ID is incorrect
|
||||
@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */
|
||||
dberr_t read_complete(const fil_node_t &node) noexcept;
|
||||
dberr_t read_complete(const fil_node_t &node, bool recovery) noexcept;
|
||||
|
||||
/** Wait for read_complete().
|
||||
@param page_id id() at the time we were holding lock
|
||||
@param stats per-thread statistics to update
|
||||
@return state() at the time we were holding lock */
|
||||
uint32_t read_wait(page_id_t *page_id, ha_handler_stats *stats) noexcept;
|
||||
|
||||
/** Release a write fix after a page write was completed.
|
||||
@param persistent whether the page belongs to a persistent tablespace
|
||||
@ -1842,7 +1851,7 @@ inline void buf_page_t::set_state(uint32_t s) noexcept
|
||||
mysql_mutex_assert_owner(&buf_pool.mutex);
|
||||
ut_ad(s <= REMOVE_HASH || s >= UNFIXED);
|
||||
ut_ad(s < WRITE_FIX);
|
||||
ut_ad(s <= READ_FIX || zip.fix == READ_FIX);
|
||||
ut_ad(s <= READ_FIX + 1 || zip.fix == READ_FIX + 1);
|
||||
zip.fix= s;
|
||||
}
|
||||
|
||||
|
@ -32,27 +32,26 @@ Created 11/5/1995 Heikki Tuuri
|
||||
will be invoked on read completion.
|
||||
@param page_id page identifier
|
||||
@param chain buf_pool.page_hash cell for page_id
|
||||
@param err error code: DB_SUCCESS if the page was successfully read,
|
||||
DB_SUCCESS_LOCKED_REC if the page was not read,
|
||||
DB_PAGE_CORRUPTED on page checksum mismatch,
|
||||
DB_DECRYPTION_FAILED if page post encryption checksum matches but
|
||||
after decryption normal page checksum does not match,
|
||||
DB_TABLESPACE_DELETED if tablespace .ibd file is missing
|
||||
@param unzip whether to decompress ROW_FORMAT=COMPRESSED pages
|
||||
@retval DB_SUCCESS if the page was read and is not corrupted
|
||||
@retval DB_SUCCESS_LOCKED_REC if the page was not read
|
||||
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
|
||||
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
|
||||
after decryption normal page checksum does not match.
|
||||
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
|
||||
dberr_t buf_read_page(const page_id_t page_id,
|
||||
buf_pool_t::hash_chain &chain, bool unzip= true)
|
||||
@return buffer-fixed block (*err may be set to DB_SUCCESS_LOCKED_REC)
|
||||
@retval nullptr if the page is not available (*err will be set) */
|
||||
buf_block_t *buf_read_page(const page_id_t page_id, dberr_t *err,
|
||||
buf_pool_t::hash_chain &chain, bool unzip= true)
|
||||
noexcept;
|
||||
|
||||
/** High-level function which reads a page asynchronously from a file to the
|
||||
buffer buf_pool if it is not already there. Sets the io_fix flag and sets
|
||||
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
|
||||
released by the i/o-handler thread.
|
||||
@param[in,out] space tablespace
|
||||
@param[in] page_id page id
|
||||
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
|
||||
void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
|
||||
ulint zip_size) noexcept
|
||||
MY_ATTRIBUTE((nonnull));
|
||||
/** Read a page asynchronously into buf_pool if it is not already there.
|
||||
@param page_id page identifier
|
||||
@param space tablespace
|
||||
@param stats statistics */
|
||||
void buf_read_page_background(const page_id_t page_id, fil_space_t *space,
|
||||
ha_handler_stats *stats) noexcept
|
||||
MY_ATTRIBUTE((nonnull(2)));
|
||||
|
||||
/** Applies a random read-ahead in buf_pool if there are at least a threshold
|
||||
value of accessed pages from the random read-ahead area. Does not read any
|
||||
|
@ -100,23 +100,6 @@ inline ulonglong mariadb_measure()
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
Call this only of start_time != 0
|
||||
See buf0rea.cc for an example of how to use it efficiently
|
||||
*/
|
||||
|
||||
inline void mariadb_increment_pages_read_time(ulonglong start_time)
|
||||
{
|
||||
ha_handler_stats *stats= mariadb_stats;
|
||||
ulonglong end_time= mariadb_measure();
|
||||
/* Check that we only call this if active, see example! */
|
||||
DBUG_ASSERT(start_time);
|
||||
DBUG_ASSERT(stats->active);
|
||||
|
||||
stats->pages_read_time+= (end_time - start_time);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Helper class to set mariadb_stats temporarly for one call in handler.cc
|
||||
*/
|
||||
|
Reference in New Issue
Block a user