Files
MariaDB/storage/innobase/buf/buf0rea.cc
Marko Mäkelä a027160cec MDEV-37244: Avoid page lookup after read
buf_read_page(): Return a pointer to a buffer-fixed, non-read-fixed page,
or nullptr in case of an error.

buf_inc_get(): Wrapper for buf_inc_get(ha_handler_stats*),
to read the thread-local variable mariadb_stats before updating it.

IORequest::read_complete(): Assert that the page is both read-fixed
and buffer-fixed. Sample recv_sys.recovery_on only once.
Buffer-unfix the page when the asynchronous read completes.

buf_page_t::read_complete(): Assert that the page is both
read-fixed and buffer-fixed.

buf_page_t::read_wait(): Wait for a read-fixed and buffer-fixed page
to be only read-fixed. Return the state and optionally page identifier
when holding a shared latch.

buf_page_init_for_read(): Return a pointer to a buffer-fixed block
descriptor pointer, bitwise-ORed with 1 in case the block already
exists in the buffer pool.

buf_read_ahead_update(), buf_read_ahead_update_sql(): Common code
for updating some statistics counters.

buf_read_page_low(): Replace the parameter sync with err, which will
return an error code to a synchronous caller. Add a parameter for
thread-local mariadb_stats.
Return the pointer to the block, or the special values nullptr
(read failure) or -1 or -2 for asynchronous reads.
Increment the statistics when a synchronous read was requested.
In a synchronous read, if the page has already been allocated in
the buffer pool but it is read-fixed, wait for the read to complete.

buf_page_get_zip(): Get a buffer-fixed page from buf_read_page(),
and unfix() it. Our caller is relying solely on a page latch.

buf_read_page_background(): Update the statistics if supplied.
2025-07-18 14:43:30 +03:00

794 lines
26 KiB
C++

/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2023, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file buf/buf0rea.cc
The database buffer read
Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "univ.i"
#include <mysql/service_thd_wait.h>
#include "buf0rea.h"
#include "fil0fil.h"
#include "mtr0mtr.h"
#include "buf0buf.h"
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0buddy.h"
#include "buf0dblwr.h"
#include "page0zip.h"
#include "log0recv.h"
#include "trx0sys.h"
#include "os0file.h"
#include "srv0start.h"
#include "srv0srv.h"
#include "log.h"
#include "mariadb_stats.h"
TRANSACTIONAL_TARGET
bool buf_pool_t::page_hash_contains(const page_id_t page_id, hash_chain &chain)
noexcept
{
transactional_shared_lock_guard<page_hash_latch> g
{page_hash.lock_get(chain)};
return page_hash.get(page_id, chain);
}
/** If there are buf_pool.curr_size() per the number below pending reads, then
read-ahead is not done: this is to prevent flooding the buffer pool with
i/o-fixed buffer blocks */
#define BUF_READ_AHEAD_PEND_LIMIT 2
/** Initialize a page for read to the buffer buf_pool. If the page is
(1) already in buf_pool, or
(2) if the tablespace has been or is being deleted,
then this function does nothing.
Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
on the buffer frame. The io-handler must take care that the flag is cleared
and the lock released later.
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0,
bitwise-ORed with 1 in recovery
@param chain buf_pool.page_hash cell for page_id
@param block preallocated buffer block (set to nullptr if consumed)
@return pointer to the block
@retval nullptr in case of an error
@retval pointer to block | 1 if the page already exists in buf pool */
TRANSACTIONAL_TARGET
static buf_page_t *buf_page_init_for_read(const page_id_t page_id,
ulint zip_size,
buf_pool_t::hash_chain &chain,
buf_block_t *&block) noexcept
{
buf_page_t *bpage= nullptr;
constexpr uint32_t READ_BUF_FIX{buf_page_t::READ_FIX + 1};
if (!zip_size || (zip_size & 1))
{
bpage= &block->page;
block->initialise(page_id, zip_size & ~1, READ_BUF_FIX);
/* x_unlock() will be invoked
in buf_page_t::read_complete() by the io-handler thread. */
block->page.lock.x_lock(true);
}
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
hash_lock.lock();
buf_page_t *hash_page= buf_pool.page_hash.get(page_id, chain);
if (hash_page)
{
page_exists:
/* The page is already in the buffer pool. */
ut_d(const uint32_t state=) hash_page->fix();
ut_ad(state >= buf_page_t::FREED);
hash_lock.unlock();
if (bpage)
{
bpage->lock.x_unlock(true);
ut_d(mysql_mutex_lock(&buf_pool.mutex));
ut_d(bpage->set_state(buf_page_t::MEMORY));
ut_d(mysql_mutex_unlock(&buf_pool.mutex));
}
return reinterpret_cast<buf_page_t*>(uintptr_t(hash_page) | 1);
}
if (UNIV_UNLIKELY(mysql_mutex_trylock(&buf_pool.mutex)))
{
hash_lock.unlock();
mysql_mutex_lock(&buf_pool.mutex);
hash_lock.lock();
hash_page= buf_pool.page_hash.get(page_id, chain);
if (hash_page)
{
mysql_mutex_unlock(&buf_pool.mutex);
goto page_exists;
}
}
zip_size&= ~1;
if (UNIV_LIKELY(bpage != nullptr))
{
block= nullptr;
/* Insert into the hash table of file pages */
buf_pool.page_hash.append(chain, bpage);
hash_lock.unlock();
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block(bpage, true/* to old blocks */);
if (UNIV_UNLIKELY(zip_size))
{
/* buf_pool.mutex may be released and reacquired by
buf_buddy_alloc(). We must defer this operation until after the
block descriptor has been added to buf_pool.LRU and
buf_pool.page_hash. */
bpage->zip.data= static_cast<page_zip_t*>(buf_buddy_alloc(zip_size));
/* To maintain the invariant
block->in_unzip_LRU_list == block->page.belongs_to_unzip_LRU()
we have to add this block to unzip_LRU
after block->page.zip.data is set. */
ut_ad(bpage->belongs_to_unzip_LRU());
buf_unzip_LRU_add_block(reinterpret_cast<buf_block_t*>(bpage), TRUE);
}
}
else
{
hash_lock.unlock();
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
bool lru= false;
void *data= buf_buddy_alloc(zip_size, &lru);
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool.mutex. Thus, we must
check the page_hash again, as it may have been modified. */
if (UNIV_UNLIKELY(lru))
{
hash_page= buf_pool.page_hash.get(page_id, chain);
if (UNIV_LIKELY_NULL(hash_page))
{
/* The block was added by some other thread. */
ut_d(const uint32_t state=) hash_page->fix();
ut_ad(state >= buf_page_t::FREED);
buf_buddy_free(data, zip_size);
mysql_mutex_unlock(&buf_pool.mutex);
return reinterpret_cast<buf_page_t*>(uintptr_t(hash_page) | 1);
}
}
bpage= static_cast<buf_page_t*>(ut_zalloc_nokey(sizeof *bpage));
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = (page_zip_t*) data;
bpage->lock.init();
bpage->init(READ_BUF_FIX, page_id);
bpage->lock.x_lock(true);
{
transactional_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
buf_pool.page_hash.append(chain, bpage);
}
/* The block must be put to the LRU list, to the old blocks.
The zip size is already set into the page zip */
buf_LRU_add_block(bpage, true/* to old blocks */);
}
buf_pool.stat.n_pages_read++;
ut_ad(!bpage || bpage->in_file());
mysql_mutex_unlock(&buf_pool.mutex);
return bpage;
}
/** Low-level function which reads a page asynchronously from a file to the
buffer buf_pool if it is not already there, in which case does nothing.
Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
flag is cleared and the x-lock released by an i/o-handler thread.
@param[in] page_id page id
@param[in] zip_size 0 or ROW_FORMAT=COMPRESSED page size
bitwise-ORed with 1 to allocate an uncompressed frame
@param[out] err nullptr for asynchronous; error code for synchronous:
DB_SUCCESS if the page was successfully read,
DB_SUCCESS_LOCKED_REC if the exists in the pool,
DB_PAGE_CORRUPTED on page checksum mismatch,
DB_DECRYPTION_FAILED if page post encryption checksum
matches but after decryption normal page checksum
does not match
@param[in,out] chain buf_pool.page_hash cell for page_id
@param[in,out] space tablespace
@param[in,out] block preallocated buffer block
@param[in,out] stats per-thread statistics
@return buffer-fixed block (*err may be set to DB_SUCCESS_LOCKED_REC)
@retval -1 if err==nullptr and an asynchronous read was submitted
@retval -2 if err==nullptr and the page exists in the buffer pool
@retval nullptr if the page was not successfully read (*err will be set) */
static
buf_page_t*
buf_read_page_low(
const page_id_t page_id,
ulint zip_size,
dberr_t* err,
buf_pool_t::hash_chain& chain,
fil_space_t* space,
buf_block_t*& block,
ha_handler_stats* stats) noexcept
{
if (buf_dblwr.is_inside(page_id))
{
fail:
space->release();
if (err)
*err= DB_PAGE_CORRUPTED;
return nullptr;
}
buf_page_t *bpage= buf_page_init_for_read(page_id, zip_size, chain, block);
if (UNIV_UNLIKELY(!bpage))
goto fail;
const bool exist(uintptr_t(bpage) & 1);
bpage= reinterpret_cast<buf_page_t*>(uintptr_t(bpage) & ~uintptr_t{1});
ulonglong start= 0;
if (exist)
{
if (!err)
{
bpage->unfix();
bpage= reinterpret_cast<buf_page_t*>(-2);
}
else
{
const uint32_t state{bpage->state()};
IF_DBUG(page_id_t id{bpage->id()},);
ut_ad(state > buf_page_t::FREED);
if (state < buf_page_t::UNFIXED)
{
corrupted:
DBUG_ASSERT(id == page_id || id == page_id_t{~0ULL});
bpage->unfix();
bpage= nullptr;
*err= DB_PAGE_CORRUPTED;
}
else if (!bpage->is_read_fixed(state))
*err= DB_SUCCESS_LOCKED_REC;
else if (bpage->read_wait(IF_DBUG(&id,nullptr), stats) <
buf_page_t::UNFIXED)
goto corrupted;
}
space->release();
return bpage;
}
ut_ad(bpage->in_file());
if (err)
{
thd_wait_begin(nullptr, THD_WAIT_DISKIO);
if (stats != nullptr && stats->active)
start= mariadb_measure();
}
void* dst= zip_size > 1 ? bpage->zip.data : bpage->frame;
const ulint len= zip_size & ~1 ? zip_size & ~1 : srv_page_size;
auto fio=
space->io(IORequest(err
? IORequest::READ_SYNC : IORequest::READ_ASYNC),
os_offset_t{page_id.page_no()} * len, len, dst, bpage);
if (UNIV_UNLIKELY(fio.err != DB_SUCCESS))
{
if (UNIV_LIKELY(err != nullptr))
*err= fio.err;
recv_sys.free_corrupted_page(page_id, *space->chain.start);
buf_pool.corrupted_evict(bpage, buf_page_t::READ_FIX + 1);
bpage= nullptr;
}
else if (err != nullptr)
{
const bool recovery{recv_sys.recovery_on};
if ((*err= fio.err= bpage->read_complete(*fio.node, recovery)))
bpage= nullptr;
thd_wait_end(nullptr);
space->release();
if (stats)
{
stats->pages_read_count++;
if (start)
stats->pages_read_time+= mariadb_measure() - start;
}
/* FIXME: Remove this, and accumulate stats->pages_read_count to
global statistics somewhere! */
buf_LRU_stat_inc_io();
mysql_mutex_assert_not_owner(&buf_pool.mutex);
}
else
bpage= reinterpret_cast<buf_page_t*>(-1);
return bpage;
}
/** Update the read-ahead statistics.
@param count number of pages that were read ahead */
static void buf_read_ahead_update(size_t count) noexcept
{
mysql_mutex_lock(&buf_pool.mutex);
/* Read ahead is considered one I/O operation for the purpose of
LRU policy decision. */
buf_LRU_stat_inc_io();
buf_pool.stat.n_ra_pages_read+= count;
mysql_mutex_unlock(&buf_pool.mutex);
}
/** Update the statistics for a read-ahead that is triggered from SQL.
@param count number of pages that were read ahead
@param stats per-thread statistics */
static void buf_read_ahead_update_sql(size_t count, ha_handler_stats *stats)
noexcept
{
if (stats)
stats->pages_prefetched+= count;
buf_read_ahead_update(count);
}
/** Acquire a buffer block. */
static buf_block_t *buf_read_acquire()
{
return buf_LRU_get_free_block(have_no_mutex_soft);
}
/** Free a buffer block if needed. */
static buf_block_t *buf_read_release(buf_block_t *block) noexcept
{
if (block)
{
mysql_mutex_lock(&buf_pool.mutex);
buf_LRU_block_free_non_file_page(block);
mysql_mutex_unlock(&buf_pool.mutex);
}
return block;
}
/** Applies a random read-ahead in buf_pool if there are at least a threshold
value of accessed pages from the random read-ahead area. Does not read any
page, not even the one at the position (space, offset), if the read-ahead
mechanism is not activated. NOTE: the calling thread may own latches on
pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@return number of page read requests issued */
TRANSACTIONAL_TARGET
ulint buf_read_ahead_random(const page_id_t page_id) noexcept
{
if (!srv_random_read_ahead || page_id.space() >= SRV_TMP_SPACE_ID)
/* Disable the read-ahead for temporary tablespace */
return 0;
if (srv_startup_is_before_trx_rollback_phase)
/* No read-ahead to avoid thread deadlocks */
return 0;
if (os_aio_pending_reads_approx() >
buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT)
return 0;
fil_space_t* space= fil_space_t::get(page_id.space());
if (!space)
return 0;
const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
ulint count= 5 + buf_read_ahead_area / 8;
const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
page_id_t high= low + buf_read_ahead_area;
high.set_page_no(std::min(high.page_no(), space->last_page_number()));
/* Count how many blocks in the area have been recently accessed,
that is, reside near the start of the LRU list. */
for (page_id_t i= low; i < high; ++i)
{
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
transactional_shared_lock_guard<page_hash_latch> g
{buf_pool.page_hash.lock_get(chain)};
if (const buf_page_t *bpage= buf_pool.page_hash.get(i, chain))
if (bpage->is_accessed() && buf_page_peek_if_young(bpage) && !--count)
goto read_ahead;
}
no_read_ahead:
space->release();
return 0;
read_ahead:
if (space->is_stopping())
goto no_read_ahead;
/* Read all the suitable blocks within the area */
buf_block_t *block= nullptr;
unsigned zip_size{space->zip_size()};
if (UNIV_LIKELY(!zip_size))
{
allocate_block:
if (UNIV_UNLIKELY(!(block= buf_read_acquire())))
goto no_read_ahead;
}
else if (recv_recovery_is_on())
{
zip_size|= 1;
goto allocate_block;
}
/* Read all the suitable blocks within the area */
for (page_id_t i= low; i < high; ++i)
{
if (space->is_stopping())
break;
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
space->reacquire();
if (reinterpret_cast<buf_page_t*>(-1) ==
buf_read_page_low(i, zip_size, nullptr, chain, space, block, nullptr))
{
count++;
ut_ad(!block);
if ((UNIV_LIKELY(!zip_size) || (zip_size & 1)) &&
UNIV_UNLIKELY(!(block= buf_read_acquire())))
break;
}
}
if (count)
{
DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
count, space->chain.start->name,
low.page_no()));
buf_read_ahead_update_sql(count, mariadb_stats);
}
space->release();
buf_read_release(block);
return count;
}
buf_block_t *buf_read_page(const page_id_t page_id, dberr_t *err,
buf_pool_t::hash_chain &chain, bool unzip) noexcept
{
fil_space_t *space= fil_space_t::get(page_id.space());
dberr_t local_err;
if (!err)
err= &local_err;
if (UNIV_UNLIKELY(!space))
{
sql_print_information("InnoDB: trying to read page "
"[page id: space=" UINT32PF
", page number=" UINT32PF "]"
" in nonexisting or being-dropped tablespace",
page_id.space(), page_id.page_no());
*err= DB_TABLESPACE_DELETED;
return nullptr;
}
/* Our caller should already have ensured that the page does not
exist in buf_pool.page_hash. */
buf_block_t *block= nullptr;
unsigned zip_size= space->zip_size();
if (UNIV_LIKELY(!zip_size))
{
allocate_block:
mysql_mutex_lock(&buf_pool.mutex);
block= buf_LRU_get_free_block(have_mutex);
mysql_mutex_unlock(&buf_pool.mutex);
}
else if (unzip)
{
zip_size|= 1;
goto allocate_block;
}
buf_page_t *b= buf_read_page_low(page_id, zip_size, err, chain, space,
block, mariadb_stats);
buf_read_release(block);
return reinterpret_cast<buf_block_t*>(b);
}
/** Read a page asynchronously into buf_pool if it is not already there.
@param page_id page identifier
@param space tablespace
@param stats statistics */
void buf_read_page_background(const page_id_t page_id, fil_space_t *space,
ha_handler_stats *stats) noexcept
{
ut_ad(!recv_recovery_is_on());
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
if (buf_pool.page_hash_contains(page_id, chain))
skip:
space->release();
else
{
buf_block_t *b= nullptr;
ulint zip_size{space->zip_size()};
if (UNIV_LIKELY(!zip_size) && UNIV_UNLIKELY(!(b= buf_read_acquire())))
goto skip;
buf_read_page_low(page_id, zip_size, nullptr, chain, space, b, nullptr);
if (!buf_read_release(b) && stats)
{
stats->pages_prefetched++;
buf_read_ahead_update(1);
}
/* buf_load() invokes this with stats=nullptr. In that case, we skip
the call to buf_read_ahead_update() or buf_LRU_stat_inc_io(); these
deliberate page reads are not part of a normal workload and therefore
should not affect the unzip_LRU heuristics. */
}
}
/** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
Does not read any page if the read-ahead mechanism is not activated. Note
that the algorithm looks at the 'natural' adjacent successor and
predecessor of the page, which on the leaf level of a B-tree are the next
and previous page in the chain of leaves. To know these, the page specified
in (space, offset) must already be present in the buf_pool. Thus, the
natural way to use this function is to call it when a page in the buf_pool
is accessed the first time, calling this function just after it has been
bufferfixed.
NOTE 1: as this function looks at the natural predecessor and successor
fields on the page, what happens, if these are not initialized to any
sensible value? No problem, before applying read-ahead we check that the
area to read is within the span of the space, if not, read-ahead is not
applied. An uninitialized value may result in a useless read operation, but
only very improbably.
NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
@param[in] page_id page id; see NOTE 3 above
@return number of page read requests issued */
TRANSACTIONAL_TARGET
ulint buf_read_ahead_linear(const page_id_t page_id) noexcept
{
/* check if readahead is disabled.
Disable the read ahead logic for temporary tablespace */
if (!srv_read_ahead_threshold || page_id.space() >= SRV_TMP_SPACE_ID)
return 0;
if (srv_startup_is_before_trx_rollback_phase)
/* No read-ahead to avoid thread deadlocks */
return 0;
if (os_aio_pending_reads_approx() >
buf_pool.curr_size() / BUF_READ_AHEAD_PEND_LIMIT)
return 0;
const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
const page_id_t high_1= low + (buf_read_ahead_area - 1);
/* We will check that almost all pages in the area have been accessed
in the desired order. */
const bool descending= page_id != low;
if (!descending && page_id != high_1)
/* This is not a border page of the area */
return 0;
fil_space_t *space= fil_space_t::get(page_id.space());
if (!space)
return 0;
if (high_1.page_no() > space->last_page_number())
{
/* The area is not whole. */
fail:
space->release();
return 0;
}
if (trx_sys_hdr_page(page_id))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
goto fail;
/* How many out of order accessed pages can we ignore
when working out the access pattern for linear readahead */
ulint count= std::min<ulint>(buf_pool_t::READ_AHEAD_PAGES -
srv_read_ahead_threshold,
uint32_t{buf_pool.read_ahead_area});
page_id_t new_low= low, new_high_1= high_1;
unsigned prev_accessed= 0;
for (page_id_t i= low; i <= high_1; ++i)
{
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(i.fold());
page_hash_latch &hash_lock= buf_pool.page_hash.lock_get(chain);
/* It does not make sense to use transactional_lock_guard here,
because we would have many complex conditions inside the memory
transaction. */
hash_lock.lock_shared();
const buf_page_t* bpage= buf_pool.page_hash.get(i, chain);
if (!bpage)
{
hash_lock.unlock_shared();
if (i == page_id)
goto fail;
failed:
if (--count)
continue;
goto fail;
}
const unsigned accessed= bpage->is_accessed();
if (i == page_id)
{
/* Read the natural predecessor and successor page addresses from
the page; NOTE that because the calling thread may have an x-latch
on the page, we do not acquire an s-latch on the page, this is to
prevent deadlocks. The hash_lock is only protecting the
buf_pool.page_hash for page i, not the bpage contents itself. */
const byte *f= bpage->frame ? bpage->frame : bpage->zip.data;
uint32_t prev= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_PREV));
uint32_t next= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_NEXT));
hash_lock.unlock_shared();
/* The underlying file page of this buffer pool page could actually
be marked as freed, or a read of the page into the buffer pool might
be in progress. We may read uninitialized data here.
Suppress warnings of comparing uninitialized values. */
MEM_MAKE_DEFINED(&prev, sizeof prev);
MEM_MAKE_DEFINED(&next, sizeof next);
if (prev == FIL_NULL || next == FIL_NULL)
goto fail;
page_id_t id= page_id;
if (descending)
{
if (id == high_1)
++id;
else if (next - 1 != page_id.page_no())
goto fail;
else
id.set_page_no(prev);
}
else
{
if (prev + 1 != page_id.page_no())
goto fail;
id.set_page_no(next);
}
new_low= id - (id.page_no() % buf_read_ahead_area);
new_high_1= new_low + (buf_read_ahead_area - 1);
if (id != new_low && id != new_high_1)
/* This is not a border page of the area: return */
goto fail;
if (new_high_1.page_no() > space->last_page_number())
/* The area is not whole */
goto fail;
}
else
hash_lock.unlock_shared();
if (!accessed)
goto failed;
/* Note that buf_page_t::is_accessed() returns the time of the
first access. If some blocks of the extent existed in the buffer
pool at the time of a linear access pattern, the first access
times may be nonmonotonic, even though the latest access times
were linear. The threshold (srv_read_ahead_factor) should help a
little against this. */
bool fail= prev_accessed &&
(descending ? prev_accessed > accessed : prev_accessed < accessed);
prev_accessed= accessed;
if (fail)
goto failed;
}
/* If we got this far, read-ahead can be sensible: do it */
buf_block_t *block= nullptr;
unsigned zip_size{space->zip_size()};
if (UNIV_LIKELY(!zip_size))
{
allocate_block:
if (UNIV_UNLIKELY(!(block= buf_read_acquire())))
goto fail;
}
else if (recv_recovery_is_on())
{
zip_size|= 1;
goto allocate_block;
}
count= 0;
for (; new_low <= new_high_1; ++new_low)
{
if (space->is_stopping())
break;
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(new_low.fold());
space->reacquire();
if (reinterpret_cast<buf_page_t*>(-1) ==
buf_read_page_low(new_low, zip_size, nullptr,
chain, space, block, nullptr))
{
count++;
ut_ad(!block);
if ((UNIV_LIKELY(!zip_size) || (zip_size & 1)) &&
UNIV_UNLIKELY(!(block= buf_read_acquire())))
break;
}
}
if (count)
{
DBUG_PRINT("ib_buf", ("linear read-ahead %zu pages from %s: %u",
count, space->chain.start->name,
new_low.page_no()));
buf_read_ahead_update_sql(count, mariadb_stats);
}
space->release();
buf_read_release(block);
return count;
}
/** Schedule a page for recovery.
@param space tablespace
@param page_id page identifier
@param recs log records
@param init_lsn page initialization, or 0 if the page needs to be read */
void buf_read_recover(fil_space_t *space, const page_id_t page_id,
page_recv_t &recs, lsn_t init_lsn) noexcept
{
ut_ad(space->id == page_id.space());
space->reacquire();
const ulint zip_size= space->zip_size() | 1;
buf_pool_t::hash_chain &chain= buf_pool.page_hash.cell_get(page_id.fold());
buf_block_t *block= buf_LRU_get_free_block(have_no_mutex);
if (init_lsn)
{
buf_page_t *bpage= buf_page_init_for_read(page_id, zip_size, chain, block);
if (UNIV_UNLIKELY(!bpage))
goto fail;
const bool exist(uintptr_t(bpage) & 1);
bpage= reinterpret_cast<buf_page_t*>(uintptr_t(bpage) & ~uintptr_t{1});
bpage->unfix();
if (!exist)
os_fake_read(IORequest{bpage, (buf_tmp_buffer_t*) &recs,
UT_LIST_GET_FIRST(space->chain),
IORequest::READ_ASYNC}, init_lsn);
}
else if (!buf_read_page_low(page_id, zip_size, nullptr, chain, space, block,
nullptr))
fail:
sql_print_error("InnoDB: Recovery failed to read page %" PRIu32 " from %s",
page_id.page_no(), space->chain.start->name);
buf_read_release(block);
}