mirror of
https://github.com/MariaDB/server.git
synced 2025-07-25 15:08:40 +00:00

* Log rows in online_alter_binlog. * Table online data is replicated within dedicated binlog file * Cached data is written on commit. * Versioning is fully supported. * Works both wit and without binlog enabled. * For now savepoints setup is forbidden while ONLINE ALTER goes on. Extra support is required. We can simply log the SAVEPOINT query events and replicate them together with row events. But it's not implemented for now. * Cache flipping: We want to care for the possible bottleneck in the online alter binlog reading/writing in advance. IO_CACHE does not provide anything better that sequential access, besides, only a single write is mutex-protected, which is not suitable, since we should write a transaction atomically. To solve this, a special layer on top Event_log is implemented. There are two IO_CACHE files underneath: one for reading, and one for writing. Once the read cache is empty, an exclusive lock is acquired (we can wait for a currently active transaction finish writing), and flip() is emitted, i.e. the write cache is reopened for read, and the read cache is emptied, and reopened for writing. This reminds a buffer flip that happens in accelerated graphics (DirectX/OpenGL/etc). Cache_flip_event_log is considered non-blocking for a single reader and a single writer in this sense, with the only lock held by reader during flip. An alternative approach by implementing a fair concurrent circular buffer is described in MDEV-24676. * Cache managers: We have two cache sinks: statement and transactional. It is important that the changes are first cached per-statement and per-transaction. If a statement fails, then only statement data is rolled back. The transaction moves along, however. Turns out, there's no guarantee that TABLE well persist in thd->open_tables to the transaction commit moment. If an error occurs, tables from statement are purged. Therefore, we can't store te caches in TABLE. Ideally, it should be handlerton, but we cut the corner and store it in THD in a list.
347 lines
11 KiB
C++
347 lines
11 KiB
C++
/*
|
|
Copyright (c) 2006, 2010, Oracle and/or its affiliates.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
#ifndef RPL_UTILITY_H
|
|
#define RPL_UTILITY_H
|
|
|
|
#ifndef __cplusplus
|
|
#error "Don't include this C++ header file from a non-C++ file!"
|
|
#endif
|
|
|
|
#include "sql_priv.h"
|
|
#include "m_string.h" /* bzero, memcpy */
|
|
#ifdef MYSQL_SERVER
|
|
#include "table.h" /* TABLE_LIST */
|
|
#endif
|
|
#include "mysql_com.h"
|
|
|
|
class Relay_log_info;
|
|
class Log_event;
|
|
struct rpl_group_info;
|
|
|
|
/**
|
|
A table definition from the master.
|
|
|
|
The responsibilities of this class is:
|
|
- Extract and decode table definition data from the table map event
|
|
- Check if table definition in table map is compatible with table
|
|
definition on slave
|
|
*/
|
|
|
|
class table_def
|
|
{
|
|
table_def(const table_def&) = default;
|
|
public:
|
|
/**
|
|
Constructor.
|
|
|
|
@param types Array of types, each stored as a byte
|
|
@param size Number of elements in array 'types'
|
|
@param field_metadata Array of extra information about fields
|
|
@param metadata_size Size of the field_metadata array
|
|
@param null_bitmap The bitmap of fields that can be null
|
|
*/
|
|
table_def(unsigned char *types, ulong size, uchar *field_metadata,
|
|
int metadata_size, uchar *null_bitmap, uint16 flags);
|
|
|
|
|
|
/**
|
|
Move constructor
|
|
Since it deallocates a memory during destruction, we can't safely copy it.
|
|
We should instead move it to zero m_memory in an old object
|
|
*/
|
|
table_def(table_def &&tabledef)
|
|
: table_def(tabledef)
|
|
{
|
|
tabledef.m_memory= NULL;
|
|
}
|
|
|
|
~table_def();
|
|
|
|
/**
|
|
Return the number of fields there is type data for.
|
|
|
|
@return The number of fields that there is type data for.
|
|
*/
|
|
uint size() const { return m_size; }
|
|
|
|
|
|
/**
|
|
Returns internal binlog type code for one field,
|
|
without translation to real types.
|
|
*/
|
|
enum_field_types binlog_type(ulong index) const
|
|
{
|
|
return static_cast<enum_field_types>(m_type[index]);
|
|
}
|
|
/*
|
|
Return a representation of the type data for one field.
|
|
|
|
@param index Field index to return data for
|
|
|
|
@return Will return a representation of the type data for field
|
|
<code>index</code>. Currently, only the type identifier is
|
|
returned.
|
|
*/
|
|
enum_field_types type(ulong index) const
|
|
{
|
|
DBUG_ASSERT(index < m_size);
|
|
/*
|
|
If the source type is MYSQL_TYPE_STRING, it can in reality be
|
|
either MYSQL_TYPE_STRING, MYSQL_TYPE_ENUM, or MYSQL_TYPE_SET, so
|
|
we might need to modify the type to get the real type.
|
|
*/
|
|
enum_field_types source_type= binlog_type(index);
|
|
uint16 source_metadata= m_field_metadata[index];
|
|
switch (source_type)
|
|
{
|
|
case MYSQL_TYPE_STRING:
|
|
{
|
|
int real_type= source_metadata >> 8;
|
|
if (real_type == MYSQL_TYPE_ENUM || real_type == MYSQL_TYPE_SET)
|
|
source_type= static_cast<enum_field_types>(real_type);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
This type has not been used since before row-based replication,
|
|
so we can safely assume that it really is MYSQL_TYPE_NEWDATE.
|
|
*/
|
|
case MYSQL_TYPE_DATE:
|
|
source_type= MYSQL_TYPE_NEWDATE;
|
|
break;
|
|
|
|
default:
|
|
/* Do nothing */
|
|
break;
|
|
}
|
|
|
|
return source_type;
|
|
}
|
|
#ifdef MYSQL_SERVER
|
|
const Type_handler *field_type_handler(uint index) const;
|
|
#endif
|
|
|
|
/*
|
|
This function allows callers to get the extra field data from the
|
|
table map for a given field. If there is no metadata for that field
|
|
or there is no extra metadata at all, the function returns 0.
|
|
|
|
The function returns the value for the field metadata for column at
|
|
position indicated by index. As mentioned, if the field was a type
|
|
that stores field metadata, that value is returned else zero (0) is
|
|
returned. This method is used in the unpack() methods of the
|
|
corresponding fields to properly extract the data from the binary log
|
|
in the event that the master's field is smaller than the slave.
|
|
*/
|
|
uint16 field_metadata(uint index) const
|
|
{
|
|
DBUG_ASSERT(index < m_size);
|
|
if (m_field_metadata_size)
|
|
return m_field_metadata[index];
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
This function returns whether the field on the master can be null.
|
|
This value is derived from field->maybe_null().
|
|
*/
|
|
my_bool maybe_null(uint index) const
|
|
{
|
|
DBUG_ASSERT(index < m_size);
|
|
return ((m_null_bits[(index / 8)] &
|
|
(1 << (index % 8))) == (1 << (index %8)));
|
|
}
|
|
|
|
/*
|
|
This function returns the field size in raw bytes based on the type
|
|
and the encoded field data from the master's raw data. This method can
|
|
be used for situations where the slave needs to skip a column (e.g.,
|
|
WL#3915) or needs to advance the pointer for the fields in the raw
|
|
data from the master to a specific column.
|
|
*/
|
|
uint32 calc_field_size(uint col, uchar *master_data) const;
|
|
|
|
/**
|
|
Decide if the table definition is compatible with a table.
|
|
|
|
Compare the definition with a table to see if it is compatible
|
|
with it.
|
|
|
|
A table definition is compatible with a table if:
|
|
- The columns types of the table definition is a (not
|
|
necessarily proper) prefix of the column type of the table.
|
|
|
|
- The other way around.
|
|
|
|
- Each column on the master that also exists on the slave can be
|
|
converted according to the current settings of @c
|
|
SLAVE_TYPE_CONVERSIONS.
|
|
|
|
@param thd
|
|
@param rli Pointer to relay log info
|
|
@param table Pointer to table to compare with.
|
|
|
|
@param[out] tmp_table_var Pointer to temporary table for holding
|
|
conversion table.
|
|
|
|
@retval 1 if the table definition is not compatible with @c table
|
|
@retval 0 if the table definition is compatible with @c table
|
|
*/
|
|
#ifndef MYSQL_CLIENT
|
|
bool compatible_with(THD *thd, rpl_group_info *rgi, TABLE *table,
|
|
TABLE **conv_table_var) const;
|
|
|
|
/**
|
|
Create a virtual in-memory temporary table structure.
|
|
|
|
The table structure has records and field array so that a row can
|
|
be unpacked into the record for further processing.
|
|
|
|
In the virtual table, each field that requires conversion will
|
|
have a non-NULL value, while fields that do not require
|
|
conversion will have a NULL value.
|
|
|
|
Some information that is missing in the events, such as the
|
|
character set for string types, are taken from the table that the
|
|
field is going to be pushed into, so the target table that the data
|
|
eventually need to be pushed into need to be supplied.
|
|
|
|
@param thd Thread to allocate memory from.
|
|
@param rli Relay log info structure, for error reporting.
|
|
@param target_table Target table for fields.
|
|
|
|
@return A pointer to a temporary table with memory allocated in the
|
|
thread's memroot, NULL if the table could not be created
|
|
*/
|
|
TABLE *create_conversion_table(THD *thd, rpl_group_info *rgi,
|
|
TABLE *target_table) const;
|
|
#endif
|
|
|
|
|
|
private:
|
|
unsigned char *m_type; // Array of type descriptors
|
|
uint m_size; // Number of elements in the types array
|
|
uint m_field_metadata_size;
|
|
uint16 *m_field_metadata;
|
|
uint16 m_flags; // Table flags
|
|
uchar *m_null_bits;
|
|
uchar *m_memory;
|
|
};
|
|
|
|
|
|
#ifndef MYSQL_CLIENT
|
|
/**
|
|
Extend the normal table list with a few new fields needed by the
|
|
slave thread, but nowhere else.
|
|
*/
|
|
struct RPL_TABLE_LIST
|
|
: public TABLE_LIST
|
|
{
|
|
bool m_tabledef_valid;
|
|
table_def m_tabledef;
|
|
TABLE *m_conv_table;
|
|
bool master_had_triggers;
|
|
const Copy_field *m_online_alter_copy_fields;
|
|
const Copy_field *m_online_alter_copy_fields_end;
|
|
|
|
RPL_TABLE_LIST(const LEX_CSTRING *db_arg, const LEX_CSTRING *table_name_arg,
|
|
thr_lock_type thr_lock_type,
|
|
table_def &&tabledef, bool master_had_trigers)
|
|
: TABLE_LIST(db_arg, table_name_arg, NULL, thr_lock_type),
|
|
m_tabledef_valid(true), m_tabledef(std::move(tabledef)),
|
|
m_conv_table(NULL), master_had_triggers(master_had_trigers),
|
|
m_online_alter_copy_fields(NULL),
|
|
m_online_alter_copy_fields_end(NULL)
|
|
{}
|
|
|
|
RPL_TABLE_LIST(TABLE *table, thr_lock_type lock_type, TABLE *conv_table,
|
|
table_def &&tabledef,
|
|
const Copy_field online_alter_copy_fields[],
|
|
const Copy_field *online_alter_copy_fields_end)
|
|
: TABLE_LIST(table, lock_type),
|
|
m_tabledef_valid(true),
|
|
m_tabledef(std::move(tabledef)),
|
|
m_conv_table(conv_table), master_had_triggers(false),
|
|
m_online_alter_copy_fields(online_alter_copy_fields),
|
|
m_online_alter_copy_fields_end(online_alter_copy_fields_end)
|
|
{}
|
|
};
|
|
|
|
|
|
/* Anonymous namespace for template functions/classes */
|
|
CPP_UNNAMED_NS_START
|
|
|
|
/*
|
|
Smart pointer that will automatically call my_afree (a macro) when
|
|
the pointer goes out of scope. This is used so that I do not have
|
|
to remember to call my_afree() before each return. There is no
|
|
overhead associated with this, since all functions are inline.
|
|
|
|
I (Matz) would prefer to use the free function as a template
|
|
parameter, but that is not possible when the "function" is a
|
|
macro.
|
|
*/
|
|
template <class Obj>
|
|
class auto_afree_ptr
|
|
{
|
|
Obj* m_ptr;
|
|
public:
|
|
auto_afree_ptr(Obj* ptr) : m_ptr(ptr) { }
|
|
~auto_afree_ptr() { if (m_ptr) my_afree(m_ptr); }
|
|
void assign(Obj* ptr) {
|
|
/* Only to be called if it hasn't been given a value before. */
|
|
DBUG_ASSERT(m_ptr == NULL);
|
|
m_ptr= ptr;
|
|
}
|
|
Obj* get() { return m_ptr; }
|
|
};
|
|
|
|
CPP_UNNAMED_NS_END
|
|
|
|
class Deferred_log_events
|
|
{
|
|
private:
|
|
DYNAMIC_ARRAY array;
|
|
Log_event *last_added;
|
|
|
|
public:
|
|
Deferred_log_events(Relay_log_info *rli);
|
|
~Deferred_log_events();
|
|
/* queue for exection at Query-log-event time prior the Query */
|
|
int add(Log_event *ev);
|
|
bool is_empty();
|
|
bool execute(struct rpl_group_info *rgi);
|
|
void rewind();
|
|
bool is_last(Log_event *ev) { return ev == last_added; };
|
|
};
|
|
|
|
#endif
|
|
|
|
// NB. number of printed bit values is limited to sizeof(buf) - 1
|
|
#define DBUG_PRINT_BITSET(N,FRM,BS) \
|
|
do { \
|
|
char buf[256]; \
|
|
uint i; \
|
|
for (i = 0 ; i < MY_MIN(sizeof(buf) - 1, (BS)->n_bits) ; i++) \
|
|
buf[i] = bitmap_is_set((BS), i) ? '1' : '0'; \
|
|
buf[i] = '\0'; \
|
|
DBUG_PRINT((N), ((FRM), buf)); \
|
|
} while (0)
|
|
|
|
#endif /* RPL_UTILITY_H */
|