MDEV-34859: Failed to initialise non-blocking API on OpenBSD arm64

Implement native my_context for arm64 (aarch64). This is more
efficient than ucontext, and also makes the non-blocking API available
on arm64 platforms that do not have ucontext such as OpenBSD.

Tested-by: Brad Smith <brad@comstyle.com>
Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen
2024-09-04 21:30:22 +02:00
parent ae3854159a
commit d2285fb830
2 changed files with 352 additions and 8 deletions

View File

@ -32,6 +32,8 @@
#define MY_CONTEXT_USE_X86_64_GCC_ASM
#elif defined(__GNUC__) && __GNUC__ >= 3 && defined(__i386__)
#define MY_CONTEXT_USE_I386_GCC_ASM
#elif defined(__GNUC__) && __GNUC__ >= 3 && defined(__aarch64__)
#define MY_CONTEXT_USE_AARCH64_GCC_ASM
#elif defined(HAVE_UCONTEXT_H)
#define MY_CONTEXT_USE_UCONTEXT
#else
@ -107,6 +109,23 @@ struct my_context {
#endif
#ifdef MY_CONTEXT_USE_AARCH64_GCC_ASM
#include <stdint.h>
struct my_context {
uint64_t save[22];
void *stack_top;
void *stack_bot;
#ifdef HAVE_VALGRIND
unsigned int valgrind_stack_id;
#endif
#ifndef DBUG_OFF
void *dbug_state;
#endif
};
#endif
#ifdef MY_CONTEXT_DISABLE
struct my_context {
int dummy;

View File

@ -21,6 +21,9 @@
swapcontext().
*/
#include <stdint.h>
#include <stdlib.h>
#include "ma_global.h"
#include "ma_string.h"
#include "ma_context.h"
@ -167,9 +170,6 @@ my_context_destroy(struct my_context *c)
save them as we cannot know if we will yield or not in advance).
*/
#include <stdint.h>
#include <stdlib.h>
/*
Layout of saved registers etc.
Since this is accessed through gcc inline assembler, it is simpler to just
@ -204,7 +204,7 @@ my_context_spawn(struct my_context *c, void (*f)(void *), void *d)
(
"movq %%rsp, (%[save])\n\t"
"movq %[stack], %%rsp\n\t"
#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 && !defined(__INTEL_COMPILER)
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __clang__) && !defined(__INTEL_COMPILER)
/*
This emits a DWARF DW_CFA_undefined directive to make the return address
undefined. This indicates that this is the top of the stack frame, and
@ -408,9 +408,6 @@ my_context_destroy(struct my_context *c)
save them as we cannot know if we will yield or not in advance).
*/
#include <stdint.h>
#include <stdlib.h>
/*
Layout of saved registers etc.
Since this is accessed through gcc inline assembler, it is simpler to just
@ -443,7 +440,7 @@ my_context_spawn(struct my_context *c, void (*f)(void *), void *d)
(
"movl %%esp, (%[save])\n\t"
"movl %[stack], %%esp\n\t"
#if __GNUC__ >= 4 && __GNUC_MINOR__ >= 4 && !defined(__INTEL_COMPILER)
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __clang__) && !defined(__INTEL_COMPILER)
/*
This emits a DWARF DW_CFA_undefined directive to make the return address
undefined. This indicates that this is the top of the stack frame, and
@ -625,6 +622,334 @@ my_context_destroy(struct my_context *c)
#endif /* MY_CONTEXT_USE_I386_GCC_ASM */
#ifdef MY_CONTEXT_USE_AARCH64_GCC_ASM
/*
GCC-aarch64 (arm64) implementation of my_context.
This is slightly optimized in the common case where we never yield
(eg. fetch next row and it is already fully received in buffer). In this
case we do not need to restore registers at return (though we still need to
save them as we cannot know if we will yield or not in advance).
*/
/*
Layout of saved registers etc.
Since this is accessed through gcc inline assembler, it is simpler to just
use numbers than to try to define nice constants or structs.
0 0 x19
1 8 x20
2 16 x21
...
9 72 x28
10 80 x29 (frame pointer)
11 88 sp
12 96 d8
13 104 d9
...
19 152 d15
20 160 pc for done
21 168 pc for yield/continue
*/
int
my_context_spawn(struct my_context *c, void (*f)(void *), void *d)
{
register int ret asm("w0");
register void (*f_reg)(void *) asm("x1") = f;
register void *d_reg asm("x2") = d;
register void *stack asm("x13") = c->stack_top;
/* Need this in callee-save register to preserve in function call. */
register const uint64_t *save asm("x19") = &c->save[0];
/*
There are a total of 20 callee-save registers (including frame pointer and
link register) we need to save and restore when suspending and continuing,
plus stack pointer sp and program counter pc.
However, if we never suspend, the user-supplied function will in any case
restore the callee-save registers, so we can avoid restoring them in this
case.
*/
__asm__ __volatile__
(
"mov x10, sp\n\t"
"mov sp, %[stack]\n\t"
#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4) || __clang__) && !defined(__INTEL_COMPILER)
/*
This emits a DWARF DW_CFA_undefined directive to make the return address
(UNW_AARCH64_X30) undefined. This indicates that this is the top of the
stack frame, and helps tools that use DWARF stack unwinding to obtain
stack traces. (I use numeric constant to avoid a dependency on libdwarf
includes).
*/
".cfi_escape 0x07, 30\n\t"
#endif
"stp x19, x20, [%[save], #0]\n\t"
"stp x21, x22, [%[save], #16]\n\t"
"stp x23, x24, [%[save], #32]\n\t"
"stp x25, x26, [%[save], #48]\n\t"
"stp x27, x28, [%[save], #64]\n\t"
"stp x29, x10, [%[save], #80]\n\t"
"stp d8, d9, [%[save], #96]\n\t"
"stp d10, d11, [%[save], #112]\n\t"
"stp d12, d13, [%[save], #128]\n\t"
"stp d14, d15, [%[save], #144]\n\t"
"adr x10, 1f\n\t"
"adr x11, 2f\n\t"
"stp x10, x11, [%[save], #160]\n\t"
/* Need this in x0 to follow calling convention. */
"mov x0, %[d]\n\t"
"blr %[f]\n\t"
"ldr x11, [%[save], #160]\n\t"
"br x11\n"
/*
Come here when operation is done.
We do not need to restore callee-save registers, as the called function
will do this for us if needed.
*/
"1:\n\t"
"ldr x10, [%[save], #88]\n\t"
"mov sp, x10\n\t"
"mov %w[ret], #0\n\t"
"b 3f\n"
/* Come here when operation was suspended. */
"2:\n\t"
"mov %w[ret], #1\n"
"3:\n"
: [ret] "=r" (ret),
[f] "+r" (f_reg),
[d] "+r" (d_reg),
[stack] "+r" (stack)
: [save] "r" (save)
: "x3", "x4", "x5", "x6", "x7",
"x9", "x10", "x11", "x14", "x15", "x18", "x30",
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
"memory", "cc"
);
return ret;
}
int
my_context_continue(struct my_context *c)
{
register int ret asm("w0");
/* Need this in callee-save register to preserve in function call. */
register const uint64_t *save asm("x19") = &c->save[0];
__asm__ __volatile__
(
"ldp x13, x11, [%[save], #0]\n\t"
"stp x19, x20, [%[save], #0]\n\t"
/* x19 is %[save], delay restoring it until %[save] is no longer needed. */
"mov x20, x11\n\t"
"ldp x10, x11, [%[save], #16]\n\t"
"stp x21, x22, [%[save], #16]\n\t"
"mov x21, x10\n\t"
"mov x22, x11\n\t"
"ldp x10, x11, [%[save], #32]\n\t"
"stp x23, x24, [%[save], #32]\n\t"
"mov x23, x10\n\t"
"mov x24, x11\n\t"
"ldp x10, x11, [%[save], #48]\n\t"
"stp x25, x26, [%[save], #48]\n\t"
"mov x25, x10\n\t"
"mov x26, x11\n\t"
"ldp x10, x11, [%[save], #64]\n\t"
"stp x27, x28, [%[save], #64]\n\t"
"mov x27, x10\n\t"
"mov x28, x11\n\t"
"ldp x10, x11, [%[save], #80]\n\t"
"mov x14, sp\n\t"
"stp x29, x14, [%[save], #80]\n\t"
"mov x29, x10\n\t"
"mov sp, x11\n\t"
"ldp d0, d1, [%[save], #96]\n\t"
"stp d8, d9, [%[save], #96]\n\t"
"fmov d8, d0\n\t"
"fmov d9, d1\n\t"
"ldp d0, d1, [%[save], #112]\n\t"
"stp d10, d11, [%[save], #112]\n\t"
"fmov d10, d0\n\t"
"fmov d11, d1\n\t"
"ldp d0, d1, [%[save], #128]\n\t"
"stp d12, d13, [%[save], #128]\n\t"
"fmov d12, d0\n\t"
"fmov d13, d1\n\t"
"ldp d0, d1, [%[save], #144]\n\t"
"stp d14, d15, [%[save], #144]\n\t"
"fmov d14, d0\n\t"
"fmov d15, d1\n\t"
"adr x10, 1f\n\t"
"adr x11, 2f\n\t"
"ldr x15, [%[save], #168]\n\t"
"stp x10, x11, [%[save], #160]\n\t"
"mov x19, x13\n\t"
"br x15\n"
/*
Come here when operation is done.
Be sure to use the same callee-save register for %[save] here and in
my_context_spawn(), so we preserve the value correctly at this point.
*/
"1:\n\t"
/* x19 (aka %[save]) is preserved from my_context_spawn() in this case. */
"ldr x20, [%[save], #8]\n\t"
"ldp x21, x22, [%[save], #16]\n\t"
"ldp x21, x22, [%[save], #32]\n\t"
"ldp x21, x22, [%[save], #48]\n\t"
"ldp x21, x22, [%[save], #64]\n\t"
"ldp x29, x10, [%[save], #80]\n\t"
"mov sp, x10\n\t"
"ldp d8, d9, [%[save], #96]\n\t"
"ldp d10, d11, [%[save], #112]\n\t"
"ldp d12, d13, [%[save], #128]\n\t"
"ldp d14, d15, [%[save], #144]\n\t"
"mov %w[ret], #0\n\t"
"b 3f\n"
/* Come here when operation is suspended. */
"2:\n\t"
"mov %w[ret], #1\n"
"3:\n"
: [ret] "=r" (ret)
: [save] "r" (save)
: "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x9", "x10", "x11", "x12", "x13", "x14", "x15", "x18", "x30",
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
"memory", "cc"
);
return ret;
}
int
my_context_yield(struct my_context *c)
{
register const uint64_t *save asm("x19") = &c->save[0];
__asm__ __volatile__
(
"ldp x13, x11, [%[save], #0]\n\t"
"stp x19, x20, [%[save], #0]\n\t"
/* x19 is %[save], delay restoring it until %[save] is no longer needed. */
"mov x20, x11\n\t"
"ldp x10, x11, [%[save], #16]\n\t"
"stp x21, x22, [%[save], #16]\n\t"
"mov x21, x10\n\t"
"mov x22, x11\n\t"
"ldp x10, x11, [%[save], #32]\n\t"
"stp x23, x24, [%[save], #32]\n\t"
"mov x23, x10\n\t"
"mov x24, x11\n\t"
"ldp x10, x11, [%[save], #48]\n\t"
"stp x25, x26, [%[save], #48]\n\t"
"mov x25, x10\n\t"
"mov x26, x11\n\t"
"ldp x10, x11, [%[save], #64]\n\t"
"stp x27, x28, [%[save], #64]\n\t"
"mov x27, x10\n\t"
"mov x28, x11\n\t"
"ldp x10, x11, [%[save], #80]\n\t"
"mov x14, sp\n\t"
"stp x29, x14, [%[save], #80]\n\t"
"mov x29, x10\n\t"
"mov sp, x11\n\t"
"ldp d0, d1, [%[save], #96]\n\t"
"stp d8, d9, [%[save], #96]\n\t"
"fmov d8, d0\n\t"
"fmov d9, d1\n\t"
"ldp d0, d1, [%[save], #112]\n\t"
"stp d10, d11, [%[save], #112]\n\t"
"fmov d10, d0\n\t"
"fmov d11, d1\n\t"
"ldp d0, d1, [%[save], #128]\n\t"
"stp d12, d13, [%[save], #128]\n\t"
"fmov d12, d0\n\t"
"fmov d13, d1\n\t"
"ldp d0, d1, [%[save], #144]\n\t"
"stp d14, d15, [%[save], #144]\n\t"
"fmov d14, d0\n\t"
"fmov d15, d1\n\t"
"ldr x11, [%[save], #168]\n\t"
"adr x10, 1f\n\t"
"str x10, [%[save], #168]\n\t"
"mov x19, x13\n\t"
"br x11\n"
"1:\n"
:
: [save] "r" (save)
: "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x9", "x10", "x11", "x12", "x13", "x14", "x15", "x18", "x30",
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
"memory", "cc"
);
return 0;
}
int
my_context_init(struct my_context *c, size_t stack_size)
{
memset(c, 0, sizeof(*c));
if (!(c->stack_bot= malloc(stack_size)))
return -1; /* Out of memory */
/*
Align stack to 16-byte boundary.
Also put two zero words at the top of the stack.
*/
c->stack_top= (void *)
(( ((intptr)c->stack_bot + stack_size) & ~(intptr)0xf) - 16);
memset(c->stack_top, 0, 16);
#ifdef HAVE_VALGRIND
c->valgrind_stack_id=
VALGRIND_STACK_REGISTER(c->stack_bot, c->stack_top);
#endif
return 0;
}
void
my_context_destroy(struct my_context *c)
{
if (c->stack_bot)
{
free(c->stack_bot);
#ifdef HAVE_VALGRIND
VALGRIND_STACK_DEREGISTER(c->valgrind_stack_id);
#endif
}
}
#endif /* MY_CONTEXT_USE_AARCH64_GCC_ASM */
#ifdef MY_CONTEXT_USE_WIN32_FIBERS
int
my_context_yield(struct my_context *c)