play a bit with the ARM PMU

This commit is contained in:
Ciro Santilli 六四事件 法轮功
2020-11-18 00:00:00 +00:00
parent 95430c7c0c
commit d4a27987d6
7 changed files with 142 additions and 22 deletions

View File

@ -10206,6 +10206,7 @@ TODO exercise DMA on the kernel module. The `edu` hardware model has that featur
* https://stackoverflow.com/questions/32592734/are-there-any-dma-driver-example-pcie-and-fpga/44716747#44716747
* https://stackoverflow.com/questions/62831327/add-memory-device-to-qemu
* https://stackoverflow.com/questions/64539528/qemu-pci-dma-read-and-pci-dma-write-does-not-work
* https://stackoverflow.com/questions/64842929/general-protection-error-while-tring-to-perform-ioctl
===== Manipulate PCI registers directly
@ -21881,6 +21882,16 @@ and:
Due to the way that <<gem5-syscall-emulation-multithreading>> however, the output is more deterministic in that case, see that section for further details.
[[perf-event-open]]
==== `perf_event_open` system call
link:userland/linux/perf_event_open.c[] counts instructions of a given loop: https://stackoverflow.com/questions/13313510/quick-way-to-count-number-of-instructions-executed-in-a-c-program/64863392#64863392
Bibliography:
* `man perf_event_open`
* https://community.arm.com/developer/ip-products/system/b/embedded-blog/posts/using-the-arm-performance-monitor-unit-pmu-linux-driver
=== Linux calling conventions
A summary of results is shown at: xref:table-linux-calling-conventions[xrefstyle=full].
@ -22758,15 +22769,10 @@ produces:
There is also the RDPID instruction that reads just the processor ID, but it appears to be very new for QEMU 4.0.0 or <<p51>>, as it fails with SIGILL on both.
Bibliography: https://stackoverflow.com/questions/22310028/is-there-an-x86-instruction-to-tell-which-core-the-instruction-is-being-run-on/56622112#56622112
Bibliography:
===== ARM PMCCNTR register
TODO We didn't manage to find a working ARM analogue to <<x86-rdtsc-instruction>>: link:kernel_modules/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything:
* https://stackoverflow.com/questions/40454157/is-there-an-equivalent-instruction-to-rdtsc-in-arm
* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809
* https://blog.regehr.org/archives/794
* ARM has an analogous <<arm-pmccntr-register>>
* https://stackoverflow.com/questions/22310028/is-there-an-x86-instruction-to-tell-which-core-the-instruction-is-being-run-on/56622112#56622112
=== x86 thread synchronization primitives
@ -23965,6 +23971,24 @@ Bibliography:
* <<arm-lse>>
=== ARM PMU
The PMU (Performance Monitor Unit) is an unit in the ARM CPU that counts performance events of interest. These can be used to benchmark, and sometimes debug, code running on ARM CPUs.
The <<linux-kernel>> exposes some (all?) of those events through the arch-agnostic <<perf-event-open>> system call.
The PMU is exposed through <<arm-system-register-instructions>>, with registers that start with the prefix `PM*`.
Bibliography: https://community.arm.com/developer/ip-products/system/b/embedded-blog/posts/using-the-arm-performance-monitor-unit-pmu-linux-driver
==== ARM PMCCNTR register
TODO We didn't manage to find a working ARM analogue to <<x86-rdtsc-instruction>>: link:kernel_modules/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything:
* https://stackoverflow.com/questions/40454157/is-there-an-equivalent-instruction-to-rdtsc-in-arm
* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809
* https://blog.regehr.org/archives/794
=== ARM assembly bibliography
==== ARM non-official bibliography
@ -28171,6 +28195,12 @@ Those files also contain arch specific helpers under ifdefs like:
We try to keep as much as possible in those files. It bloats builds a little, but just makes everything simpler to understand.
Link with lkmc.o is enabled with the <<path-properties>>
....
'extra_objs_lkmc_common': False,
....
[[lkmc-home]]
==== lkmc_home

11
lkmc.c
View File

@ -57,6 +57,17 @@ void lkmc_assert_memcmp(
}
}
void __attribute__ ((noinline)) lkmc_busy_loop(
unsigned long long max,
unsigned long long max2
) {
for (unsigned long long i = 0; i < max2; i++) {
for (unsigned long long j = 0; j < max; j++) {
__asm__ __volatile__ ("" : "+g" (i), "+g" (j) : :);
}
}
}
void lkmc_print_hex_32(uint32_t x) {
printf("0x%08" PRIX32, x);
}

6
lkmc.h
View File

@ -34,6 +34,12 @@ void lkmc_assert_memcmp(const void *s1, const void *s2, size_t n, uint32_t line)
/* Temporary per C source file name that our examples can safely create. */
#define LKMC_TMP_FILE __FILE__ LKMC_TMP_EXT
#define LKMC_TMP_FILE_NAMED(name) __FILE__ "__" name LKMC_TMP_EXT
/* https://cirosantilli.com/linux-kernel-module-cheat#c-busy-loop */
void __attribute__ ((noinline)) lkmc_busy_loop(
unsigned long long max,
unsigned long long max2
);
#endif
/* Assert that the given branch instruction is taken. */

View File

@ -49,6 +49,7 @@ class PathProperties:
# added to baremetal examples.
'extra_objs_disable_baremetal_bootloader': False,
# We should get rid of this if we ever properly implement dependency graphs.
# Enable: https://cirosantilli.com/linux-kernel-module-cheat#lkmc-c
'extra_objs_lkmc_common': False,
'freestanding': False,
'gem5_unimplemented_instruction': False,
@ -735,7 +736,10 @@ path_properties_tuples = (
'gcc': (
{**gnu_extension_properties, **{'cc_pedantic': False}},
{
'busy_loop.c': {'baremetal': True},
'busy_loop.c': {
'baremetal': True,
'extra_objs_lkmc_common': True,
},
'openmp.c': {'cc_flags': ['-fopenmp', LF]},
}
),
@ -783,6 +787,7 @@ path_properties_tuples = (
'gem5_unimplemented_syscall': True
},
'pagemap_dump.c': {'requires_argument': True},
'perf_event_open.c': {'extra_objs_lkmc_common': True},
'poweroff.c': {'requires_sudo': True},
'proc_events.c': {'requires_sudo': True},
'proc_events.c': {'requires_sudo': True},

View File

@ -28,6 +28,11 @@ class MyMap {
auto pair = *it;
return std::make_pair(2*pair.first, 3*pair.second);
}
// TODO. How to return that new object by address?
//value_type& operator->() {
// auto pair = *it;
// return std::make_pair(2*pair.first, 3*pair.second);
//}
};
iterator begin() { return iterator(map.begin()); }
iterator end() { return iterator(map.end()); }
@ -44,7 +49,14 @@ int main() {
assert((*it++ == std::pair<const int, int>(2, 33)));
assert((*it++ == std::pair<const int, int>(4, 36)));
// TODO operator->()
it = map.begin();
//assert((it->first == 0));
auto stl_it = map.map.begin();
assert((stl_it->first == 0));
for (const auto& v : map) {
std::cout << v.first << " " << v.second << std::endl;
}
}

View File

@ -2,18 +2,7 @@
* https://cirosantilli.com/linux-kernel-module-cheat#c-busy-loop
* https://cirosantilli.com/linux-kernel-module-cheat#benchmark-emulators-on-userland-executables */
#include <stdlib.h>
void __attribute__ ((noinline)) busy_loop(
unsigned long long max,
unsigned long long max2
) {
for (unsigned long long i = 0; i < max2; i++) {
for (unsigned long long j = 0; j < max; j++) {
__asm__ __volatile__ ("" : "+g" (i), "+g" (j) : :);
}
}
}
#include <lkmc.h>
int main(int argc, char **argv) {
unsigned long long max, max2;
@ -27,5 +16,5 @@ int main(int argc, char **argv) {
} else {
max2 = 1;
}
busy_loop(max, max2);
lkmc_busy_loop(max, max2);
}

View File

@ -0,0 +1,67 @@
/* https://cirosantilli.com/linux-kernel-module-cheat#perf-event-open
*
* Malloc n bytes as given from the command line.
*/
#include <asm/unistd.h>
#include <linux/perf_event.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <unistd.h>
#include <lkmc.h>
static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
return ret;
}
int
main(int argc, char **argv)
{
struct perf_event_attr pe;
long long count;
int fd;
uint64_t n;
if (argc > 1) {
n = strtoll(argv[1], NULL, 0);
} else {
n = 100;
}
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 1;
// Don't count hypervisor events.
pe.exclude_hv = 1;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
lkmc_busy_loop(n, 1);
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
read(fd, &count, sizeof(long long));
printf("Used %lld instructions\n", count);
close(fd);
}