mirror of
https://github.com/cirosantilli/linux-kernel-module-cheat.git
synced 2026-01-13 20:12:26 +00:00
play a bit with the ARM PMU
This commit is contained in:
46
README.adoc
46
README.adoc
@ -10206,6 +10206,7 @@ TODO exercise DMA on the kernel module. The `edu` hardware model has that featur
|
||||
* https://stackoverflow.com/questions/32592734/are-there-any-dma-driver-example-pcie-and-fpga/44716747#44716747
|
||||
* https://stackoverflow.com/questions/62831327/add-memory-device-to-qemu
|
||||
* https://stackoverflow.com/questions/64539528/qemu-pci-dma-read-and-pci-dma-write-does-not-work
|
||||
* https://stackoverflow.com/questions/64842929/general-protection-error-while-tring-to-perform-ioctl
|
||||
|
||||
===== Manipulate PCI registers directly
|
||||
|
||||
@ -21881,6 +21882,16 @@ and:
|
||||
|
||||
Due to the way that <<gem5-syscall-emulation-multithreading>> however, the output is more deterministic in that case, see that section for further details.
|
||||
|
||||
[[perf-event-open]]
|
||||
==== `perf_event_open` system call
|
||||
|
||||
link:userland/linux/perf_event_open.c[] counts instructions of a given loop: https://stackoverflow.com/questions/13313510/quick-way-to-count-number-of-instructions-executed-in-a-c-program/64863392#64863392
|
||||
|
||||
Bibliography:
|
||||
|
||||
* `man perf_event_open`
|
||||
* https://community.arm.com/developer/ip-products/system/b/embedded-blog/posts/using-the-arm-performance-monitor-unit-pmu-linux-driver
|
||||
|
||||
=== Linux calling conventions
|
||||
|
||||
A summary of results is shown at: xref:table-linux-calling-conventions[xrefstyle=full].
|
||||
@ -22758,15 +22769,10 @@ produces:
|
||||
|
||||
There is also the RDPID instruction that reads just the processor ID, but it appears to be very new for QEMU 4.0.0 or <<p51>>, as it fails with SIGILL on both.
|
||||
|
||||
Bibliography: https://stackoverflow.com/questions/22310028/is-there-an-x86-instruction-to-tell-which-core-the-instruction-is-being-run-on/56622112#56622112
|
||||
Bibliography:
|
||||
|
||||
===== ARM PMCCNTR register
|
||||
|
||||
TODO We didn't manage to find a working ARM analogue to <<x86-rdtsc-instruction>>: link:kernel_modules/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything:
|
||||
|
||||
* https://stackoverflow.com/questions/40454157/is-there-an-equivalent-instruction-to-rdtsc-in-arm
|
||||
* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809
|
||||
* https://blog.regehr.org/archives/794
|
||||
* ARM has an analogous <<arm-pmccntr-register>>
|
||||
* https://stackoverflow.com/questions/22310028/is-there-an-x86-instruction-to-tell-which-core-the-instruction-is-being-run-on/56622112#56622112
|
||||
|
||||
=== x86 thread synchronization primitives
|
||||
|
||||
@ -23965,6 +23971,24 @@ Bibliography:
|
||||
|
||||
* <<arm-lse>>
|
||||
|
||||
=== ARM PMU
|
||||
|
||||
The PMU (Performance Monitor Unit) is an unit in the ARM CPU that counts performance events of interest. These can be used to benchmark, and sometimes debug, code running on ARM CPUs.
|
||||
|
||||
The <<linux-kernel>> exposes some (all?) of those events through the arch-agnostic <<perf-event-open>> system call.
|
||||
|
||||
The PMU is exposed through <<arm-system-register-instructions>>, with registers that start with the prefix `PM*`.
|
||||
|
||||
Bibliography: https://community.arm.com/developer/ip-products/system/b/embedded-blog/posts/using-the-arm-performance-monitor-unit-pmu-linux-driver
|
||||
|
||||
==== ARM PMCCNTR register
|
||||
|
||||
TODO We didn't manage to find a working ARM analogue to <<x86-rdtsc-instruction>>: link:kernel_modules/pmccntr.c[] is oopsing, and even it if weren't, it likely won't give the cycle count since boot since it needs to be activate before it starts counting anything:
|
||||
|
||||
* https://stackoverflow.com/questions/40454157/is-there-an-equivalent-instruction-to-rdtsc-in-arm
|
||||
* https://stackoverflow.com/questions/31620375/arm-cortex-a7-returning-pmccntr-0-in-kernel-mode-and-illegal-instruction-in-u/31649809#31649809
|
||||
* https://blog.regehr.org/archives/794
|
||||
|
||||
=== ARM assembly bibliography
|
||||
|
||||
==== ARM non-official bibliography
|
||||
@ -28171,6 +28195,12 @@ Those files also contain arch specific helpers under ifdefs like:
|
||||
|
||||
We try to keep as much as possible in those files. It bloats builds a little, but just makes everything simpler to understand.
|
||||
|
||||
Link with lkmc.o is enabled with the <<path-properties>>
|
||||
|
||||
....
|
||||
'extra_objs_lkmc_common': False,
|
||||
....
|
||||
|
||||
[[lkmc-home]]
|
||||
==== lkmc_home
|
||||
|
||||
|
||||
11
lkmc.c
11
lkmc.c
@ -57,6 +57,17 @@ void lkmc_assert_memcmp(
|
||||
}
|
||||
}
|
||||
|
||||
void __attribute__ ((noinline)) lkmc_busy_loop(
|
||||
unsigned long long max,
|
||||
unsigned long long max2
|
||||
) {
|
||||
for (unsigned long long i = 0; i < max2; i++) {
|
||||
for (unsigned long long j = 0; j < max; j++) {
|
||||
__asm__ __volatile__ ("" : "+g" (i), "+g" (j) : :);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void lkmc_print_hex_32(uint32_t x) {
|
||||
printf("0x%08" PRIX32, x);
|
||||
}
|
||||
|
||||
6
lkmc.h
6
lkmc.h
@ -34,6 +34,12 @@ void lkmc_assert_memcmp(const void *s1, const void *s2, size_t n, uint32_t line)
|
||||
/* Temporary per C source file name that our examples can safely create. */
|
||||
#define LKMC_TMP_FILE __FILE__ LKMC_TMP_EXT
|
||||
#define LKMC_TMP_FILE_NAMED(name) __FILE__ "__" name LKMC_TMP_EXT
|
||||
|
||||
/* https://cirosantilli.com/linux-kernel-module-cheat#c-busy-loop */
|
||||
void __attribute__ ((noinline)) lkmc_busy_loop(
|
||||
unsigned long long max,
|
||||
unsigned long long max2
|
||||
);
|
||||
#endif
|
||||
|
||||
/* Assert that the given branch instruction is taken. */
|
||||
|
||||
@ -49,6 +49,7 @@ class PathProperties:
|
||||
# added to baremetal examples.
|
||||
'extra_objs_disable_baremetal_bootloader': False,
|
||||
# We should get rid of this if we ever properly implement dependency graphs.
|
||||
# Enable: https://cirosantilli.com/linux-kernel-module-cheat#lkmc-c
|
||||
'extra_objs_lkmc_common': False,
|
||||
'freestanding': False,
|
||||
'gem5_unimplemented_instruction': False,
|
||||
@ -735,7 +736,10 @@ path_properties_tuples = (
|
||||
'gcc': (
|
||||
{**gnu_extension_properties, **{'cc_pedantic': False}},
|
||||
{
|
||||
'busy_loop.c': {'baremetal': True},
|
||||
'busy_loop.c': {
|
||||
'baremetal': True,
|
||||
'extra_objs_lkmc_common': True,
|
||||
},
|
||||
'openmp.c': {'cc_flags': ['-fopenmp', LF]},
|
||||
}
|
||||
),
|
||||
@ -783,6 +787,7 @@ path_properties_tuples = (
|
||||
'gem5_unimplemented_syscall': True
|
||||
},
|
||||
'pagemap_dump.c': {'requires_argument': True},
|
||||
'perf_event_open.c': {'extra_objs_lkmc_common': True},
|
||||
'poweroff.c': {'requires_sudo': True},
|
||||
'proc_events.c': {'requires_sudo': True},
|
||||
'proc_events.c': {'requires_sudo': True},
|
||||
|
||||
@ -28,6 +28,11 @@ class MyMap {
|
||||
auto pair = *it;
|
||||
return std::make_pair(2*pair.first, 3*pair.second);
|
||||
}
|
||||
// TODO. How to return that new object by address?
|
||||
//value_type& operator->() {
|
||||
// auto pair = *it;
|
||||
// return std::make_pair(2*pair.first, 3*pair.second);
|
||||
//}
|
||||
};
|
||||
iterator begin() { return iterator(map.begin()); }
|
||||
iterator end() { return iterator(map.end()); }
|
||||
@ -44,7 +49,14 @@ int main() {
|
||||
assert((*it++ == std::pair<const int, int>(2, 33)));
|
||||
assert((*it++ == std::pair<const int, int>(4, 36)));
|
||||
|
||||
// TODO operator->()
|
||||
it = map.begin();
|
||||
//assert((it->first == 0));
|
||||
auto stl_it = map.map.begin();
|
||||
assert((stl_it->first == 0));
|
||||
|
||||
for (const auto& v : map) {
|
||||
std::cout << v.first << " " << v.second << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -2,18 +2,7 @@
|
||||
* https://cirosantilli.com/linux-kernel-module-cheat#c-busy-loop
|
||||
* https://cirosantilli.com/linux-kernel-module-cheat#benchmark-emulators-on-userland-executables */
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
void __attribute__ ((noinline)) busy_loop(
|
||||
unsigned long long max,
|
||||
unsigned long long max2
|
||||
) {
|
||||
for (unsigned long long i = 0; i < max2; i++) {
|
||||
for (unsigned long long j = 0; j < max; j++) {
|
||||
__asm__ __volatile__ ("" : "+g" (i), "+g" (j) : :);
|
||||
}
|
||||
}
|
||||
}
|
||||
#include <lkmc.h>
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
unsigned long long max, max2;
|
||||
@ -27,5 +16,5 @@ int main(int argc, char **argv) {
|
||||
} else {
|
||||
max2 = 1;
|
||||
}
|
||||
busy_loop(max, max2);
|
||||
lkmc_busy_loop(max, max2);
|
||||
}
|
||||
|
||||
67
userland/linux/perf_event_open.c
Normal file
67
userland/linux/perf_event_open.c
Normal file
@ -0,0 +1,67 @@
|
||||
/* https://cirosantilli.com/linux-kernel-module-cheat#perf-event-open
|
||||
*
|
||||
* Malloc n bytes as given from the command line.
|
||||
*/
|
||||
|
||||
#include <asm/unistd.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <lkmc.h>
|
||||
|
||||
static long
|
||||
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
|
||||
int cpu, int group_fd, unsigned long flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
|
||||
group_fd, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
struct perf_event_attr pe;
|
||||
long long count;
|
||||
int fd;
|
||||
|
||||
uint64_t n;
|
||||
if (argc > 1) {
|
||||
n = strtoll(argv[1], NULL, 0);
|
||||
} else {
|
||||
n = 100;
|
||||
}
|
||||
|
||||
memset(&pe, 0, sizeof(struct perf_event_attr));
|
||||
pe.type = PERF_TYPE_HARDWARE;
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
|
||||
pe.disabled = 1;
|
||||
pe.exclude_kernel = 1;
|
||||
// Don't count hypervisor events.
|
||||
pe.exclude_hv = 1;
|
||||
|
||||
fd = perf_event_open(&pe, 0, -1, -1, 0);
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Error opening leader %llx\n", pe.config);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
|
||||
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
|
||||
lkmc_busy_loop(n, 1);
|
||||
|
||||
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
|
||||
read(fd, &count, sizeof(long long));
|
||||
|
||||
printf("Used %lld instructions\n", count);
|
||||
|
||||
close(fd);
|
||||
}
|
||||
Reference in New Issue
Block a user