1*b6a48d2aSIlya Leoshkevich /*
2*b6a48d2aSIlya Leoshkevich * Test patching code, running in one thread, from another thread.
3*b6a48d2aSIlya Leoshkevich *
4*b6a48d2aSIlya Leoshkevich * Intel SDM calls this "cross-modifying code" and recommends a special
5*b6a48d2aSIlya Leoshkevich * sequence, which requires both threads to cooperate.
6*b6a48d2aSIlya Leoshkevich *
7*b6a48d2aSIlya Leoshkevich * Linux kernel uses a different sequence that does not require cooperation and
8*b6a48d2aSIlya Leoshkevich * involves patching the first byte with int3.
9*b6a48d2aSIlya Leoshkevich *
10*b6a48d2aSIlya Leoshkevich * Finally, there is user-mode software out there that simply uses atomics, and
11*b6a48d2aSIlya Leoshkevich * that seems to be good enough in practice. Test that QEMU has no problems
12*b6a48d2aSIlya Leoshkevich * with this as well.
13*b6a48d2aSIlya Leoshkevich */
14*b6a48d2aSIlya Leoshkevich
15*b6a48d2aSIlya Leoshkevich #include <assert.h>
16*b6a48d2aSIlya Leoshkevich #include <pthread.h>
17*b6a48d2aSIlya Leoshkevich #include <stdbool.h>
18*b6a48d2aSIlya Leoshkevich #include <stdlib.h>
19*b6a48d2aSIlya Leoshkevich
20*b6a48d2aSIlya Leoshkevich void add1_or_nop(long *x);
21*b6a48d2aSIlya Leoshkevich asm(".pushsection .rwx,\"awx\",@progbits\n"
22*b6a48d2aSIlya Leoshkevich ".globl add1_or_nop\n"
23*b6a48d2aSIlya Leoshkevich /* addq $0x1,(%rdi) */
24*b6a48d2aSIlya Leoshkevich "add1_or_nop: .byte 0x48, 0x83, 0x07, 0x01\n"
25*b6a48d2aSIlya Leoshkevich "ret\n"
26*b6a48d2aSIlya Leoshkevich ".popsection\n");
27*b6a48d2aSIlya Leoshkevich
28*b6a48d2aSIlya Leoshkevich #define THREAD_WAIT 0
29*b6a48d2aSIlya Leoshkevich #define THREAD_PATCH 1
30*b6a48d2aSIlya Leoshkevich #define THREAD_STOP 2
31*b6a48d2aSIlya Leoshkevich
thread_func(void * arg)32*b6a48d2aSIlya Leoshkevich static void *thread_func(void *arg)
33*b6a48d2aSIlya Leoshkevich {
34*b6a48d2aSIlya Leoshkevich int val = 0x0026748d; /* nop */
35*b6a48d2aSIlya Leoshkevich
36*b6a48d2aSIlya Leoshkevich while (true) {
37*b6a48d2aSIlya Leoshkevich switch (__atomic_load_n((int *)arg, __ATOMIC_SEQ_CST)) {
38*b6a48d2aSIlya Leoshkevich case THREAD_WAIT:
39*b6a48d2aSIlya Leoshkevich break;
40*b6a48d2aSIlya Leoshkevich case THREAD_PATCH:
41*b6a48d2aSIlya Leoshkevich val = __atomic_exchange_n((int *)&add1_or_nop, val,
42*b6a48d2aSIlya Leoshkevich __ATOMIC_SEQ_CST);
43*b6a48d2aSIlya Leoshkevich break;
44*b6a48d2aSIlya Leoshkevich case THREAD_STOP:
45*b6a48d2aSIlya Leoshkevich return NULL;
46*b6a48d2aSIlya Leoshkevich default:
47*b6a48d2aSIlya Leoshkevich assert(false);
48*b6a48d2aSIlya Leoshkevich __builtin_unreachable();
49*b6a48d2aSIlya Leoshkevich }
50*b6a48d2aSIlya Leoshkevich }
51*b6a48d2aSIlya Leoshkevich }
52*b6a48d2aSIlya Leoshkevich
53*b6a48d2aSIlya Leoshkevich #define INITIAL 42
54*b6a48d2aSIlya Leoshkevich #define COUNT 1000000
55*b6a48d2aSIlya Leoshkevich
main(void)56*b6a48d2aSIlya Leoshkevich int main(void)
57*b6a48d2aSIlya Leoshkevich {
58*b6a48d2aSIlya Leoshkevich int command = THREAD_WAIT;
59*b6a48d2aSIlya Leoshkevich pthread_t thread;
60*b6a48d2aSIlya Leoshkevich long x = 0;
61*b6a48d2aSIlya Leoshkevich int err;
62*b6a48d2aSIlya Leoshkevich int i;
63*b6a48d2aSIlya Leoshkevich
64*b6a48d2aSIlya Leoshkevich err = pthread_create(&thread, NULL, &thread_func, &command);
65*b6a48d2aSIlya Leoshkevich assert(err == 0);
66*b6a48d2aSIlya Leoshkevich
67*b6a48d2aSIlya Leoshkevich __atomic_store_n(&command, THREAD_PATCH, __ATOMIC_SEQ_CST);
68*b6a48d2aSIlya Leoshkevich for (i = 0; i < COUNT; i++) {
69*b6a48d2aSIlya Leoshkevich add1_or_nop(&x);
70*b6a48d2aSIlya Leoshkevich }
71*b6a48d2aSIlya Leoshkevich __atomic_store_n(&command, THREAD_STOP, __ATOMIC_SEQ_CST);
72*b6a48d2aSIlya Leoshkevich
73*b6a48d2aSIlya Leoshkevich err = pthread_join(thread, NULL);
74*b6a48d2aSIlya Leoshkevich assert(err == 0);
75*b6a48d2aSIlya Leoshkevich
76*b6a48d2aSIlya Leoshkevich assert(x >= INITIAL);
77*b6a48d2aSIlya Leoshkevich assert(x <= INITIAL + COUNT);
78*b6a48d2aSIlya Leoshkevich
79*b6a48d2aSIlya Leoshkevich return EXIT_SUCCESS;
80*b6a48d2aSIlya Leoshkevich }
81