amd64 fork/swap data corruptions: A ~110 line C program demonstrating an example (Pine64+ 2GB context)
Mark Millard
markmi at dsl-only.net
Tue Mar 14 06:52:51 UTC 2017
I'm still at a loss about how to figure out what stages are messed
up. (Memory coherency? Some memory not swapped out? Bad data swapped
out? Wrong data swapped in?)
But at least I've found a much smaller/simpler example to demonstrate
some problem with in my Pine64+_ 2GB context.
The Pine64+ 2GB is the only amd64 context that I have access to.
The following program fails its check for data
having its expected byte pattern in dynamically
allocated memory after a fork/swap-out/swap-in
sequence.
I'll note that the program sleeps for 60s after
forking to give time to do something else to
cause the parent and child processes to swap
out (RES=0 as seen in top).
Note the source code line:
// test_check(); // Adding this line prevents failure.
It seem that accessing the region contents before forking
and swapping avoids the problem. But there is a problem
if the region was only written-to before the fork/swap.
Another point is the size of the region matters: <= 14K Bytes
fails and > 14K Bytes works for as much has I have tested.
# more swap_testing.c
// swap_testing.c
// Built via (c++ was clang++ 4.0 in my case):
//
// cc -g -std=c11 -Wpedantic swap_testing.c
// -O0 and -O2 also gets the problem.
#include <unistd.h> // for fork(), sleep(.)
#include <sys/types.h> // for pid_t
#include <sys/wait.h> // for wait(.)
extern void test_setup(void); // Sets up the memory byte pattern.
extern void test_check(void); // Tests the memory byte pattern.
int main(void)
{
test_setup();
// test_check(); // Adding this line prevents failure.
pid_t pid = fork();
int wait_status = 0;;
if (0<pid) { wait(&wait_status); }
if (-1!=wait_status && 0<=pid)
{
if (0==pid)
{
sleep(60);
// During this manually force this process to
// swap out. I use something like:
// stress -m 1 --vm-bytes 1800M
// in another shell and ^C'ing it after top
// shows the swapped status desired. 1800M
// just happened to work on the Pine64+ 2GB
// that I was using.
}
test_check();
}
}
// The memory and test code follows.
#include <stdbool.h> // for bool, true, false
#include <stddef.h> // for size_t, NULL
#include <stdlib.h> // for malloc(.), free(.)
#include <signal.h> // for raise(.), SIGABRT
#define region_size (14u*1024u)
// Bad dyn_region pattern, parent and child
// processes:
// 256u, 4u*1024u, 8u*1024u, 9u*1024u,
// 12u*1024u, 14u*1024u
// Works:
// 14u*1024u+1u, 15u*1024u, 16u*1024u,
// 32u*1024u, 256u*1024u*1024u
typedef volatile unsigned char value_type;
struct region_struct { value_type array[region_size]; };
typedef struct region_struct region;
static region gbl_region;
static region * volatile dyn_region = NULL;
static value_type value(size_t v) { return (value_type)v; }
void test_setup(void) {
dyn_region = malloc(sizeof(region));
if (!dyn_region) raise(SIGABRT);
for(size_t i=0u; i<region_size; i++) {
(*dyn_region).array[i] = gbl_region.array[i] = value(i);
}
}
static volatile bool gbl_failed = false; // Until potentially disproved
static volatile size_t gbl_pos = 0u;
static volatile bool dyn_failed = false; // Until potentially disproved
static volatile size_t dyn_pos = 0u;
void test_check(void) {
while (!gbl_failed && gbl_pos<region_size) {
gbl_failed = (value(gbl_pos) != gbl_region.array[gbl_pos]);
gbl_pos++;
}
while (!dyn_failed && dyn_pos<region_size) {
dyn_failed = (value(dyn_pos) != (*dyn_region).array[dyn_pos]);
// Note: When the memory pattern fails this case is that
// records the failure.
dyn_pos++;
}
if (gbl_failed) raise(SIGABRT);
if (dyn_failed) raise(SIGABRT); // lldb reports this line for the __raise call.
// when it fails (both parent and child processes).
}
Other details from lldb (not using -O2 so things are
simpler, not presented in the order examined):
# lldb a.out -c /var/crash/a.out.11575.core
(lldb) target create "a.out" --core "/var/crash/a.out.11575.core"
Core file '/var/crash/a.out.11575.core' (aarch64) was loaded.
(lldb) bt
* thread #1, name = 'a.out', stop reason = signal SIGABRT
* frame #0: 0x0000000040113d38 libc.so.7`_thr_kill + 8
frame #1: libc.so.7`__raise(s=6) at raise.c:52
frame #2: a.out`test_check at swap_testing.c:103
frame #3: a.out`main at swap_testing.c:42
frame #4: 0x0000000000020184 a.out`__start + 364
frame #5: ld-elf.so.1`.rtld_start at rtld_start.S:41
(lldb) up 2
frame #2: a.out`test_check at swap_testing.c:103
100 }
101
102 if (gbl_failed) raise(SIGABRT);
-> 103 if (dyn_failed) raise(SIGABRT); // lldb reports this line for the __raise call.
104 // when it fails (both parent and child processes).
105 }
(lldb) print dyn_pos
(size_t) $0 = 2
(That is one after the failure position.)
(lldb) print dyn_region
(region *volatile) $3 = 0x0000000040616000
(lldb) print *dyn_region
(region) $1 = {
array = {
[0] = '\0'
[1] = '\0'
[2] = '\0'
. . . (all '\0' bytes) . . .
[251] = '\0'
[252] = '\0'
[253] = '\0'
[254] = '\0'
[255] = '\0'
...
}
}
(lldb) print gbl_region
(region) $2 = {
array = {
[0] = '\0'
[1] = '\x01'
[2] = '\x02'
. . .
[251] = '\xfb'
[252] = '\xfc'
[253] = '\xfd'
[254] = '\xfe'
[255] = '\xff'
...
}
}
(lldb) disass -n main
a.out`main:
0x2022c <+0>: sub sp, sp, #0x30 ; =0x30
0x20230 <+4>: stp x29, x30, [sp, #0x20]
0x20234 <+8>: add x29, sp, #0x20 ; =0x20
0x20238 <+12>: stur wzr, [x29, #-0x4]
0x2023c <+16>: bl 0x202b0 ; test_setup at swap_testing.c:74
0x20240 <+20>: bl 0x20580 ; symbol stub for: fork
0x20244 <+24>: mov w8, wzr
0x20248 <+28>: stur w0, [x29, #-0x8]
0x2024c <+32>: stur wzr, [x29, #-0xc]
0x20250 <+36>: ldur w0, [x29, #-0x8]
0x20254 <+40>: cmp w8, w0
0x20258 <+44>: b.ge 0x20268 ; <+60> at swap_testing.c
0x2025c <+48>: sub x0, x29, #0xc ; =0xc
0x20260 <+52>: bl 0x20590 ; symbol stub for: wait
0x20264 <+56>: str w0, [sp, #0x10]
0x20268 <+60>: mov w8, #-0x1
0x2026c <+64>: ldur w9, [x29, #-0xc]
0x20270 <+68>: cmp w8, w9
0x20274 <+72>: b.eq 0x202a0 ; <+116> at swap_testing.c:44
0x20278 <+76>: mov w8, wzr
0x2027c <+80>: ldur w9, [x29, #-0x8]
0x20280 <+84>: cmp w8, w9
0x20284 <+88>: b.gt 0x202a0 ; <+116> at swap_testing.c:44
0x20288 <+92>: ldur w8, [x29, #-0x8]
0x2028c <+96>: cbnz w8, 0x2029c ; <+112> at swap_testing.c:42
0x20290 <+100>: orr w0, wzr, #0x3c
0x20294 <+104>: bl 0x205a0 ; symbol stub for: sleep
0x20298 <+108>: str w0, [sp, #0xc]
0x2029c <+112>: bl 0x20348 ; test_check at swap_testing.c:89
0x202a0 <+116>: ldur w0, [x29, #-0x4]
0x202a4 <+120>: ldp x29, x30, [sp, #0x20]
0x202a8 <+124>: add sp, sp, #0x30 ; =0x30
0x202ac <+128>: ret
(lldb) disass -n value
a.out`value:
0x204cc <+0>: sub sp, sp, #0x10 ; =0x10
0x204d0 <+4>: str x0, [sp, #0x8]
0x204d4 <+8>: ldrb w8, [sp, #0x8]
0x204d8 <+12>: mov w1, w8
0x204dc <+16>: mov w0, w8
0x204e0 <+20>: str w1, [sp, #0x4]
0x204e4 <+24>: add sp, sp, #0x10 ; =0x10
0x204e8 <+28>: ret
(lldb) disass -n test_setup
a.out`test_setup:
0x202b0 <+0>: sub sp, sp, #0x20 ; =0x20
0x202b4 <+4>: stp x29, x30, [sp, #0x10]
0x202b8 <+8>: add x29, sp, #0x10 ; =0x10
0x202bc <+12>: orr x0, xzr, #0x3800
0x202c0 <+16>: bl 0x205b0 ; symbol stub for: malloc
0x202c4 <+20>: adrp x30, 48
0x202c8 <+24>: add x30, x30, #0x0 ; =0x0
0x202cc <+28>: str x0, [x30]
0x202d0 <+32>: ldr x0, [x30]
0x202d4 <+36>: cbnz x0, 0x202e4 ; <+52> at swap_testing.c:78
0x202d8 <+40>: orr w0, wzr, #0x6
0x202dc <+44>: bl 0x205c0 ; symbol stub for: raise
0x202e0 <+48>: str w0, [sp, #0x4]
0x202e4 <+52>: str xzr, [sp, #0x8]
0x202e8 <+56>: orr x8, xzr, #0x3800
0x202ec <+60>: ldr x9, [sp, #0x8]
0x202f0 <+64>: cmp x9, x8
0x202f4 <+68>: b.hs 0x2033c ; <+140> at swap_testing.c:81
0x202f8 <+72>: ldr x0, [sp, #0x8]
0x202fc <+76>: bl 0x204cc ; value at swap_testing.c:72
0x20300 <+80>: adrp x30, 48
0x20304 <+84>: add x30, x30, #0x0 ; =0x0
0x20308 <+88>: adrp x8, 48
0x2030c <+92>: add x8, x8, #0x8 ; =0x8
0x20310 <+96>: ldr x9, [sp, #0x8]
0x20314 <+100>: add x8, x8, x9
0x20318 <+104>: strb w0, [x8]
0x2031c <+108>: ldr x8, [x30]
0x20320 <+112>: ldr x9, [sp, #0x8]
0x20324 <+116>: add x8, x8, x9
0x20328 <+120>: strb w0, [x8]
0x2032c <+124>: ldr x8, [sp, #0x8]
0x20330 <+128>: add x8, x8, #0x1 ; =0x1
0x20334 <+132>: str x8, [sp, #0x8]
0x20338 <+136>: b 0x202e8 ; <+56> at swap_testing.c
0x2033c <+140>: ldp x29, x30, [sp, #0x10]
0x20340 <+144>: add sp, sp, #0x20 ; =0x20
0x20344 <+148>: ret
(lldb) disass -n test_check
a.out`test_check:
0x20348 <+0>: sub sp, sp, #0x20 ; =0x20
0x2034c <+4>: stp x29, x30, [sp, #0x10]
0x20350 <+8>: add x29, sp, #0x10 ; =0x10
0x20354 <+12>: b 0x20358 ; <+16> at swap_testing.c
0x20358 <+16>: mov w8, wzr
0x2035c <+20>: adrp x9, 51
0x20360 <+24>: add x9, x9, #0x808 ; =0x808
0x20364 <+28>: ldrb w10, [x9]
0x20368 <+32>: stur w8, [x29, #-0x4]
0x2036c <+36>: tbnz w10, #0x0, 0x2038c ; <+68> at swap_testing.c
0x20370 <+40>: orr x8, xzr, #0x3800
0x20374 <+44>: adrp x9, 51
0x20378 <+48>: add x9, x9, #0x810 ; =0x810
0x2037c <+52>: ldr x9, [x9]
0x20380 <+56>: cmp x9, x8
0x20384 <+60>: cset w10, lo
0x20388 <+64>: stur w10, [x29, #-0x4]
0x2038c <+68>: ldur w8, [x29, #-0x4]
0x20390 <+72>: tbz w8, #0x0, 0x203ec ; <+164> at swap_testing.c:95
0x20394 <+76>: adrp x8, 51
0x20398 <+80>: add x8, x8, #0x810 ; =0x810
0x2039c <+84>: ldr x0, [x8]
0x203a0 <+88>: bl 0x204cc ; value at swap_testing.c:72
0x203a4 <+92>: adrp x8, 51
0x203a8 <+96>: add x8, x8, #0x810 ; =0x810
0x203ac <+100>: adrp x30, 51
0x203b0 <+104>: add x30, x30, #0x808 ; =0x808
0x203b4 <+108>: adrp x9, 48
0x203b8 <+112>: add x9, x9, #0x8 ; =0x8
0x203bc <+116>: uxtb w0, w0
0x203c0 <+120>: ldr x10, [x8]
0x203c4 <+124>: add x9, x9, x10
0x203c8 <+128>: ldrb w11, [x9]
0x203cc <+132>: cmp w0, w11
0x203d0 <+136>: cset w11, ne
0x203d4 <+140>: and w11, w11, #0x1
0x203d8 <+144>: strb w11, [x30]
0x203dc <+148>: ldr x9, [x8]
0x203e0 <+152>: add x9, x9, #0x1 ; =0x1
0x203e4 <+156>: str x9, [x8]
0x203e8 <+160>: b 0x20358 ; <+16> at swap_testing.c
0x203ec <+164>: b 0x203f0 ; <+168> at swap_testing.c
0x203f0 <+168>: mov w8, wzr
0x203f4 <+172>: adrp x9, 51
0x203f8 <+176>: add x9, x9, #0x818 ; =0x818
0x203fc <+180>: ldrb w10, [x9]
0x20400 <+184>: str w8, [sp, #0x8]
0x20404 <+188>: tbnz w10, #0x0, 0x20424 ; <+220> at swap_testing.c
0x20408 <+192>: orr x8, xzr, #0x3800
0x2040c <+196>: adrp x9, 51
0x20410 <+200>: add x9, x9, #0x820 ; =0x820
0x20414 <+204>: ldr x9, [x9]
0x20418 <+208>: cmp x9, x8
0x2041c <+212>: cset w10, lo
0x20420 <+216>: str w10, [sp, #0x8]
0x20424 <+220>: ldr w8, [sp, #0x8]
0x20428 <+224>: tbz w8, #0x0, 0x20488 ; <+320> at swap_testing.c
0x2042c <+228>: adrp x8, 51
0x20430 <+232>: add x8, x8, #0x820 ; =0x820
0x20434 <+236>: ldr x0, [x8]
0x20438 <+240>: bl 0x204cc ; value at swap_testing.c:72
0x2043c <+244>: adrp x8, 51
0x20440 <+248>: add x8, x8, #0x820 ; =0x820
0x20444 <+252>: adrp x30, 51
0x20448 <+256>: add x30, x30, #0x818 ; =0x818
0x2044c <+260>: adrp x9, 48
0x20450 <+264>: add x9, x9, #0x0 ; =0x0
0x20454 <+268>: uxtb w0, w0
0x20458 <+272>: ldr x9, [x9]
0x2045c <+276>: ldr x10, [x8]
0x20460 <+280>: add x9, x9, x10
0x20464 <+284>: ldrb w11, [x9]
0x20468 <+288>: cmp w0, w11
0x2046c <+292>: cset w11, ne
0x20470 <+296>: and w11, w11, #0x1
0x20474 <+300>: strb w11, [x30]
0x20478 <+304>: ldr x9, [x8]
0x2047c <+308>: add x9, x9, #0x1 ; =0x1
0x20480 <+312>: str x9, [x8]
0x20484 <+316>: b 0x203f0 ; <+168> at swap_testing.c
0x20488 <+320>: adrp x8, 51
0x2048c <+324>: add x8, x8, #0x808 ; =0x808
0x20490 <+328>: ldrb w9, [x8]
0x20494 <+332>: tbz w9, #0x0, 0x204a4 ; <+348> at swap_testing.c
0x20498 <+336>: orr w0, wzr, #0x6
0x2049c <+340>: bl 0x205c0 ; symbol stub for: raise
0x204a0 <+344>: str w0, [sp, #0x4]
0x204a4 <+348>: adrp x8, 51
0x204a8 <+352>: add x8, x8, #0x818 ; =0x818
0x204ac <+356>: ldrb w9, [x8]
0x204b0 <+360>: tbz w9, #0x0, 0x204c0 ; <+376> at swap_testing.c:105
0x204b4 <+364>: orr w0, wzr, #0x6
0x204b8 <+368>: bl 0x205c0 ; symbol stub for: raise
-> 0x204bc <+372>: str w0, [sp]
0x204c0 <+376>: ldp x29, x30, [sp, #0x10]
0x204c4 <+380>: add sp, sp, #0x20 ; =0x20
0x204c8 <+384>: ret
# uname -apKU
FreeBSD pine64 12.0-CURRENT FreeBSD 12.0-CURRENT r314638M arm64 aarch64 1200023 1200023
buildworld buildlkernel did not have MALLOC_PRODUCTION= defined. The kernel is a
non-debug kernel. (Previous to these experiments my other corruption examples
were not caught by a debug kernel. I'm not hopeful that this simpler context
would either.)
===
Mark Millard
markmi at dsl-only.net
More information about the freebsd-arm
mailing list