arm/174461: [patch] Fix off-by-one in arm9/arm10 cache maintenance routines
Ian Lepore
freebsd at damnhippie.dyndns.org
Sat Dec 15 20:00:00 UTC 2012
>Number: 174461
>Category: arm
>Synopsis: [patch] Fix off-by-one in arm9/arm10 cache maintenance routines
>Confidential: no
>Severity: serious
>Priority: medium
>Responsible: freebsd-arm
>State: open
>Quarter:
>Keywords:
>Date-Required:
>Class: sw-bug
>Submitter-Id: current-users
>Arrival-Date: Sat Dec 15 20:00:00 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator: Ian Lepore <freebsd at damnhippie.dyndns.org>
>Release: FreeBSD 10.0-CURRENT arm
>Organization:
Symmetricom, Inc.
>Environment:
FreeBSD dpcur 10.0-CURRENT FreeBSD 10.0-CURRENT #23 r243920M: Sat Dec 15 11:31:47 MST 2012 ilepore at revolution.hippie.lan:/local/build/staging/freebsd/dp10/obj/arm.arm/local/build/staging/freebsd/dp10/src/sys/DP arm
>Description:
In all the routines that loop through a range of virtual addresses, the loop
is controlled by subtracting the cache line size from the total length of the
request. After the subtract, a 'bpl' instruction was used, which branches if
the result of the subtraction is zero or greater, but we need to exit the
loop when the count hits zero. Thus, all the bpl instructions in those loops
have been changed to 'bhi' (branch if greater than zero).
In addition, the two routines that walk through the cache using set-and-index
were correct, but confusing. The loop control for those has been simplified,
just so that it's easier to see by examination that the code is correct.
Routines for other arm architectures and generations still have the bpl
instruction, but compensate for the off-by-one situation by decrementing
the count register by one before entering the loop. Just for the sake of
consistancy, these should probably all be changed to remove the decrement
and use the correct branch instruction. That would then make it easier to
see what appears superficially to be lots of duplication in these routines,
and some consolidation could happen.
>How-To-Repeat:
Build a kernel for Atmel ARM with INVARIANTS and INVARIANT_SUPPORT enabled.
When it boots, the uart devices fail to instantiate.
The reason was that the driver allocates a dma tag, then a dma buffer,
and accidentally the tag ended up laid out in memory immediately after the
buffer. When the driver did the bus_dmamap_sync(PREREAD) on the buffer, the
off-by-one error caused the cache line immediately following that buffer (the
first 32 bytes of the dma tag) to be erroniously invalidated. Because of
INVARIANTS, the physical memory under that dirty cache line was full of
0xdeadc0de, so the invalidate operation corrupted the dma tag. When the
driver then attempted to allocate another buffer using that tag it failed,
because the 0xdeadc0de values in the tag led to insane allocation decisions
that caused malloc() to fail.
Without INVARIANTS, either things end up in different places in memory, or
the values in underlying memory that become exposed after the bad invalidate
are harmless; either way, it accidentally works right most of the time.
>Fix:
--- arm9_arm10_cacheops_offbyone_fix.diff begins here ---
diff -r 0f2004466772 sys/arm/arm/cpufunc_asm_arm10.S
--- sys/arm/arm/cpufunc_asm_arm10.S Thu Dec 06 08:24:00 2012 -0700
+++ sys/arm/arm/cpufunc_asm_arm10.S Sat Dec 15 11:30:41 2012 -0700
@@ -87,7 +87,7 @@ ENTRY_NP(arm10_icache_sync_range)
mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm10_sync_next
+ bhi .Larm10_sync_next
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
@@ -108,12 +108,10 @@ ENTRY_NP(arm10_icache_sync_all)
orr ip, s_max, i_max
.Lnext_index:
mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */
- sub ip, ip, i_inc
- tst ip, i_max /* Index 0 is last one */
- bne .Lnext_index /* Next index */
- mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */
+ subs ip, ip, i_inc
+ bhs .Lnext_index /* Next index */
subs s_max, s_max, s_inc
- bpl .Lnext_set /* Next set */
+ bhs .Lnext_set /* Next set */
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
@@ -133,7 +131,7 @@ ENTRY(arm10_dcache_wb_range)
mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm10_wb_next
+ bhi .Larm10_wb_next
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
@@ -150,7 +148,7 @@ ENTRY(arm10_dcache_wbinv_range)
mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm10_wbinv_next
+ bhi .Larm10_wbinv_next
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
@@ -171,7 +169,7 @@ ENTRY(arm10_dcache_inv_range)
mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm10_inv_next
+ bhi .Larm10_inv_next
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
@@ -189,7 +187,7 @@ ENTRY(arm10_idcache_wbinv_range)
mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm10_id_wbinv_next
+ bhi .Larm10_id_wbinv_next
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
@@ -211,12 +209,10 @@ ENTRY(arm10_dcache_wbinv_all)
orr ip, s_max, i_max
.Lnext_index_inv:
mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */
- sub ip, ip, i_inc
- tst ip, i_max /* Index 0 is last one */
- bne .Lnext_index_inv /* Next index */
- mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */
+ subs ip, ip, i_inc
+ bhs .Lnext_index_inv /* Next index */
subs s_max, s_max, s_inc
- bpl .Lnext_set_inv /* Next set */
+ bhs .Lnext_set_inv /* Next set */
mcr p15, 0, r0, c7, c10, 4 /* drain the write buffer */
bx lr
diff -r 0f2004466772 sys/arm/arm/cpufunc_asm_arm9.S
--- sys/arm/arm/cpufunc_asm_arm9.S Thu Dec 06 08:24:00 2012 -0700
+++ sys/arm/arm/cpufunc_asm_arm9.S Sat Dec 15 11:30:41 2012 -0700
@@ -81,7 +81,7 @@ ENTRY_NP(arm9_icache_sync_range)
mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm9_sync_next
+ bhi .Larm9_sync_next
mov pc, lr
ENTRY_NP(arm9_icache_sync_all)
@@ -101,12 +101,10 @@ ENTRY_NP(arm9_icache_sync_all)
orr ip, s_max, i_max
.Lnext_index:
mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */
- sub ip, ip, i_inc
- tst ip, i_max /* Index 0 is last one */
- bne .Lnext_index /* Next index */
- mcr p15, 0, ip, c7, c10, 2 /* Clean D cache SE with Set/Index */
+ subs ip, ip, i_inc
+ bhs .Lnext_index /* Next index */
subs s_max, s_max, s_inc
- bpl .Lnext_set /* Next set */
+ bhs .Lnext_set /* Next set */
mov pc, lr
.Larm9_line_size:
@@ -125,7 +123,7 @@ ENTRY(arm9_dcache_wb_range)
mcr p15, 0, r0, c7, c10, 1 /* Clean D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm9_wb_next
+ bhi .Larm9_wb_next
mov pc, lr
ENTRY(arm9_dcache_wbinv_range)
@@ -141,7 +139,7 @@ ENTRY(arm9_dcache_wbinv_range)
mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm9_wbinv_next
+ bhi .Larm9_wbinv_next
mov pc, lr
/*
@@ -161,7 +159,7 @@ ENTRY(arm9_dcache_inv_range)
mcr p15, 0, r0, c7, c6, 1 /* Invalidate D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm9_inv_next
+ bhi .Larm9_inv_next
mov pc, lr
ENTRY(arm9_idcache_wbinv_range)
@@ -178,7 +176,7 @@ ENTRY(arm9_idcache_wbinv_range)
mcr p15, 0, r0, c7, c14, 1 /* Purge D cache SE with VA */
add r0, r0, ip
subs r1, r1, ip
- bpl .Larm9_id_wbinv_next
+ bhi .Larm9_id_wbinv_next
mov pc, lr
ENTRY_NP(arm9_idcache_wbinv_all)
@@ -199,12 +197,10 @@ ENTRY(arm9_dcache_wbinv_all)
orr ip, s_max, i_max
.Lnext_index_inv:
mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */
- sub ip, ip, i_inc
- tst ip, i_max /* Index 0 is last one */
- bne .Lnext_index_inv /* Next index */
- mcr p15, 0, ip, c7, c14, 2 /* Purge D cache SE with Set/Index */
+ subs ip, ip, i_inc
+ bhs .Lnext_index_inv /* Next index */
subs s_max, s_max, s_inc
- bpl .Lnext_set_inv /* Next set */
+ bhs .Lnext_set_inv /* Next set */
mov pc, lr
.Larm9_cache_data:
--- arm9_arm10_cacheops_offbyone_fix.diff ends here ---
>Release-Note:
>Audit-Trail:
>Unformatted:
More information about the freebsd-arm
mailing list