svn commit: r266353 - in stable/10/sys/arm: arm include
Ian Lepore
ian at FreeBSD.org
Sat May 17 21:07:55 UTC 2014
Author: ian
Date: Sat May 17 21:07:54 2014
New Revision: 266353
URL: http://svnweb.freebsd.org/changeset/base/266353
Log:
MFC 264128, 264129, 264130, 264135,
Fix TTB set operation for armv7. Perform sychronization (by "isb" barrier)
after TTB is set.
Fix TLB maintenance issues for armv6 and armv7.
- Add cpu_cpwait to comply with the convention.
- Add missing TLB invalidations, especially in pmap_kenter & pmap_kremove
with distinguishing between D and ID pages.
- Modify pmap init/bootstrap invalidations to ID, just to be safe.
- Fix TLB-inv and PTE_SYNC ordering.
Allocate per-cpu resources for doing pmap_zero_page() and pmap_copy_page().
This is performance enhancement rather than bugfix.
We don't support any ARM systems with an ISA bus and don't need a freelist
of memory to support ISA addressing limitations.
Modified:
stable/10/sys/arm/arm/cpufunc_asm_armv7.S
stable/10/sys/arm/arm/pmap-v6.c
stable/10/sys/arm/include/vmparam.h
Directory Properties:
stable/10/ (props changed)
Modified: stable/10/sys/arm/arm/cpufunc_asm_armv7.S
==============================================================================
--- stable/10/sys/arm/arm/cpufunc_asm_armv7.S Sat May 17 20:52:10 2014 (r266352)
+++ stable/10/sys/arm/arm/cpufunc_asm_armv7.S Sat May 17 21:07:54 2014 (r266353)
@@ -71,6 +71,7 @@ ENTRY(armv7_setttb)
orr r0, r0, #PT_ATTR
mcr p15, 0, r0, c2, c0, 0 /* Translation Table Base Register 0 (TTBR0) */
+ isb
#ifdef SMP
mcr p15, 0, r0, c8, c3, 0 /* invalidate I+D TLBs Inner Shareable*/
#else
@@ -273,6 +274,7 @@ ENTRY(armv7_context_switch)
orr r0, r0, #PT_ATTR
mcr p15, 0, r0, c2, c0, 0 /* set the new TTB */
+ isb
#ifdef SMP
mcr p15, 0, r0, c8, c3, 0 /* and flush the I+D tlbs Inner Sharable */
#else
Modified: stable/10/sys/arm/arm/pmap-v6.c
==============================================================================
--- stable/10/sys/arm/arm/pmap-v6.c Sat May 17 20:52:10 2014 (r266352)
+++ stable/10/sys/arm/arm/pmap-v6.c Sat May 17 21:07:54 2014 (r266353)
@@ -265,9 +265,18 @@ vm_offset_t vm_max_kernel_address;
struct pmap kernel_pmap_store;
-static pt_entry_t *csrc_pte, *cdst_pte;
-static vm_offset_t csrcp, cdstp;
-static struct mtx cmtx;
+/*
+ * Resources for quickly copying and zeroing pages using virtual address space
+ * and page table entries that are pre-allocated per-CPU by pmap_init().
+ */
+struct czpages {
+ struct mtx lock;
+ pt_entry_t *srcptep;
+ pt_entry_t *dstptep;
+ vm_offset_t srcva;
+ vm_offset_t dstva;
+};
+static struct czpages cpu_czpages[MAXCPU];
static void pmap_init_l1(struct l1_ttable *, pd_entry_t *);
/*
@@ -1047,6 +1056,7 @@ small_mappings:
cpu_tlb_flushID_SE(pv->pv_va);
else if (PTE_BEEN_REFD(opte))
cpu_tlb_flushD_SE(pv->pv_va);
+ cpu_cpwait();
}
PMAP_UNLOCK(pmap);
@@ -1134,8 +1144,8 @@ vector_page_setprot(int prot)
*ptep |= L2_S_REF;
pmap_set_prot(ptep, prot|VM_PROT_EXECUTE, 0);
-
- cpu_tlb_flushD_SE(vector_page);
+ PTE_SYNC(ptep);
+ cpu_tlb_flushID_SE(vector_page);
cpu_cpwait();
}
@@ -1643,8 +1653,8 @@ pmap_postinit(void)
pte = (pte & ~L2_S_CACHE_MASK) | pte_l2_s_cache_mode_pt;
*ptep = pte;
PTE_SYNC(ptep);
- cpu_tlb_flushD_SE(va);
-
+ cpu_tlb_flushID_SE(va);
+ cpu_cpwait();
va += PAGE_SIZE;
}
pmap_init_l1(l1, pl1pt);
@@ -1802,13 +1812,14 @@ pmap_bootstrap(vm_offset_t firstaddr, st
struct l1_ttable *l1 = &static_l1;
struct l2_dtable *l2;
struct l2_bucket *l2b;
+ struct czpages *czp;
pd_entry_t pde;
pd_entry_t *kernel_l1pt = (pd_entry_t *)l1pt->pv_va;
pt_entry_t *ptep;
vm_paddr_t pa;
vm_offset_t va;
vm_size_t size;
- int l1idx, l2idx, l2next = 0;
+ int i, l1idx, l2idx, l2next = 0;
PDEBUG(1, printf("firstaddr = %08x, lastaddr = %08x\n",
firstaddr, vm_max_kernel_address));
@@ -1920,13 +1931,16 @@ pmap_bootstrap(vm_offset_t firstaddr, st
/*
* Reserve some special page table entries/VA space for temporary
- * mapping of pages.
+ * mapping of pages that are being copied or zeroed.
*/
+ for (czp = cpu_czpages, i = 0; i < MAXCPU; ++i, ++czp) {
+ mtx_init(&czp->lock, "czpages", NULL, MTX_DEF);
+ pmap_alloc_specials(&virtual_avail, 1, &czp->srcva, &czp->srcptep);
+ pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->srcptep);
+ pmap_alloc_specials(&virtual_avail, 1, &czp->dstva, &czp->dstptep);
+ pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)czp->dstptep);
+ }
- pmap_alloc_specials(&virtual_avail, 1, &csrcp, &csrc_pte);
- pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)csrc_pte);
- pmap_alloc_specials(&virtual_avail, 1, &cdstp, &cdst_pte);
- pmap_set_pt_cache_mode(kernel_l1pt, (vm_offset_t)cdst_pte);
size = ((vm_max_kernel_address - pmap_curmaxkvaddr) + L1_S_OFFSET) /
L1_S_SIZE;
pmap_alloc_specials(&virtual_avail,
@@ -1948,11 +1962,12 @@ pmap_bootstrap(vm_offset_t firstaddr, st
pmap_init_l1(l1, kernel_l1pt);
cpu_dcache_wbinv_all();
cpu_l2cache_wbinv_all();
+ cpu_tlb_flushID();
+ cpu_cpwait();
virtual_avail = round_page(virtual_avail);
virtual_end = vm_max_kernel_address;
kernel_vm_end = pmap_curmaxkvaddr;
- mtx_init(&cmtx, "TMP mappings mtx", NULL, MTX_DEF);
pmap_set_pcb_pagedir(kernel_pmap, thread0.td_pcb);
}
@@ -2034,6 +2049,8 @@ pmap_grow_map(vm_offset_t va, pt_entry_t
*ptep = L2_S_PROTO | pa | cache_mode | L2_S_REF;
pmap_set_prot(ptep, VM_PROT_READ | VM_PROT_WRITE, 0);
PTE_SYNC(ptep);
+ cpu_tlb_flushD_SE(va);
+ cpu_cpwait();
return (0);
}
@@ -2348,6 +2365,8 @@ pmap_kenter_section(vm_offset_t va, vm_o
l1->l1_kva[L1_IDX(va)] = pd;
PTE_SYNC(&l1->l1_kva[L1_IDX(va)]);
}
+ cpu_tlb_flushID_SE(va);
+ cpu_cpwait();
}
/*
@@ -2387,13 +2406,6 @@ pmap_kenter_internal(vm_offset_t va, vm_
ptep = &l2b->l2b_kva[l2pte_index(va)];
opte = *ptep;
- if (l2pte_valid(opte)) {
- cpu_tlb_flushD_SE(va);
- cpu_cpwait();
- } else {
- if (opte == 0)
- l2b->l2b_occupancy++;
- }
if (flags & KENTER_CACHE) {
*ptep = L2_S_PROTO | pa | pte_l2_s_cache_mode | L2_S_REF;
@@ -2405,10 +2417,19 @@ pmap_kenter_internal(vm_offset_t va, vm_
0);
}
+ PTE_SYNC(ptep);
+ if (l2pte_valid(opte)) {
+ if (L2_S_EXECUTABLE(opte) || L2_S_EXECUTABLE(*ptep))
+ cpu_tlb_flushID_SE(va);
+ else
+ cpu_tlb_flushD_SE(va);
+ } else {
+ if (opte == 0)
+ l2b->l2b_occupancy++;
+ }
+
PDEBUG(1, printf("pmap_kenter: pte = %08x, opte = %08x, npte = %08x\n",
(uint32_t) ptep, opte, *ptep));
- PTE_SYNC(ptep);
- cpu_cpwait();
}
void
@@ -2474,10 +2495,13 @@ pmap_kremove(vm_offset_t va)
opte = *ptep;
if (l2pte_valid(opte)) {
va = va & ~PAGE_MASK;
- cpu_tlb_flushD_SE(va);
- cpu_cpwait();
*ptep = 0;
PTE_SYNC(ptep);
+ if (L2_S_EXECUTABLE(opte))
+ cpu_tlb_flushID_SE(va);
+ else
+ cpu_tlb_flushD_SE(va);
+ cpu_cpwait();
}
}
@@ -2710,6 +2734,7 @@ small_mappings:
cpu_tlb_flushID();
else
cpu_tlb_flushD();
+ cpu_cpwait();
}
vm_page_aflag_clear(m, PGA_WRITEABLE);
rw_wunlock(&pvh_global_lock);
@@ -2763,6 +2788,7 @@ pmap_change_attr(vm_offset_t sva, vm_siz
pmap_l2cache_wbinv_range(tmpva, pte & L2_S_FRAME, PAGE_SIZE);
*ptep = pte;
cpu_tlb_flushID_SE(tmpva);
+ cpu_cpwait();
dprintf("%s: for va:%x ptep:%x pte:%x\n",
__func__, tmpva, (uint32_t)ptep, pte);
@@ -2900,6 +2926,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sv
else
if (is_refd)
cpu_tlb_flushD();
+ cpu_cpwait();
}
rw_wunlock(&pvh_global_lock);
@@ -3166,6 +3193,7 @@ validate:
cpu_tlb_flushID_SE(va);
else if (is_refd)
cpu_tlb_flushD_SE(va);
+ cpu_cpwait();
}
if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
@@ -3715,6 +3743,7 @@ pmap_remove_section(pmap_t pmap, vm_offs
cpu_tlb_flushID_SE(sva);
else
cpu_tlb_flushD_SE(sva);
+ cpu_cpwait();
}
/*
@@ -3887,6 +3916,7 @@ pmap_promote_section(pmap_t pmap, vm_off
cpu_tlb_flushID();
else
cpu_tlb_flushD();
+ cpu_cpwait();
pmap_section_promotions++;
CTR2(KTR_PMAP, "pmap_promote_section: success for va %#x"
@@ -4011,6 +4041,7 @@ pmap_demote_section(pmap_t pmap, vm_offs
cpu_tlb_flushID_SE(va);
else if (L1_S_REFERENCED(l1pd))
cpu_tlb_flushD_SE(va);
+ cpu_cpwait();
pmap_section_demotions++;
CTR2(KTR_PMAP, "pmap_demote_section: success for va %#x"
@@ -4382,6 +4413,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
}
}
+ *ptep = 0;
+ PTE_SYNC(ptep);
if (pmap_is_current(pmap)) {
total++;
if (total < PMAP_REMOVE_CLEAN_LIST_SIZE) {
@@ -4392,8 +4425,6 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
} else if (total == PMAP_REMOVE_CLEAN_LIST_SIZE)
flushall = 1;
}
- *ptep = 0;
- PTE_SYNC(ptep);
sva += PAGE_SIZE;
ptep++;
@@ -4406,6 +4437,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
rw_wunlock(&pvh_global_lock);
if (flushall)
cpu_tlb_flushID();
+ cpu_cpwait();
+
PMAP_UNLOCK(pmap);
}
@@ -4420,39 +4453,42 @@ pmap_remove(pmap_t pmap, vm_offset_t sva
static void
pmap_zero_page_gen(vm_page_t m, int off, int size)
{
+ struct czpages *czp;
+
+ KASSERT(TAILQ_EMPTY(&m->md.pv_list),
+ ("pmap_zero_page_gen: page has mappings"));
vm_paddr_t phys = VM_PAGE_TO_PHYS(m);
- if (!TAILQ_EMPTY(&m->md.pv_list))
- panic("pmap_zero_page: page has mappings");
- mtx_lock(&cmtx);
+ sched_pin();
+ czp = &cpu_czpages[PCPU_GET(cpuid)];
+ mtx_lock(&czp->lock);
+
/*
- * Hook in the page, zero it, invalidate the TLB as needed.
- *
- * Note the temporary zero-page mapping must be a non-cached page in
- * order to work without corruption when write-allocate is enabled.
+ * Hook in the page, zero it.
*/
- *cdst_pte = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF;
- pmap_set_prot(cdst_pte, VM_PROT_WRITE, 0);
- PTE_SYNC(cdst_pte);
- cpu_tlb_flushD_SE(cdstp);
+ *czp->dstptep = L2_S_PROTO | phys | pte_l2_s_cache_mode | L2_S_REF;
+ pmap_set_prot(czp->dstptep, VM_PROT_WRITE, 0);
+ PTE_SYNC(czp->dstptep);
+ cpu_tlb_flushD_SE(czp->dstva);
cpu_cpwait();
+
if (off || size != PAGE_SIZE)
- bzero((void *)(cdstp + off), size);
+ bzero((void *)(czp->dstva + off), size);
else
- bzero_page(cdstp);
+ bzero_page(czp->dstva);
/*
- * Although aliasing is not possible if we use
- * cdstp temporary mappings with memory that
- * will be mapped later as non-cached or with write-through
- * caches we might end up overwriting it when calling wbinv_all
- * So make sure caches are clean after copy operation
+ * Although aliasing is not possible, if we use temporary mappings with
+ * memory that will be mapped later as non-cached or with write-through
+ * caches, we might end up overwriting it when calling wbinv_all. So
+ * make sure caches are clean after the operation.
*/
- cpu_idcache_wbinv_range(cdstp, size);
- pmap_l2cache_wbinv_range(cdstp, phys, size);
+ cpu_idcache_wbinv_range(czp->dstva, size);
+ pmap_l2cache_wbinv_range(czp->dstva, phys, size);
- mtx_unlock(&cmtx);
+ mtx_unlock(&czp->lock);
+ sched_unpin();
}
/*
@@ -4510,45 +4546,39 @@ pmap_zero_page_idle(vm_page_t m)
void
pmap_copy_page_generic(vm_paddr_t src, vm_paddr_t dst)
{
- /*
- * Hold the source page's lock for the duration of the copy
- * so that no other mappings can be created while we have a
- * potentially aliased mapping.
- * Map the pages into the page hook points, copy them, and purge
- * the cache for the appropriate page. Invalidate the TLB
- * as required.
- */
- mtx_lock(&cmtx);
-
- /* For ARMv6 using System bit is deprecated and mapping with AP
- * bits set to 0x0 makes page not accessible. csrc_pte is mapped
- * read/write until proper mapping defines are created for ARMv6.
- */
- *csrc_pte = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF;
- pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
- PTE_SYNC(csrc_pte);
-
- *cdst_pte = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF;
- pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
- PTE_SYNC(cdst_pte);
+ struct czpages *czp;
- cpu_tlb_flushD_SE(csrcp);
- cpu_tlb_flushD_SE(cdstp);
+ sched_pin();
+ czp = &cpu_czpages[PCPU_GET(cpuid)];
+ mtx_lock(&czp->lock);
+
+ /*
+ * Map the pages into the page hook points, copy them, and purge the
+ * cache for the appropriate page.
+ */
+ *czp->srcptep = L2_S_PROTO | src | pte_l2_s_cache_mode | L2_S_REF;
+ pmap_set_prot(czp->srcptep, VM_PROT_READ, 0);
+ PTE_SYNC(czp->srcptep);
+ cpu_tlb_flushD_SE(czp->srcva);
+ *czp->dstptep = L2_S_PROTO | dst | pte_l2_s_cache_mode | L2_S_REF;
+ pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0);
+ PTE_SYNC(czp->dstptep);
+ cpu_tlb_flushD_SE(czp->dstva);
cpu_cpwait();
+ bcopy_page(czp->srcva, czp->dstva);
+
/*
- * Although aliasing is not possible if we use
- * cdstp temporary mappings with memory that
- * will be mapped later as non-cached or with write-through
- * caches we might end up overwriting it when calling wbinv_all
- * So make sure caches are clean after copy operation
+ * Although aliasing is not possible, if we use temporary mappings with
+ * memory that will be mapped later as non-cached or with write-through
+ * caches, we might end up overwriting it when calling wbinv_all. So
+ * make sure caches are clean after the operation.
*/
- bcopy_page(csrcp, cdstp);
+ cpu_idcache_wbinv_range(czp->dstva, PAGE_SIZE);
+ pmap_l2cache_wbinv_range(czp->dstva, dst, PAGE_SIZE);
- cpu_idcache_wbinv_range(cdstp, PAGE_SIZE);
- pmap_l2cache_wbinv_range(cdstp, dst, PAGE_SIZE);
-
- mtx_unlock(&cmtx);
+ mtx_unlock(&czp->lock);
+ sched_unpin();
}
int unmapped_buf_allowed = 1;
@@ -4560,8 +4590,12 @@ pmap_copy_pages(vm_page_t ma[], vm_offse
vm_page_t a_pg, b_pg;
vm_offset_t a_pg_offset, b_pg_offset;
int cnt;
+ struct czpages *czp;
+
+ sched_pin();
+ czp = &cpu_czpages[PCPU_GET(cpuid)];
+ mtx_lock(&czp->lock);
- mtx_lock(&cmtx);
while (xfersize > 0) {
a_pg = ma[a_offset >> PAGE_SHIFT];
a_pg_offset = a_offset & PAGE_MASK;
@@ -4569,27 +4603,29 @@ pmap_copy_pages(vm_page_t ma[], vm_offse
b_pg = mb[b_offset >> PAGE_SHIFT];
b_pg_offset = b_offset & PAGE_MASK;
cnt = min(cnt, PAGE_SIZE - b_pg_offset);
- *csrc_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) |
+ *czp->srcptep = L2_S_PROTO | VM_PAGE_TO_PHYS(a_pg) |
pte_l2_s_cache_mode | L2_S_REF;
- pmap_set_prot(csrc_pte, VM_PROT_READ, 0);
- PTE_SYNC(csrc_pte);
- *cdst_pte = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) |
+ pmap_set_prot(czp->srcptep, VM_PROT_READ, 0);
+ PTE_SYNC(czp->srcptep);
+ cpu_tlb_flushD_SE(czp->srcva);
+ *czp->dstptep = L2_S_PROTO | VM_PAGE_TO_PHYS(b_pg) |
pte_l2_s_cache_mode | L2_S_REF;
- pmap_set_prot(cdst_pte, VM_PROT_READ | VM_PROT_WRITE, 0);
- PTE_SYNC(cdst_pte);
- cpu_tlb_flushD_SE(csrcp);
- cpu_tlb_flushD_SE(cdstp);
+ pmap_set_prot(czp->dstptep, VM_PROT_READ | VM_PROT_WRITE, 0);
+ PTE_SYNC(czp->dstptep);
+ cpu_tlb_flushD_SE(czp->dstva);
cpu_cpwait();
- bcopy((char *)csrcp + a_pg_offset, (char *)cdstp + b_pg_offset,
+ bcopy((char *)czp->srcva + a_pg_offset, (char *)czp->dstva + b_pg_offset,
cnt);
- cpu_idcache_wbinv_range(cdstp + b_pg_offset, cnt);
- pmap_l2cache_wbinv_range(cdstp + b_pg_offset,
+ cpu_idcache_wbinv_range(czp->dstva + b_pg_offset, cnt);
+ pmap_l2cache_wbinv_range(czp->dstva + b_pg_offset,
VM_PAGE_TO_PHYS(b_pg) + b_pg_offset, cnt);
xfersize -= cnt;
a_offset += cnt;
b_offset += cnt;
}
- mtx_unlock(&cmtx);
+
+ mtx_unlock(&czp->lock);
+ sched_unpin();
}
void
@@ -4922,6 +4958,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva
cpu_tlb_flushID_SE(sva);
else if (PTE_BEEN_REFD(opte))
cpu_tlb_flushD_SE(sva);
+ cpu_cpwait();
}
}
}
Modified: stable/10/sys/arm/include/vmparam.h
==============================================================================
--- stable/10/sys/arm/include/vmparam.h Sat May 17 20:52:10 2014 (r266352)
+++ stable/10/sys/arm/include/vmparam.h Sat May 17 21:07:54 2014 (r266353)
@@ -93,15 +93,10 @@
#define VM_FREEPOOL_DIRECT 0
/*
- * we support 2 free lists:
- *
- * - DEFAULT for all systems
- * - ISADMA for the ISA DMA range on Sharks only
+ * We need just one free list: DEFAULT.
*/
-
-#define VM_NFREELIST 2
+#define VM_NFREELIST 1
#define VM_FREELIST_DEFAULT 0
-#define VM_FREELIST_ISADMA 1
/*
* The largest allocation size is 1MB.
More information about the svn-src-stable
mailing list