Browse Source

MFC r362031, r362065, r362075:

amd64 pmap: reorder IPI send and local TLB flush in TLB invalidations.
remotes/github/freebsd/12-stable/master
kib 1 month ago
parent
commit
3870b9a8f5
Notes: kib 1 month ago
svn path=/stable/12/; revision=362572
5 changed files with 135 additions and 72 deletions
  1. +54
    -32
      sys/amd64/amd64/pmap.c
  2. +18
    -8
      sys/i386/i386/pmap.c
  3. +8
    -1
      sys/i386/i386/vm_machdep.c
  4. +9
    -4
      sys/x86/include/x86_smp.h
  5. +46
    -27
      sys/x86/x86/mp_x86.c

+ 54
- 32
sys/amd64/amd64/pmap.c View File

@@ -2412,6 +2412,20 @@ DEFINE_IFUNC(static, void, pmap_invalidate_page_mode, (pmap_t, vm_offset_t),
return (pmap_invalidate_page_nopcid);
}

static void
pmap_invalidate_page_curcpu_cb(pmap_t pmap, vm_offset_t va,
vm_offset_t addr2 __unused)
{

if (pmap == kernel_pmap) {
invlpg(va);
} else {
if (pmap == PCPU_GET(curpmap))
invlpg(va);
pmap_invalidate_page_mode(pmap, va);
}
}

void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
@@ -2424,16 +2438,8 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
KASSERT(pmap->pm_type == PT_X86,
("pmap_invalidate_page: invalid type %d", pmap->pm_type));

sched_pin();
if (pmap == kernel_pmap) {
invlpg(va);
} else {
if (pmap == PCPU_GET(curpmap))
invlpg(va);
pmap_invalidate_page_mode(pmap, va);
}
smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap);
sched_unpin();
smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap,
pmap_invalidate_page_curcpu_cb);
}

/* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */
@@ -2509,10 +2515,26 @@ DEFINE_IFUNC(static, void, pmap_invalidate_range_mode, (pmap_t, vm_offset_t,
return (pmap_invalidate_range_nopcid);
}

static void
pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t addr;

if (pmap == kernel_pmap) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
} else {
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
pmap_invalidate_range_mode(pmap, sva, eva);
}
}

void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t addr;

if (eva - sva >= PMAP_INVLPG_THRESHOLD) {
pmap_invalidate_all(pmap);
@@ -2527,19 +2549,8 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
KASSERT(pmap->pm_type == PT_X86,
("pmap_invalidate_range: invalid type %d", pmap->pm_type));

sched_pin();
if (pmap == kernel_pmap) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
} else {
if (pmap == PCPU_GET(curpmap)) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
pmap_invalidate_range_mode(pmap, sva, eva);
}
smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap);
sched_unpin();
smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap,
pmap_invalidate_range_curcpu_cb);
}

static inline void
@@ -2626,6 +2637,14 @@ DEFINE_IFUNC(static, void, pmap_invalidate_all_mode, (pmap_t), static)
return (pmap_invalidate_all_nopcid);
}

static void
pmap_invalidate_all_curcpu_cb(pmap_t pmap, vm_offset_t addr1 __unused,
vm_offset_t addr2 __unused)
{

pmap_invalidate_all_mode(pmap);
}

void
pmap_invalidate_all(pmap_t pmap)
{
@@ -2638,20 +2657,23 @@ pmap_invalidate_all(pmap_t pmap)
KASSERT(pmap->pm_type == PT_X86,
("pmap_invalidate_all: invalid type %d", pmap->pm_type));

sched_pin();
pmap_invalidate_all_mode(pmap);
smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap);
sched_unpin();
smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap,
pmap_invalidate_all_curcpu_cb);
}

static void
pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused, vm_offset_t va __unused,
vm_offset_t addr2 __unused)
{

wbinvd();
}

void
pmap_invalidate_cache(void)
{

sched_pin();
wbinvd();
smp_cache_flush();
sched_unpin();
smp_cache_flush(pmap_invalidate_cache_curcpu_cb);
}

struct pde_action {

+ 18
- 8
sys/i386/i386/pmap.c View File

@@ -1164,6 +1164,13 @@ invltlb_glob(void)


#ifdef SMP

static void
pmap_curcpu_cb_dummy(pmap_t pmap __unused, vm_offset_t addr1 __unused,
vm_offset_t addr2 __unused)
{
}

/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*
@@ -1202,7 +1209,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
smp_masked_invlpg(*mask, va, pmap);
smp_masked_invlpg(*mask, va, pmap, pmap_curcpu_cb_dummy);
sched_unpin();
}

@@ -1235,7 +1242,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
smp_masked_invlpg_range(*mask, sva, eva, pmap);
smp_masked_invlpg_range(*mask, sva, eva, pmap, pmap_curcpu_cb_dummy);
sched_unpin();
}

@@ -1258,18 +1265,21 @@ pmap_invalidate_all(pmap_t pmap)
CPU_AND(&other_cpus, &pmap->pm_active);
mask = &other_cpus;
}
smp_masked_invltlb(*mask, pmap);
smp_masked_invltlb(*mask, pmap, pmap_curcpu_cb_dummy);
sched_unpin();
}

static void
pmap_invalidate_cache_curcpu_cb(pmap_t pmap __unused,
vm_offset_t addr1 __unused, vm_offset_t addr2 __unused)
{
wbinvd();
}

void
pmap_invalidate_cache(void)
{

sched_pin();
wbinvd();
smp_cache_flush();
sched_unpin();
smp_cache_flush(pmap_invalidate_cache_curcpu_cb);
}

struct pde_action {

+ 8
- 1
sys/i386/i386/vm_machdep.c View File

@@ -615,6 +615,12 @@ sf_buf_map(struct sf_buf *sf, int flags)
}

#ifdef SMP
static void
sf_buf_shootdown_curcpu_cb(pmap_t pmap __unused,
vm_offset_t addr1 __unused, vm_offset_t addr2 __unused)
{
}

void
sf_buf_shootdown(struct sf_buf *sf, int flags)
{
@@ -633,7 +639,8 @@ sf_buf_shootdown(struct sf_buf *sf, int flags)
CPU_NAND(&other_cpus, &sf->cpumask);
if (!CPU_EMPTY(&other_cpus)) {
CPU_OR(&sf->cpumask, &other_cpus);
smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap);
smp_masked_invlpg(other_cpus, sf->kva, kernel_pmap,
sf_buf_shootdown_curcpu_cb);
}
}
sched_unpin();

+ 9
- 4
sys/x86/include/x86_smp.h View File

@@ -80,6 +80,9 @@ inthand_t
IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
IDTVEC(rendezvous); /* handle CPU rendezvous */

typedef void (*smp_invl_cb_t)(struct pmap *, vm_offset_t addr1,
vm_offset_t addr2);

/* functions in x86_mp.c */
void assign_cpu_ids(void);
void cpu_add(u_int apic_id, char boot_cpu);
@@ -99,11 +102,13 @@ void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_selected(cpuset_t cpus, u_int ipi);
void set_interrupt_apic_ids(void);
void smp_cache_flush(void);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap);
void smp_cache_flush(smp_invl_cb_t curcpu_cb);
void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap,
smp_invl_cb_t curcpu_cb);
void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
vm_offset_t endva, struct pmap *pmap);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap);
vm_offset_t endva, struct pmap *pmap, smp_invl_cb_t curcpu_cb);
void smp_masked_invltlb(cpuset_t mask, struct pmap *pmap,
smp_invl_cb_t curcpu_cb);
void mem_range_AP_init(void);
void topo_probe(void);
void ipi_send_cpu(int cpu, u_int ipi);

+ 46
- 27
sys/x86/x86/mp_x86.c View File

@@ -1613,29 +1613,48 @@ volatile uint32_t smp_tlb_generation;
#define read_eflags() read_rflags()
#endif

/*
* Used by pmap to request invalidation of TLB or cache on local and
* remote processors. Mask provides the set of remote CPUs which are
* to be signalled with the IPI specified by vector. The curcpu_cb
* callback is invoked on the calling CPU while waiting for remote
* CPUs to complete the operation.
*
* The callback function is called unconditionally on the caller's
* underlying processor, even when this processor is not set in the
* mask. So, the callback function must be prepared to handle such
* spurious invocations.
*/
static void
smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
vm_offset_t addr1, vm_offset_t addr2)
vm_offset_t addr1, vm_offset_t addr2, smp_invl_cb_t curcpu_cb)
{
cpuset_t other_cpus;
volatile uint32_t *p_cpudone;
uint32_t generation;
int cpu;

/* It is not necessary to signal other CPUs while in the debugger. */
if (kdb_active || panicstr != NULL)
/*
* It is not necessary to signal other CPUs while booting or
* when in the debugger.
*/
if (kdb_active || panicstr != NULL || !smp_started) {
curcpu_cb(pmap, addr1, addr2);
return;
}

sched_pin();

/*
* Check for other cpus. Return if none.
*/
if (CPU_ISFULLSET(&mask)) {
if (mp_ncpus <= 1)
return;
goto nospinexit;
} else {
CPU_CLR(PCPU_GET(cpuid), &mask);
if (CPU_EMPTY(&mask))
return;
goto nospinexit;
}

if (!(read_eflags() & PSL_I))
@@ -1659,6 +1678,7 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
ipi_send_cpu(cpu, vector);
}
}
curcpu_cb(pmap, addr1, addr2);
while ((cpu = CPU_FFS(&other_cpus)) != 0) {
cpu--;
CPU_CLR(cpu, &other_cpus);
@@ -1667,55 +1687,54 @@ smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, pmap_t pmap,
ia32_pause();
}
mtx_unlock_spin(&smp_ipi_mtx);
sched_unpin();
return;

nospinexit:
curcpu_cb(pmap, addr1, addr2);
sched_unpin();
}

void
smp_masked_invltlb(cpuset_t mask, pmap_t pmap)
smp_masked_invltlb(cpuset_t mask, pmap_t pmap, smp_invl_cb_t curcpu_cb)
{

if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0);
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, pmap, 0, 0, curcpu_cb);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
ipi_global++;
#endif
}
}

void
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap)
smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, pmap_t pmap,
smp_invl_cb_t curcpu_cb)
{

if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0);
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, pmap, addr, 0, curcpu_cb);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
ipi_page++;
#endif
}
}

void
smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2,
pmap_t pmap)
pmap_t pmap, smp_invl_cb_t curcpu_cb)
{

if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap,
addr1, addr2);
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, pmap, addr1, addr2,
curcpu_cb);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
}

void
smp_cache_flush(void)
smp_cache_flush(smp_invl_cb_t curcpu_cb)
{

if (smp_started) {
smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL,
0, 0);
}
smp_targeted_tlb_shootdown(all_cpus, IPI_INVLCACHE, NULL, 0, 0,
curcpu_cb);
}

/*

Loading…
Cancel
Save