Browse Source

Merge branch 'freebsd/current/master' into hardened/current/master

* freebsd/current/master:
  zpool-features.7: Fix a typo
  Fix OOM handling of some corner cases.
hardened/current/master
Oliver Pinter + 10 months ago
parent
commit
044a6ca228
5 changed files with 65 additions and 10 deletions
  1. +2
    -2
      cddl/contrib/opensolaris/cmd/zpool/zpool-features.7
  2. +28
    -3
      sys/vm/vm_fault.c
  3. +2
    -2
      sys/vm/vm_page.c
  4. +30
    -1
      sys/vm/vm_pageout.c
  5. +3
    -2
      sys/vm/vm_pageout.h

+ 2
- 2
cddl/contrib/opensolaris/cmd/zpool/zpool-features.7 View File

@@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd June 7, 2017
.Dd August 16, 2019
.Dt ZPOOL-FEATURES 7
.Os
.Sh NAME
@@ -284,7 +284,7 @@ configuration.
.It DEPENDENCIES Ta none
.El
.Pp
This features allows ZFS to maintain more information about how free space
This feature allows ZFS to maintain more information about how free space
is organized within the pool.
If this feature is
.Sy enabled ,

+ 28
- 3
sys/vm/vm_fault.c View File

@@ -135,6 +135,18 @@ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr,
static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
int backward, int forward, bool obj_locked);

static int vm_pfault_oom_attempts = 3;
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
&vm_pfault_oom_attempts, 0,
"Number of page allocation attempts in page fault handler before it "
"triggers OOM handling");

static int vm_pfault_oom_wait = 10;
SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
&vm_pfault_oom_wait, 0,
"Number of seconds to wait for free pages before retrying "
"the page fault handler");

static inline void
release_page(struct faultstate *fs)
{
@@ -570,7 +582,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
vm_pindex_t retry_pindex;
vm_prot_t prot, retry_prot;
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
int locked, nera, result, rv;
int locked, nera, oom, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
bool dead, hardfault, is_first_object_locked;
@@ -581,7 +593,9 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
nera = -1;
hardfault = false;

RetryFault:;
RetryFault:
oom = 0;
RetryFault_oom:

/*
* Find the backing store object and offset into it to begin the
@@ -827,7 +841,18 @@ RetryFault:;
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
vm_waitpfault(dset);
if (vm_pfault_oom_attempts < 0 ||
oom < vm_pfault_oom_attempts) {
oom++;
vm_waitpfault(dset,
vm_pfault_oom_wait * hz);
goto RetryFault_oom;
}
if (bootverbose)
printf(
"proc %d (%s) failed to alloc page on fault, starting OOM\n",
curproc->p_pid, curproc->p_comm);
vm_pageout_oom(VM_OOM_MEM_PF);
goto RetryFault;
}
}

+ 2
- 2
sys/vm/vm_page.c View File

@@ -3032,7 +3032,7 @@ vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_t object, int req)
* this balance without careful testing first.
*/
void
vm_waitpfault(struct domainset *dset)
vm_waitpfault(struct domainset *dset, int timo)
{

/*
@@ -3044,7 +3044,7 @@ vm_waitpfault(struct domainset *dset)
if (vm_page_count_min_set(&dset->ds_mask)) {
vm_min_waiters++;
msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
"pfault", 0);
"pfault", timo);
} else
mtx_unlock(&vm_domainset_lock);
}

+ 30
- 1
sys/vm/vm_pageout.c View File

@@ -1720,6 +1720,12 @@ vm_pageout_oom_pagecount(struct vmspace *vmspace)
return (res);
}

static int vm_oom_ratelim_last;
static int vm_oom_pf_secs = 10;
SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
"");
static struct mtx vm_oom_ratelim_mtx;

void
vm_pageout_oom(int shortage)
{
@@ -1727,8 +1733,30 @@ vm_pageout_oom(int shortage)
vm_offset_t size, bigsize;
struct thread *td;
struct vmspace *vm;
int now;
bool breakout;

/*
* For OOM requests originating from vm_fault(), there is a high
* chance that a single large process faults simultaneously in
* several threads. Also, on an active system running many
* processes of middle-size, like buildworld, all of them
* could fault almost simultaneously as well.
*
* To avoid killing too many processes, rate-limit OOMs
* initiated by vm_fault() time-outs on the waits for free
* pages.
*/
mtx_lock(&vm_oom_ratelim_mtx);
now = ticks;
if (shortage == VM_OOM_MEM_PF &&
(u_int)(now - vm_oom_ratelim_last) < hz * vm_oom_pf_secs) {
mtx_unlock(&vm_oom_ratelim_mtx);
return;
}
vm_oom_ratelim_last = now;
mtx_unlock(&vm_oom_ratelim_mtx);

/*
* We keep the process bigproc locked once we find it to keep anyone
* from messing with it; however, there is a possibility of
@@ -1793,7 +1821,7 @@ vm_pageout_oom(int shortage)
continue;
}
size = vmspace_swap_count(vm);
if (shortage == VM_OOM_MEM)
if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
size += vm_pageout_oom_pagecount(vm);
vm_map_unlock_read(&vm->vm_map);
vmspace_free(vm);
@@ -2048,6 +2076,7 @@ vm_pageout(void)
p = curproc;
td = curthread;

mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
swap_pager_swap_init();
for (first = -1, i = 0; i < vm_ndomains; i++) {
if (VM_DOMAIN_EMPTY(i)) {

+ 3
- 2
sys/vm/vm_pageout.h View File

@@ -79,7 +79,8 @@ extern u_long vm_page_max_user_wired;
extern int vm_pageout_page_count;

#define VM_OOM_MEM 1
#define VM_OOM_SWAPZ 2
#define VM_OOM_MEM_PF 2
#define VM_OOM_SWAPZ 3

/*
* vm_lowmem flags.
@@ -96,7 +97,7 @@ extern int vm_pageout_page_count;
*/

void vm_wait(vm_object_t obj);
void vm_waitpfault(struct domainset *);
void vm_waitpfault(struct domainset *, int timo);
void vm_wait_domain(int domain);
void vm_wait_min(void);
void vm_wait_severe(void);

Loading…
Cancel
Save