Browse Source

Merge remote-tracking branch 'origin/freebsd/current/master' into hardened/current/master

hardened/current/master
Shawn Webb 4 months ago
parent
commit
432e9176d5
Signed by: Shawn Webb <shawn.webb@hardenedbsd.org> GPG Key ID: FF2E67A277F8E1FA
55 changed files with 1874 additions and 878 deletions
  1. +0
    -18
      Makefile.inc1
  2. +2
    -1
      etc/Makefile
  3. +1
    -1
      include/unistd.h
  4. +5
    -1
      share/man/man4/cpufreq.4
  5. +89
    -0
      share/man/man4/hwpstate_intel.4
  6. +2
    -1
      share/termcap/Makefile
  7. +3
    -1
      stand/usb/usbcore.mk
  8. +2
    -1
      sys/conf/files.x86
  9. +2
    -1
      sys/dev/altera/atse/if_atse.c
  10. +1
    -1
      sys/dev/beri/virtio/network/if_vtbe.c
  11. +1
    -1
      sys/dev/dpaa/if_dtsec.c
  12. +2
    -1
      sys/dev/hyperv/netvsc/if_hn.c
  13. +2
    -1
      sys/dev/if_ndis/if_ndis.c
  14. +5
    -0
      sys/dev/netmap/netmap.c
  15. +2
    -1
      sys/dev/ntb/if_ntb/if_ntb.c
  16. +2
    -1
      sys/dev/sbni/if_sbni.c
  17. +3
    -2
      sys/dev/spibus/spigen.c
  18. +1
    -1
      sys/dev/virtio/scsi/virtio_scsi.c
  19. +0
    -5
      sys/dev/virtio/scsi/virtio_scsivar.h
  20. +74
    -26
      sys/dev/vmware/vmxnet3/if_vmx.c
  21. +2
    -0
      sys/dev/vmware/vmxnet3/if_vmxvar.h
  22. +1
    -1
      sys/dev/xen/netback/netback.c
  23. +178
    -119
      sys/kern/kern_cpu.c
  24. +26
    -13
      sys/kern/kern_intr.c
  25. +3
    -2
      sys/kern/kern_kcov.c
  26. +6
    -0
      sys/kern/kern_poll.c
  27. +1
    -1
      sys/kern/kern_sendfile.c
  28. +7
    -2
      sys/kern/sched_ule.c
  29. +2
    -1
      sys/mips/nlm/dev/net/xlpge.c
  30. +1
    -1
      sys/modules/cpufreq/Makefile
  31. +6
    -1
      sys/modules/usb/template/Makefile
  32. +1
    -1
      sys/modules/vmware/vmxnet3/Makefile
  33. +1
    -1
      sys/net/if.h
  34. +21
    -1
      sys/net/if_bridge.c
  35. +4
    -2
      sys/net/if_ethersubr.c
  36. +23
    -1
      sys/net/if_lagg.c
  37. +3
    -0
      sys/net/if_tuntap.c
  38. +23
    -1
      sys/net/if_vlan.c
  39. +8
    -0
      sys/net/iflib.c
  40. +1
    -4
      sys/net/netisr.c
  41. +6
    -13
      sys/net/pfil.c
  42. +176
    -136
      sys/netinet/ip_divert.c
  43. +6
    -0
      sys/netpfil/pf/if_pfsync.c
  44. +1
    -1
      sys/powerpc/pseries/phyp_llan.c
  45. +5
    -0
      sys/sys/cpu.h
  46. +2
    -1
      sys/sys/interrupt.h
  47. +5
    -4
      sys/vm/uma_core.c
  48. +577
    -496
      sys/vm/vm_fault.c
  49. +6
    -4
      sys/vm/vm_glue.c
  50. +9
    -6
      sys/vm/vm_kern.c
  51. +1
    -0
      sys/vm/vm_param.h
  52. +12
    -0
      sys/x86/cpufreq/est.c
  53. +0
    -0
      sys/x86/cpufreq/hwpstate_amd.c
  54. +516
    -0
      sys/x86/cpufreq/hwpstate_intel.c
  55. +35
    -0
      sys/x86/cpufreq/hwpstate_intel_internal.h

+ 0
- 18
Makefile.inc1 View File

@@ -180,24 +180,6 @@ MK_SYSTEM_LINKER= no
.if defined(CROSS_TOOLCHAIN_PREFIX)
CROSS_BINUTILS_PREFIX?=${CROSS_TOOLCHAIN_PREFIX}
.endif
# If we do not have a bootstrap binutils (because the in-tree one does not
# support the target architecture), provide a default cross-binutils prefix.
# This allows riscv64 builds, for example, to automatically use the
# riscv64-binutils port or package.
.if !make(showconfig) && !defined(_NO_INCLUDE_COMPILERMK)
.if !empty(BROKEN_OPTIONS:MBINUTILS_BOOTSTRAP) && \
${MK_LLD_BOOTSTRAP} == "no" && \
!defined(CROSS_BINUTILS_PREFIX)
CROSS_BINUTILS_PREFIX=/usr/local/${TARGET_TRIPLE}/bin/
.if !exists(${CROSS_BINUTILS_PREFIX})
.if !empty(BROKEN_OPTIONS:MGCC_BOOTSTRAP) && ${MK_CLANG_BOOTSTRAP} == "no"
.error In-tree toolchain does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-xtoolchain-gcc port or package or set CROSS_TOOLCHAIN_PREFIX.
.else
.error In-tree binutils does not support the ${TARGET_ARCH} architecture. Install the ${TARGET_ARCH}-binutils port or package or set CROSS_BINUTILS_PREFIX.
.endif
.endif
.endif
.endif
XBINUTILS= AS AR LD NM OBJCOPY RANLIB SIZE STRINGS
.for BINUTIL in ${XBINUTILS}
.if defined(CROSS_BINUTILS_PREFIX) && \

+ 2
- 1
etc/Makefile View File

@@ -164,7 +164,8 @@ distrib-dirs: ${MTREES:N/*} distrib-cleanup .PHONY
.endif
.if ${MK_NLS} != "no"
.for alias nls in ${NLS_ALIASES}
${INSTALL_SYMLINK} "${nls}" "${DESTDIR}${SHAREDIR}/nls/${alias}"
${INSTALL_SYMLINK} -T "package=utilities" \
"${nls}" "${DESTDIR}${SHAREDIR}/nls/${alias}"
.endfor
.endif


+ 1
- 1
include/unistd.h View File

@@ -552,7 +552,7 @@ char *re_comp(const char *);
int re_exec(const char *);
int reboot(int);
int revoke(const char *);
pid_t rfork(int);
pid_t rfork(int) __returns_twice;
pid_t rfork_thread(int, void *, int (*)(void *), void *);
int rresvport(int *);
int rresvport_af(int *, int);

+ 5
- 1
share/man/man4/cpufreq.4 View File

@@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd March 3, 2006
.Dd January 22, 2020
.Dt CPUFREQ 4
.Os
.Sh NAME
@@ -85,6 +85,10 @@ sysctl entry.
.Bl -tag -width indent
.It Va dev.cpu.%d.freq
Current active CPU frequency in MHz.
.It Va dev.cpu.%d.freq_driver
The specific
.Nm
driver used by this cpu.
.It Va dev.cpu.%d.freq_levels
Currently available levels for the CPU (frequency/power usage).
Values are in units of MHz and milliwatts.

+ 89
- 0
share/man/man4/hwpstate_intel.4 View File

@@ -0,0 +1,89 @@
.\"
.\" Copyright (c) 2019 Intel Corporation
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd January 22, 2020
.Dt HWPSTATE_INTEL 4
.Os
.Sh NAME
.Nm hwpstate_intel
.Nd Intel Speed Shift Technology driver
.Sh SYNOPSIS
To compile this driver into your kernel
place the following line in your kernel
configuration file:
.Bd -ragged -offset indent
.Cd "device cpufreq"
.Ed
.Sh DESCRIPTION
The
.Nm
driver provides support for hardware-controlled performance states on Intel
platforms, also known as Intel Speed Shift Technology.
.Sh LOADER TUNABLES
.Bl -tag -width indent
.It Va hint.hwpstate_intel.0.disabled
Can be used to disable
.Nm ,
allowing other compatible drivers to manage performance states, like
.Xr est 4 .
.Pq default 0
.El
.Sh SYSCTL VARIABLES
The following
.Xr sysctl 8
values are available
.Bl -tag -width indent
.It Va dev.hwpstate_intel.%d.\%desc
Describes the attached driver
.It dev.hwpstate_intel.0.%desc: Intel Speed Shift
.It Va dev.hwpstate_intel.%d.\%driver
Driver in use, always hwpstate_intel.
.It dev.hwpstate_intel.0.%driver: hwpstate_intel
.It Va dev.hwpstate_intel.%d.\%parent
.It dev.hwpstate_intel.0.%parent: cpu0
The cpu that is exposing these frequencies.
For example
.Va cpu0 .
.It Va dev.hwpstate_intel.%d.epp
Energy/Performance Preference.
Valid values range from 0 to 100.
Setting this field conveys a hint to the hardware regarding a preference towards
performance (at value 0), energy efficiency (at value 100), or somewhere in
between.
.It dev.hwpstate_intel.0.epp: 0
.El
.Sh COMPATIBILITY
.Nm
is only found on supported Intel CPUs.
.Sh SEE ALSO
.Xr cpufreq 4
.Rs
.%T "Intel 64 and IA-32 Architectures Software Developer Manuals"
.%U "http://www.intel.com/content/www/us/en/processors/architectures-software-developer-manuals.html"
.Re
.Sh AUTHORS
This manual page was written by
.An D Scott Phillips Aq Mt scottph@FreeBSD.org .

+ 2
- 1
share/termcap/Makefile View File

@@ -19,6 +19,7 @@ termcap.db: termcap
cap_mkdb ${CAP_MKDB_ENDIAN} -f ${.TARGET:R} ${.ALLSRC}

etc-termcap:
${INSTALL_SYMLINK} ${BINDIR}/misc/termcap ${DESTDIR}/etc/termcap
${INSTALL_SYMLINK} -T "package=runtime" \
${BINDIR}/misc/termcap ${DESTDIR}/etc/termcap

.include <bsd.prog.mk>

+ 3
- 1
stand/usb/usbcore.mk View File

@@ -1,7 +1,7 @@
#
# $FreeBSD$
#
# Copyright (c) 2013 Hans Petter Selasky.
# Copyright (c) 2013-2020 Hans Petter Selasky.
# Copyright (c) 2014 SRI International
# All rights reserved.
#
@@ -162,6 +162,8 @@ KSRCS+= usb_template_audio.c
KSRCS+= usb_template_phone.c
KSRCS+= usb_template_serialnet.c
KSRCS+= usb_template_midi.c
KSRCS+= usb_template_multi.c
KSRCS+= usb_template_cdceem.c

#
# USB mass storage support

+ 2
- 1
sys/conf/files.x86 View File

@@ -290,7 +290,8 @@ x86/acpica/srat.c optional acpi
x86/bios/smbios.c optional smbios
x86/bios/vpd.c optional vpd
x86/cpufreq/est.c optional cpufreq
x86/cpufreq/hwpstate.c optional cpufreq
x86/cpufreq/hwpstate_amd.c optional cpufreq
x86/cpufreq/hwpstate_intel.c optional cpufreq
x86/cpufreq/p4tcc.c optional cpufreq
x86/cpufreq/powernow.c optional cpufreq
x86/iommu/busdma_dmar.c optional acpi acpi_dmar pci

+ 2
- 1
sys/dev/altera/atse/if_atse.c View File

@@ -1381,7 +1381,8 @@ atse_attach(device_t dev)
}
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
IFF_NEEDSEPOCH;
ifp->if_ioctl = atse_ioctl;
ifp->if_transmit = atse_transmit;
ifp->if_qflush = atse_qflush;

+ 1
- 1
sys/dev/beri/virtio/network/if_vtbe.c View File

@@ -613,7 +613,7 @@ vtbe_attach(device_t dev)
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX |
IFF_MULTICAST | IFF_PROMISC);
IFF_MULTICAST | IFF_PROMISC | IFF_NEEDSEPOCH);
ifp->if_capabilities = IFCAP_VLAN_MTU;
ifp->if_capenable = ifp->if_capabilities;
ifp->if_start = vtbe_txstart;

+ 1
- 1
sys/dev/dpaa/if_dtsec.c View File

@@ -688,7 +688,7 @@ dtsec_attach(device_t dev)

ifp->if_softc = sc;
ifp->if_mtu = ETHERMTU; /* TODO: Configure */
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_NEEDSEPOCH;
ifp->if_init = dtsec_if_init;
ifp->if_start = dtsec_if_start;
ifp->if_ioctl = dtsec_if_ioctl;

+ 2
- 1
sys/dev/hyperv/netvsc/if_hn.c View File

@@ -2362,7 +2362,8 @@ hn_attach(device_t dev)
*/

ifp->if_baudrate = IF_Gbps(10);
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
IFF_NEEDSEPOCH;
ifp->if_ioctl = hn_ioctl;
ifp->if_init = hn_init;
#ifdef HN_IFSTART_SUPPORT

+ 2
- 1
sys/dev/if_ndis/if_ndis.c View File

@@ -967,7 +967,8 @@ ndis_ifattach(struct ndis_softc *sc)

if_initname(ifp, device_get_name(sc->ndis_dev),
device_get_unit(sc->ndis_dev));
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
IFF_NEEDSEPOCH;
ifp->if_ioctl = ndis_ifioctl;
ifp->if_start = ndis_ifstart;
ifp->if_init = ndis_init;

+ 5
- 0
sys/dev/netmap/netmap.c View File

@@ -437,11 +437,13 @@ ports attached to the switch)
#include <sys/socketvar.h> /* struct socket */
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/socket.h> /* sockaddrs */
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/epoch.h>
#include <net/vnet.h>
#include <net/if.h>
#include <net/if_var.h>
@@ -1146,9 +1148,11 @@ netmap_dtor(void *data)
static void
netmap_send_up(struct ifnet *dst, struct mbq *q)
{
struct epoch_tracker et;
struct mbuf *m;
struct mbuf *head = NULL, *prev = NULL;

NET_EPOCH_ENTER(et);
/* Send packets up, outside the lock; head/prev machinery
* is only useful for Windows. */
while ((m = mbq_dequeue(q)) != NULL) {
@@ -1160,6 +1164,7 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
}
if (head)
nm_os_send_up(dst, NULL, head);
NET_EPOCH_EXIT(et);
mbq_fini(q);
}


+ 2
- 1
sys/dev/ntb/if_ntb/if_ntb.c View File

@@ -172,7 +172,8 @@ ntb_net_attach(device_t dev)

if_setinitfn(ifp, ntb_net_init);
if_setsoftc(ifp, sc);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
IFF_NEEDSEPOCH);
if_setioctlfn(ifp, ntb_ioctl);
if_settransmitfn(ifp, ntb_transmit);
if_setqflushfn(ifp, ntb_qflush);

+ 2
- 1
sys/dev/sbni/if_sbni.c View File

@@ -243,7 +243,8 @@ sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags)
ifp->if_baudrate =
(csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate);

ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
IFF_NEEDSEPOCH;

mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF);
callout_init_mtx(&sc->wch, &sc->lock, 0);

+ 3
- 2
sys/dev/spibus/spigen.c View File

@@ -325,8 +325,9 @@ spigen_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
vm_object_reference_locked(mmap->bufobj); // kernel and userland both
for (n = 0; n < pages; n++) {
m[n] = vm_page_grab(mmap->bufobj, n,
VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_WIRED);
m[n]->valid = VM_PAGE_BITS_ALL;
VM_ALLOC_ZERO | VM_ALLOC_WIRED);
vm_page_valid(m[n]);
vm_page_xunbusy(m[n]);
}
VM_OBJECT_WUNLOCK(mmap->bufobj);
pmap_qenter(mmap->kvaddr, m, pages);

+ 1
- 1
sys/dev/virtio/scsi/virtio_scsi.c View File

@@ -937,7 +937,7 @@ vtscsi_cam_path_inquiry(struct vtscsi_softc *sc, struct cam_sim *sim,

cpi->max_target = sc->vtscsi_max_target;
cpi->max_lun = sc->vtscsi_max_lun;
cpi->initiator_id = VTSCSI_INITIATOR_ID;
cpi->initiator_id = cpi->max_target + 1;

strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN);
strlcpy(cpi->hba_vid, "VirtIO", HBA_IDLEN);

+ 0
- 5
sys/dev/virtio/scsi/virtio_scsivar.h View File

@@ -204,11 +204,6 @@ struct vtscsi_request {
*/
#define VTSCSI_RESERVED_REQUESTS 10

/*
* Specification doesn't say, use traditional SCSI default.
*/
#define VTSCSI_INITIATOR_ID 7

/*
* How to wait (or not) for request completion.
*/

+ 74
- 26
sys/dev/vmware/vmxnet3/if_vmx.c View File

@@ -23,6 +23,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include "opt_rss.h"

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@@ -46,6 +48,9 @@ __FBSDID("$FreeBSD$");
#include <net/if_media.h>
#include <net/if_vlan_var.h>
#include <net/iflib.h>
#ifdef RSS
#include <net/rss_config.h>
#endif

#include <netinet/in_systm.h>
#include <netinet/in.h>
@@ -1140,8 +1145,11 @@ vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
struct vmxnet3_driver_shared *ds;
if_softc_ctx_t scctx;
struct vmxnet3_rss_shared *rss;
#ifdef RSS
uint8_t rss_algo;
#endif
int i;
ds = sc->vmx_ds;
scctx = sc->vmx_scctx;
rss = sc->vmx_rss;
@@ -1152,10 +1160,29 @@ vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);

for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
rss->ind_table[i] = i % scctx->isc_nrxqsets;
#ifdef RSS
/*
* If the software RSS is configured to anything else other than
* Toeplitz, then just do Toeplitz in "hardware" for the sake of
* the packet distribution, but report the hash as opaque to
* disengage from the software RSS.
*/
rss_algo = rss_gethashalgo();
if (rss_algo == RSS_HASH_TOEPLITZ) {
rss_getkey(rss->hash_key);
for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
scctx->isc_nrxqsets;
}
sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
} else
#endif
{
memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
rss->ind_table[i] = i % scctx->isc_nrxqsets;
sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
}
}

static void
@@ -1499,29 +1526,50 @@ vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
KASSERT(rxcd->sop, ("%s: expected sop", __func__));

/*
* RSS and flow ID
* RSS and flow ID.
* Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
* be used only if the software RSS is enabled and it uses the same
* algorithm and the hash key as the "hardware". If the software RSS
* is not enabled, then it's simply pointless to use those types.
* If it's enabled but with different parameters, then hash values will
* not match.
*/
ri->iri_flowid = rxcd->rss_hash;
switch (rxcd->rss_type) {
case VMXNET3_RCD_RSS_TYPE_NONE:
ri->iri_flowid = ri->iri_qsidx;
ri->iri_rsstype = M_HASHTYPE_NONE;
break;
case VMXNET3_RCD_RSS_TYPE_IPV4:
ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
break;
case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
break;
case VMXNET3_RCD_RSS_TYPE_IPV6:
ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
break;
case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
break;
default:
ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
break;
#ifdef RSS
if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
switch (rxcd->rss_type) {
case VMXNET3_RCD_RSS_TYPE_NONE:
ri->iri_flowid = ri->iri_qsidx;
ri->iri_rsstype = M_HASHTYPE_NONE;
break;
case VMXNET3_RCD_RSS_TYPE_IPV4:
ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
break;
case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
break;
case VMXNET3_RCD_RSS_TYPE_IPV6:
ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
break;
case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
break;
default:
ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
break;
}
} else
#endif
{
switch (rxcd->rss_type) {
case VMXNET3_RCD_RSS_TYPE_NONE:
ri->iri_flowid = ri->iri_qsidx;
ri->iri_rsstype = M_HASHTYPE_NONE;
break;
default:
ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
break;
}
}

/* VLAN */

+ 2
- 0
sys/dev/vmware/vmxnet3/if_vmxvar.h View File

@@ -113,6 +113,8 @@ struct vmxnet3_softc {
struct vmxnet3_driver_shared *vmx_ds;
uint32_t vmx_flags;
#define VMXNET3_FLAG_RSS 0x0002
#define VMXNET3_FLAG_SOFT_RSS 0x0004 /* Software RSS is enabled with
compatible algorithm. */

struct vmxnet3_rxqueue *vmx_rxq;
struct vmxnet3_txqueue *vmx_txq;

+ 1
- 1
sys/dev/xen/netback/netback.c View File

@@ -780,7 +780,7 @@ xnb_connect_comms(struct xnb_softc *xnb)
xnb->evtchn,
/*filter*/NULL,
xnb_intr, /*arg*/xnb,
INTR_TYPE_BIO | INTR_MPSAFE,
INTR_TYPE_NET | INTR_MPSAFE,
&xnb->xen_intr_handle);
if (error != 0) {
(void)xnb_disconnect(xnb);

+ 178
- 119
sys/kern/kern_cpu.c View File

@@ -76,6 +76,7 @@ struct cpufreq_softc {
int all_count;
int max_mhz;
device_t dev;
device_t cf_drv_dev;
struct sysctl_ctx_list sysctl_ctx;
struct task startup_task;
struct cf_level *levels_buf;
@@ -142,6 +143,11 @@ SYSCTL_INT(_debug_cpufreq, OID_AUTO, lowest, CTLFLAG_RWTUN, &cf_lowest_freq, 1,
SYSCTL_INT(_debug_cpufreq, OID_AUTO, verbose, CTLFLAG_RWTUN, &cf_verbose, 1,
"Print verbose debugging messages");

/*
* This is called as the result of a hardware specific frequency control driver
* calling cpufreq_register. It provides a general interface for system wide
* frequency controls and operates on a per cpu basis.
*/
static int
cpufreq_attach(device_t dev)
{
@@ -149,7 +155,6 @@ cpufreq_attach(device_t dev)
struct pcpu *pc;
device_t parent;
uint64_t rate;
int numdevs;

CF_DEBUG("initializing %s\n", device_get_nameunit(dev));
sc = device_get_softc(dev);
@@ -164,6 +169,7 @@ cpufreq_attach(device_t dev)
sc->max_mhz = cpu_get_nominal_mhz(dev);
/* If that fails, try to measure the current rate */
if (sc->max_mhz <= 0) {
CF_DEBUG("Unable to obtain nominal frequency.\n");
pc = cpu_get_pcpu(dev);
if (cpu_est_clockrate(pc->pc_cpuid, &rate) == 0)
sc->max_mhz = rate / 1000000;
@@ -171,15 +177,6 @@ cpufreq_attach(device_t dev)
sc->max_mhz = CPUFREQ_VAL_UNKNOWN;
}

/*
* Only initialize one set of sysctls for all CPUs. In the future,
* if multiple CPUs can have different settings, we can move these
* sysctls to be under every CPU instead of just the first one.
*/
numdevs = devclass_get_count(cpufreq_dc);
if (numdevs > 1)
return (0);

CF_DEBUG("initializing one-time data for %s\n",
device_get_nameunit(dev));
sc->levels_buf = malloc(CF_MAX_LEVELS * sizeof(*sc->levels_buf),
@@ -216,7 +213,6 @@ cpufreq_detach(device_t dev)
{
struct cpufreq_softc *sc;
struct cf_saved_freq *saved_freq;
int numdevs;

CF_DEBUG("shutdown %s\n", device_get_nameunit(dev));
sc = device_get_softc(dev);
@@ -227,12 +223,7 @@ cpufreq_detach(device_t dev)
free(saved_freq, M_TEMP);
}

/* Only clean up these resources when the last device is detaching. */
numdevs = devclass_get_count(cpufreq_dc);
if (numdevs == 1) {
CF_DEBUG("final shutdown for %s\n", device_get_nameunit(dev));
free(sc->levels_buf, M_DEVBUF);
}
free(sc->levels_buf, M_DEVBUF);

return (0);
}
@@ -421,26 +412,75 @@ out:
return (error);
}

static int
cpufreq_get_frequency(device_t dev)
{
struct cf_setting set;

if (CPUFREQ_DRV_GET(dev, &set) != 0)
return (-1);

return (set.freq);
}

/* Returns the index into *levels with the match */
static int
cpufreq_get_level(device_t dev, struct cf_level *levels, int count)
{
int i, freq;

if ((freq = cpufreq_get_frequency(dev)) < 0)
return (-1);
for (i = 0; i < count; i++)
if (freq == levels[i].total_set.freq)
return (i);

return (-1);
}

/*
* Used by the cpufreq core, this function will populate *level with the current
* frequency as either determined by a cached value sc->curr_level, or in the
* case the lower level driver has set the CPUFREQ_FLAG_UNCACHED flag, it will
* obtain the frequency from the driver itself.
*/
static int
cf_get_method(device_t dev, struct cf_level *level)
{
struct cpufreq_softc *sc;
struct cf_level *levels;
struct cf_setting *curr_set, set;
struct cf_setting *curr_set;
struct pcpu *pc;
device_t *devs;
int bdiff, count, diff, error, i, n, numdevs;
int bdiff, count, diff, error, i, type;
uint64_t rate;

sc = device_get_softc(dev);
error = 0;
levels = NULL;

/* If we already know the current frequency, we're done. */
/*
* If we already know the current frequency, and the driver didn't ask
* for uncached usage, we're done.
*/
CF_MTX_LOCK(&sc->lock);
curr_set = &sc->curr_level.total_set;
if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
error = CPUFREQ_DRV_TYPE(sc->cf_drv_dev, &type);
if (error == 0 && (type & CPUFREQ_FLAG_UNCACHED)) {
struct cf_setting set;

/*
* If the driver wants to always report back the real frequency,
* first try the driver and if that fails, fall back to
* estimating.
*/
if (CPUFREQ_DRV_GET(sc->cf_drv_dev, &set) != 0)
goto estimate;
sc->curr_level.total_set = set;
CF_DEBUG("get returning immediate freq %d\n", curr_set->freq);
goto out;
} else if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
CF_DEBUG("get returning known freq %d\n", curr_set->freq);
error = 0;
goto out;
}
CF_MTX_UNLOCK(&sc->lock);
@@ -461,11 +501,6 @@ cf_get_method(device_t dev, struct cf_level *level)
free(levels, M_TEMP);
return (error);
}
error = device_get_children(device_get_parent(dev), &devs, &numdevs);
if (error) {
free(levels, M_TEMP);
return (error);
}

/*
* Reacquire the lock and search for the given level.
@@ -476,24 +511,21 @@ cf_get_method(device_t dev, struct cf_level *level)
* The estimation code below catches this case though.
*/
CF_MTX_LOCK(&sc->lock);
for (n = 0; n < numdevs && curr_set->freq == CPUFREQ_VAL_UNKNOWN; n++) {
if (!device_is_attached(devs[n]))
continue;
if (CPUFREQ_DRV_GET(devs[n], &set) != 0)
continue;
for (i = 0; i < count; i++) {
if (set.freq == levels[i].total_set.freq) {
sc->curr_level = levels[i];
break;
}
}
}
free(devs, M_TEMP);
i = cpufreq_get_level(sc->cf_drv_dev, levels, count);
if (i >= 0)
sc->curr_level = levels[i];
else
CF_DEBUG("Couldn't find supported level for %s\n",
device_get_nameunit(sc->cf_drv_dev));

if (curr_set->freq != CPUFREQ_VAL_UNKNOWN) {
CF_DEBUG("get matched freq %d from drivers\n", curr_set->freq);
goto out;
}

estimate:
CF_MTX_ASSERT(&sc->lock);

/*
* We couldn't find an exact match, so attempt to estimate and then
* match against a level.
@@ -525,6 +557,73 @@ out:
return (error);
}

/*
* Either directly obtain settings from the cpufreq driver, or build a list of
* relative settings to be integrated later against an absolute max.
*/
static int
cpufreq_add_levels(device_t cf_dev, struct cf_setting_lst *rel_sets)
{
struct cf_setting_array *set_arr;
struct cf_setting *sets;
device_t dev;
struct cpufreq_softc *sc;
int type, set_count, error;

sc = device_get_softc(cf_dev);
dev = sc->cf_drv_dev;

/* Skip devices that aren't ready. */
if (!device_is_attached(cf_dev))
return (0);

/*
* Get settings, skipping drivers that offer no settings or
* provide settings for informational purposes only.
*/
error = CPUFREQ_DRV_TYPE(dev, &type);
if (error != 0 || (type & CPUFREQ_FLAG_INFO_ONLY)) {
if (error == 0) {
CF_DEBUG("skipping info-only driver %s\n",
device_get_nameunit(cf_dev));
}
return (error);
}

sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
if (sets == NULL)
return (ENOMEM);

set_count = MAX_SETTINGS;
error = CPUFREQ_DRV_SETTINGS(dev, sets, &set_count);
if (error != 0 || set_count == 0)
goto out;

/* Add the settings to our absolute/relative lists. */
switch (type & CPUFREQ_TYPE_MASK) {
case CPUFREQ_TYPE_ABSOLUTE:
error = cpufreq_insert_abs(sc, sets, set_count);
break;
case CPUFREQ_TYPE_RELATIVE:
CF_DEBUG("adding %d relative settings\n", set_count);
set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
if (set_arr == NULL) {
error = ENOMEM;
goto out;
}
bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
set_arr->count = set_count;
TAILQ_INSERT_TAIL(rel_sets, set_arr, link);
break;
default:
error = EINVAL;
}

out:
free(sets, M_TEMP);
return (error);
}

static int
cf_levels_method(device_t dev, struct cf_level *levels, int *count)
{
@@ -532,10 +631,8 @@ cf_levels_method(device_t dev, struct cf_level *levels, int *count)
struct cf_setting_lst rel_sets;
struct cpufreq_softc *sc;
struct cf_level *lev;
struct cf_setting *sets;
struct pcpu *pc;
device_t *devs;
int error, i, numdevs, set_count, type;
int error, i;
uint64_t rate;

if (levels == NULL || count == NULL)
@@ -543,67 +640,21 @@ cf_levels_method(device_t dev, struct cf_level *levels, int *count)

TAILQ_INIT(&rel_sets);
sc = device_get_softc(dev);
error = device_get_children(device_get_parent(dev), &devs, &numdevs);
if (error)
return (error);
sets = malloc(MAX_SETTINGS * sizeof(*sets), M_TEMP, M_NOWAIT);
if (sets == NULL) {
free(devs, M_TEMP);
return (ENOMEM);
}

/* Get settings from all cpufreq drivers. */
CF_MTX_LOCK(&sc->lock);
for (i = 0; i < numdevs; i++) {
/* Skip devices that aren't ready. */
if (!device_is_attached(devs[i]))
continue;

/*
* Get settings, skipping drivers that offer no settings or
* provide settings for informational purposes only.
*/
error = CPUFREQ_DRV_TYPE(devs[i], &type);
if (error || (type & CPUFREQ_FLAG_INFO_ONLY)) {
if (error == 0) {
CF_DEBUG("skipping info-only driver %s\n",
device_get_nameunit(devs[i]));
}
continue;
}
set_count = MAX_SETTINGS;
error = CPUFREQ_DRV_SETTINGS(devs[i], sets, &set_count);
if (error || set_count == 0)
continue;

/* Add the settings to our absolute/relative lists. */
switch (type & CPUFREQ_TYPE_MASK) {
case CPUFREQ_TYPE_ABSOLUTE:
error = cpufreq_insert_abs(sc, sets, set_count);
break;
case CPUFREQ_TYPE_RELATIVE:
CF_DEBUG("adding %d relative settings\n", set_count);
set_arr = malloc(sizeof(*set_arr), M_TEMP, M_NOWAIT);
if (set_arr == NULL) {
error = ENOMEM;
goto out;
}
bcopy(sets, set_arr->sets, set_count * sizeof(*sets));
set_arr->count = set_count;
TAILQ_INSERT_TAIL(&rel_sets, set_arr, link);
break;
default:
error = EINVAL;
}
if (error)
goto out;
}
error = cpufreq_add_levels(sc->dev, &rel_sets);
if (error)
goto out;

/*
* If there are no absolute levels, create a fake one at 100%. We
* then cache the clockrate for later use as our base frequency.
*/
if (TAILQ_EMPTY(&sc->all_levels)) {
struct cf_setting set;

CF_DEBUG("No absolute levels returned by driver\n");

if (sc->max_mhz == CPUFREQ_VAL_UNKNOWN) {
sc->max_mhz = cpu_get_nominal_mhz(dev);
/*
@@ -617,10 +668,10 @@ cf_levels_method(device_t dev, struct cf_level *levels, int *count)
sc->max_mhz = rate / 1000000;
}
}
memset(&sets[0], CPUFREQ_VAL_UNKNOWN, sizeof(*sets));
sets[0].freq = sc->max_mhz;
sets[0].dev = NULL;
error = cpufreq_insert_abs(sc, sets, 1);
memset(&set, CPUFREQ_VAL_UNKNOWN, sizeof(set));
set.freq = sc->max_mhz;
set.dev = NULL;
error = cpufreq_insert_abs(sc, &set, 1);
if (error)
goto out;
}
@@ -665,8 +716,6 @@ out:
TAILQ_REMOVE(&rel_sets, set_arr, link);
free(set_arr, M_TEMP);
}
free(devs, M_TEMP);
free(sets, M_TEMP);
return (error);
}

@@ -1011,11 +1060,24 @@ out:
return (error);
}

static void
cpufreq_add_freq_driver_sysctl(device_t cf_dev)
{
struct cpufreq_softc *sc;

sc = device_get_softc(cf_dev);
SYSCTL_ADD_CONST_STRING(&sc->sysctl_ctx,
SYSCTL_CHILDREN(device_get_sysctl_tree(cf_dev)), OID_AUTO,
"freq_driver", CTLFLAG_RD, device_get_nameunit(sc->cf_drv_dev),
"cpufreq driver used by this cpu");
}

int
cpufreq_register(device_t dev)
{
struct cpufreq_softc *sc;
device_t cf_dev, cpu_dev;
int error;

/* Add a sysctl to get each driver's settings separately. */
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
@@ -1031,6 +1093,7 @@ cpufreq_register(device_t dev)
if ((cf_dev = device_find_child(cpu_dev, "cpufreq", -1))) {
sc = device_get_softc(cf_dev);
sc->max_mhz = CPUFREQ_VAL_UNKNOWN;
MPASS(sc->cf_drv_dev != NULL);
return (0);
}

@@ -1040,40 +1103,36 @@ cpufreq_register(device_t dev)
return (ENOMEM);
device_quiet(cf_dev);

return (device_probe_and_attach(cf_dev));
error = device_probe_and_attach(cf_dev);
if (error)
return (error);

sc = device_get_softc(cf_dev);
sc->cf_drv_dev = dev;
cpufreq_add_freq_driver_sysctl(cf_dev);
return (error);
}

int
cpufreq_unregister(device_t dev)
{
device_t cf_dev, *devs;
int cfcount, devcount, error, i, type;
device_t cf_dev;
struct cpufreq_softc *sc;

/*
* If this is the last cpufreq child device, remove the control
* device as well. We identify cpufreq children by calling a method
* they support.
*/
error = device_get_children(device_get_parent(dev), &devs, &devcount);
if (error)
return (error);
cf_dev = device_find_child(device_get_parent(dev), "cpufreq", -1);
if (cf_dev == NULL) {
device_printf(dev,
"warning: cpufreq_unregister called with no cpufreq device active\n");
free(devs, M_TEMP);
return (0);
}
cfcount = 0;
for (i = 0; i < devcount; i++) {
if (!device_is_attached(devs[i]))
continue;
if (CPUFREQ_DRV_TYPE(devs[i], &type) == 0)
cfcount++;
}
if (cfcount <= 1)
device_delete_child(device_get_parent(cf_dev), cf_dev);
free(devs, M_TEMP);
sc = device_get_softc(cf_dev);
MPASS(sc->cf_drv_dev == dev);
device_delete_child(device_get_parent(cf_dev), cf_dev);

return (0);
}

+ 26
- 13
sys/kern/kern_intr.c View File

@@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/epoch.h>
#include <sys/random.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
@@ -94,6 +95,9 @@ static int intr_storm_threshold = 0;
SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RWTUN,
&intr_storm_threshold, 0,
"Number of consecutive interrupts before storm protection is enabled");
static int intr_epoch_batch = 1000;
SYSCTL_INT(_hw, OID_AUTO, intr_epoch_batch, CTLFLAG_RWTUN, &intr_epoch_batch,
0, "Maximum interrupt handler executions without re-entering epoch(9)");
static TAILQ_HEAD(, intr_event) event_list =
TAILQ_HEAD_INITIALIZER(event_list);
static struct mtx event_lock;
@@ -190,7 +194,7 @@ intr_event_update(struct intr_event *ie)
/* Start off with no entropy and just the name of the event. */
mtx_assert(&ie->ie_lock, MA_OWNED);
strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname));
ie->ie_flags &= ~IE_ENTROPY;
ie->ie_hflags = 0;
missed = 0;
space = 1;

@@ -203,8 +207,7 @@ intr_event_update(struct intr_event *ie)
space = 0;
} else
missed++;
if (ih->ih_flags & IH_ENTROPY)
ie->ie_flags |= IE_ENTROPY;
ie->ie_hflags |= ih->ih_flags;
}

/*
@@ -588,6 +591,8 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
ih->ih_flags |= IH_MPSAFE;
if (flags & INTR_ENTROPY)
ih->ih_flags |= IH_ENTROPY;
if (flags & INTR_TYPE_NET)
ih->ih_flags |= IH_NET;

/* We can only have one exclusive handler in a event. */
mtx_lock(&ie->ie_lock);
@@ -958,7 +963,7 @@ intr_event_schedule_thread(struct intr_event *ie)
* If any of the handlers for this ithread claim to be good
* sources of entropy, then gather some.
*/
if (ie->ie_flags & IE_ENTROPY) {
if (ie->ie_hflags & IH_ENTROPY) {
entropy.event = (uintptr_t)ie;
entropy.td = ctd;
random_harvest_queue(&entropy, sizeof(entropy), RANDOM_INTERRUPT);
@@ -1197,11 +1202,12 @@ ithread_execute_handlers(struct proc *p, struct intr_event *ie)
static void
ithread_loop(void *arg)
{
struct epoch_tracker et;
struct intr_thread *ithd;
struct intr_event *ie;
struct thread *td;
struct proc *p;
int wake;
int wake, epoch_count;

td = curthread;
p = td->td_proc;
@@ -1236,8 +1242,21 @@ ithread_loop(void *arg)
* that the load of ih_need in ithread_execute_handlers()
* is ordered after the load of it_need here.
*/
while (atomic_cmpset_acq_int(&ithd->it_need, 1, 0) != 0)
if (ie->ie_hflags & IH_NET) {
epoch_count = 0;
NET_EPOCH_ENTER(et);
}
while (atomic_cmpset_acq_int(&ithd->it_need, 1, 0) != 0) {
ithread_execute_handlers(p, ie);
if ((ie->ie_hflags & IH_NET) &&
++epoch_count >= intr_epoch_batch) {
NET_EPOCH_EXIT(et);
epoch_count = 0;
NET_EPOCH_ENTER(et);
}
}
if (ie->ie_hflags & IH_NET)
NET_EPOCH_EXIT(et);
WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread");
mtx_assert(&Giant, MA_NOTOWNED);

@@ -1492,7 +1511,7 @@ db_dump_intr_event(struct intr_event *ie, int handlers)
db_printf("(pid %d)", it->it_thread->td_proc->p_pid);
else
db_printf("(no thread)");
if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 ||
if ((ie->ie_flags & (IE_SOFT | IE_ADDING_THREAD)) != 0 ||
(it != NULL && it->it_need)) {
db_printf(" {");
comma = 0;
@@ -1500,12 +1519,6 @@ db_dump_intr_event(struct intr_event *ie, int handlers)
db_printf("SOFT");
comma = 1;
}
if (ie->ie_flags & IE_ENTROPY) {
if (comma)
db_printf(", ");
db_printf("ENTROPY");
comma = 1;
}
if (ie->ie_flags & IE_ADDING_THREAD) {
if (comma)
db_printf(", ");

+ 3
- 2
sys/kern/kern_kcov.c View File

@@ -383,8 +383,9 @@ kcov_alloc(struct kcov_info *info, size_t entries)
VM_OBJECT_WLOCK(info->bufobj);
for (n = 0; n < pages; n++) {
m = vm_page_grab(info->bufobj, n,
VM_ALLOC_NOBUSY | VM_ALLOC_ZERO | VM_ALLOC_WIRED);
m->valid = VM_PAGE_BITS_ALL;
VM_ALLOC_ZERO | VM_ALLOC_WIRED);
vm_page_valid(m);
vm_page_xunbusy(m);
pmap_qenter(info->kvaddr + n * PAGE_SIZE, &m, 1);
}
VM_OBJECT_WUNLOCK(info->bufobj);

+ 6
- 0
sys/kern/kern_poll.c View File

@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/proc.h>
#include <sys/epoch.h>
#include <sys/eventhandler.h>
#include <sys/resourcevar.h>
#include <sys/socket.h> /* needed by net/if.h */
@@ -332,6 +333,7 @@ hardclock_device_poll(void)
static void
ether_poll(int count)
{
struct epoch_tracker et;
int i;

mtx_lock(&poll_mtx);
@@ -339,8 +341,10 @@ ether_poll(int count)
if (count > poll_each_burst)
count = poll_each_burst;

NET_EPOCH_ENTER(et);
for (i = 0 ; i < poll_handlers ; i++)
pr[i].handler(pr[i].ifp, POLL_ONLY, count);
NET_EPOCH_EXIT(et);

mtx_unlock(&poll_mtx);
}
@@ -429,6 +433,8 @@ netisr_poll(void)
int i, cycles;
enum poll_cmd arg = POLL_ONLY;

NET_EPOCH_ASSERT();

if (poll_handlers == 0)
return;


+ 1
- 1
sys/kern/kern_sendfile.c View File

@@ -388,7 +388,7 @@ sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off,
if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL,
&a)) {
pmap_zero_page(pa[i]);
pa[i]->valid = VM_PAGE_BITS_ALL;
vm_page_valid(pa[i]);
MPASS(pa[i]->dirty == 0);
vm_page_xunbusy(pa[i]);
i++;

+ 7
- 2
sys/kern/sched_ule.c View File

@@ -2894,7 +2894,7 @@ sched_throw(struct thread *td)
struct thread *newtd;
struct tdq *tdq;

if (td == NULL) {
if (__predict_false(td == NULL)) {
#ifdef SMP
PCPU_SET(sched, DPCPU_PTR(tdq));
#endif
@@ -2912,13 +2912,18 @@ sched_throw(struct thread *td)
tdq_load_rem(tdq, td);
td->td_lastcpu = td->td_oncpu;
td->td_oncpu = NOCPU;
thread_lock_block(td);
}
newtd = choosethread();
spinlock_enter();
TDQ_UNLOCK(tdq);
KASSERT(curthread->td_md.md_spinlock_count == 1,
("invalid count %d", curthread->td_md.md_spinlock_count));
cpu_throw(td, newtd); /* doesn't return */
/* doesn't return */
if (__predict_false(td == NULL))
cpu_throw(td, newtd); /* doesn't return */
else
cpu_switch(td, newtd, TDQ_LOCKPTR(tdq));
}

/*

+ 2
- 1
sys/mips/nlm/dev/net/xlpge.c View File

@@ -1052,7 +1052,8 @@ nlm_xlpge_ifinit(struct nlm_xlpge_softc *sc)
}
ifp->if_softc = sc;
if_initname(ifp, device_get_name(dev), device_get_unit(dev));
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
IFF_NEEDSEPOCH;
sc->if_flags = ifp->if_flags;
/*ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_VLAN_HWTAGGING;*/
ifp->if_capabilities = 0;

+ 1
- 1
sys/modules/cpufreq/Makefile View File

@@ -11,7 +11,7 @@ SRCS+= bus_if.h cpufreq_if.h device_if.h pci_if.h
.PATH: ${SRCTOP}/sys/x86/cpufreq

SRCS+= acpi_if.h opt_acpi.h
SRCS+= est.c hwpstate.c p4tcc.c powernow.c
SRCS+= est.c hwpstate_amd.c p4tcc.c powernow.c hwpstate_intel.c
.endif

.if ${MACHINE} == "i386"

+ 6
- 1
sys/modules/usb/template/Makefile View File

@@ -1,7 +1,7 @@
#
# $FreeBSD$
#
# Copyright (c) 2008 Hans Petter Selasky. All rights reserved.
# Copyright (c) 2008-2020 Hans Petter Selasky. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
@@ -45,4 +45,9 @@ SRCS= opt_bus.h opt_usb.h device_if.h bus_if.h usb_if.h vnode_if.h usbdevs.h \
usb_template_multi.c \
usb_template_cdceem.c

#
# NOTE:
# Keep source list above in sync with stand/usb/usbcore.mk
#

.include <bsd.kmod.mk>

+ 1
- 1
sys/modules/vmware/vmxnet3/Makefile View File

@@ -28,6 +28,6 @@
KMOD= if_vmx
SRCS= if_vmx.c
SRCS+= bus_if.h device_if.h pci_if.h ifdi_if.h
SRCS+= opt_inet.h opt_inet6.h
SRCS+= opt_inet.h opt_inet6.h opt_rss.h

.include <bsd.kmod.mk>

+ 1
- 1
sys/net/if.h View File

@@ -144,7 +144,7 @@ struct if_data {
#define IFF_DEBUG 0x4 /* (n) turn on debugging */
#define IFF_LOOPBACK 0x8 /* (i) is a loopback net */
#define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */
/* 0x20 was IFF_SMART */
#define IFF_NEEDSEPOCH 0x20 /* (i) calls if_input w/o epoch */
#define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */
#define IFF_NOARP 0x80 /* (n) no address resolution protocol */
#define IFF_PROMISC 0x100 /* (n) receive all packets */

+ 21
- 1
sys/net/if_bridge.c View File

@@ -135,6 +135,14 @@ __FBSDID("$FreeBSD$");

#include <net/route.h>

#ifdef INET6
/*
* XXX: declare here to avoid to include many inet6 related files..
* should be more generalized?
*/
extern void nd6_setmtu(struct ifnet *);
#endif

/*
* Size of the route hash table. Must be a power of two.
*/
@@ -772,7 +780,7 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
} args;
struct ifdrv *ifd = (struct ifdrv *) data;
const struct bridge_control *bc;
int error = 0;
int error = 0, oldmtu;

switch (cmd) {

@@ -818,12 +826,24 @@ bridge_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
break;
}

oldmtu = ifp->if_mtu;
BRIDGE_LOCK(sc);
error = (*bc->bc_func)(sc, &args);
BRIDGE_UNLOCK(sc);
if (error)
break;

/*
* Bridge MTU may change during addition of the first port.
* If it did, do network layer specific procedure.
*/
if (ifp->if_mtu != oldmtu) {
#ifdef INET6
nd6_setmtu(ifp);
#endif
rt_updatemtu(ifp);
}

if (bc->bc_flags & BC_F_COPYOUT)
error = copyout(&args, ifd->ifd_data, ifd->ifd_len);


+ 4
- 2
sys/net/if_ethersubr.c View File

@@ -809,7 +809,8 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
* them up. This allows the drivers to amortize the receive lock.
*/
CURVNET_SET_QUIET(ifp->if_vnet);
NET_EPOCH_ENTER(et);
if (__predict_false(ifp->if_flags & IFF_NEEDSEPOCH))
NET_EPOCH_ENTER(et);
while (m) {
mn = m->m_nextpkt;
m->m_nextpkt = NULL;
@@ -824,7 +825,8 @@ ether_input(struct ifnet *ifp, struct mbuf *m)
netisr_dispatch(NETISR_ETHER, m);
m = mn;
}
NET_EPOCH_EXIT(et);
if (__predict_false(ifp->if_flags & IFF_NEEDSEPOCH))
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
}


+ 23
- 1
sys/net/if_lagg.c View File

@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/bpf.h>
#include <net/route.h>
#include <net/vnet.h>

#if defined(INET) || defined(INET6)
@@ -74,6 +75,14 @@ __FBSDID("$FreeBSD$");
#include <net/if_lagg.h>
#include <net/ieee8023ad_lacp.h>

#ifdef INET6
/*
* XXX: declare here to avoid to include many inet6 related files..
* should be more generalized?
*/
extern void nd6_setmtu(struct ifnet *);
#endif

#define LAGG_RLOCK() struct epoch_tracker lagg_et; epoch_enter_preempt(net_epoch_preempt, &lagg_et)
#define LAGG_RUNLOCK() epoch_exit_preempt(net_epoch_preempt, &lagg_et)
#define LAGG_RLOCK_ASSERT() NET_EPOCH_ASSERT()
@@ -1178,7 +1187,7 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifnet *tpif;
struct thread *td = curthread;
char *buf, *outbuf;
int count, buflen, len, error = 0;
int count, buflen, len, error = 0, oldmtu;

bzero(&rpbuf, sizeof(rpbuf));

@@ -1453,10 +1462,23 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
tpif->if_xname);
}
#endif
oldmtu = ifp->if_mtu;
LAGG_XLOCK(sc);
error = lagg_port_create(sc, tpif);
LAGG_XUNLOCK(sc);
if_rele(tpif);

/*
* LAGG MTU may change during addition of the first port.
* If it did, do network layer specific procedure.
*/
if (ifp->if_mtu != oldmtu) {
#ifdef INET6
nd6_setmtu(ifp);
#endif
rt_updatemtu(ifp);
}

VLAN_CAPABILITIES(ifp);
break;
case SIOCSLAGGDELPORT:

+ 3
- 0
sys/net/if_tuntap.c View File

@@ -1778,6 +1778,7 @@ static int
tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m,
struct virtio_net_hdr_mrg_rxbuf *vhdr)
{
struct epoch_tracker et;
struct ether_header *eh;
struct ifnet *ifp;

@@ -1808,7 +1809,9 @@ tunwrite_l2(struct tuntap_softc *tp, struct mbuf *m,

/* Pass packet up to parent. */
CURVNET_SET(ifp->if_vnet);
NET_EPOCH_ENTER(et);
(*ifp->if_input)(ifp, m);
NET_EPOCH_EXIT(et);
CURVNET_RESTORE();
/* ibytes are counted in parent */
if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);

+ 23
- 1
sys/net/if_vlan.c View File

@@ -46,6 +46,7 @@
__FBSDID("$FreeBSD$");

#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_kern_tls.h"
#include "opt_vlan.h"
#include "opt_ratelimit.h"
@@ -75,6 +76,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_vlan_var.h>
#include <net/route.h>
#include <net/vnet.h>

#ifdef INET
@@ -82,6 +84,14 @@ __FBSDID("$FreeBSD$");
#include <netinet/if_ether.h>
#endif

#ifdef INET6
/*
* XXX: declare here to avoid to include many inet6 related files..
* should be more generalized?
*/
extern void nd6_setmtu(struct ifnet *);
#endif

#define VLAN_DEF_HWIDTH 4
#define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST)

@@ -1807,7 +1817,7 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
struct ifvlan *ifv;
struct ifvlantrunk *trunk;
struct vlanreq vlr;
int error = 0;
int error = 0, oldmtu;

ifr = (struct ifreq *)data;
ifa = (struct ifaddr *) data;
@@ -1901,8 +1911,20 @@ vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
error = ENOENT;
break;
}
oldmtu = ifp->if_mtu;
error = vlan_config(ifv, p, vlr.vlr_tag);
if_rele(p);

/*
* VLAN MTU may change during addition of the vlandev.
* If it did, do network layer specific procedure.
*/
if (ifp->if_mtu != oldmtu) {
#ifdef INET6
nd6_setmtu(ifp);
#endif
rt_updatemtu(ifp);
}
break;

case SIOCGETVLAN:

+ 8
- 0
sys/net/iflib.c View File

@@ -2759,6 +2759,8 @@ iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
*/
struct mbuf *m, *mh, *mt, *mf;

NET_EPOCH_ASSERT();

lro_possible = v4_forwarding = v6_forwarding = false;
ifp = ctx->ifc_ifp;
mh = mt = NULL;
@@ -3779,6 +3781,7 @@ _task_fn_tx(void *context)
static void
_task_fn_rx(void *context)
{
struct epoch_tracker et;
iflib_rxq_t rxq = context;
if_ctx_t ctx = rxq->ifr_ctx;
bool more;
@@ -3802,6 +3805,7 @@ _task_fn_rx(void *context)
budget = ctx->ifc_sysctl_rx_budget;
if (budget == 0)
budget = 16; /* XXX */
NET_EPOCH_ENTER(et);
if (more == false || (more = iflib_rxeof(rxq, budget)) == false) {
if (ctx->ifc_flags & IFC_LEGACY)
IFDI_INTR_ENABLE(ctx);
@@ -3809,6 +3813,7 @@ _task_fn_rx(void *context)
IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
DBG_COUNTER_INC(rx_intr_enables);
}
NET_EPOCH_EXIT(et);
if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
return;
if (more)
@@ -6811,6 +6816,7 @@ iflib_debugnet_transmit(if_t ifp, struct mbuf *m)
static int
iflib_debugnet_poll(if_t ifp, int count)
{
struct epoch_tracker et;
if_ctx_t ctx;
if_softc_ctx_t scctx;
iflib_txq_t txq;
@@ -6826,8 +6832,10 @@ iflib_debugnet_poll(if_t ifp, int count)
txq = &ctx->ifc_txqs[0];
(void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));

NET_EPOCH_ENTER(et);
for (i = 0; i < scctx->isc_nrxqsets; i++)
(void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */);
NET_EPOCH_EXIT(et);
return (0);
}
#endif /* DEBUGNET */

+ 1
- 4
sys/net/netisr.c View File

@@ -861,7 +861,6 @@ static u_int
netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
{
struct netisr_work local_npw, *npwp;
struct epoch_tracker et;
u_int handled;
struct mbuf *m;

@@ -891,7 +890,6 @@ netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
npwp->nw_len = 0;
nwsp->nws_pendingbits &= ~(1 << proto);
NWS_UNLOCK(nwsp);
NET_EPOCH_ENTER(et);
while ((m = local_npw.nw_head) != NULL) {
local_npw.nw_head = m->m_nextpkt;
m->m_nextpkt = NULL;
@@ -904,7 +902,6 @@ netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto)
netisr_proto[proto].np_handler(m);
CURVNET_RESTORE();
}
NET_EPOCH_EXIT(et);
KASSERT(local_npw.nw_len == 0,
("%s(%u): len %u", __func__, proto, local_npw.nw_len));
if (netisr_proto[proto].np_drainedcpu)
@@ -1248,7 +1245,7 @@ netisr_start_swi(u_int cpuid, struct pcpu *pc)
nwsp->nws_cpu = cpuid;
snprintf(swiname, sizeof(swiname), "netisr %u", cpuid);
error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp,
SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie);
SWI_NET, INTR_TYPE_NET | INTR_MPSAFE, &nwsp->nws_swi_cookie);
if (error)
panic("%s: swi_add %d", __func__, error);
pc->pc_netisr = nwsp->nws_intr_event;

+ 6
- 13
sys/net/pfil.c View File

@@ -69,10 +69,6 @@ MTX_SYSINIT(pfil_mtxinit, &pfil_lock, "pfil(9) lock", MTX_DEF);
#define PFIL_UNLOCK() mtx_unlock(&pfil_lock)
#define PFIL_LOCK_ASSERT() mtx_assert(&pfil_lock, MA_OWNED)

#define PFIL_EPOCH net_epoch_preempt
#define PFIL_EPOCH_ENTER(et) epoch_enter_preempt(net_epoch_preempt, &(et))
#define PFIL_EPOCH_EXIT(et) epoch_exit_preempt(net_epoch_preempt, &(et))

struct pfil_hook {
pfil_func_t hook_func;
void *hook_ruleset;
@@ -168,12 +164,13 @@ int
pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp,
int flags, struct inpcb *inp)
{
struct epoch_tracker et;
pfil_chain_t *pch;
struct pfil_link *link;
pfil_return_t rv;
bool realloc = false;

NET_EPOCH_ASSERT();

if (PFIL_DIR(flags) == PFIL_IN)
pch = &head->head_in;
else if (__predict_true(PFIL_DIR(flags) == PFIL_OUT))
@@ -182,7 +179,6 @@ pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp,
panic("%s: bogus flags %d", __func__, flags);

rv = PFIL_PASS;
PFIL_EPOCH_ENTER(et);
CK_STAILQ_FOREACH(link, pch, link_chain) {
if ((flags & PFIL_MEMPTR) && !(link->link_flags & PFIL_MEMPTR))
rv = pfil_fake_mbuf(link->link_func, &p, ifp, flags,
@@ -197,7 +193,6 @@ pfil_run_hooks(struct pfil_head *head, pfil_packet_t p, struct ifnet *ifp,
realloc = true;
}
}
PFIL_EPOCH_EXIT(et);
if (realloc && rv == PFIL_PASS)
rv = PFIL_REALLOCED;
return (rv);
@@ -313,9 +308,9 @@ pfil_unlink(struct pfil_link_args *pa, pfil_head_t head, pfil_hook_t hook)
PFIL_UNLOCK();

if (in != NULL)
epoch_call(PFIL_EPOCH, pfil_link_free, &in->link_epoch_ctx);
NET_EPOCH_CALL(pfil_link_free, &in->link_epoch_ctx);
if (out != NULL)
epoch_call(PFIL_EPOCH, pfil_link_free, &out->link_epoch_ctx);
NET_EPOCH_CALL(pfil_link_free, &out->link_epoch_ctx);

if (in == NULL && out == NULL)
return (ENOENT);
@@ -443,15 +438,13 @@ retry:
if (in != NULL) {
head->head_nhooksin--;
hook->hook_links--;
epoch_call(PFIL_EPOCH, pfil_link_free,
&in->link_epoch_ctx);
NET_EPOCH_CALL(pfil_link_free, &in->link_epoch_ctx);
}
out = pfil_link_remove(&head->head_out, hook);
if (out != NULL) {
head->head_nhooksout--;
hook->hook_links--;
epoch_call(PFIL_EPOCH, pfil_link_free,
&out->link_epoch_ctx);
NET_EPOCH_CALL(pfil_link_free, &out->link_epoch_ctx);
}
if (in != NULL || out != NULL)
/* What if some stupid admin put same filter twice? */

+ 176
- 136
sys/netinet/ip_divert.c View File

@@ -122,6 +122,10 @@ static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */

static eventhandler_tag ip_divert_event_tag;

static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m,
struct sockaddr_in *sin);
static int div_output_outbound(int family, struct socket *so, struct mbuf *m);

/*
* Initialize divert connection block queue.
*/
@@ -308,10 +312,10 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
struct mbuf *control)
{
struct epoch_tracker et;
struct ip *const ip = mtod(m, struct ip *);
const struct ip *ip;
struct m_tag *mtag;
struct ipfw_rule_ref *dt;
int error = 0;
int error, family;

/*
* An mbuf may hasn't come from userland, but we pretend
@@ -330,8 +334,8 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
mtag = m_tag_alloc(MTAG_IPFW_RULE, 0,
sizeof(struct ipfw_rule_ref), M_NOWAIT | M_ZERO);
if (mtag == NULL) {
error = ENOBUFS;
goto cantsend;
m_freem(m);
return (ENOBUFS);
}
m_tag_prepend(m, mtag);
}
@@ -349,6 +353,7 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
dt->chain_id = 0;
dt->rulenum = sin->sin_port+1; /* host format ? */
dt->rule_id = 0;
/* XXX: broken for IPv6 */
/*
* Find receive interface with the given name, stuffed
* (if it exists) in the sin_zero[] field.
@@ -361,157 +366,192 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin,
m->m_pkthdr.rcvif = ifunit(sin->sin_zero);
}

ip = mtod(m, struct ip *);
switch (ip->ip_v) {
case IPVERSION:
family = AF_INET;
break;
case IPV6_VERSION >> 4:
family = AF_INET6;
break;
default:
m_freem(m);
return (EAFNOSUPPORT);
}

/* Reinject packet into the system as incoming or outgoing */
NET_EPOCH_ENTER(et);
if (!sin || sin->sin_addr.s_addr == 0) {
struct mbuf *options = NULL;
struct inpcb *inp;

dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT;
inp = sotoinpcb(so);
INP_RLOCK(inp);
switch (ip->ip_v) {
case IPVERSION:
/*
* Don't allow both user specified and setsockopt
* options, and don't allow packet length sizes that
* will crash.
*/
if ((((ip->ip_hl << 2) != sizeof(struct ip)) &&
inp->inp_options != NULL) ||
((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
error = EINVAL;
INP_RUNLOCK(inp);
goto cantsend;
}
break;
error = div_output_outbound(family, so, m);
} else {
dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN;
error = div_output_inbound(family, so, m, sin);
}
NET_EPOCH_EXIT(et);

if (error != 0)
m_freem(m);

return (error);
}

/*
* Sends mbuf @m to the wire via ip[6]_output().
*
* Returns 0 on success, @m is consumed.
* On failure, returns error code. It is caller responsibility to free @m.
*/
static int
div_output_outbound(int family, struct socket *so, struct mbuf *m)
{
struct ip *const ip = mtod(m, struct ip *);
struct mbuf *options;
struct inpcb *inp;
int error;

inp = sotoinpcb(so);
INP_RLOCK(inp);
switch (family) {
case AF_INET:
/*
* Don't allow both user specified and setsockopt
* options, and don't allow packet length sizes that
* will crash.
*/
if ((((ip->ip_hl << 2) != sizeof(struct ip)) &&
inp->inp_options != NULL) ||
((u_short)ntohs(ip->ip_len) > m->m_pkthdr.len)) {
INP_RUNLOCK(inp);
return (EINVAL);
}
break;
#ifdef INET6
case IPV6_VERSION >> 4:
{
struct ip6_hdr *const ip6 = mtod(m, struct ip6_hdr *);

/* Don't allow packet length sizes that will crash */
if (((u_short)ntohs(ip6->ip6_plen) > m->m_pkthdr.len)) {
error = EINVAL;
INP_RUNLOCK(inp);
goto cantsend;
}
break;
}
#endif
default:
error = EINVAL;
case AF_INET6:
{
struct ip6_hdr *const ip6 = mtod(m, struct ip6_hdr *);

/* Don't allow packet length sizes that will crash */
if (((u_short)ntohs(ip6->ip6_plen) > m->m_pkthdr.len)) {
INP_RUNLOCK(inp);
goto cantsend;
return (EINVAL);
}
break;
}
#endif
}

/* Send packet to output processing */
KMOD_IPSTAT_INC(ips_rawout); /* XXX */
/* Send packet to output processing */
KMOD_IPSTAT_INC(ips_rawout); /* XXX */

#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
mac_inpcb_create_mbuf(inp, m);
#endif
/*
* Get ready to inject the packet into ip_output().
* Just in case socket options were specified on the
* divert socket, we duplicate them. This is done
* to avoid having to hold the PCB locks over the call
* to ip_output(), as doing this results in a number of
* lock ordering complexities.
*
* Note that we set the multicast options argument for
* ip_output() to NULL since it should be invariant that
* they are not present.
*/
KASSERT(inp->inp_moptions == NULL,
("multicast options set on a divert socket"));
/*
* XXXCSJP: It is unclear to me whether or not it makes
* sense for divert sockets to have options. However,
* for now we will duplicate them with the INP locks
* held so we can use them in ip_output() without
* requring a reference to the pcb.
*/
if (inp->inp_options != NULL) {
options = m_dup(inp->inp_options, M_NOWAIT);
if (options == NULL) {
INP_RUNLOCK(inp);
error = ENOBUFS;
goto cantsend;
}
/*
* Get ready to inject the packet into ip_output().
* Just in case socket options were specified on the
* divert socket, we duplicate them. This is done
* to avoid having to hold the PCB locks over the call
* to ip_output(), as doing this results in a number of
* lock ordering complexities.
*
* Note that we set the multicast options argument for
* ip_output() to NULL since it should be invariant that
* they are not present.
*/
KASSERT(inp->inp_moptions == NULL,
("multicast options set on a divert socket"));
/*
* XXXCSJP: It is unclear to me whether or not it makes
* sense for divert sockets to have options. However,
* for now we will duplicate them with the INP locks
* held so we can use them in ip_output() without
* requring a reference to the pcb.
*/
options = NULL;
if (inp->inp_options != NULL) {
options = m_dup(inp->inp_options, M_NOWAIT);
if (options == NULL) {
INP_RUNLOCK(inp);
return (ENOBUFS);
}
INP_RUNLOCK(inp);

NET_EPOCH_ENTER(et);
switch (ip->ip_v) {
case IPVERSION:
error = ip_output(m, options, NULL,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0)
| IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
break;
}
INP_RUNLOCK(inp);

error = 0;
switch (family) {
case AF_INET:
error = ip_output(m, options, NULL,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0)
| IP_ALLOWBROADCAST | IP_RAWOUTPUT, NULL, NULL);
break;
#ifdef INET6
case IPV6_VERSION >> 4:
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
break;
case AF_INET6:
error = ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL);
break;
#endif
}
NET_EPOCH_EXIT(et);
if (options != NULL)
m_freem(options);
} else {
dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN;
if (m->m_pkthdr.rcvif == NULL) {
/*
* No luck with the name, check by IP address.
* Clear the port and the ifname to make sure
* there are no distractions for ifa_ifwithaddr.
*/
struct epoch_tracker et;
struct ifaddr *ifa;

bzero(sin->sin_zero, sizeof(sin->sin_zero));
sin->sin_port = 0;
NET_EPOCH_ENTER(et);
ifa = ifa_ifwithaddr((struct sockaddr *) sin);
if (ifa == NULL) {
error = EADDRNOTAVAIL;
NET_EPOCH_EXIT(et);
goto cantsend;
}
m->m_pkthdr.rcvif = ifa->ifa_ifp;
NET_EPOCH_EXIT(et);
}
}
if (options != NULL)
m_freem(options);

return (error);
}

/*
* Schedules mbuf @m for local processing via IPv4/IPv6 netisr queue.
*
* Returns 0 on success, @m is consumed.
* Returns error code on failure. It is caller responsibility to free @m.
*/
static int
div_output_inbound(int family, struct socket *so, struct mbuf *m,
struct sockaddr_in *sin)
{
const struct ip *ip;
struct ifaddr *ifa;

if (m->m_pkthdr.rcvif == NULL) {
/*
* No luck with the name, check by IP address.
* Clear the port and the ifname to make sure
* there are no distractions for ifa_ifwithaddr.
*/

/* XXX: broken for IPv6 */
bzero(sin->sin_zero, sizeof(sin->sin_zero));
sin->sin_port = 0;
ifa = ifa_ifwithaddr((struct sockaddr *) sin);
if (ifa == NULL)
return (EADDRNOTAVAIL);
m->m_pkthdr.rcvif = ifa->ifa_ifp;
}
#ifdef MAC
mac_socket_create_mbuf(so, m);
mac_socket_create_mbuf(so, m);
#endif
/* Send packet to input processing via netisr */
switch (ip->ip_v) {
case IPVERSION:
/*
* Restore M_BCAST flag when destination address is
* broadcast. It is expected by ip_tryforward().
*/
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
m->m_flags |= M_MCAST;
else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
m->m_flags |= M_BCAST;
netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
break;
/* Send packet to input processing via netisr */
switch (family) {
case AF_INET:
ip = mtod(m, struct ip *);
/*
* Restore M_BCAST flag when destination address is
* broadcast. It is expected by ip_tryforward().
*/
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)))
m->m_flags |= M_MCAST;
else if (in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif))
m->m_flags |= M_BCAST;
netisr_queue_src(NETISR_IP, (uintptr_t)so, m);
break;
#ifdef INET6
case IPV6_VERSION >> 4:
netisr_queue_src(NETISR_IPV6, (uintptr_t)so, m);
break;
case AF_INET6:
netisr_queue_src(NETISR_IPV6, (uintptr_t)so, m);
break;
#endif
default:
error = EINVAL;
goto cantsend;
}
default:
return (EINVAL);
}

return (error);

cantsend:
m_freem(m);
return (error);
return (0);
}

static int

+ 6
- 0
sys/netpfil/pf/if_pfsync.c View File

@@ -1806,6 +1806,7 @@ pfsync_undefer(struct pfsync_deferral *pd, int drop)
static void
pfsync_defer_tmo(void *arg)
{
struct epoch_tracker et;
struct pfsync_deferral *pd = arg;
struct pfsync_softc *sc = pd->pd_sc;
struct mbuf *m = pd->pd_m;
@@ -1814,6 +1815,7 @@ pfsync_defer_tmo(void *arg)

PFSYNC_BUCKET_LOCK_ASSERT(b);

NET_EPOCH_ENTER(et);
CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);

TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry);
@@ -1828,6 +1830,7 @@ pfsync_defer_tmo(void *arg)
pf_release_state(st);

CURVNET_RESTORE();
NET_EPOCH_EXIT(et);
}

static void
@@ -2307,11 +2310,13 @@ pfsync_push_all(struct pfsync_softc *sc)
static void
pfsyncintr(void *arg)
{
struct epoch_tracker et;
struct pfsync_softc *sc = arg;
struct pfsync_bucket *b;
struct mbuf *m, *n;
int c;

NET_EPOCH_ENTER(et);
CURVNET_SET(sc->sc_ifp->if_vnet);

for (c = 0; c < pfsync_buckets; c++) {
@@ -2345,6 +2350,7 @@ pfsyncintr(void *arg)
}
}
CURVNET_RESTORE();
NET_EPOCH_EXIT(et);
}

static int

+ 1
- 1
sys/powerpc/pseries/phyp_llan.c View File

@@ -189,7 +189,7 @@ llan_attach(device_t dev)
return (ENXIO);
}

bus_setup_intr(dev, sc->irq, INTR_TYPE_MISC | INTR_MPSAFE |
bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE |
INTR_ENTROPY, NULL, llan_intr, sc, &sc->irq_cookie);

/* Setup DMA */

+ 5
- 0
sys/sys/cpu.h View File

@@ -120,11 +120,16 @@ TAILQ_HEAD(cf_level_lst, cf_level);
* information about settings but rely on another machine-dependent driver
* for actually performing the frequency transition (e.g., ACPI performance
* states of type "functional fixed hardware.")
*
* The "unc