Browse Source

MFC

hardened/current/userlandenhanced
attilio 7 years ago
parent
commit
d883da7ba4
27 changed files with 551 additions and 592 deletions
  1. +5
    -4
      contrib/binutils/opcodes/i386-opc.h
  2. +6
    -3
      sbin/devd/devd.conf.5
  3. +2
    -2
      share/man/man4/carp.4
  4. +0
    -1
      sys/conf/files
  5. +2
    -0
      sys/dev/ath/if_ath.c
  6. +2
    -0
      sys/dev/ath/if_ath_tx.c
  7. +1
    -0
      sys/dev/cxgbe/common/t4_hw.h
  8. +229
    -0
      sys/dev/cxgbe/t4_main.c
  9. +1
    -3
      sys/dev/firewire/sbp.c
  10. +147
    -142
      sys/dev/mxge/if_mxge.c
  11. +1
    -31
      sys/dev/mxge/if_mxge_var.h
  12. +0
    -357
      sys/dev/mxge/mxge_lro.c
  13. +0
    -2
      sys/fs/nfs/nfs_commonkrpc.c
  14. +1
    -1
      sys/fs/nfsclient/nfs_clvfsops.c
  15. +14
    -8
      sys/kern/kern_sig.c
  16. +2
    -0
      sys/kern/subr_trap.c
  17. +1
    -0
      sys/kern/vfs_export.c
  18. +1
    -1
      sys/kern/vfs_lookup.c
  19. +1
    -1
      sys/modules/mxge/mxge/Makefile
  20. +1
    -1
      sys/netinet/tcp_lro.c
  21. +0
    -2
      sys/nfsclient/nfs_krpc.c
  22. +1
    -1
      sys/nfsclient/nfs_vfsops.c
  23. +116
    -24
      sys/sys/mount.h
  24. +2
    -2
      sys/sys/signalvar.h
  25. +0
    -2
      sys/sys/systm.h
  26. +3
    -0
      sys/tools/vnode_if.awk
  27. +12
    -4
      sys/x86/isa/atrtc.c

+ 5
- 4
contrib/binutils/opcodes/i386-opc.h View File

@@ -73,15 +73,16 @@ typedef struct template
#define CpuSSE4_2 0x800000 /* SSE4.2 Instructions required */
#define CpuXSAVE 0x1000000 /* XSAVE Instructions required */
#define CpuAES 0x2000000 /* AES Instructions required */
#define CpuPCLMUL 0x4000000 /* Carry-less Multiplication extensions */

/* SSE4.1/4.2 Instructions required */
#define CpuSSE4 (CpuSSE4_1|CpuSSE4_2)

/* These flags are set by gas depending on the flag_code. */
#define Cpu64 0x4000000 /* 64bit support required */
#define CpuNo64 0x8000000 /* Not supported in the 64bit mode */

#define CpuPCLMUL 0x10000000 /* Carry-less Multiplication extensions */

/* SSE4.1/4.2 Instructions required */
#define CpuSSE4 (CpuSSE4_1|CpuSSE4_2)

/* The default value for unknown CPUs - enable all features to avoid problems. */
#define CpuUnknownFlags (Cpu186|Cpu286|Cpu386|Cpu486|Cpu586|Cpu686 \
|CpuP4|CpuSledgehammer|CpuMMX|CpuMMX2|CpuSSE|CpuSSE2|CpuSSE3|CpuVMX \


+ 6
- 3
sbin/devd/devd.conf.5 View File

@@ -41,7 +41,7 @@
.\" ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
.\" SOFTWARE.
.\"
.Dd December 16, 2011
.Dd February 22, 2013
.Dt DEVD.CONF 5
.Os
.Sh NAME
@@ -181,9 +181,8 @@ Valid media types are:
.Dq Li Tokenring ,
.Dq Li FDDI ,
.Dq Li 802.11 ,
.Dq Li ATM ,
and
.Dq Li CARP .
.Dq Li ATM .
.It Ic subdevice Qq Ar string ;
This is shorthand for
.Dq Ic match Qo Li subdevice Qc Qq Ar string .
@@ -350,6 +349,7 @@ The network interface is attached to the system.
The network interface is detached from the system.
.El
.El
.Pp
.It Li DEVFS
Events related to the
.Xr devfs 5
@@ -369,6 +369,7 @@ The
node is destroyed.
.El
.El
.Pp
.It Li USB
Events related to the USB subsystem.
.Bl -tag -width ".Sy Subsystem" -compact
@@ -390,6 +391,7 @@ USB interface is attached to a device.
USB interface is detached from a device.
.El
.El
.Pp
.It Li coretemp
Events related to the
.Xr coretemp 4
@@ -404,6 +406,7 @@ Notification that the CPU core has reached critical temperature.
String containing the temperature of the core that has become too hot.
.El
.El
.Pp
.It Li kern
Events related to the kernel.
.Bl -tag -width ".Sy Subsystem" -compact


+ 2
- 2
share/man/man4/carp.4 View File

@@ -26,7 +26,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd December 25, 2012
.Dd February 21, 2013
.Dt CARP 4
.Os
.Sh NAME
@@ -281,7 +281,7 @@ status change events can be set up by using the following devd.conf rule:
.Bd -literal -offset indent
notify 0 {
match "system" "CARP";
match "subsystem" "[0-9]+@";
match "subsystem" "[0-9]+@[0-9a-z]+";
match "type" "(MASTER|BACKUP)";
action "/root/carpcontrol.sh $subsystem $type";
};


+ 0
- 1
sys/conf/files View File

@@ -1743,7 +1743,6 @@ mwlboot.fw optional mwlfw \
no-obj no-implicit-rule \
clean "mwlboot.fw"
dev/mxge/if_mxge.c optional mxge pci
dev/mxge/mxge_lro.c optional mxge pci
dev/mxge/mxge_eth_z8e.c optional mxge pci
dev/mxge/mxge_ethp_z8e.c optional mxge pci
dev/mxge/mxge_rss_eth_z8e.c optional mxge pci


+ 2
- 0
sys/dev/ath/if_ath.c View File

@@ -3631,12 +3631,14 @@ ath_tx_default_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
st = ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0) ?
ts->ts_status : HAL_TXERR_XRETRY;

#if 0
if (bf->bf_state.bfs_dobaw)
device_printf(sc->sc_dev,
"%s: bf %p: seqno %d: dobaw should've been cleared!\n",
__func__,
bf,
SEQNO(bf->bf_state.bfs_seqno));
#endif
if (bf->bf_next != NULL)
device_printf(sc->sc_dev,
"%s: bf %p: seqno %d: bf_next not NULL!\n",


+ 2
- 0
sys/dev/ath/if_ath_tx.c View File

@@ -3373,6 +3373,7 @@ ath_tx_tid_drain_pkt(struct ath_softc *sc, struct ath_node *an,
ath_tx_update_baw(sc, an, tid, bf);
bf->bf_state.bfs_dobaw = 0;
}
#if 0
/*
* This has become a non-fatal error now
*/
@@ -3380,6 +3381,7 @@ ath_tx_tid_drain_pkt(struct ath_softc *sc, struct ath_node *an,
device_printf(sc->sc_dev,
"%s: wasn't added: seqno %d\n",
__func__, SEQNO(bf->bf_state.bfs_seqno));
#endif
}
TAILQ_INSERT_TAIL(bf_cq, bf, bf_list);
}


+ 1
- 0
sys/dev/cxgbe/common/t4_hw.h View File

@@ -58,6 +58,7 @@ enum {
CIM_PIFLA_SIZE = 64, /* # of 192-bit words in CIM PIF LA */
CIM_MALA_SIZE = 64, /* # of 160-bit words in CIM MA LA */
CIM_IBQ_SIZE = 128, /* # of 128-bit words in a CIM IBQ */
CIM_OBQ_SIZE = 128, /* # of 128-bit words in a CIM OBQ */
TPLA_SIZE = 128, /* # of 64-bit words in TP LA */
ULPRX_LA_SIZE = 512, /* # of 256-bit words in ULP_RX LA */
};


+ 229
- 0
sys/dev/cxgbe/t4_main.c View File

@@ -317,6 +317,9 @@ static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS);
static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS);
#ifdef SBUF_DRAIN
static int sysctl_cctrl(SYSCTL_HANDLER_ARGS);
static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS);
static int sysctl_cim_la(SYSCTL_HANDLER_ARGS);
static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS);
static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS);
static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS);
static int sysctl_devlog(SYSCTL_HANDLER_ARGS);
@@ -3171,6 +3174,62 @@ t4_sysctls(struct adapter *sc)
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
sysctl_cctrl, "A", "congestion control");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1",
CTLTYPE_STRING | CTLFLAG_RD, sc, 1,
sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp",
CTLTYPE_STRING | CTLFLAG_RD, sc, 2,
sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0",
CTLTYPE_STRING | CTLFLAG_RD, sc, 3,
sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1",
CTLTYPE_STRING | CTLFLAG_RD, sc, 4,
sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi",
CTLTYPE_STRING | CTLFLAG_RD, sc, 5,
sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
sysctl_cim_la, "A", "CIM logic analyzer");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ,
sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1",
CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ,
sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2",
CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ,
sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3",
CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ,
sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge",
CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ,
sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi",
CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ,
sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
sysctl_cim_qcfg, "A", "CIM queue configuration");

SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats",
CTLTYPE_STRING | CTLFLAG_RD, sc, 0,
sysctl_cpl_stats, "A", "CPL statistics");
@@ -3694,6 +3753,176 @@ sysctl_cctrl(SYSCTL_HANDLER_ARGS)
return (rc);
}

static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ] = {
"TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */
"ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI" /* obq's */
};

static int
sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS)
{
struct adapter *sc = arg1;
struct sbuf *sb;
int rc, i, n, qid = arg2;
uint32_t *buf, *p;
char *qtype;

KASSERT(qid >= 0 && qid < nitems(qname),
("%s: bad qid %d\n", __func__, qid));

if (qid < CIM_NUM_IBQ) {
/* inbound queue */
qtype = "IBQ";
n = 4 * CIM_IBQ_SIZE;
buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
rc = t4_read_cim_ibq(sc, qid, buf, n);
} else {
/* outbound queue */
qtype = "OBQ";
qid -= CIM_NUM_IBQ;
n = 4 * 6 * CIM_OBQ_SIZE;
buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK);
rc = t4_read_cim_obq(sc, qid, buf, n);
}

if (rc < 0) {
rc = -rc;
goto done;
}
n = rc * sizeof(uint32_t); /* rc has # of words actually read */

rc = sysctl_wire_old_buffer(req, 0);
if (rc != 0)
goto done;

sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
if (sb == NULL) {
rc = ENOMEM;
goto done;
}

sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]);
for (i = 0, p = buf; i < n; i += 16, p += 4)
sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1],
p[2], p[3]);

rc = sbuf_finish(sb);
sbuf_delete(sb);
done:
free(buf, M_CXGBE);
return (rc);
}

static int
sysctl_cim_la(SYSCTL_HANDLER_ARGS)
{
struct adapter *sc = arg1;
u_int cfg;
struct sbuf *sb;
uint32_t *buf, *p;
int rc;

rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg);
if (rc != 0)
return (rc);

rc = sysctl_wire_old_buffer(req, 0);
if (rc != 0)
return (rc);

sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
if (sb == NULL)
return (ENOMEM);

buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE,
M_ZERO | M_WAITOK);

rc = -t4_cim_read_la(sc, buf, NULL);
if (rc != 0)
goto done;

sbuf_printf(sb, "Status Data PC%s",
cfg & F_UPDBGLACAPTPCONLY ? "" :
" LS0Stat LS0Addr LS0Data");

KASSERT((sc->params.cim_la_size & 7) == 0,
("%s: p will walk off the end of buf", __func__));

for (p = buf; p < &buf[sc->params.cim_la_size]; p += 8) {
if (cfg & F_UPDBGLACAPTPCONLY) {
sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff,
p[6], p[7]);
sbuf_printf(sb, "\n %02x %02x%06x %02x%06x",
(p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8,
p[4] & 0xff, p[5] >> 8);
sbuf_printf(sb, "\n %02x %x%07x %x%07x",
(p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
p[1] & 0xf, p[2] >> 4);
} else {
sbuf_printf(sb,
"\n %02x %x%07x %x%07x %08x %08x "
"%08x%08x%08x%08x",
(p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4,
p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5],
p[6], p[7]);
}
}

rc = sbuf_finish(sb);
sbuf_delete(sb);
done:
free(buf, M_CXGBE);
return (rc);
}

static int
sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS)
{
struct adapter *sc = arg1;
struct sbuf *sb;
int rc, i;
uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ];
uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ];
uint16_t thres[CIM_NUM_IBQ];
uint32_t obq_wr[2 * CIM_NUM_OBQ], *wr = obq_wr;
uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ)], *p = stat;

rc = -t4_cim_read(sc, A_UP_IBQ_0_RDADDR, nitems(stat), stat);
if (rc == 0)
rc = -t4_cim_read(sc, A_UP_OBQ_0_REALADDR, nitems(obq_wr),
obq_wr);
if (rc != 0)
return (rc);

t4_read_cimq_cfg(sc, base, size, thres);

rc = sysctl_wire_old_buffer(req, 0);
if (rc != 0)
return (rc);

sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req);
if (sb == NULL)
return (ENOMEM);

sbuf_printf(sb, "Queue Base Size Thres RdPtr WrPtr SOP EOP Avail");

for (i = 0; i < CIM_NUM_IBQ; i++, p += 4)
sbuf_printf(sb, "\n%5s %5x %5u %4u %6x %4x %4u %4u %5u",
qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]),
G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
G_QUEREMFLITS(p[2]) * 16);
for ( ; i < CIM_NUM_IBQ + CIM_NUM_OBQ; i++, p += 4, wr += 2)
sbuf_printf(sb, "\n%5s %5x %5u %11x %4x %4u %4u %5u", qname[i],
base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff,
wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]),
G_QUEREMFLITS(p[2]) * 16);

rc = sbuf_finish(sb);
sbuf_delete(sb);

return (rc);
}

static int
sysctl_cpl_stats(SYSCTL_HANDLER_ARGS)
{


+ 1
- 3
sys/dev/firewire/sbp.c View File

@@ -2488,16 +2488,14 @@ printf("ORB %08x %08x %08x %08x\n", ntohl(ocb->orb[0]), ntohl(ocb->orb[1]), ntoh
printf("ORB %08x %08x %08x %08x\n", ntohl(ocb->orb[4]), ntohl(ocb->orb[5]), ntohl(ocb->orb[6]), ntohl(ocb->orb[7]));
*/
if (ccb->csio.dxfer_len > 0) {
int s, error;
int error;

s = splsoftvm();
error = bus_dmamap_load_ccb(/*dma tag*/sbp->dmat,
/*dma map*/ocb->dmamap,
ccb,
sbp_execute_ocb,
ocb,
/*flags*/0);
splx(s);
if (error)
printf("sbp: bus_dmamap_load error %d\n", error);
} else


+ 147
- 142
sys/dev/mxge/if_mxge.c View File

@@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/tcp.h>
#include <netinet/tcp_lro.h>
#include <netinet6/ip6_var.h>

#include <machine/bus.h>
@@ -102,7 +103,6 @@ static int mxge_intr_coal_delay = 30;
static int mxge_deassert_wait = 1;
static int mxge_flow_control = 1;
static int mxge_verbose = 0;
static int mxge_lro_cnt = 8;
static int mxge_ticks;
static int mxge_max_slices = 1;
static int mxge_rss_hash_type = MXGEFW_RSS_HASH_TYPE_SRC_DST_PORT;
@@ -1311,9 +1311,9 @@ mxge_reset(mxge_softc_t *sc, int interrupts_setup)
ss->tx.stall = 0;
ss->rx_big.cnt = 0;
ss->rx_small.cnt = 0;
ss->lro_bad_csum = 0;
ss->lro_queued = 0;
ss->lro_flushed = 0;
ss->lc.lro_bad_csum = 0;
ss->lc.lro_queued = 0;
ss->lc.lro_flushed = 0;
if (ss->fw_stats != NULL) {
bzero(ss->fw_stats, sizeof *ss->fw_stats);
}
@@ -1413,50 +1413,6 @@ mxge_change_flow_control(SYSCTL_HANDLER_ARGS)
return err;
}

static int
mxge_change_lro_locked(mxge_softc_t *sc, int lro_cnt)
{
struct ifnet *ifp;
int err = 0;

ifp = sc->ifp;
if (lro_cnt == 0)
ifp->if_capenable &= ~IFCAP_LRO;
else
ifp->if_capenable |= IFCAP_LRO;
sc->lro_cnt = lro_cnt;
if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
mxge_close(sc, 0);
err = mxge_open(sc);
}
return err;
}

static int
mxge_change_lro(SYSCTL_HANDLER_ARGS)
{
mxge_softc_t *sc;
unsigned int lro_cnt;
int err;

sc = arg1;
lro_cnt = sc->lro_cnt;
err = sysctl_handle_int(oidp, &lro_cnt, arg2, req);
if (err != 0)
return err;

if (lro_cnt == sc->lro_cnt)
return 0;

if (lro_cnt > 128)
return EINVAL;

mtx_lock(&sc->driver_mtx);
err = mxge_change_lro_locked(sc, lro_cnt);
mtx_unlock(&sc->driver_mtx);
return err;
}

static int
mxge_handle_be32(SYSCTL_HANDLER_ARGS)
{
@@ -1653,14 +1609,6 @@ mxge_add_sysctls(mxge_softc_t *sc)
CTLFLAG_RW, &mxge_verbose,
0, "verbose printing");

/* lro */
SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
"lro_cnt",
CTLTYPE_INT|CTLFLAG_RW, sc,
0, mxge_change_lro,
"I", "number of lro merge queues");


/* add counters exported for debugging from all slices */
sysctl_ctx_init(&sc->slice_sysctl_ctx);
sc->slice_sysctl_tree =
@@ -1686,11 +1634,15 @@ mxge_add_sysctls(mxge_softc_t *sc)
CTLFLAG_RD, &ss->rx_big.cnt,
0, "rx_small_cnt");
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"lro_flushed", CTLFLAG_RD, &ss->lro_flushed,
"lro_flushed", CTLFLAG_RD, &ss->lc.lro_flushed,
0, "number of lro merge queues flushed");

SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"lro_queued", CTLFLAG_RD, &ss->lro_queued,
"lro_bad_csum", CTLFLAG_RD, &ss->lc.lro_bad_csum,
0, "number of bad csums preventing LRO");

SYSCTL_ADD_INT(ctx, children, OID_AUTO,
"lro_queued", CTLFLAG_RD, &ss->lc.lro_queued,
0, "number of frames appended to lro merge"
"queues");

@@ -2534,6 +2486,64 @@ done:
return err;
}

#ifdef INET6

static uint16_t
mxge_csum_generic(uint16_t *raw, int len)
{
uint32_t csum;


csum = 0;
while (len > 0) {
csum += *raw;
raw++;
len -= 2;
}
csum = (csum >> 16) + (csum & 0xffff);
csum = (csum >> 16) + (csum & 0xffff);
return (uint16_t)csum;
}

static inline uint16_t
mxge_rx_csum6(void *p, struct mbuf *m, uint32_t csum)
{
uint32_t partial;
int nxt, cksum_offset;
struct ip6_hdr *ip6 = p;
uint16_t c;

nxt = ip6->ip6_nxt;
cksum_offset = sizeof (*ip6) + ETHER_HDR_LEN;
if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) {
cksum_offset = ip6_lasthdr(m, ETHER_HDR_LEN,
IPPROTO_IPV6, &nxt);
if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP)
return (1);
}

/*
* IPv6 headers do not contain a checksum, and hence
* do not checksum to zero, so they don't "fall out"
* of the partial checksum calculation like IPv4
* headers do. We need to fix the partial checksum by
* subtracting the checksum of the IPv6 header.
*/

partial = mxge_csum_generic((uint16_t *)ip6, cksum_offset -
ETHER_HDR_LEN);
csum += ~partial;
csum += (csum < ~partial);
csum = (csum >> 16) + (csum & 0xFFFF);
csum = (csum >> 16) + (csum & 0xFFFF);
c = in6_cksum_pseudo(ip6, m->m_pkthdr.len - cksum_offset, nxt,
csum);

// printf("%d %d %x %x %x %x %x\n", m->m_pkthdr.len, cksum_offset, c, csum, ocsum, partial, d);
c ^= 0xffff;
return (c);
}
#endif /* INET6 */
/*
* Myri10GE hardware checksums are not valid if the sender
* padded the frame with non-zero padding. This is because
@@ -2547,26 +2557,39 @@ static inline uint16_t
mxge_rx_csum(struct mbuf *m, int csum)
{
struct ether_header *eh;
#ifdef INET
struct ip *ip;
uint16_t c;
#endif
int cap = m->m_pkthdr.rcvif->if_capenable;
uint16_t c, etype;

eh = mtod(m, struct ether_header *);

/* only deal with IPv4 TCP & UDP for now */
if (__predict_false(eh->ether_type != htons(ETHERTYPE_IP)))
return 1;
ip = (struct ip *)(eh + 1);
if (__predict_false(ip->ip_p != IPPROTO_TCP &&
ip->ip_p != IPPROTO_UDP))
return 1;
eh = mtod(m, struct ether_header *);
etype = ntohs(eh->ether_type);
switch (etype) {
#ifdef INET
c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htonl(ntohs(csum) + ntohs(ip->ip_len) +
- (ip->ip_hl << 2) + ip->ip_p));
#else
c = 1;
case ETHERTYPE_IP:
if ((cap & IFCAP_RXCSUM) == 0)
return (1);
ip = (struct ip *)(eh + 1);
if (ip->ip_p != IPPROTO_TCP && ip->ip_p != IPPROTO_UDP)
return (1);
c = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htonl(ntohs(csum) + ntohs(ip->ip_len) -
(ip->ip_hl << 2) + ip->ip_p));
c ^= 0xffff;
break;
#endif
c ^= 0xffff;
#ifdef INET6
case ETHERTYPE_IPV6:
if ((cap & IFCAP_RXCSUM_IPV6) == 0)
return (1);
c = mxge_rx_csum6((eh + 1), m, csum);
break;
#endif
default:
c = 1;
}
return (c);
}

@@ -2628,7 +2651,8 @@ mxge_vlan_tag_remove(struct mbuf *m, uint32_t *csum)


static inline void
mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len,
uint32_t csum, int lro)
{
mxge_softc_t *sc;
struct ifnet *ifp;
@@ -2637,7 +2661,6 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
uint16_t tcpudp_csum;

sc = ss->sc;
ifp = sc->ifp;
@@ -2674,14 +2697,18 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
return;
/* otherwise, it was a UDP frame, or a TCP frame which
we could not do LRO on. Tell the stack that the
checksum is good */
if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
(0 == mxge_rx_csum(m, csum))) {
/* Tell the stack that the checksum is good */
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
CSUM_DATA_VALID;

#if defined(INET) || defined (INET6)
if (lro && (0 == tcp_lro_rx(&ss->lc, m, 0)))
return;
#endif
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
@@ -2693,7 +2720,8 @@ mxge_rx_done_big(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
}

static inline void
mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len,
uint32_t csum, int lro)
{
mxge_softc_t *sc;
struct ifnet *ifp;
@@ -2702,7 +2730,6 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_rx_ring_t *rx;
bus_dmamap_t old_map;
int idx;
uint16_t tcpudp_csum;

sc = ss->sc;
ifp = sc->ifp;
@@ -2739,14 +2766,17 @@ mxge_rx_done_small(struct mxge_slice_state *ss, uint32_t len, uint32_t csum)
mxge_vlan_tag_remove(m, &csum);
}
/* if the checksum is valid, mark it in the mbuf header */
if (sc->csum_flag && (0 == (tcpudp_csum = mxge_rx_csum(m, csum)))) {
if (sc->lro_cnt && (0 == mxge_lro_rx(ss, m, csum)))
return;
/* otherwise, it was a UDP frame, or a TCP frame which
we could not do LRO on. Tell the stack that the
checksum is good */
if ((ifp->if_capenable & (IFCAP_RXCSUM_IPV6 | IFCAP_RXCSUM)) &&
(0 == mxge_rx_csum(m, csum))) {
/* Tell the stack that the checksum is good */
m->m_pkthdr.csum_data = 0xffff;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR | CSUM_DATA_VALID;
m->m_pkthdr.csum_flags = CSUM_PSEUDO_HDR |
CSUM_DATA_VALID;

#if defined(INET) || defined (INET6)
if (lro && (0 == tcp_lro_rx(&ss->lc, m, csum)))
return;
#endif
}
/* flowid only valid if RSS hashing is enabled */
if (sc->num_slices > 1) {
@@ -2764,16 +2794,17 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
int limit = 0;
uint16_t length;
uint16_t checksum;
int lro;

lro = ss->sc->ifp->if_capenable & IFCAP_LRO;
while (rx_done->entry[rx_done->idx].length != 0) {
length = ntohs(rx_done->entry[rx_done->idx].length);
rx_done->entry[rx_done->idx].length = 0;
checksum = rx_done->entry[rx_done->idx].checksum;
if (length <= (MHLEN - MXGEFW_PAD))
mxge_rx_done_small(ss, length, checksum);
mxge_rx_done_small(ss, length, checksum, lro);
else
mxge_rx_done_big(ss, length, checksum);
mxge_rx_done_big(ss, length, checksum, lro);
rx_done->cnt++;
rx_done->idx = rx_done->cnt & rx_done->mask;

@@ -2781,11 +2812,11 @@ mxge_clean_rx_done(struct mxge_slice_state *ss)
if (__predict_false(++limit > rx_done->mask / 2))
break;
}
#ifdef INET
while (!SLIST_EMPTY(&ss->lro_active)) {
struct lro_entry *lro = SLIST_FIRST(&ss->lro_active);
SLIST_REMOVE_HEAD(&ss->lro_active, next);
mxge_lro_flush(ss, lro);
#if defined(INET) || defined (INET6)
while (!SLIST_EMPTY(&ss->lc.lro_active)) {
struct lro_entry *lro = SLIST_FIRST(&ss->lc.lro_active);
SLIST_REMOVE_HEAD(&ss->lc.lro_active, next);
tcp_lro_flush(&ss->lc, lro);
}
#endif
}
@@ -3153,15 +3184,11 @@ mxge_init(void *arg)
static void
mxge_free_slice_mbufs(struct mxge_slice_state *ss)
{
struct lro_entry *lro_entry;
int i;

while (!SLIST_EMPTY(&ss->lro_free)) {
lro_entry = SLIST_FIRST(&ss->lro_free);
SLIST_REMOVE_HEAD(&ss->lro_free, next);
free(lro_entry, M_DEVBUF);
}

#if defined(INET) || defined(INET6)
tcp_lro_free(&ss->lc);
#endif
for (i = 0; i <= ss->rx_big.mask; i++) {
if (ss->rx_big.info[i].m == NULL)
continue;
@@ -3545,26 +3572,17 @@ mxge_slice_open(struct mxge_slice_state *ss, int nbufs, int cl_size)
mxge_softc_t *sc;
mxge_cmd_t cmd;
bus_dmamap_t map;
struct lro_entry *lro_entry;
int err, i, slice;


sc = ss->sc;
slice = ss - sc->ss;

SLIST_INIT(&ss->lro_free);
SLIST_INIT(&ss->lro_active);

for (i = 0; i < sc->lro_cnt; i++) {
lro_entry = (struct lro_entry *)
malloc(sizeof (*lro_entry), M_DEVBUF,
M_NOWAIT | M_ZERO);
if (lro_entry == NULL) {
sc->lro_cnt = i;
break;
}
SLIST_INSERT_HEAD(&ss->lro_free, lro_entry, next);
}
#if defined(INET) || defined(INET6)
(void)tcp_lro_init(&ss->lc);
#endif
ss->lc.ifp = sc->ifp;
/* get the lanai pointers to the send and receive rings */

err = 0;
@@ -4219,10 +4237,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
} else if (mask & IFCAP_RXCSUM) {
if (IFCAP_RXCSUM & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_RXCSUM;
sc->csum_flag = 0;
} else {
ifp->if_capenable |= IFCAP_RXCSUM;
sc->csum_flag = 1;
}
}
if (mask & IFCAP_TSO4) {
@@ -4249,16 +4265,12 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
ifp->if_hwassist |= (CSUM_TCP_IPV6
| CSUM_UDP_IPV6);
}
#ifdef NOTYET
} else if (mask & IFCAP_RXCSUM6) {
if (IFCAP_RXCSUM6 & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_RXCSUM6;
sc->csum_flag = 0;
} else if (mask & IFCAP_RXCSUM_IPV6) {
if (IFCAP_RXCSUM_IPV6 & ifp->if_capenable) {
ifp->if_capenable &= ~IFCAP_RXCSUM_IPV6;
} else {
ifp->if_capenable |= IFCAP_RXCSUM6;
sc->csum_flag = 1;
ifp->if_capenable |= IFCAP_RXCSUM_IPV6;
}
#endif
}
if (mask & IFCAP_TSO6) {
if (IFCAP_TSO6 & ifp->if_capenable) {
@@ -4274,12 +4286,8 @@ mxge_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
}
#endif /*IFCAP_TSO6 */

if (mask & IFCAP_LRO) {
if (IFCAP_LRO & ifp->if_capenable)
err = mxge_change_lro_locked(sc, 0);
else
err = mxge_change_lro_locked(sc, mxge_lro_cnt);
}
if (mask & IFCAP_LRO)
ifp->if_capenable ^= IFCAP_LRO;
if (mask & IFCAP_VLAN_HWTAGGING)
ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
if (mask & IFCAP_VLAN_HWTSO)
@@ -4326,14 +4334,11 @@ mxge_fetch_tunables(mxge_softc_t *sc)
TUNABLE_INT_FETCH("hw.mxge.verbose",
&mxge_verbose);
TUNABLE_INT_FETCH("hw.mxge.ticks", &mxge_ticks);
TUNABLE_INT_FETCH("hw.mxge.lro_cnt", &sc->lro_cnt);
TUNABLE_INT_FETCH("hw.mxge.always_promisc", &mxge_always_promisc);
TUNABLE_INT_FETCH("hw.mxge.rss_hash_type", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.rss_hashtype", &mxge_rss_hash_type);
TUNABLE_INT_FETCH("hw.mxge.initial_mtu", &mxge_initial_mtu);
TUNABLE_INT_FETCH("hw.mxge.throttle", &mxge_throttle);
if (sc->lro_cnt != 0)
mxge_lro_cnt = sc->lro_cnt;

if (bootverbose)
mxge_verbose = 1;
@@ -4897,8 +4902,9 @@ mxge_attach(device_t dev)

if_initbaudrate(ifp, IF_Gbps(10));
ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 |
IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6;
#ifdef INET
IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6 |
IFCAP_RXCSUM_IPV6;
#if defined(INET) || defined(INET6)
ifp->if_capabilities |= IFCAP_LRO;
#endif

@@ -4929,7 +4935,6 @@ mxge_attach(device_t dev)
ifp->if_capenable = ifp->if_capabilities;
if (sc->lro_cnt == 0)
ifp->if_capenable &= ~IFCAP_LRO;
sc->csum_flag = 1;
ifp->if_init = mxge_init;
ifp->if_softc = sc;
ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;


+ 1
- 31
sys/dev/mxge/if_mxge_var.h View File

@@ -194,31 +194,6 @@ typedef struct
char mtx_name[16];
} mxge_tx_ring_t;

struct lro_entry;
struct lro_entry
{
SLIST_ENTRY(lro_entry) next;
struct mbuf *m_head;
struct mbuf *m_tail;
int timestamp;
struct ip *ip;
uint32_t tsval;
uint32_t tsecr;
uint32_t source_ip;
uint32_t dest_ip;
uint32_t next_seq;
uint32_t ack_seq;
uint32_t len;
uint32_t data_csum;
uint16_t window;
uint16_t source_port;
uint16_t dest_port;
uint16_t append_cnt;
uint16_t mss;
};
SLIST_HEAD(lro_head, lro_entry);

struct mxge_softc;
typedef struct mxge_softc mxge_softc_t;

@@ -236,11 +211,7 @@ struct mxge_slice_state {
u_long omcasts;
u_long oerrors;
int if_drv_flags;
struct lro_head lro_active;
struct lro_head lro_free;
int lro_queued;
int lro_flushed;
int lro_bad_csum;
struct lro_ctrl lc;
mxge_dma_t fw_stats_dma;
struct sysctl_oid *sysctl_tree;
struct sysctl_ctx_list sysctl_ctx;
@@ -250,7 +221,6 @@ struct mxge_slice_state {
struct mxge_softc {
struct ifnet* ifp;
struct mxge_slice_state *ss;
int csum_flag; /* rx_csums? */
int tx_boundary; /* boundary transmits cannot cross*/
int lro_cnt;
bus_dma_tag_t parent_dmat;


+ 0
- 357
sys/dev/mxge/mxge_lro.c View File

@@ -1,357 +0,0 @@
/******************************************************************************

Copyright (c) 2007-2008, Myricom Inc.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

2. Neither the name of the Myricom Inc, nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

***************************************************************************/

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/endian.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/bus.h>

#include <net/if.h>
#include <net/ethernet.h>
#include <net/if_media.h>

#include <netinet/in_systm.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/tcp.h>

#include <machine/bus.h>
#include <machine/in_cksum.h>

#include <dev/mxge/mxge_mcp.h>
#include <dev/mxge/if_mxge_var.h>

#include "opt_inet.h"

#ifdef INET

/* Assume len is a multiple of 4 */
static uint16_t
mxge_csum_generic(uint16_t *raw, int len)
{
uint32_t csum;
csum = 0;
while (len > 0) {
csum += *raw;
raw++;
csum += *raw;
raw++;
len -= 4;
}
csum = (csum >> 16) + (csum & 0xffff);
csum = (csum >> 16) + (csum & 0xffff);
return (uint16_t)csum;
}


void
mxge_lro_flush(struct mxge_slice_state *ss, struct lro_entry *lro)
{
mxge_softc_t *mgp = ss->sc;
struct ifnet *ifp;
struct ip *ip;
struct tcphdr *tcp;
uint32_t *ts_ptr;
uint32_t tcplen, tcp_csum;

if (lro->append_cnt) {
/* incorporate the new len into the ip header and
* re-calculate the checksum */
ip = lro->ip;
ip->ip_len = htons(lro->len - ETHER_HDR_LEN);
ip->ip_sum = 0;
ip->ip_sum = 0xffff ^
mxge_csum_generic((uint16_t*)ip,
sizeof (*ip));

lro->m_head->m_pkthdr.csum_flags = CSUM_IP_CHECKED |
CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
lro->m_head->m_pkthdr.csum_data = 0xffff;
lro->m_head->m_pkthdr.len = lro->len;

/* incorporate the latest ack into the tcp header */
tcp = (struct tcphdr *) (ip + 1);
tcp->th_ack = lro->ack_seq;
tcp->th_win = lro->window;
/* incorporate latest timestamp into the tcp header */
if (lro->timestamp) {
ts_ptr = (uint32_t *)(tcp + 1);
ts_ptr[1] = htonl(lro->tsval);
ts_ptr[2] = lro->tsecr;
}
/*
* update checksum in tcp header by re-calculating the
* tcp pseudoheader checksum, and adding it to the checksum
* of the tcp payload data
*/
tcp->th_sum = 0;
tcplen = lro->len - sizeof(*ip) - ETHER_HDR_LEN;
tcp_csum = lro->data_csum;
tcp_csum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tcplen + IPPROTO_TCP));
tcp_csum += mxge_csum_generic((uint16_t*)tcp,
tcp->th_off << 2);
tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
#if 0
IOLog("pseudo = 0x%x, generic = 0x%x, sum = %x\n",
in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
htons(tcplen + IPPROTO_TCP)),
mxge_csum_generic((uint16_t*)tcp,
tcp->th_off << 2),
htons(0xffff ^ tcp_csum));
#endif
tcp->th_sum = 0xffff ^ tcp_csum;
}
ifp = mgp->ifp;
(*ifp->if_input)(mgp->ifp, lro->m_head);
ss->lro_queued += lro->append_cnt + 1;
ss->lro_flushed++;
lro->m_head = NULL;
lro->timestamp = 0;
lro->append_cnt = 0;
SLIST_INSERT_HEAD(&ss->lro_free, lro, next);
}

int
mxge_lro_rx(struct mxge_slice_state *ss, struct mbuf *m_head, uint32_t csum)
{
struct ether_header *eh;
struct ip *ip;
struct tcphdr *tcp;
uint32_t *ts_ptr;
struct mbuf *m_nxt, *m_tail;
struct lro_entry *lro;
int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len;
int opt_bytes, trim;
uint32_t seq, tmp_csum, device_mtu;

eh = mtod(m_head, struct ether_header *);
if (eh->ether_type != htons(ETHERTYPE_IP))
return 1;
ip = (struct ip *) (eh + 1);
if (ip->ip_p != IPPROTO_TCP)
return 1;
/* ensure there are no options */
if ((ip->ip_hl << 2) != sizeof (*ip))
return -1;

/* .. and the packet is not fragmented */
if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
return -1;

/* verify that the IP header checksum is correct */
tmp_csum = mxge_csum_generic((uint16_t *)ip, sizeof (*ip));
if (__predict_false((tmp_csum ^ 0xffff) != 0)) {
ss->lro_bad_csum++;
return -1;
}

/* find the TCP header */
tcp = (struct tcphdr *) (ip + 1);

/* ensure no bits set besides ack or psh */
if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
return -1;

/* check for timestamps. Since the only option we handle are
timestamps, we only have to handle the simple case of
aligned timestamps */

opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
tcp_hdr_len = sizeof (*tcp) + opt_bytes;
ts_ptr = (uint32_t *)(tcp + 1);
if (opt_bytes != 0) {
if (__predict_false(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
(*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
return -1;
}

ip_len = ntohs(ip->ip_len);
tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);

/*
* If frame is padded beyond the end of the IP packet,
* then we must trim the extra bytes off the end.
*/
tot_len = m_head->m_pkthdr.len;
trim = tot_len - (ip_len + ETHER_HDR_LEN);
if (trim != 0) {
if (trim < 0) {
/* truncated packet */
return -1;
}
m_adj(m_head, -trim);
tot_len = m_head->m_pkthdr.len;
}

m_nxt = m_head;
m_tail = NULL; /* -Wuninitialized */
while (m_nxt != NULL) {
m_tail = m_nxt;
m_nxt = m_tail->m_next;
}

hlen = ip_len + ETHER_HDR_LEN - tcp_data_len;
seq = ntohl(tcp->th_seq);

SLIST_FOREACH(lro, &ss->lro_active, next) {
if (lro->source_port == tcp->th_sport &&
lro->dest_port == tcp->th_dport &&
lro->source_ip == ip->ip_src.s_addr &&
lro->dest_ip == ip->ip_dst.s_addr) {
/* Try to append it */

if (__predict_false(seq != lro->next_seq ||
(tcp_data_len == 0 &&
lro->ack_seq == tcp->th_ack))) {
/* out of order packet or dup ack */
SLIST_REMOVE(&ss->lro_active, lro,
lro_entry, next);
mxge_lro_flush(ss, lro);
return -1;
}

if (opt_bytes) {
uint32_t tsval = ntohl(*(ts_ptr + 1));
/* make sure timestamp values are increasing */
if (__predict_false(lro->tsval > tsval ||
*(ts_ptr + 2) == 0)) {
return -1;
}
lro->tsval = tsval;
lro->tsecr = *(ts_ptr + 2);
}

lro->next_seq += tcp_data_len;
lro->ack_seq = tcp->th_ack;
lro->window = tcp->th_win;
lro->append_cnt++;
if (tcp_data_len == 0) {
m_freem(m_head);
return 0;
}
/* subtract off the checksum of the tcp header
* from the hardware checksum, and add it to the
* stored tcp data checksum. Byteswap the checksum
* if the total length so far is odd
*/
tmp_csum = mxge_csum_generic((uint16_t*)tcp,
tcp_hdr_len);
csum = csum + (tmp_csum ^ 0xffff);
csum = (csum & 0xffff) + (csum >> 16);
csum = (csum & 0xffff) + (csum >> 16);
if (lro->len & 0x1) {
/* Odd number of bytes so far, flip bytes */
csum = ((csum << 8) | (csum >> 8)) & 0xffff;
}
csum = csum + lro->data_csum;
csum = (csum & 0xffff) + (csum >> 16);
csum = (csum & 0xffff) + (csum >> 16);
lro->data_csum = csum;

lro->len += tcp_data_len;

/* adjust mbuf so that m->m_data points to
the first byte of the payload */
m_adj(m_head, hlen);
/* append mbuf chain */
lro->m_tail->m_next = m_head;
/* advance the last pointer */
lro->m_tail = m_tail;
/* flush packet if required */
device_mtu = ss->sc->ifp->if_mtu;
if (lro->len > (65535 - device_mtu)) {
SLIST_REMOVE(&ss->lro_active, lro,
lro_entry, next);
mxge_lro_flush(ss, lro);
}
return 0;
}
}

if (SLIST_EMPTY(&ss->lro_free))
return -1;

/* start a new chain */
lro = SLIST_FIRST(&ss->lro_free);
SLIST_REMOVE_HEAD(&ss->lro_free, next);
SLIST_INSERT_HEAD(&ss->lro_active, lro, next);
lro->source_port = tcp->th_sport;
lro->dest_port = tcp->th_dport;
lro->source_ip = ip->ip_src.s_addr;
lro->dest_ip = ip->ip_dst.s_addr;
lro->next_seq = seq + tcp_data_len;
lro->mss = tcp_data_len;
lro->ack_seq = tcp->th_ack;
lro->window = tcp->th_win;

/* save the checksum of just the TCP payload by
* subtracting off the checksum of the TCP header from
* the entire hardware checksum
* Since IP header checksum is correct, checksum over
* the IP header is -0. Substracting -0 is unnecessary.
*/
tmp_csum = mxge_csum_generic((uint16_t*)tcp, tcp_hdr_len);
csum = csum + (tmp_csum ^ 0xffff);
csum = (csum & 0xffff) + (csum >> 16);
csum = (csum & 0xffff) + (csum >> 16);
lro->data_csum = csum;
lro->ip = ip;
/* record timestamp if it is present */
if (opt_bytes) {
lro->timestamp = 1;
lro->tsval = ntohl(*(ts_ptr + 1));
lro->tsecr = *(ts_ptr + 2);
}
lro->len = tot_len;
lro->m_head = m_head;
lro->m_tail = m_tail;
return 0;
}

#endif /* INET */
/*
This file uses Myri10GE driver indentation.

Local Variables:
c-file-style:"linux"
tab-width:8
End:
*/

+ 0
- 2
sys/fs/nfs/nfs_commonkrpc.c View File

@@ -1080,7 +1080,6 @@ newnfs_set_sigmask(struct thread *td, sigset_t *oldset)
SIGDELSET(newset, newnfs_sig_set[i]);
}
mtx_unlock(&p->p_sigacts->ps_mtx);
sigdeferstop(td);
kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
SIGPROCMASK_PROC_LOCKED);
PROC_UNLOCK(p);
@@ -1092,7 +1091,6 @@ newnfs_restore_sigmask(struct thread *td, sigset_t *set)
if (td == NULL)
td = curthread; /* XXX */
kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
sigallowstop(td);
}

/*


+ 1
- 1
sys/fs/nfsclient/nfs_clvfsops.c View File

@@ -132,7 +132,7 @@ static struct vfsops nfs_vfsops = {
.vfs_unmount = nfs_unmount,
.vfs_sysctl = nfs_sysctl,
};
VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK);
VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK | VFCF_SBDRY);

/* So that loader and kldload(2) can find us, wherever we are.. */
MODULE_VERSION(nfs, 1);


+ 14
- 8
sys/kern/kern_sig.c View File

@@ -2537,16 +2537,22 @@ tdsigcleanup(struct thread *td)

}

/* Defer the delivery of SIGSTOP for the current thread. */
void
sigdeferstop(struct thread *td)
/*
* Defer the delivery of SIGSTOP for the current thread. Returns true
* if stops were deferred and false if they were already deferred.
*/
int
sigdeferstop(void)
{
struct thread *td;

KASSERT(!(td->td_flags & TDF_SBDRY),
("attempt to set TDF_SBDRY recursively"));
td = curthread;
if (td->td_flags & TDF_SBDRY)
return (0);
thread_lock(td);
td->td_flags |= TDF_SBDRY;
thread_unlock(td);
return (1);
}

/*
@@ -2555,11 +2561,11 @@ sigdeferstop(struct thread *td)
* will suspend either via ast() or a subsequent interruptible sleep.
*/
void
sigallowstop(struct thread *td)
sigallowstop()
{
struct thread *td;

KASSERT(td->td_flags & TDF_SBDRY,
("attempt to clear already-cleared TDF_SBDRY"));
td = curthread;
thread_lock(td);
td->td_flags &= ~TDF_SBDRY;
thread_unlock(td);


+ 2
- 0
sys/kern/subr_trap.c View File

@@ -164,6 +164,8 @@ userret(struct thread *td, struct trapframe *frame)
("userret: Returning with with pinned thread"));
KASSERT(td->td_vp_reserv == 0,
("userret: Returning while holding vnode reservation"));
KASSERT((td->td_flags & TDF_SBDRY) == 0,
("userret: Returning with stop signals deferred"));
#ifdef VIMAGE
/* Unfortunately td_vnet_lpush needs VNET_DEBUG. */
VNET_ASSERT(curvnet == NULL,


+ 1
- 0
sys/kern/vfs_export.c View File

@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/rwlock.h>
#include <sys/refcount.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/vnode.h>


+ 1
- 1
sys/kern/vfs_lookup.c View File

@@ -339,7 +339,7 @@ namei(struct nameidata *ndp)
auio.uio_offset = 0;
auio.uio_rw = UIO_READ;
auio.uio_segflg = UIO_SYSSPACE;
auio.uio_td = (struct thread *)0;
auio.uio_td = td;
auio.uio_resid = MAXPATHLEN;
error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
if (error) {


+ 1
- 1
sys/modules/mxge/mxge/Makefile View File

@@ -3,6 +3,6 @@
.PATH: ${.CURDIR}/../../../dev/mxge

KMOD= if_mxge
SRCS= if_mxge.c mxge_lro.c device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h
SRCS= if_mxge.c device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h

.include <bsd.kmod.mk>

+ 1
- 1
sys/netinet/tcp_lro.c View File

@@ -333,7 +333,7 @@ tcp_lro_rx_ipv4(struct lro_ctrl *lc, struct mbuf *m, struct ip *ip4,
}
} else {
csum = in_cksum_hdr(ip4);
if (__predict_false((csum ^ 0xffff) != 0)) {
if (__predict_false((csum) != 0)) {
lc->lro_bad_csum++;
return (TCP_LRO_CANNOT);
}


+ 0
- 2
sys/nfsclient/nfs_krpc.c View File

@@ -748,7 +748,6 @@ nfs_set_sigmask(struct thread *td, sigset_t *oldset)
SIGDELSET(newset, nfs_sig_set[i]);
}
mtx_unlock(&p->p_sigacts->ps_mtx);
sigdeferstop(td);
kern_sigprocmask(td, SIG_SETMASK, &newset, oldset,
SIGPROCMASK_PROC_LOCKED);
PROC_UNLOCK(p);
@@ -760,7 +759,6 @@ nfs_restore_sigmask(struct thread *td, sigset_t *set)
if (td == NULL)
td = curthread; /* XXX */
kern_sigprocmask(td, SIG_SETMASK, set, NULL, 0);
sigallowstop(td);
}

/*


+ 1
- 1
sys/nfsclient/nfs_vfsops.c View File

@@ -146,7 +146,7 @@ static struct vfsops nfs_vfsops = {
.vfs_unmount = nfs_unmount,
.vfs_sysctl = nfs_sysctl,
};
VFS_SET(nfs_vfsops, oldnfs, VFCF_NETWORK);
VFS_SET(nfs_vfsops, oldnfs, VFCF_NETWORK | VFCF_SBDRY);

/* So that loader and kldload(2) can find us, wherever we are.. */
MODULE_VERSION(oldnfs, 1);


+ 116
- 24
sys/sys/mount.h View File

@@ -493,6 +493,7 @@ struct ovfsconf {
#define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */
#define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */
#define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */
#define VFCF_SBDRY 0x01000000 /* defer stop requests */

typedef uint32_t fsctlop_t;

@@ -629,30 +630,121 @@ struct vfsops {

vfs_statfs_t __vfs_statfs;

#define VFS_MOUNT(MP) (*(MP)->mnt_op->vfs_mount)(MP)
#define VFS_UNMOUNT(MP, FORCE) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE)
#define VFS_ROOT(MP, FLAGS, VPP) \
(*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP)
#define VFS_QUOTACTL(MP, C, U, A) \
(*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A)
#define VFS_STATFS(MP, SBP) __vfs_statfs((MP), (SBP))
#define VFS_SYNC(MP, WAIT) (*(MP)->mnt_op->vfs_sync)(MP, WAIT)
#define VFS_VGET(MP, INO, FLAGS, VPP) \
(*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP)
#define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) \
(*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP)
#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) \
(*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC, SEC)
#define VFS_EXTATTRCTL(MP, C, FN, NS, N) \
(*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N)
#define VFS_SYSCTL(MP, OP, REQ) \
(*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ)
#define VFS_SUSP_CLEAN(MP) \
({if (*(MP)->mnt_op->vfs_susp_clean != NULL) \
(*(MP)->mnt_op->vfs_susp_clean)(MP); })
#define VFS_RECLAIM_LOWERVP(MP, VP) \
({if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) \
(*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); })
#define VFS_PROLOGUE(MP) do { \
int _enable_stops; \
\
_enable_stops = ((MP) != NULL && \
((MP)->mnt_vfc->vfc_flags & VFCF_SBDRY) && sigdeferstop())

#define VFS_EPILOGUE(MP) \
if (_enable_stops) \
sigallowstop(); \
} while (0)

#define VFS_MOUNT(MP) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_mount)(MP); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_UNMOUNT(MP, FORCE) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_ROOT(MP, FLAGS, VPP) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_QUOTACTL(MP, C, U, A) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_STATFS(MP, SBP) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = __vfs_statfs((MP), (SBP)); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_SYNC(MP, WAIT) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_VGET(MP, INO, FLAGS, VPP) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\
SEC); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_SYSCTL(MP, OP, REQ) ({ \
int _rc; \
\
VFS_PROLOGUE(MP); \
_rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \
VFS_EPILOGUE(MP); \
_rc; })

#define VFS_SUSP_CLEAN(MP) do { \
if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \
VFS_PROLOGUE(MP); \
(*(MP)->mnt_op->vfs_susp_clean)(MP); \
VFS_EPILOGUE(MP); \
} \
} while (0)

#define VFS_RECLAIM_LOWERVP(MP, VP) do { \
if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \
VFS_PROLOGUE(MP); \
(*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \
VFS_EPILOGUE(MP); \
} \
} while (0)

#define VFS_KNOTE_LOCKED(vp, hint) do \
{ \


+ 2
- 2
sys/sys/signalvar.h View File

@@ -328,8 +328,8 @@ extern struct mtx sigio_lock;
#define SIGPROCMASK_PS_LOCKED 0x0004

int cursig(struct thread *td, int stop_allowed);
void sigdeferstop(struct thread *td);
void sigallowstop(struct thread *td);
int sigdeferstop(void);
void sigallowstop(void);
void execsigs(struct proc *p);
void gsignal(int pgid, int sig, ksiginfo_t *ksi);
void killproc(struct proc *p, char *why);


+ 0
- 2
sys/sys/systm.h View File

@@ -330,8 +330,6 @@ static __inline intrmask_t splclock(void) { return 0; }
static __inline intrmask_t splhigh(void) { return 0; }
static __inline intrmask_t splimp(void) { return 0; }
static __inline intrmask_t splnet(void) { return 0; }
static __inline intrmask_t splsoftclock(void) { return 0; }
static __inline intrmask_t splsoftvm(void) { return 0; }
static __inline intrmask_t spltty(void) { return 0; }
static __inline intrmask_t splvm(void) { return 0; }
static __inline void splx(intrmask_t ipl __unused) { return; }


+ 3
- 0
sys/tools/vnode_if.awk View File

@@ -172,6 +172,7 @@ if (cfile) {
"#include <sys/kernel.h>\n" \
"#include <sys/mount.h>\n" \
"#include <sys/sdt.h>\n" \
"#include <sys/signalvar.h>\n" \
"#include <sys/systm.h>\n" \
"#include <sys/vnode.h>\n" \
"\n" \
@@ -365,10 +366,12 @@ while ((getline < srcfile) > 0) {
add_debug_code(name, args[i], "Entry", "\t");
printc("\tKTR_START" ctrstr);
add_pre(name);
printc("\tVFS_PROLOGUE(a->a_" args[0]"->v_mount);")
printc("\tif (vop->"name" != NULL)")
printc("\t\trc = vop->"name"(a);")
printc("\telse")
printc("\t\trc = vop->vop_bypass(&a->a_gen);")
printc("\tVFS_EPILOGUE(a->a_" args[0]"->v_mount);")
printc("\tSDT_PROBE(vfs, vop, " name ", return, a->a_" args[0] ", a, rc, 0, 0);\n");
printc("\tif (rc == 0) {");
for (i = 0; i < numargs; ++i)


+ 12
- 4
sys/x86/isa/atrtc.c View File

@@ -38,6 +38,7 @@ __FBSDID("$FreeBSD$");
#include <sys/clock.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/proc.h>
@@ -52,8 +53,8 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include "clock_if.h"

#define RTC_LOCK mtx_lock_spin(&clock_lock)
#define RTC_UNLOCK mtx_unlock_spin(&clock_lock)
#define RTC_LOCK do { if (!kdb_active) mtx_lock_spin(&clock_lock); } while (0)
#define RTC_UNLOCK do { if (!kdb_active) mtx_unlock_spin(&clock_lock); } while (0)

int atrtcclock_disable = 0;

@@ -335,10 +336,16 @@ atrtc_gettime(device_t dev, struct timespec *ts)
return (EINVAL);
}

/* wait for time update to complete */
/* If RTCSA_TUP is zero, we have at least 244us before next update */
/*
* wait for time update to complete
* If RTCSA_TUP is zero, we have at least 244us before next update.
* This is fast enough on most hardware, but a refinement would be
* to make sure that no more than 240us pass after we start reading,
* and try again if so.
*/
while (rtcin(RTC_STATUSA) & RTCSA_TUP)
continue;
critical_enter();
ct.nsec = 0;
ct.sec = readrtc(RTC_SEC);
ct.min = readrtc(RTC_MIN);
@@ -352,6 +359,7 @@ atrtc_gettime(device_t dev, struct timespec *ts)
#else
ct.year += 2000;
#endif
critical_exit();
/* Set dow = -1 because some clocks don't set it correctly. */
ct.dow = -1;
return (clock_ct_to_ts(&ct, ts));


Loading…
Cancel
Save