HardenedBSD src tree https://hardenedbsd.org/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

6813 lines
182 KiB

  1. /*-
  2. * Copyright (c) 2014-2018, Matthew Macy <mmacy@mattmacy.io>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice,
  9. * this list of conditions and the following disclaimer.
  10. *
  11. * 2. Neither the name of Matthew Macy nor the names of its
  12. * contributors may be used to endorse or promote products derived from
  13. * this software without specific prior written permission.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  19. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  20. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  21. * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  22. * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  23. * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  24. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  25. * POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <sys/cdefs.h>
  28. __FBSDID("$FreeBSD$");
  29. #include "opt_inet.h"
  30. #include "opt_inet6.h"
  31. #include "opt_acpi.h"
  32. #include "opt_sched.h"
  33. #include <sys/param.h>
  34. #include <sys/types.h>
  35. #include <sys/bus.h>
  36. #include <sys/eventhandler.h>
  37. #include <sys/jail.h>
  38. #include <sys/kernel.h>
  39. #include <sys/lock.h>
  40. #include <sys/md5.h>
  41. #include <sys/mutex.h>
  42. #include <sys/module.h>
  43. #include <sys/kobj.h>
  44. #include <sys/rman.h>
  45. #include <sys/proc.h>
  46. #include <sys/sbuf.h>
  47. #include <sys/smp.h>
  48. #include <sys/socket.h>
  49. #include <sys/sockio.h>
  50. #include <sys/sysctl.h>
  51. #include <sys/syslog.h>
  52. #include <sys/taskqueue.h>
  53. #include <sys/limits.h>
  54. #include <net/if.h>
  55. #include <net/if_var.h>
  56. #include <net/if_types.h>
  57. #include <net/if_media.h>
  58. #include <net/bpf.h>
  59. #include <net/ethernet.h>
  60. #include <net/mp_ring.h>
  61. #include <net/vnet.h>
  62. #include <netinet/in.h>
  63. #include <netinet/in_pcb.h>
  64. #include <netinet/tcp_lro.h>
  65. #include <netinet/in_systm.h>
  66. #include <netinet/if_ether.h>
  67. #include <netinet/ip.h>
  68. #include <netinet/ip6.h>
  69. #include <netinet/tcp.h>
  70. #include <netinet/ip_var.h>
  71. #include <netinet/netdump/netdump.h>
  72. #include <netinet6/ip6_var.h>
  73. #include <machine/bus.h>
  74. #include <machine/in_cksum.h>
  75. #include <vm/vm.h>
  76. #include <vm/pmap.h>
  77. #include <dev/led/led.h>
  78. #include <dev/pci/pcireg.h>
  79. #include <dev/pci/pcivar.h>
  80. #include <dev/pci/pci_private.h>
  81. #include <net/iflib.h>
  82. #include <net/iflib_private.h>
  83. #include "ifdi_if.h"
  84. #ifdef PCI_IOV
  85. #include <dev/pci/pci_iov.h>
  86. #endif
  87. #include <sys/bitstring.h>
  88. /*
  89. * enable accounting of every mbuf as it comes in to and goes out of
  90. * iflib's software descriptor references
  91. */
  92. #define MEMORY_LOGGING 0
  93. /*
  94. * Enable mbuf vectors for compressing long mbuf chains
  95. */
  96. /*
  97. * NB:
  98. * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead
  99. * we prefetch needs to be determined by the time spent in m_free vis a vis
  100. * the cost of a prefetch. This will of course vary based on the workload:
  101. * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which
  102. * is quite expensive, thus suggesting very little prefetch.
  103. * - small packet forwarding which is just returning a single mbuf to
  104. * UMA will typically be very fast vis a vis the cost of a memory
  105. * access.
  106. */
  107. /*
  108. * File organization:
  109. * - private structures
  110. * - iflib private utility functions
  111. * - ifnet functions
  112. * - vlan registry and other exported functions
  113. * - iflib public core functions
  114. *
  115. *
  116. */
  117. MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library");
  118. #define IFLIB_RXEOF_MORE (1U << 0)
  119. #define IFLIB_RXEOF_EMPTY (2U << 0)
  120. struct iflib_txq;
  121. typedef struct iflib_txq *iflib_txq_t;
  122. struct iflib_rxq;
  123. typedef struct iflib_rxq *iflib_rxq_t;
  124. struct iflib_fl;
  125. typedef struct iflib_fl *iflib_fl_t;
  126. struct iflib_ctx;
  127. static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid);
  128. static void iflib_timer(void *arg);
  129. typedef struct iflib_filter_info {
  130. driver_filter_t *ifi_filter;
  131. void *ifi_filter_arg;
  132. struct grouptask *ifi_task;
  133. void *ifi_ctx;
  134. } *iflib_filter_info_t;
  135. struct iflib_ctx {
  136. KOBJ_FIELDS;
  137. /*
  138. * Pointer to hardware driver's softc
  139. */
  140. void *ifc_softc;
  141. device_t ifc_dev;
  142. if_t ifc_ifp;
  143. cpuset_t ifc_cpus;
  144. if_shared_ctx_t ifc_sctx;
  145. struct if_softc_ctx ifc_softc_ctx;
  146. struct sx ifc_ctx_sx;
  147. struct mtx ifc_state_mtx;
  148. iflib_txq_t ifc_txqs;
  149. iflib_rxq_t ifc_rxqs;
  150. uint32_t ifc_if_flags;
  151. uint32_t ifc_flags;
  152. uint32_t ifc_max_fl_buf_size;
  153. uint32_t ifc_rx_mbuf_sz;
  154. int ifc_link_state;
  155. int ifc_watchdog_events;
  156. struct cdev *ifc_led_dev;
  157. struct resource *ifc_msix_mem;
  158. struct if_irq ifc_legacy_irq;
  159. struct grouptask ifc_admin_task;
  160. struct grouptask ifc_vflr_task;
  161. struct iflib_filter_info ifc_filter_info;
  162. struct ifmedia ifc_media;
  163. struct sysctl_oid *ifc_sysctl_node;
  164. uint16_t ifc_sysctl_ntxqs;
  165. uint16_t ifc_sysctl_nrxqs;
  166. uint16_t ifc_sysctl_qs_eq_override;
  167. uint16_t ifc_sysctl_rx_budget;
  168. uint16_t ifc_sysctl_tx_abdicate;
  169. uint16_t ifc_sysctl_core_offset;
  170. #define CORE_OFFSET_UNSPECIFIED 0xffff
  171. uint8_t ifc_sysctl_separate_txrx;
  172. qidx_t ifc_sysctl_ntxds[8];
  173. qidx_t ifc_sysctl_nrxds[8];
  174. struct if_txrx ifc_txrx;
  175. #define isc_txd_encap ifc_txrx.ift_txd_encap
  176. #define isc_txd_flush ifc_txrx.ift_txd_flush
  177. #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update
  178. #define isc_rxd_available ifc_txrx.ift_rxd_available
  179. #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get
  180. #define isc_rxd_refill ifc_txrx.ift_rxd_refill
  181. #define isc_rxd_flush ifc_txrx.ift_rxd_flush
  182. #define isc_rxd_refill ifc_txrx.ift_rxd_refill
  183. #define isc_rxd_refill ifc_txrx.ift_rxd_refill
  184. #define isc_legacy_intr ifc_txrx.ift_legacy_intr
  185. eventhandler_tag ifc_vlan_attach_event;
  186. eventhandler_tag ifc_vlan_detach_event;
  187. uint8_t ifc_mac[ETHER_ADDR_LEN];
  188. };
  189. void *
  190. iflib_get_softc(if_ctx_t ctx)
  191. {
  192. return (ctx->ifc_softc);
  193. }
  194. device_t
  195. iflib_get_dev(if_ctx_t ctx)
  196. {
  197. return (ctx->ifc_dev);
  198. }
  199. if_t
  200. iflib_get_ifp(if_ctx_t ctx)
  201. {
  202. return (ctx->ifc_ifp);
  203. }
  204. struct ifmedia *
  205. iflib_get_media(if_ctx_t ctx)
  206. {
  207. return (&ctx->ifc_media);
  208. }
  209. uint32_t
  210. iflib_get_flags(if_ctx_t ctx)
  211. {
  212. return (ctx->ifc_flags);
  213. }
  214. void
  215. iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN])
  216. {
  217. bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN);
  218. }
  219. if_softc_ctx_t
  220. iflib_get_softc_ctx(if_ctx_t ctx)
  221. {
  222. return (&ctx->ifc_softc_ctx);
  223. }
  224. if_shared_ctx_t
  225. iflib_get_sctx(if_ctx_t ctx)
  226. {
  227. return (ctx->ifc_sctx);
  228. }
  229. #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2)
  230. #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*))
  231. #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1)))
  232. #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP)
  233. #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF)
  234. typedef struct iflib_sw_rx_desc_array {
  235. bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
  236. struct mbuf **ifsd_m; /* pkthdr mbufs */
  237. caddr_t *ifsd_cl; /* direct cluster pointer for rx */
  238. bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */
  239. } iflib_rxsd_array_t;
  240. typedef struct iflib_sw_tx_desc_array {
  241. bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */
  242. bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */
  243. struct mbuf **ifsd_m; /* pkthdr mbufs */
  244. } if_txsd_vec_t;
  245. /* magic number that should be high enough for any hardware */
  246. #define IFLIB_MAX_TX_SEGS 128
  247. #define IFLIB_RX_COPY_THRESH 128
  248. #define IFLIB_MAX_RX_REFRESH 32
  249. /* The minimum descriptors per second before we start coalescing */
  250. #define IFLIB_MIN_DESC_SEC 16384
  251. #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16
  252. #define IFLIB_QUEUE_IDLE 0
  253. #define IFLIB_QUEUE_HUNG 1
  254. #define IFLIB_QUEUE_WORKING 2
  255. /* maximum number of txqs that can share an rx interrupt */
  256. #define IFLIB_MAX_TX_SHARED_INTR 4
  257. /* this should really scale with ring size - this is a fairly arbitrary value */
  258. #define TX_BATCH_SIZE 32
  259. #define IFLIB_RESTART_BUDGET 8
  260. #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \
  261. CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \
  262. CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP)
  263. struct iflib_txq {
  264. qidx_t ift_in_use;
  265. qidx_t ift_cidx;
  266. qidx_t ift_cidx_processed;
  267. qidx_t ift_pidx;
  268. uint8_t ift_gen;
  269. uint8_t ift_br_offset;
  270. uint16_t ift_npending;
  271. uint16_t ift_db_pending;
  272. uint16_t ift_rs_pending;
  273. /* implicit pad */
  274. uint8_t ift_txd_size[8];
  275. uint64_t ift_processed;
  276. uint64_t ift_cleaned;
  277. uint64_t ift_cleaned_prev;
  278. #if MEMORY_LOGGING
  279. uint64_t ift_enqueued;
  280. uint64_t ift_dequeued;
  281. #endif
  282. uint64_t ift_no_tx_dma_setup;
  283. uint64_t ift_no_desc_avail;
  284. uint64_t ift_mbuf_defrag_failed;
  285. uint64_t ift_mbuf_defrag;
  286. uint64_t ift_map_failed;
  287. uint64_t ift_txd_encap_efbig;
  288. uint64_t ift_pullups;
  289. uint64_t ift_last_timer_tick;
  290. struct mtx ift_mtx;
  291. struct mtx ift_db_mtx;
  292. /* constant values */
  293. if_ctx_t ift_ctx;
  294. struct ifmp_ring *ift_br;
  295. struct grouptask ift_task;
  296. qidx_t ift_size;
  297. uint16_t ift_id;
  298. struct callout ift_timer;
  299. if_txsd_vec_t ift_sds;
  300. uint8_t ift_qstatus;
  301. uint8_t ift_closed;
  302. uint8_t ift_update_freq;
  303. struct iflib_filter_info ift_filter_info;
  304. bus_dma_tag_t ift_buf_tag;
  305. bus_dma_tag_t ift_tso_buf_tag;
  306. iflib_dma_info_t ift_ifdi;
  307. #define MTX_NAME_LEN 32
  308. char ift_mtx_name[MTX_NAME_LEN];
  309. bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE);
  310. #ifdef IFLIB_DIAGNOSTICS
  311. uint64_t ift_cpu_exec_count[256];
  312. #endif
  313. } __aligned(CACHE_LINE_SIZE);
  314. struct iflib_fl {
  315. qidx_t ifl_cidx;
  316. qidx_t ifl_pidx;
  317. qidx_t ifl_credits;
  318. uint8_t ifl_gen;
  319. uint8_t ifl_rxd_size;
  320. #if MEMORY_LOGGING
  321. uint64_t ifl_m_enqueued;
  322. uint64_t ifl_m_dequeued;
  323. uint64_t ifl_cl_enqueued;
  324. uint64_t ifl_cl_dequeued;
  325. #endif
  326. /* implicit pad */
  327. bitstr_t *ifl_rx_bitmap;
  328. qidx_t ifl_fragidx;
  329. /* constant */
  330. qidx_t ifl_size;
  331. uint16_t ifl_buf_size;
  332. uint16_t ifl_cltype;
  333. uma_zone_t ifl_zone;
  334. iflib_rxsd_array_t ifl_sds;
  335. iflib_rxq_t ifl_rxq;
  336. uint8_t ifl_id;
  337. bus_dma_tag_t ifl_buf_tag;
  338. iflib_dma_info_t ifl_ifdi;
  339. uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE);
  340. caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH];
  341. qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH];
  342. } __aligned(CACHE_LINE_SIZE);
  343. static inline qidx_t
  344. get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen)
  345. {
  346. qidx_t used;
  347. if (pidx > cidx)
  348. used = pidx - cidx;
  349. else if (pidx < cidx)
  350. used = size - cidx + pidx;
  351. else if (gen == 0 && pidx == cidx)
  352. used = 0;
  353. else if (gen == 1 && pidx == cidx)
  354. used = size;
  355. else
  356. panic("bad state");
  357. return (used);
  358. }
  359. #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen))
  360. #define IDXDIFF(head, tail, wrap) \
  361. ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head))
  362. struct iflib_rxq {
  363. if_ctx_t ifr_ctx;
  364. iflib_fl_t ifr_fl;
  365. uint64_t ifr_rx_irq;
  366. /*
  367. * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is
  368. * the command queue consumer index. Otherwise it's unused.
  369. */
  370. qidx_t ifr_cq_cidx;
  371. uint16_t ifr_id;
  372. uint8_t ifr_nfl;
  373. uint8_t ifr_ntxqirq;
  374. uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR];
  375. uint8_t ifr_fl_offset;
  376. struct lro_ctrl ifr_lc;
  377. struct grouptask ifr_task;
  378. struct callout ifr_watchdog;
  379. struct iflib_filter_info ifr_filter_info;
  380. iflib_dma_info_t ifr_ifdi;
  381. /* dynamically allocate if any drivers need a value substantially larger than this */
  382. struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE);
  383. #ifdef IFLIB_DIAGNOSTICS
  384. uint64_t ifr_cpu_exec_count[256];
  385. #endif
  386. } __aligned(CACHE_LINE_SIZE);
  387. typedef struct if_rxsd {
  388. caddr_t *ifsd_cl;
  389. struct mbuf **ifsd_m;
  390. iflib_fl_t ifsd_fl;
  391. qidx_t ifsd_cidx;
  392. } *if_rxsd_t;
  393. /* multiple of word size */
  394. #ifdef __LP64__
  395. #define PKT_INFO_SIZE 6
  396. #define RXD_INFO_SIZE 5
  397. #define PKT_TYPE uint64_t
  398. #else
  399. #define PKT_INFO_SIZE 11
  400. #define RXD_INFO_SIZE 8
  401. #define PKT_TYPE uint32_t
  402. #endif
  403. #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3)
  404. #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4)
  405. typedef struct if_pkt_info_pad {
  406. PKT_TYPE pkt_val[PKT_INFO_SIZE];
  407. } *if_pkt_info_pad_t;
  408. typedef struct if_rxd_info_pad {
  409. PKT_TYPE rxd_val[RXD_INFO_SIZE];
  410. } *if_rxd_info_pad_t;
  411. CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info));
  412. CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info));
  413. static inline void
  414. pkt_info_zero(if_pkt_info_t pi)
  415. {
  416. if_pkt_info_pad_t pi_pad;
  417. pi_pad = (if_pkt_info_pad_t)pi;
  418. pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0;
  419. pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0;
  420. #ifndef __LP64__
  421. pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0;
  422. pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0;
  423. #endif
  424. }
  425. static device_method_t iflib_pseudo_methods[] = {
  426. DEVMETHOD(device_attach, noop_attach),
  427. DEVMETHOD(device_detach, iflib_pseudo_detach),
  428. DEVMETHOD_END
  429. };
  430. driver_t iflib_pseudodriver = {
  431. "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx),
  432. };
  433. static inline void
  434. rxd_info_zero(if_rxd_info_t ri)
  435. {
  436. if_rxd_info_pad_t ri_pad;
  437. int i;
  438. ri_pad = (if_rxd_info_pad_t)ri;
  439. for (i = 0; i < RXD_LOOP_BOUND; i += 4) {
  440. ri_pad->rxd_val[i] = 0;
  441. ri_pad->rxd_val[i+1] = 0;
  442. ri_pad->rxd_val[i+2] = 0;
  443. ri_pad->rxd_val[i+3] = 0;
  444. }
  445. #ifdef __LP64__
  446. ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0;
  447. #endif
  448. }
  449. /*
  450. * Only allow a single packet to take up most 1/nth of the tx ring
  451. */
  452. #define MAX_SINGLE_PACKET_FRACTION 12
  453. #define IF_BAD_DMA (bus_addr_t)-1
  454. #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING))
  455. #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock")
  456. #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx)
  457. #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx)
  458. #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx)
  459. #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF)
  460. #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx)
  461. #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx)
  462. #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx)
  463. #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx)
  464. #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx)
  465. void
  466. iflib_set_detach(if_ctx_t ctx)
  467. {
  468. STATE_LOCK(ctx);
  469. ctx->ifc_flags |= IFC_IN_DETACH;
  470. STATE_UNLOCK(ctx);
  471. }
  472. /* Our boot-time initialization hook */
  473. static int iflib_module_event_handler(module_t, int, void *);
  474. static moduledata_t iflib_moduledata = {
  475. "iflib",
  476. iflib_module_event_handler,
  477. NULL
  478. };
  479. DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY);
  480. MODULE_VERSION(iflib, 1);
  481. MODULE_DEPEND(iflib, pci, 1, 1, 1);
  482. MODULE_DEPEND(iflib, ether, 1, 1, 1);
  483. TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1);
  484. TASKQGROUP_DEFINE(if_config_tqg, 1, 1);
  485. #ifndef IFLIB_DEBUG_COUNTERS
  486. #ifdef INVARIANTS
  487. #define IFLIB_DEBUG_COUNTERS 1
  488. #else
  489. #define IFLIB_DEBUG_COUNTERS 0
  490. #endif /* !INVARIANTS */
  491. #endif
  492. static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0,
  493. "iflib driver parameters");
  494. /*
  495. * XXX need to ensure that this can't accidentally cause the head to be moved backwards
  496. */
  497. static int iflib_min_tx_latency = 0;
  498. SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW,
  499. &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput");
  500. static int iflib_no_tx_batch = 0;
  501. SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW,
  502. &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput");
  503. #if IFLIB_DEBUG_COUNTERS
  504. static int iflib_tx_seen;
  505. static int iflib_tx_sent;
  506. static int iflib_tx_encap;
  507. static int iflib_rx_allocs;
  508. static int iflib_fl_refills;
  509. static int iflib_fl_refills_large;
  510. static int iflib_tx_frees;
  511. SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD,
  512. &iflib_tx_seen, 0, "# TX mbufs seen");
  513. SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD,
  514. &iflib_tx_sent, 0, "# TX mbufs sent");
  515. SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD,
  516. &iflib_tx_encap, 0, "# TX mbufs encapped");
  517. SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD,
  518. &iflib_tx_frees, 0, "# TX frees");
  519. SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD,
  520. &iflib_rx_allocs, 0, "# RX allocations");
  521. SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD,
  522. &iflib_fl_refills, 0, "# refills");
  523. SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD,
  524. &iflib_fl_refills_large, 0, "# large refills");
  525. static int iflib_txq_drain_flushing;
  526. static int iflib_txq_drain_oactive;
  527. static int iflib_txq_drain_notready;
  528. SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD,
  529. &iflib_txq_drain_flushing, 0, "# drain flushes");
  530. SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD,
  531. &iflib_txq_drain_oactive, 0, "# drain oactives");
  532. SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD,
  533. &iflib_txq_drain_notready, 0, "# drain notready");
  534. static int iflib_encap_load_mbuf_fail;
  535. static int iflib_encap_pad_mbuf_fail;
  536. static int iflib_encap_txq_avail_fail;
  537. static int iflib_encap_txd_encap_fail;
  538. SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD,
  539. &iflib_encap_load_mbuf_fail, 0, "# busdma load failures");
  540. SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD,
  541. &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures");
  542. SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD,
  543. &iflib_encap_txq_avail_fail, 0, "# txq avail failures");
  544. SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD,
  545. &iflib_encap_txd_encap_fail, 0, "# driver encap failures");
  546. static int iflib_task_fn_rxs;
  547. static int iflib_rx_intr_enables;
  548. static int iflib_fast_intrs;
  549. static int iflib_rx_unavail;
  550. static int iflib_rx_ctx_inactive;
  551. static int iflib_rx_if_input;
  552. static int iflib_rx_mbuf_null;
  553. static int iflib_rxd_flush;
  554. static int iflib_verbose_debug;
  555. SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD,
  556. &iflib_task_fn_rxs, 0, "# task_fn_rx calls");
  557. SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD,
  558. &iflib_rx_intr_enables, 0, "# RX intr enables");
  559. SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD,
  560. &iflib_fast_intrs, 0, "# fast_intr calls");
  561. SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD,
  562. &iflib_rx_unavail, 0, "# times rxeof called with no available data");
  563. SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD,
  564. &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context");
  565. SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD,
  566. &iflib_rx_if_input, 0, "# times rxeof called if_input");
  567. SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD,
  568. &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf");
  569. SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD,
  570. &iflib_rxd_flush, 0, "# times rxd_flush called");
  571. SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW,
  572. &iflib_verbose_debug, 0, "enable verbose debugging");
  573. #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1)
  574. static void
  575. iflib_debug_reset(void)
  576. {
  577. iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs =
  578. iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees =
  579. iflib_txq_drain_flushing = iflib_txq_drain_oactive =
  580. iflib_txq_drain_notready =
  581. iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail =
  582. iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail =
  583. iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs =
  584. iflib_rx_unavail =
  585. iflib_rx_ctx_inactive = iflib_rx_if_input =
  586. iflib_rx_mbuf_null = iflib_rxd_flush = 0;
  587. }
  588. #else
  589. #define DBG_COUNTER_INC(name)
  590. static void iflib_debug_reset(void) {}
  591. #endif
  592. #define IFLIB_DEBUG 0
  593. static void iflib_tx_structures_free(if_ctx_t ctx);
  594. static void iflib_rx_structures_free(if_ctx_t ctx);
  595. static int iflib_queues_alloc(if_ctx_t ctx);
  596. static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq);
  597. static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget);
  598. static int iflib_qset_structures_setup(if_ctx_t ctx);
  599. static int iflib_msix_init(if_ctx_t ctx);
  600. static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str);
  601. static void iflib_txq_check_drain(iflib_txq_t txq, int budget);
  602. static uint32_t iflib_txq_can_drain(struct ifmp_ring *);
  603. #ifdef ALTQ
  604. static void iflib_altq_if_start(if_t ifp);
  605. static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m);
  606. #endif
  607. static int iflib_register(if_ctx_t);
  608. static void iflib_deregister(if_ctx_t);
  609. static void iflib_unregister_vlan_handlers(if_ctx_t ctx);
  610. static void iflib_init_locked(if_ctx_t ctx);
  611. static void iflib_add_device_sysctl_pre(if_ctx_t ctx);
  612. static void iflib_add_device_sysctl_post(if_ctx_t ctx);
  613. static void iflib_ifmp_purge(iflib_txq_t txq);
  614. static void _iflib_pre_assert(if_softc_ctx_t scctx);
  615. static void iflib_if_init_locked(if_ctx_t ctx);
  616. static void iflib_free_intr_mem(if_ctx_t ctx);
  617. #ifndef __NO_STRICT_ALIGNMENT
  618. static struct mbuf * iflib_fixup_rx(struct mbuf *m);
  619. #endif
  620. static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets =
  621. SLIST_HEAD_INITIALIZER(cpu_offsets);
  622. struct cpu_offset {
  623. SLIST_ENTRY(cpu_offset) entries;
  624. cpuset_t set;
  625. unsigned int refcount;
  626. uint16_t offset;
  627. };
  628. static struct mtx cpu_offset_mtx;
  629. MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock",
  630. MTX_DEF);
  631. NETDUMP_DEFINE(iflib);
  632. #ifdef DEV_NETMAP
  633. #include <sys/selinfo.h>
  634. #include <net/netmap.h>
  635. #include <dev/netmap/netmap_kern.h>
  636. MODULE_DEPEND(iflib, netmap, 1, 1, 1);
  637. static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init);
  638. /*
  639. * device-specific sysctl variables:
  640. *
  641. * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
  642. * During regular operations the CRC is stripped, but on some
  643. * hardware reception of frames not multiple of 64 is slower,
  644. * so using crcstrip=0 helps in benchmarks.
  645. *
  646. * iflib_rx_miss, iflib_rx_miss_bufs:
  647. * count packets that might be missed due to lost interrupts.
  648. */
  649. SYSCTL_DECL(_dev_netmap);
  650. /*
  651. * The xl driver by default strips CRCs and we do not override it.
  652. */
  653. int iflib_crcstrip = 1;
  654. SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip,
  655. CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames");
  656. int iflib_rx_miss, iflib_rx_miss_bufs;
  657. SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss,
  658. CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr");
  659. SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs,
  660. CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs");
  661. /*
  662. * Register/unregister. We are already under netmap lock.
  663. * Only called on the first register or the last unregister.
  664. */
  665. static int
  666. iflib_netmap_register(struct netmap_adapter *na, int onoff)
  667. {
  668. if_t ifp = na->ifp;
  669. if_ctx_t ctx = ifp->if_softc;
  670. int status;
  671. CTX_LOCK(ctx);
  672. IFDI_INTR_DISABLE(ctx);
  673. /* Tell the stack that the interface is no longer active */
  674. ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
  675. if (!CTX_IS_VF(ctx))
  676. IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip);
  677. iflib_stop(ctx);
  678. /*
  679. * Enable (or disable) netmap flags, and intercept (or restore)
  680. * ifp->if_transmit. This is done once the device has been stopped
  681. * to prevent race conditions.
  682. */
  683. if (onoff) {
  684. nm_set_native_flags(na);
  685. } else {
  686. nm_clear_native_flags(na);
  687. }
  688. iflib_init_locked(ctx);
  689. IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ?
  690. status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1;
  691. if (status)
  692. nm_clear_native_flags(na);
  693. CTX_UNLOCK(ctx);
  694. return (status);
  695. }
  696. static int
  697. netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init)
  698. {
  699. struct netmap_adapter *na = kring->na;
  700. u_int const lim = kring->nkr_num_slots - 1;
  701. u_int head = kring->rhead;
  702. struct netmap_ring *ring = kring->ring;
  703. bus_dmamap_t *map;
  704. struct if_rxd_update iru;
  705. if_ctx_t ctx = rxq->ifr_ctx;
  706. iflib_fl_t fl = &rxq->ifr_fl[0];
  707. uint32_t refill_pidx, nic_i;
  708. #if IFLIB_DEBUG_COUNTERS
  709. int rf_count = 0;
  710. #endif
  711. if (nm_i == head && __predict_true(!init))
  712. return 0;
  713. iru_init(&iru, rxq, 0 /* flid */);
  714. map = fl->ifl_sds.ifsd_map;
  715. refill_pidx = netmap_idx_k2n(kring, nm_i);
  716. /*
  717. * IMPORTANT: we must leave one free slot in the ring,
  718. * so move head back by one unit
  719. */
  720. head = nm_prev(head, lim);
  721. nic_i = UINT_MAX;
  722. DBG_COUNTER_INC(fl_refills);
  723. while (nm_i != head) {
  724. #if IFLIB_DEBUG_COUNTERS
  725. if (++rf_count == 9)
  726. DBG_COUNTER_INC(fl_refills_large);
  727. #endif
  728. for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) {
  729. struct netmap_slot *slot = &ring->slot[nm_i];
  730. void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]);
  731. uint32_t nic_i_dma = refill_pidx;
  732. nic_i = netmap_idx_k2n(kring, nm_i);
  733. MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH);
  734. if (addr == NETMAP_BUF_BASE(na)) /* bad buf */
  735. return netmap_ring_reinit(kring);
  736. fl->ifl_vm_addrs[tmp_pidx] = addr;
  737. if (__predict_false(init)) {
  738. netmap_load_map(na, fl->ifl_buf_tag,
  739. map[nic_i], addr);
  740. } else if (slot->flags & NS_BUF_CHANGED) {
  741. /* buffer has changed, reload map */
  742. netmap_reload_map(na, fl->ifl_buf_tag,
  743. map[nic_i], addr);
  744. }
  745. slot->flags &= ~NS_BUF_CHANGED;
  746. nm_i = nm_next(nm_i, lim);
  747. fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim);
  748. if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1)
  749. continue;
  750. iru.iru_pidx = refill_pidx;
  751. iru.iru_count = tmp_pidx+1;
  752. ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
  753. refill_pidx = nic_i;
  754. for (int n = 0; n < iru.iru_count; n++) {
  755. bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i_dma],
  756. BUS_DMASYNC_PREREAD);
  757. /* XXX - change this to not use the netmap func*/
  758. nic_i_dma = nm_next(nic_i_dma, lim);
  759. }
  760. }
  761. }
  762. kring->nr_hwcur = head;
  763. bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
  764. BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  765. if (__predict_true(nic_i != UINT_MAX)) {
  766. ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i);
  767. DBG_COUNTER_INC(rxd_flush);
  768. }
  769. return (0);
  770. }
  771. /*
  772. * Reconcile kernel and user view of the transmit ring.
  773. *
  774. * All information is in the kring.
  775. * Userspace wants to send packets up to the one before kring->rhead,
  776. * kernel knows kring->nr_hwcur is the first unsent packet.
  777. *
  778. * Here we push packets out (as many as possible), and possibly
  779. * reclaim buffers from previously completed transmission.
  780. *
  781. * The caller (netmap) guarantees that there is only one instance
  782. * running at any time. Any interference with other driver
  783. * methods should be handled by the individual drivers.
  784. */
  785. static int
  786. iflib_netmap_txsync(struct netmap_kring *kring, int flags)
  787. {
  788. struct netmap_adapter *na = kring->na;
  789. if_t ifp = na->ifp;
  790. struct netmap_ring *ring = kring->ring;
  791. u_int nm_i; /* index into the netmap kring */
  792. u_int nic_i; /* index into the NIC ring */
  793. u_int n;
  794. u_int const lim = kring->nkr_num_slots - 1;
  795. u_int const head = kring->rhead;
  796. struct if_pkt_info pi;
  797. /*
  798. * interrupts on every tx packet are expensive so request
  799. * them every half ring, or where NS_REPORT is set
  800. */
  801. u_int report_frequency = kring->nkr_num_slots >> 1;
  802. /* device-specific */
  803. if_ctx_t ctx = ifp->if_softc;
  804. iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id];
  805. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  806. BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  807. /*
  808. * First part: process new packets to send.
  809. * nm_i is the current index in the netmap kring,
  810. * nic_i is the corresponding index in the NIC ring.
  811. *
  812. * If we have packets to send (nm_i != head)
  813. * iterate over the netmap ring, fetch length and update
  814. * the corresponding slot in the NIC ring. Some drivers also
  815. * need to update the buffer's physical address in the NIC slot
  816. * even NS_BUF_CHANGED is not set (PNMB computes the addresses).
  817. *
  818. * The netmap_reload_map() calls is especially expensive,
  819. * even when (as in this case) the tag is 0, so do only
  820. * when the buffer has actually changed.
  821. *
  822. * If possible do not set the report/intr bit on all slots,
  823. * but only a few times per ring or when NS_REPORT is set.
  824. *
  825. * Finally, on 10G and faster drivers, it might be useful
  826. * to prefetch the next slot and txr entry.
  827. */
  828. nm_i = kring->nr_hwcur;
  829. if (nm_i != head) { /* we have new packets to send */
  830. pkt_info_zero(&pi);
  831. pi.ipi_segs = txq->ift_segs;
  832. pi.ipi_qsidx = kring->ring_id;
  833. nic_i = netmap_idx_k2n(kring, nm_i);
  834. __builtin_prefetch(&ring->slot[nm_i]);
  835. __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]);
  836. __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]);
  837. for (n = 0; nm_i != head; n++) {
  838. struct netmap_slot *slot = &ring->slot[nm_i];
  839. u_int len = slot->len;
  840. uint64_t paddr;
  841. void *addr = PNMB(na, slot, &paddr);
  842. int flags = (slot->flags & NS_REPORT ||
  843. nic_i == 0 || nic_i == report_frequency) ?
  844. IPI_TX_INTR : 0;
  845. /* device-specific */
  846. pi.ipi_len = len;
  847. pi.ipi_segs[0].ds_addr = paddr;
  848. pi.ipi_segs[0].ds_len = len;
  849. pi.ipi_nsegs = 1;
  850. pi.ipi_ndescs = 0;
  851. pi.ipi_pidx = nic_i;
  852. pi.ipi_flags = flags;
  853. /* Fill the slot in the NIC ring. */
  854. ctx->isc_txd_encap(ctx->ifc_softc, &pi);
  855. DBG_COUNTER_INC(tx_encap);
  856. /* prefetch for next round */
  857. __builtin_prefetch(&ring->slot[nm_i + 1]);
  858. __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]);
  859. __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]);
  860. NM_CHECK_ADDR_LEN(na, addr, len);
  861. if (slot->flags & NS_BUF_CHANGED) {
  862. /* buffer has changed, reload map */
  863. netmap_reload_map(na, txq->ift_buf_tag,
  864. txq->ift_sds.ifsd_map[nic_i], addr);
  865. }
  866. /* make sure changes to the buffer are synced */
  867. bus_dmamap_sync(txq->ift_buf_tag,
  868. txq->ift_sds.ifsd_map[nic_i],
  869. BUS_DMASYNC_PREWRITE);
  870. slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
  871. nm_i = nm_next(nm_i, lim);
  872. nic_i = nm_next(nic_i, lim);
  873. }
  874. kring->nr_hwcur = nm_i;
  875. /* synchronize the NIC ring */
  876. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  877. BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  878. /* (re)start the tx unit up to slot nic_i (excluded) */
  879. ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i);
  880. }
  881. /*
  882. * Second part: reclaim buffers for completed transmissions.
  883. *
  884. * If there are unclaimed buffers, attempt to reclaim them.
  885. * If none are reclaimed, and TX IRQs are not in use, do an initial
  886. * minimal delay, then trigger the tx handler which will spin in the
  887. * group task queue.
  888. */
  889. if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) {
  890. if (iflib_tx_credits_update(ctx, txq)) {
  891. /* some tx completed, increment avail */
  892. nic_i = txq->ift_cidx_processed;
  893. kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
  894. }
  895. }
  896. if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ))
  897. if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) {
  898. callout_reset_on(&txq->ift_timer, hz < 2000 ? 1 : hz / 1000,
  899. iflib_timer, txq, txq->ift_timer.c_cpu);
  900. }
  901. return (0);
  902. }
  903. /*
  904. * Reconcile kernel and user view of the receive ring.
  905. * Same as for the txsync, this routine must be efficient.
  906. * The caller guarantees a single invocations, but races against
  907. * the rest of the driver should be handled here.
  908. *
  909. * On call, kring->rhead is the first packet that userspace wants
  910. * to keep, and kring->rcur is the wakeup point.
  911. * The kernel has previously reported packets up to kring->rtail.
  912. *
  913. * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective
  914. * of whether or not we received an interrupt.
  915. */
  916. static int
  917. iflib_netmap_rxsync(struct netmap_kring *kring, int flags)
  918. {
  919. struct netmap_adapter *na = kring->na;
  920. struct netmap_ring *ring = kring->ring;
  921. if_t ifp = na->ifp;
  922. iflib_fl_t fl;
  923. uint32_t nm_i; /* index into the netmap ring */
  924. uint32_t nic_i; /* index into the NIC ring */
  925. u_int i, n;
  926. u_int const lim = kring->nkr_num_slots - 1;
  927. u_int const head = kring->rhead;
  928. int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
  929. struct if_rxd_info ri;
  930. if_ctx_t ctx = ifp->if_softc;
  931. iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id];
  932. if (head > lim)
  933. return netmap_ring_reinit(kring);
  934. /*
  935. * XXX netmap_fl_refill() only ever (re)fills free list 0 so far.
  936. */
  937. for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) {
  938. bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
  939. BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  940. }
  941. /*
  942. * First part: import newly received packets.
  943. *
  944. * nm_i is the index of the next free slot in the netmap ring,
  945. * nic_i is the index of the next received packet in the NIC ring,
  946. * and they may differ in case if_init() has been called while
  947. * in netmap mode. For the receive ring we have
  948. *
  949. * nic_i = rxr->next_check;
  950. * nm_i = kring->nr_hwtail (previous)
  951. * and
  952. * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
  953. *
  954. * rxr->next_check is set to 0 on a ring reinit
  955. */
  956. if (netmap_no_pendintr || force_update) {
  957. int crclen = iflib_crcstrip ? 0 : 4;
  958. int error, avail;
  959. for (i = 0; i < rxq->ifr_nfl; i++) {
  960. fl = &rxq->ifr_fl[i];
  961. nic_i = fl->ifl_cidx;
  962. nm_i = netmap_idx_n2k(kring, nic_i);
  963. avail = ctx->isc_rxd_available(ctx->ifc_softc,
  964. rxq->ifr_id, nic_i, USHRT_MAX);
  965. for (n = 0; avail > 0; n++, avail--) {
  966. rxd_info_zero(&ri);
  967. ri.iri_frags = rxq->ifr_frags;
  968. ri.iri_qsidx = kring->ring_id;
  969. ri.iri_ifp = ctx->ifc_ifp;
  970. ri.iri_cidx = nic_i;
  971. error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
  972. ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen;
  973. ring->slot[nm_i].flags = 0;
  974. bus_dmamap_sync(fl->ifl_buf_tag,
  975. fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD);
  976. nm_i = nm_next(nm_i, lim);
  977. nic_i = nm_next(nic_i, lim);
  978. }
  979. if (n) { /* update the state variables */
  980. if (netmap_no_pendintr && !force_update) {
  981. /* diagnostics */
  982. iflib_rx_miss ++;
  983. iflib_rx_miss_bufs += n;
  984. }
  985. fl->ifl_cidx = nic_i;
  986. kring->nr_hwtail = nm_i;
  987. }
  988. kring->nr_kflags &= ~NKR_PENDINTR;
  989. }
  990. }
  991. /*
  992. * Second part: skip past packets that userspace has released.
  993. * (kring->nr_hwcur to head excluded),
  994. * and make the buffers available for reception.
  995. * As usual nm_i is the index in the netmap ring,
  996. * nic_i is the index in the NIC ring, and
  997. * nm_i == (nic_i + kring->nkr_hwofs) % ring_size
  998. */
  999. /* XXX not sure how this will work with multiple free lists */
  1000. nm_i = kring->nr_hwcur;
  1001. return (netmap_fl_refill(rxq, kring, nm_i, false));
  1002. }
  1003. static void
  1004. iflib_netmap_intr(struct netmap_adapter *na, int onoff)
  1005. {
  1006. if_ctx_t ctx = na->ifp->if_softc;
  1007. CTX_LOCK(ctx);
  1008. if (onoff) {
  1009. IFDI_INTR_ENABLE(ctx);
  1010. } else {
  1011. IFDI_INTR_DISABLE(ctx);
  1012. }
  1013. CTX_UNLOCK(ctx);
  1014. }
  1015. static int
  1016. iflib_netmap_attach(if_ctx_t ctx)
  1017. {
  1018. struct netmap_adapter na;
  1019. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  1020. bzero(&na, sizeof(na));
  1021. na.ifp = ctx->ifc_ifp;
  1022. na.na_flags = NAF_BDG_MAYSLEEP;
  1023. MPASS(ctx->ifc_softc_ctx.isc_ntxqsets);
  1024. MPASS(ctx->ifc_softc_ctx.isc_nrxqsets);
  1025. na.num_tx_desc = scctx->isc_ntxd[0];
  1026. na.num_rx_desc = scctx->isc_nrxd[0];
  1027. na.nm_txsync = iflib_netmap_txsync;
  1028. na.nm_rxsync = iflib_netmap_rxsync;
  1029. na.nm_register = iflib_netmap_register;
  1030. na.nm_intr = iflib_netmap_intr;
  1031. na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets;
  1032. na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets;
  1033. return (netmap_attach(&na));
  1034. }
  1035. static void
  1036. iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq)
  1037. {
  1038. struct netmap_adapter *na = NA(ctx->ifc_ifp);
  1039. struct netmap_slot *slot;
  1040. slot = netmap_reset(na, NR_TX, txq->ift_id, 0);
  1041. if (slot == NULL)
  1042. return;
  1043. for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) {
  1044. /*
  1045. * In netmap mode, set the map for the packet buffer.
  1046. * NOTE: Some drivers (not this one) also need to set
  1047. * the physical buffer address in the NIC ring.
  1048. * netmap_idx_n2k() maps a nic index, i, into the corresponding
  1049. * netmap slot index, si
  1050. */
  1051. int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i);
  1052. netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i],
  1053. NMB(na, slot + si));
  1054. }
  1055. }
  1056. static void
  1057. iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq)
  1058. {
  1059. struct netmap_adapter *na = NA(ctx->ifc_ifp);
  1060. struct netmap_kring *kring = na->rx_rings[rxq->ifr_id];
  1061. struct netmap_slot *slot;
  1062. uint32_t nm_i;
  1063. slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0);
  1064. if (slot == NULL)
  1065. return;
  1066. nm_i = netmap_idx_n2k(kring, 0);
  1067. netmap_fl_refill(rxq, kring, nm_i, true);
  1068. }
  1069. static void
  1070. iflib_netmap_timer_adjust(if_ctx_t ctx, iflib_txq_t txq, uint32_t *reset_on)
  1071. {
  1072. struct netmap_kring *kring;
  1073. uint16_t txqid;
  1074. txqid = txq->ift_id;
  1075. kring = NA(ctx->ifc_ifp)->tx_rings[txqid];
  1076. if (kring->nr_hwcur != nm_next(kring->nr_hwtail, kring->nkr_num_slots - 1)) {
  1077. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  1078. BUS_DMASYNC_POSTREAD);
  1079. if (ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false))
  1080. netmap_tx_irq(ctx->ifc_ifp, txqid);
  1081. if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) {
  1082. if (hz < 2000)
  1083. *reset_on = 1;
  1084. else
  1085. *reset_on = hz / 1000;
  1086. }
  1087. }
  1088. }
  1089. #define iflib_netmap_detach(ifp) netmap_detach(ifp)
  1090. #else
  1091. #define iflib_netmap_txq_init(ctx, txq)
  1092. #define iflib_netmap_rxq_init(ctx, rxq)
  1093. #define iflib_netmap_detach(ifp)
  1094. #define iflib_netmap_attach(ctx) (0)
  1095. #define netmap_rx_irq(ifp, qid, budget) (0)
  1096. #define netmap_tx_irq(ifp, qid) do {} while (0)
  1097. #define iflib_netmap_timer_adjust(ctx, txq, reset_on)
  1098. #endif
  1099. #if defined(__i386__) || defined(__amd64__)
  1100. static __inline void
  1101. prefetch(void *x)
  1102. {
  1103. __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
  1104. }
  1105. static __inline void
  1106. prefetch2cachelines(void *x)
  1107. {
  1108. __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x));
  1109. #if (CACHE_LINE_SIZE < 128)
  1110. __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long)))));
  1111. #endif
  1112. }
  1113. #else
  1114. #define prefetch(x)
  1115. #define prefetch2cachelines(x)
  1116. #endif
  1117. static void
  1118. iflib_gen_mac(if_ctx_t ctx)
  1119. {
  1120. struct thread *td;
  1121. MD5_CTX mdctx;
  1122. char uuid[HOSTUUIDLEN+1];
  1123. char buf[HOSTUUIDLEN+16];
  1124. uint8_t *mac;
  1125. unsigned char digest[16];
  1126. td = curthread;
  1127. mac = ctx->ifc_mac;
  1128. uuid[HOSTUUIDLEN] = 0;
  1129. bcopy(td->td_ucred->cr_prison->pr_hostuuid, uuid, HOSTUUIDLEN);
  1130. snprintf(buf, HOSTUUIDLEN+16, "%s-%s", uuid, device_get_nameunit(ctx->ifc_dev));
  1131. /*
  1132. * Generate a pseudo-random, deterministic MAC
  1133. * address based on the UUID and unit number.
  1134. * The FreeBSD Foundation OUI of 58-9C-FC is used.
  1135. */
  1136. MD5Init(&mdctx);
  1137. MD5Update(&mdctx, buf, strlen(buf));
  1138. MD5Final(digest, &mdctx);
  1139. mac[0] = 0x58;
  1140. mac[1] = 0x9C;
  1141. mac[2] = 0xFC;
  1142. mac[3] = digest[0];
  1143. mac[4] = digest[1];
  1144. mac[5] = digest[2];
  1145. }
  1146. static void
  1147. iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid)
  1148. {
  1149. iflib_fl_t fl;
  1150. fl = &rxq->ifr_fl[flid];
  1151. iru->iru_paddrs = fl->ifl_bus_addrs;
  1152. iru->iru_vaddrs = &fl->ifl_vm_addrs[0];
  1153. iru->iru_idxs = fl->ifl_rxd_idxs;
  1154. iru->iru_qsidx = rxq->ifr_id;
  1155. iru->iru_buf_size = fl->ifl_buf_size;
  1156. iru->iru_flidx = fl->ifl_id;
  1157. }
  1158. static void
  1159. _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err)
  1160. {
  1161. if (err)
  1162. return;
  1163. *(bus_addr_t *) arg = segs[0].ds_addr;
  1164. }
  1165. int
  1166. iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags)
  1167. {
  1168. int err;
  1169. device_t dev = ctx->ifc_dev;
  1170. err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
  1171. align, 0, /* alignment, bounds */
  1172. BUS_SPACE_MAXADDR, /* lowaddr */
  1173. BUS_SPACE_MAXADDR, /* highaddr */
  1174. NULL, NULL, /* filter, filterarg */
  1175. size, /* maxsize */
  1176. 1, /* nsegments */
  1177. size, /* maxsegsize */
  1178. BUS_DMA_ALLOCNOW, /* flags */
  1179. NULL, /* lockfunc */
  1180. NULL, /* lockarg */
  1181. &dma->idi_tag);
  1182. if (err) {
  1183. device_printf(dev,
  1184. "%s: bus_dma_tag_create failed: %d\n",
  1185. __func__, err);
  1186. goto fail_0;
  1187. }
  1188. err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr,
  1189. BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map);
  1190. if (err) {
  1191. device_printf(dev,
  1192. "%s: bus_dmamem_alloc(%ju) failed: %d\n",
  1193. __func__, (uintmax_t)size, err);
  1194. goto fail_1;
  1195. }
  1196. dma->idi_paddr = IF_BAD_DMA;
  1197. err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr,
  1198. size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT);
  1199. if (err || dma->idi_paddr == IF_BAD_DMA) {
  1200. device_printf(dev,
  1201. "%s: bus_dmamap_load failed: %d\n",
  1202. __func__, err);
  1203. goto fail_2;
  1204. }
  1205. dma->idi_size = size;
  1206. return (0);
  1207. fail_2:
  1208. bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
  1209. fail_1:
  1210. bus_dma_tag_destroy(dma->idi_tag);
  1211. fail_0:
  1212. dma->idi_tag = NULL;
  1213. return (err);
  1214. }
  1215. int
  1216. iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags)
  1217. {
  1218. if_shared_ctx_t sctx = ctx->ifc_sctx;
  1219. KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized"));
  1220. return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags));
  1221. }
  1222. int
  1223. iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count)
  1224. {
  1225. int i, err;
  1226. iflib_dma_info_t *dmaiter;
  1227. dmaiter = dmalist;
  1228. for (i = 0; i < count; i++, dmaiter++) {
  1229. if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0)
  1230. break;
  1231. }
  1232. if (err)
  1233. iflib_dma_free_multi(dmalist, i);
  1234. return (err);
  1235. }
  1236. void
  1237. iflib_dma_free(iflib_dma_info_t dma)
  1238. {
  1239. if (dma->idi_tag == NULL)
  1240. return;
  1241. if (dma->idi_paddr != IF_BAD_DMA) {
  1242. bus_dmamap_sync(dma->idi_tag, dma->idi_map,
  1243. BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  1244. bus_dmamap_unload(dma->idi_tag, dma->idi_map);
  1245. dma->idi_paddr = IF_BAD_DMA;
  1246. }
  1247. if (dma->idi_vaddr != NULL) {
  1248. bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map);
  1249. dma->idi_vaddr = NULL;
  1250. }
  1251. bus_dma_tag_destroy(dma->idi_tag);
  1252. dma->idi_tag = NULL;
  1253. }
  1254. void
  1255. iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count)
  1256. {
  1257. int i;
  1258. iflib_dma_info_t *dmaiter = dmalist;
  1259. for (i = 0; i < count; i++, dmaiter++)
  1260. iflib_dma_free(*dmaiter);
  1261. }
  1262. #ifdef EARLY_AP_STARTUP
  1263. static const int iflib_started = 1;
  1264. #else
  1265. /*
  1266. * We used to abuse the smp_started flag to decide if the queues have been
  1267. * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()).
  1268. * That gave bad races, since the SYSINIT() runs strictly after smp_started
  1269. * is set. Run a SYSINIT() strictly after that to just set a usable
  1270. * completion flag.
  1271. */
  1272. static int iflib_started;
  1273. static void
  1274. iflib_record_started(void *arg)
  1275. {
  1276. iflib_started = 1;
  1277. }
  1278. SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST,
  1279. iflib_record_started, NULL);
  1280. #endif
  1281. static int
  1282. iflib_fast_intr(void *arg)
  1283. {
  1284. iflib_filter_info_t info = arg;
  1285. struct grouptask *gtask = info->ifi_task;
  1286. int result;
  1287. if (!iflib_started)
  1288. return (FILTER_STRAY);
  1289. DBG_COUNTER_INC(fast_intrs);
  1290. if (info->ifi_filter != NULL) {
  1291. result = info->ifi_filter(info->ifi_filter_arg);
  1292. if ((result & FILTER_SCHEDULE_THREAD) == 0)
  1293. return (result);
  1294. }
  1295. GROUPTASK_ENQUEUE(gtask);
  1296. return (FILTER_HANDLED);
  1297. }
  1298. static int
  1299. iflib_fast_intr_rxtx(void *arg)
  1300. {
  1301. iflib_filter_info_t info = arg;
  1302. struct grouptask *gtask = info->ifi_task;
  1303. if_ctx_t ctx;
  1304. iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx;
  1305. iflib_txq_t txq;
  1306. void *sc;
  1307. int i, cidx, result;
  1308. qidx_t txqid;
  1309. bool intr_enable, intr_legacy;
  1310. if (!iflib_started)
  1311. return (FILTER_STRAY);
  1312. DBG_COUNTER_INC(fast_intrs);
  1313. if (info->ifi_filter != NULL) {
  1314. result = info->ifi_filter(info->ifi_filter_arg);
  1315. if ((result & FILTER_SCHEDULE_THREAD) == 0)
  1316. return (result);
  1317. }
  1318. ctx = rxq->ifr_ctx;
  1319. sc = ctx->ifc_softc;
  1320. intr_enable = false;
  1321. intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY);
  1322. MPASS(rxq->ifr_ntxqirq);
  1323. for (i = 0; i < rxq->ifr_ntxqirq; i++) {
  1324. txqid = rxq->ifr_txqid[i];
  1325. txq = &ctx->ifc_txqs[txqid];
  1326. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  1327. BUS_DMASYNC_POSTREAD);
  1328. if (!ctx->isc_txd_credits_update(sc, txqid, false)) {
  1329. if (intr_legacy)
  1330. intr_enable = true;
  1331. else
  1332. IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid);
  1333. continue;
  1334. }
  1335. GROUPTASK_ENQUEUE(&txq->ift_task);
  1336. }
  1337. if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ)
  1338. cidx = rxq->ifr_cq_cidx;
  1339. else
  1340. cidx = rxq->ifr_fl[0].ifl_cidx;
  1341. if (iflib_rxd_avail(ctx, rxq, cidx, 1))
  1342. GROUPTASK_ENQUEUE(gtask);
  1343. else {
  1344. if (intr_legacy)
  1345. intr_enable = true;
  1346. else
  1347. IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
  1348. DBG_COUNTER_INC(rx_intr_enables);
  1349. }
  1350. if (intr_enable)
  1351. IFDI_INTR_ENABLE(ctx);
  1352. return (FILTER_HANDLED);
  1353. }
  1354. static int
  1355. iflib_fast_intr_ctx(void *arg)
  1356. {
  1357. iflib_filter_info_t info = arg;
  1358. struct grouptask *gtask = info->ifi_task;
  1359. int result;
  1360. if (!iflib_started)
  1361. return (FILTER_STRAY);
  1362. DBG_COUNTER_INC(fast_intrs);
  1363. if (info->ifi_filter != NULL) {
  1364. result = info->ifi_filter(info->ifi_filter_arg);
  1365. if ((result & FILTER_SCHEDULE_THREAD) == 0)
  1366. return (result);
  1367. }
  1368. GROUPTASK_ENQUEUE(gtask);
  1369. return (FILTER_HANDLED);
  1370. }
  1371. static int
  1372. _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
  1373. driver_filter_t filter, driver_intr_t handler, void *arg,
  1374. const char *name)
  1375. {
  1376. int rc, flags;
  1377. struct resource *res;
  1378. void *tag = NULL;
  1379. device_t dev = ctx->ifc_dev;
  1380. flags = RF_ACTIVE;
  1381. if (ctx->ifc_flags & IFC_LEGACY)
  1382. flags |= RF_SHAREABLE;
  1383. MPASS(rid < 512);
  1384. irq->ii_rid = rid;
  1385. res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags);
  1386. if (res == NULL) {
  1387. device_printf(dev,
  1388. "failed to allocate IRQ for rid %d, name %s.\n", rid, name);
  1389. return (ENOMEM);
  1390. }
  1391. irq->ii_res = res;
  1392. KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL"));
  1393. rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET,
  1394. filter, handler, arg, &tag);
  1395. if (rc != 0) {
  1396. device_printf(dev,
  1397. "failed to setup interrupt for rid %d, name %s: %d\n",
  1398. rid, name ? name : "unknown", rc);
  1399. return (rc);
  1400. } else if (name)
  1401. bus_describe_intr(dev, res, tag, "%s", name);
  1402. irq->ii_tag = tag;
  1403. return (0);
  1404. }
  1405. /*********************************************************************
  1406. *
  1407. * Allocate DMA resources for TX buffers as well as memory for the TX
  1408. * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a
  1409. * iflib_sw_tx_desc_array structure, storing all the information that
  1410. * is needed to transmit a packet on the wire. This is called only
  1411. * once at attach, setup is done every reset.
  1412. *
  1413. **********************************************************************/
  1414. static int
  1415. iflib_txsd_alloc(iflib_txq_t txq)
  1416. {
  1417. if_ctx_t ctx = txq->ift_ctx;
  1418. if_shared_ctx_t sctx = ctx->ifc_sctx;
  1419. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  1420. device_t dev = ctx->ifc_dev;
  1421. bus_size_t tsomaxsize;
  1422. int err, nsegments, ntsosegments;
  1423. bool tso;
  1424. nsegments = scctx->isc_tx_nsegments;
  1425. ntsosegments = scctx->isc_tx_tso_segments_max;
  1426. tsomaxsize = scctx->isc_tx_tso_size_max;
  1427. if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU)
  1428. tsomaxsize += sizeof(struct ether_vlan_header);
  1429. MPASS(scctx->isc_ntxd[0] > 0);
  1430. MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0);
  1431. MPASS(nsegments > 0);
  1432. if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) {
  1433. MPASS(ntsosegments > 0);
  1434. MPASS(sctx->isc_tso_maxsize >= tsomaxsize);
  1435. }
  1436. /*
  1437. * Set up DMA tags for TX buffers.
  1438. */
  1439. if ((err = bus_dma_tag_create(bus_get_dma_tag(dev),
  1440. 1, 0, /* alignment, bounds */
  1441. BUS_SPACE_MAXADDR, /* lowaddr */
  1442. BUS_SPACE_MAXADDR, /* highaddr */
  1443. NULL, NULL, /* filter, filterarg */
  1444. sctx->isc_tx_maxsize, /* maxsize */
  1445. nsegments, /* nsegments */
  1446. sctx->isc_tx_maxsegsize, /* maxsegsize */
  1447. 0, /* flags */
  1448. NULL, /* lockfunc */
  1449. NULL, /* lockfuncarg */
  1450. &txq->ift_buf_tag))) {
  1451. device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err);
  1452. device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n",
  1453. (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize);
  1454. goto fail;
  1455. }
  1456. tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0;
  1457. if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev),
  1458. 1, 0, /* alignment, bounds */
  1459. BUS_SPACE_MAXADDR, /* lowaddr */
  1460. BUS_SPACE_MAXADDR, /* highaddr */
  1461. NULL, NULL, /* filter, filterarg */
  1462. tsomaxsize, /* maxsize */
  1463. ntsosegments, /* nsegments */
  1464. sctx->isc_tso_maxsegsize,/* maxsegsize */
  1465. 0, /* flags */
  1466. NULL, /* lockfunc */
  1467. NULL, /* lockfuncarg */
  1468. &txq->ift_tso_buf_tag))) {
  1469. device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n",
  1470. err);
  1471. goto fail;
  1472. }
  1473. /* Allocate memory for the TX mbuf map. */
  1474. if (!(txq->ift_sds.ifsd_m =
  1475. (struct mbuf **) malloc(sizeof(struct mbuf *) *
  1476. scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
  1477. device_printf(dev, "Unable to allocate TX mbuf map memory\n");
  1478. err = ENOMEM;
  1479. goto fail;
  1480. }
  1481. /*
  1482. * Create the DMA maps for TX buffers.
  1483. */
  1484. if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc(
  1485. sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset],
  1486. M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
  1487. device_printf(dev,
  1488. "Unable to allocate TX buffer DMA map memory\n");
  1489. err = ENOMEM;
  1490. goto fail;
  1491. }
  1492. if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc(
  1493. sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset],
  1494. M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
  1495. device_printf(dev,
  1496. "Unable to allocate TSO TX buffer map memory\n");
  1497. err = ENOMEM;
  1498. goto fail;
  1499. }
  1500. for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) {
  1501. err = bus_dmamap_create(txq->ift_buf_tag, 0,
  1502. &txq->ift_sds.ifsd_map[i]);
  1503. if (err != 0) {
  1504. device_printf(dev, "Unable to create TX DMA map\n");
  1505. goto fail;
  1506. }
  1507. if (!tso)
  1508. continue;
  1509. err = bus_dmamap_create(txq->ift_tso_buf_tag, 0,
  1510. &txq->ift_sds.ifsd_tso_map[i]);
  1511. if (err != 0) {
  1512. device_printf(dev, "Unable to create TSO TX DMA map\n");
  1513. goto fail;
  1514. }
  1515. }
  1516. return (0);
  1517. fail:
  1518. /* We free all, it handles case where we are in the middle */
  1519. iflib_tx_structures_free(ctx);
  1520. return (err);
  1521. }
  1522. static void
  1523. iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i)
  1524. {
  1525. bus_dmamap_t map;
  1526. if (txq->ift_sds.ifsd_map != NULL) {
  1527. map = txq->ift_sds.ifsd_map[i];
  1528. bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE);
  1529. bus_dmamap_unload(txq->ift_buf_tag, map);
  1530. bus_dmamap_destroy(txq->ift_buf_tag, map);
  1531. txq->ift_sds.ifsd_map[i] = NULL;
  1532. }
  1533. if (txq->ift_sds.ifsd_tso_map != NULL) {
  1534. map = txq->ift_sds.ifsd_tso_map[i];
  1535. bus_dmamap_sync(txq->ift_tso_buf_tag, map,
  1536. BUS_DMASYNC_POSTWRITE);
  1537. bus_dmamap_unload(txq->ift_tso_buf_tag, map);
  1538. bus_dmamap_destroy(txq->ift_tso_buf_tag, map);
  1539. txq->ift_sds.ifsd_tso_map[i] = NULL;
  1540. }
  1541. }
  1542. static void
  1543. iflib_txq_destroy(iflib_txq_t txq)
  1544. {
  1545. if_ctx_t ctx = txq->ift_ctx;
  1546. for (int i = 0; i < txq->ift_size; i++)
  1547. iflib_txsd_destroy(ctx, txq, i);
  1548. if (txq->ift_br != NULL) {
  1549. ifmp_ring_free(txq->ift_br);
  1550. txq->ift_br = NULL;
  1551. }
  1552. mtx_destroy(&txq->ift_mtx);
  1553. if (txq->ift_sds.ifsd_map != NULL) {
  1554. free(txq->ift_sds.ifsd_map, M_IFLIB);
  1555. txq->ift_sds.ifsd_map = NULL;
  1556. }
  1557. if (txq->ift_sds.ifsd_tso_map != NULL) {
  1558. free(txq->ift_sds.ifsd_tso_map, M_IFLIB);
  1559. txq->ift_sds.ifsd_tso_map = NULL;
  1560. }
  1561. if (txq->ift_sds.ifsd_m != NULL) {
  1562. free(txq->ift_sds.ifsd_m, M_IFLIB);
  1563. txq->ift_sds.ifsd_m = NULL;
  1564. }
  1565. if (txq->ift_buf_tag != NULL) {
  1566. bus_dma_tag_destroy(txq->ift_buf_tag);
  1567. txq->ift_buf_tag = NULL;
  1568. }
  1569. if (txq->ift_tso_buf_tag != NULL) {
  1570. bus_dma_tag_destroy(txq->ift_tso_buf_tag);
  1571. txq->ift_tso_buf_tag = NULL;
  1572. }
  1573. if (txq->ift_ifdi != NULL) {
  1574. free(txq->ift_ifdi, M_IFLIB);
  1575. }
  1576. }
  1577. static void
  1578. iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i)
  1579. {
  1580. struct mbuf **mp;
  1581. mp = &txq->ift_sds.ifsd_m[i];
  1582. if (*mp == NULL)
  1583. return;
  1584. if (txq->ift_sds.ifsd_map != NULL) {
  1585. bus_dmamap_sync(txq->ift_buf_tag,
  1586. txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE);
  1587. bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]);
  1588. }
  1589. if (txq->ift_sds.ifsd_tso_map != NULL) {
  1590. bus_dmamap_sync(txq->ift_tso_buf_tag,
  1591. txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE);
  1592. bus_dmamap_unload(txq->ift_tso_buf_tag,
  1593. txq->ift_sds.ifsd_tso_map[i]);
  1594. }
  1595. m_free(*mp);
  1596. DBG_COUNTER_INC(tx_frees);
  1597. *mp = NULL;
  1598. }
  1599. static int
  1600. iflib_txq_setup(iflib_txq_t txq)
  1601. {
  1602. if_ctx_t ctx = txq->ift_ctx;
  1603. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  1604. if_shared_ctx_t sctx = ctx->ifc_sctx;
  1605. iflib_dma_info_t di;
  1606. int i;
  1607. /* Set number of descriptors available */
  1608. txq->ift_qstatus = IFLIB_QUEUE_IDLE;
  1609. /* XXX make configurable */
  1610. txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ;
  1611. /* Reset indices */
  1612. txq->ift_cidx_processed = 0;
  1613. txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0;
  1614. txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset];
  1615. for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++)
  1616. bzero((void *)di->idi_vaddr, di->idi_size);
  1617. IFDI_TXQ_SETUP(ctx, txq->ift_id);
  1618. for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++)
  1619. bus_dmamap_sync(di->idi_tag, di->idi_map,
  1620. BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  1621. return (0);
  1622. }
  1623. /*********************************************************************
  1624. *
  1625. * Allocate DMA resources for RX buffers as well as memory for the RX
  1626. * mbuf map, direct RX cluster pointer map and RX cluster bus address
  1627. * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and
  1628. * RX cluster map are kept in a iflib_sw_rx_desc_array structure.
  1629. * Since we use use one entry in iflib_sw_rx_desc_array per received
  1630. * packet, the maximum number of entries we'll need is equal to the
  1631. * number of hardware receive descriptors that we've allocated.
  1632. *
  1633. **********************************************************************/
  1634. static int
  1635. iflib_rxsd_alloc(iflib_rxq_t rxq)
  1636. {
  1637. if_ctx_t ctx = rxq->ifr_ctx;
  1638. if_shared_ctx_t sctx = ctx->ifc_sctx;
  1639. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  1640. device_t dev = ctx->ifc_dev;
  1641. iflib_fl_t fl;
  1642. int err;
  1643. MPASS(scctx->isc_nrxd[0] > 0);
  1644. MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0);
  1645. fl = rxq->ifr_fl;
  1646. for (int i = 0; i < rxq->ifr_nfl; i++, fl++) {
  1647. fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */
  1648. /* Set up DMA tag for RX buffers. */
  1649. err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
  1650. 1, 0, /* alignment, bounds */
  1651. BUS_SPACE_MAXADDR, /* lowaddr */
  1652. BUS_SPACE_MAXADDR, /* highaddr */
  1653. NULL, NULL, /* filter, filterarg */
  1654. sctx->isc_rx_maxsize, /* maxsize */
  1655. sctx->isc_rx_nsegments, /* nsegments */
  1656. sctx->isc_rx_maxsegsize, /* maxsegsize */
  1657. 0, /* flags */
  1658. NULL, /* lockfunc */
  1659. NULL, /* lockarg */
  1660. &fl->ifl_buf_tag);
  1661. if (err) {
  1662. device_printf(dev,
  1663. "Unable to allocate RX DMA tag: %d\n", err);
  1664. goto fail;
  1665. }
  1666. /* Allocate memory for the RX mbuf map. */
  1667. if (!(fl->ifl_sds.ifsd_m =
  1668. (struct mbuf **) malloc(sizeof(struct mbuf *) *
  1669. scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
  1670. device_printf(dev,
  1671. "Unable to allocate RX mbuf map memory\n");
  1672. err = ENOMEM;
  1673. goto fail;
  1674. }
  1675. /* Allocate memory for the direct RX cluster pointer map. */
  1676. if (!(fl->ifl_sds.ifsd_cl =
  1677. (caddr_t *) malloc(sizeof(caddr_t) *
  1678. scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
  1679. device_printf(dev,
  1680. "Unable to allocate RX cluster map memory\n");
  1681. err = ENOMEM;
  1682. goto fail;
  1683. }
  1684. /* Allocate memory for the RX cluster bus address map. */
  1685. if (!(fl->ifl_sds.ifsd_ba =
  1686. (bus_addr_t *) malloc(sizeof(bus_addr_t) *
  1687. scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
  1688. device_printf(dev,
  1689. "Unable to allocate RX bus address map memory\n");
  1690. err = ENOMEM;
  1691. goto fail;
  1692. }
  1693. /*
  1694. * Create the DMA maps for RX buffers.
  1695. */
  1696. if (!(fl->ifl_sds.ifsd_map =
  1697. (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) {
  1698. device_printf(dev,
  1699. "Unable to allocate RX buffer DMA map memory\n");
  1700. err = ENOMEM;
  1701. goto fail;
  1702. }
  1703. for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) {
  1704. err = bus_dmamap_create(fl->ifl_buf_tag, 0,
  1705. &fl->ifl_sds.ifsd_map[i]);
  1706. if (err != 0) {
  1707. device_printf(dev, "Unable to create RX buffer DMA map\n");
  1708. goto fail;
  1709. }
  1710. }
  1711. }
  1712. return (0);
  1713. fail:
  1714. iflib_rx_structures_free(ctx);
  1715. return (err);
  1716. }
  1717. /*
  1718. * Internal service routines
  1719. */
  1720. struct rxq_refill_cb_arg {
  1721. int error;
  1722. bus_dma_segment_t seg;
  1723. int nseg;
  1724. };
  1725. static void
  1726. _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
  1727. {
  1728. struct rxq_refill_cb_arg *cb_arg = arg;
  1729. cb_arg->error = error;
  1730. cb_arg->seg = segs[0];
  1731. cb_arg->nseg = nseg;
  1732. }
  1733. /**
  1734. * _iflib_fl_refill - refill an rxq free-buffer list
  1735. * @ctx: the iflib context
  1736. * @fl: the free list to refill
  1737. * @count: the number of new buffers to allocate
  1738. *
  1739. * (Re)populate an rxq free-buffer list with up to @count new packet buffers.
  1740. * The caller must assure that @count does not exceed the queue's capacity.
  1741. */
  1742. static uint8_t
  1743. _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
  1744. {
  1745. struct if_rxd_update iru;
  1746. struct rxq_refill_cb_arg cb_arg;
  1747. struct mbuf *m;
  1748. caddr_t cl, *sd_cl;
  1749. struct mbuf **sd_m;
  1750. bus_dmamap_t *sd_map;
  1751. bus_addr_t bus_addr, *sd_ba;
  1752. int err, frag_idx, i, idx, n, pidx;
  1753. qidx_t credits;
  1754. sd_m = fl->ifl_sds.ifsd_m;
  1755. sd_map = fl->ifl_sds.ifsd_map;
  1756. sd_cl = fl->ifl_sds.ifsd_cl;
  1757. sd_ba = fl->ifl_sds.ifsd_ba;
  1758. pidx = fl->ifl_pidx;
  1759. idx = pidx;
  1760. frag_idx = fl->ifl_fragidx;
  1761. credits = fl->ifl_credits;
  1762. i = 0;
  1763. n = count;
  1764. MPASS(n > 0);
  1765. MPASS(credits + n <= fl->ifl_size);
  1766. if (pidx < fl->ifl_cidx)
  1767. MPASS(pidx + n <= fl->ifl_cidx);
  1768. if (pidx == fl->ifl_cidx && (credits < fl->ifl_size))
  1769. MPASS(fl->ifl_gen == 0);
  1770. if (pidx > fl->ifl_cidx)
  1771. MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx);
  1772. DBG_COUNTER_INC(fl_refills);
  1773. if (n > 8)
  1774. DBG_COUNTER_INC(fl_refills_large);
  1775. iru_init(&iru, fl->ifl_rxq, fl->ifl_id);
  1776. while (n--) {
  1777. /*
  1778. * We allocate an uninitialized mbuf + cluster, mbuf is
  1779. * initialized after rx.
  1780. *
  1781. * If the cluster is still set then we know a minimum sized packet was received
  1782. */
  1783. bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size,
  1784. &frag_idx);
  1785. if (frag_idx < 0)
  1786. bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx);
  1787. MPASS(frag_idx >= 0);
  1788. if ((cl = sd_cl[frag_idx]) == NULL) {
  1789. if ((cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
  1790. break;
  1791. cb_arg.error = 0;
  1792. MPASS(sd_map != NULL);
  1793. err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx],
  1794. cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg,
  1795. BUS_DMA_NOWAIT);
  1796. if (err != 0 || cb_arg.error) {
  1797. /*
  1798. * !zone_pack ?
  1799. */
  1800. if (fl->ifl_zone == zone_pack)
  1801. uma_zfree(fl->ifl_zone, cl);
  1802. break;
  1803. }
  1804. sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr;
  1805. sd_cl[frag_idx] = cl;
  1806. #if MEMORY_LOGGING
  1807. fl->ifl_cl_enqueued++;
  1808. #endif
  1809. } else {
  1810. bus_addr = sd_ba[frag_idx];
  1811. }
  1812. bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx],
  1813. BUS_DMASYNC_PREREAD);
  1814. MPASS(sd_m[frag_idx] == NULL);
  1815. if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
  1816. break;
  1817. }
  1818. sd_m[frag_idx] = m;
  1819. bit_set(fl->ifl_rx_bitmap, frag_idx);
  1820. #if MEMORY_LOGGING
  1821. fl->ifl_m_enqueued++;
  1822. #endif
  1823. DBG_COUNTER_INC(rx_allocs);
  1824. fl->ifl_rxd_idxs[i] = frag_idx;
  1825. fl->ifl_bus_addrs[i] = bus_addr;
  1826. fl->ifl_vm_addrs[i] = cl;
  1827. credits++;
  1828. i++;
  1829. MPASS(credits <= fl->ifl_size);
  1830. if (++idx == fl->ifl_size) {
  1831. fl->ifl_gen = 1;
  1832. idx = 0;
  1833. }
  1834. if (n == 0 || i == IFLIB_MAX_RX_REFRESH) {
  1835. iru.iru_pidx = pidx;
  1836. iru.iru_count = i;
  1837. ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
  1838. i = 0;
  1839. pidx = idx;
  1840. fl->ifl_pidx = idx;
  1841. fl->ifl_credits = credits;
  1842. }
  1843. }
  1844. if (i) {
  1845. iru.iru_pidx = pidx;
  1846. iru.iru_count = i;
  1847. ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
  1848. fl->ifl_pidx = idx;
  1849. fl->ifl_credits = credits;
  1850. }
  1851. DBG_COUNTER_INC(rxd_flush);
  1852. if (fl->ifl_pidx == 0)
  1853. pidx = fl->ifl_size - 1;
  1854. else
  1855. pidx = fl->ifl_pidx - 1;
  1856. bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
  1857. BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  1858. ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
  1859. fl->ifl_fragidx = frag_idx;
  1860. return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY);
  1861. }
  1862. static __inline uint8_t
  1863. __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max)
  1864. {
  1865. /* we avoid allowing pidx to catch up with cidx as it confuses ixl */
  1866. int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1;
  1867. #ifdef INVARIANTS
  1868. int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1;
  1869. #endif
  1870. MPASS(fl->ifl_credits <= fl->ifl_size);
  1871. MPASS(reclaimable == delta);
  1872. if (reclaimable > 0)
  1873. return (_iflib_fl_refill(ctx, fl, min(max, reclaimable)));
  1874. return (0);
  1875. }
  1876. uint8_t
  1877. iflib_in_detach(if_ctx_t ctx)
  1878. {
  1879. bool in_detach;
  1880. STATE_LOCK(ctx);
  1881. in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH);
  1882. STATE_UNLOCK(ctx);
  1883. return (in_detach);
  1884. }
  1885. static void
  1886. iflib_fl_bufs_free(iflib_fl_t fl)
  1887. {
  1888. iflib_dma_info_t idi = fl->ifl_ifdi;
  1889. bus_dmamap_t sd_map;
  1890. uint32_t i;
  1891. for (i = 0; i < fl->ifl_size; i++) {
  1892. struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i];
  1893. caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i];
  1894. if (*sd_cl != NULL) {
  1895. sd_map = fl->ifl_sds.ifsd_map[i];
  1896. bus_dmamap_sync(fl->ifl_buf_tag, sd_map,
  1897. BUS_DMASYNC_POSTREAD);
  1898. bus_dmamap_unload(fl->ifl_buf_tag, sd_map);
  1899. if (*sd_cl != NULL)
  1900. uma_zfree(fl->ifl_zone, *sd_cl);
  1901. if (*sd_m != NULL) {
  1902. m_init(*sd_m, M_NOWAIT, MT_DATA, 0);
  1903. uma_zfree(zone_mbuf, *sd_m);
  1904. }
  1905. } else {
  1906. MPASS(*sd_cl == NULL);
  1907. MPASS(*sd_m == NULL);
  1908. }
  1909. #if MEMORY_LOGGING
  1910. fl->ifl_m_dequeued++;
  1911. fl->ifl_cl_dequeued++;
  1912. #endif
  1913. *sd_cl = NULL;
  1914. *sd_m = NULL;
  1915. }
  1916. #ifdef INVARIANTS
  1917. for (i = 0; i < fl->ifl_size; i++) {
  1918. MPASS(fl->ifl_sds.ifsd_cl[i] == NULL);
  1919. MPASS(fl->ifl_sds.ifsd_m[i] == NULL);
  1920. }
  1921. #endif
  1922. /*
  1923. * Reset free list values
  1924. */
  1925. fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0;
  1926. bzero(idi->idi_vaddr, idi->idi_size);
  1927. }
  1928. /*********************************************************************
  1929. *
  1930. * Initialize a free list and its buffers.
  1931. *
  1932. **********************************************************************/
  1933. static int
  1934. iflib_fl_setup(iflib_fl_t fl)
  1935. {
  1936. iflib_rxq_t rxq = fl->ifl_rxq;
  1937. if_ctx_t ctx = rxq->ifr_ctx;
  1938. bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1);
  1939. /*
  1940. ** Free current RX buffer structs and their mbufs
  1941. */
  1942. iflib_fl_bufs_free(fl);
  1943. /* Now replenish the mbufs */
  1944. MPASS(fl->ifl_credits == 0);
  1945. fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz;
  1946. if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size)
  1947. ctx->ifc_max_fl_buf_size = fl->ifl_buf_size;
  1948. fl->ifl_cltype = m_gettype(fl->ifl_buf_size);
  1949. fl->ifl_zone = m_getzone(fl->ifl_buf_size);
  1950. /* avoid pre-allocating zillions of clusters to an idle card
  1951. * potentially speeding up attach
  1952. */
  1953. (void) _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size));
  1954. MPASS(min(128, fl->ifl_size) == fl->ifl_credits);
  1955. if (min(128, fl->ifl_size) != fl->ifl_credits)
  1956. return (ENOBUFS);
  1957. /*
  1958. * handle failure
  1959. */
  1960. MPASS(rxq != NULL);
  1961. MPASS(fl->ifl_ifdi != NULL);
  1962. bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
  1963. BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  1964. return (0);
  1965. }
  1966. /*********************************************************************
  1967. *
  1968. * Free receive ring data structures
  1969. *
  1970. **********************************************************************/
  1971. static void
  1972. iflib_rx_sds_free(iflib_rxq_t rxq)
  1973. {
  1974. iflib_fl_t fl;
  1975. int i, j;
  1976. if (rxq->ifr_fl != NULL) {
  1977. for (i = 0; i < rxq->ifr_nfl; i++) {
  1978. fl = &rxq->ifr_fl[i];
  1979. if (fl->ifl_buf_tag != NULL) {
  1980. if (fl->ifl_sds.ifsd_map != NULL) {
  1981. for (j = 0; j < fl->ifl_size; j++) {
  1982. bus_dmamap_sync(
  1983. fl->ifl_buf_tag,
  1984. fl->ifl_sds.ifsd_map[j],
  1985. BUS_DMASYNC_POSTREAD);
  1986. bus_dmamap_unload(
  1987. fl->ifl_buf_tag,
  1988. fl->ifl_sds.ifsd_map[j]);
  1989. bus_dmamap_destroy(
  1990. fl->ifl_buf_tag,
  1991. fl->ifl_sds.ifsd_map[j]);
  1992. }
  1993. }
  1994. bus_dma_tag_destroy(fl->ifl_buf_tag);
  1995. fl->ifl_buf_tag = NULL;
  1996. }
  1997. free(fl->ifl_sds.ifsd_m, M_IFLIB);
  1998. free(fl->ifl_sds.ifsd_cl, M_IFLIB);
  1999. free(fl->ifl_sds.ifsd_ba, M_IFLIB);
  2000. free(fl->ifl_sds.ifsd_map, M_IFLIB);
  2001. fl->ifl_sds.ifsd_m = NULL;
  2002. fl->ifl_sds.ifsd_cl = NULL;
  2003. fl->ifl_sds.ifsd_ba = NULL;
  2004. fl->ifl_sds.ifsd_map = NULL;
  2005. }
  2006. free(rxq->ifr_fl, M_IFLIB);
  2007. rxq->ifr_fl = NULL;
  2008. free(rxq->ifr_ifdi, M_IFLIB);
  2009. rxq->ifr_ifdi = NULL;
  2010. rxq->ifr_cq_cidx = 0;
  2011. }
  2012. }
  2013. /*
  2014. * Timer routine
  2015. */
  2016. static void
  2017. iflib_timer(void *arg)
  2018. {
  2019. iflib_txq_t txq = arg;
  2020. if_ctx_t ctx = txq->ift_ctx;
  2021. if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
  2022. uint64_t this_tick = ticks;
  2023. uint32_t reset_on = hz / 2;
  2024. if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
  2025. return;
  2026. /*
  2027. ** Check on the state of the TX queue(s), this
  2028. ** can be done without the lock because its RO
  2029. ** and the HUNG state will be static if set.
  2030. */
  2031. if (this_tick - txq->ift_last_timer_tick >= hz / 2) {
  2032. txq->ift_last_timer_tick = this_tick;
  2033. IFDI_TIMER(ctx, txq->ift_id);
  2034. if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) &&
  2035. ((txq->ift_cleaned_prev == txq->ift_cleaned) ||
  2036. (sctx->isc_pause_frames == 0)))
  2037. goto hung;
  2038. if (txq->ift_qstatus != IFLIB_QUEUE_IDLE &&
  2039. ifmp_ring_is_stalled(txq->ift_br)) {
  2040. KASSERT(ctx->ifc_link_state == LINK_STATE_UP, ("queue can't be marked as hung if interface is down"));
  2041. txq->ift_qstatus = IFLIB_QUEUE_HUNG;
  2042. }
  2043. txq->ift_cleaned_prev = txq->ift_cleaned;
  2044. }
  2045. #ifdef DEV_NETMAP
  2046. if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP)
  2047. iflib_netmap_timer_adjust(ctx, txq, &reset_on);
  2048. #endif
  2049. /* handle any laggards */
  2050. if (txq->ift_db_pending)
  2051. GROUPTASK_ENQUEUE(&txq->ift_task);
  2052. sctx->isc_pause_frames = 0;
  2053. if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)
  2054. callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu);
  2055. return;
  2056. hung:
  2057. device_printf(ctx->ifc_dev,
  2058. "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n",
  2059. txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx);
  2060. STATE_LOCK(ctx);
  2061. if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
  2062. ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET);
  2063. iflib_admin_intr_deferred(ctx);
  2064. STATE_UNLOCK(ctx);
  2065. }
  2066. static void
  2067. iflib_calc_rx_mbuf_sz(if_ctx_t ctx)
  2068. {
  2069. if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
  2070. /*
  2071. * XXX don't set the max_frame_size to larger
  2072. * than the hardware can handle
  2073. */
  2074. if (sctx->isc_max_frame_size <= MCLBYTES)
  2075. ctx->ifc_rx_mbuf_sz = MCLBYTES;
  2076. else
  2077. ctx->ifc_rx_mbuf_sz = MJUMPAGESIZE;
  2078. }
  2079. uint32_t
  2080. iflib_get_rx_mbuf_sz(if_ctx_t ctx)
  2081. {
  2082. return (ctx->ifc_rx_mbuf_sz);
  2083. }
  2084. static void
  2085. iflib_init_locked(if_ctx_t ctx)
  2086. {
  2087. if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
  2088. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  2089. if_t ifp = ctx->ifc_ifp;
  2090. iflib_fl_t fl;
  2091. iflib_txq_t txq;
  2092. iflib_rxq_t rxq;
  2093. int i, j, tx_ip_csum_flags, tx_ip6_csum_flags;
  2094. if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
  2095. IFDI_INTR_DISABLE(ctx);
  2096. tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP);
  2097. tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP);
  2098. /* Set hardware offload abilities */
  2099. if_clearhwassist(ifp);
  2100. if (if_getcapenable(ifp) & IFCAP_TXCSUM)
  2101. if_sethwassistbits(ifp, tx_ip_csum_flags, 0);
  2102. if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
  2103. if_sethwassistbits(ifp, tx_ip6_csum_flags, 0);
  2104. if (if_getcapenable(ifp) & IFCAP_TSO4)
  2105. if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
  2106. if (if_getcapenable(ifp) & IFCAP_TSO6)
  2107. if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
  2108. for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) {
  2109. CALLOUT_LOCK(txq);
  2110. callout_stop(&txq->ift_timer);
  2111. CALLOUT_UNLOCK(txq);
  2112. iflib_netmap_txq_init(ctx, txq);
  2113. }
  2114. /*
  2115. * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so
  2116. * that drivers can use the value when setting up the hardware receive
  2117. * buffers.
  2118. */
  2119. iflib_calc_rx_mbuf_sz(ctx);
  2120. #ifdef INVARIANTS
  2121. i = if_getdrvflags(ifp);
  2122. #endif
  2123. IFDI_INIT(ctx);
  2124. MPASS(if_getdrvflags(ifp) == i);
  2125. for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) {
  2126. /* XXX this should really be done on a per-queue basis */
  2127. if (if_getcapenable(ifp) & IFCAP_NETMAP) {
  2128. MPASS(rxq->ifr_id == i);
  2129. iflib_netmap_rxq_init(ctx, rxq);
  2130. continue;
  2131. }
  2132. for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
  2133. if (iflib_fl_setup(fl)) {
  2134. device_printf(ctx->ifc_dev,
  2135. "setting up free list %d failed - "
  2136. "check cluster settings\n", j);
  2137. goto done;
  2138. }
  2139. }
  2140. }
  2141. done:
  2142. if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
  2143. IFDI_INTR_ENABLE(ctx);
  2144. txq = ctx->ifc_txqs;
  2145. for (i = 0; i < sctx->isc_ntxqsets; i++, txq++)
  2146. callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq,
  2147. txq->ift_timer.c_cpu);
  2148. }
  2149. static int
  2150. iflib_media_change(if_t ifp)
  2151. {
  2152. if_ctx_t ctx = if_getsoftc(ifp);
  2153. int err;
  2154. CTX_LOCK(ctx);
  2155. if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0)
  2156. iflib_init_locked(ctx);
  2157. CTX_UNLOCK(ctx);
  2158. return (err);
  2159. }
  2160. static void
  2161. iflib_media_status(if_t ifp, struct ifmediareq *ifmr)
  2162. {
  2163. if_ctx_t ctx = if_getsoftc(ifp);
  2164. CTX_LOCK(ctx);
  2165. IFDI_UPDATE_ADMIN_STATUS(ctx);
  2166. IFDI_MEDIA_STATUS(ctx, ifmr);
  2167. CTX_UNLOCK(ctx);
  2168. }
  2169. void
  2170. iflib_stop(if_ctx_t ctx)
  2171. {
  2172. iflib_txq_t txq = ctx->ifc_txqs;
  2173. iflib_rxq_t rxq = ctx->ifc_rxqs;
  2174. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  2175. if_shared_ctx_t sctx = ctx->ifc_sctx;
  2176. iflib_dma_info_t di;
  2177. iflib_fl_t fl;
  2178. int i, j;
  2179. /* Tell the stack that the interface is no longer active */
  2180. if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
  2181. IFDI_INTR_DISABLE(ctx);
  2182. DELAY(1000);
  2183. IFDI_STOP(ctx);
  2184. DELAY(1000);
  2185. iflib_debug_reset();
  2186. /* Wait for current tx queue users to exit to disarm watchdog timer. */
  2187. for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) {
  2188. /* make sure all transmitters have completed before proceeding XXX */
  2189. CALLOUT_LOCK(txq);
  2190. callout_stop(&txq->ift_timer);
  2191. CALLOUT_UNLOCK(txq);
  2192. /* clean any enqueued buffers */
  2193. iflib_ifmp_purge(txq);
  2194. /* Free any existing tx buffers. */
  2195. for (j = 0; j < txq->ift_size; j++) {
  2196. iflib_txsd_free(ctx, txq, j);
  2197. }
  2198. txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0;
  2199. txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0;
  2200. txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0;
  2201. txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0;
  2202. txq->ift_pullups = 0;
  2203. ifmp_ring_reset_stats(txq->ift_br);
  2204. for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++)
  2205. bzero((void *)di->idi_vaddr, di->idi_size);
  2206. }
  2207. for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) {
  2208. /* make sure all transmitters have completed before proceeding XXX */
  2209. rxq->ifr_cq_cidx = 0;
  2210. for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++)
  2211. bzero((void *)di->idi_vaddr, di->idi_size);
  2212. /* also resets the free lists pidx/cidx */
  2213. for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
  2214. iflib_fl_bufs_free(fl);
  2215. }
  2216. }
  2217. static inline caddr_t
  2218. calc_next_rxd(iflib_fl_t fl, int cidx)
  2219. {
  2220. qidx_t size;
  2221. int nrxd;
  2222. caddr_t start, end, cur, next;
  2223. nrxd = fl->ifl_size;
  2224. size = fl->ifl_rxd_size;
  2225. start = fl->ifl_ifdi->idi_vaddr;
  2226. if (__predict_false(size == 0))
  2227. return (start);
  2228. cur = start + size*cidx;
  2229. end = start + size*nrxd;
  2230. next = CACHE_PTR_NEXT(cur);
  2231. return (next < end ? next : start);
  2232. }
  2233. static inline void
  2234. prefetch_pkts(iflib_fl_t fl, int cidx)
  2235. {
  2236. int nextptr;
  2237. int nrxd = fl->ifl_size;
  2238. caddr_t next_rxd;
  2239. nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1);
  2240. prefetch(&fl->ifl_sds.ifsd_m[nextptr]);
  2241. prefetch(&fl->ifl_sds.ifsd_cl[nextptr]);
  2242. next_rxd = calc_next_rxd(fl, cidx);
  2243. prefetch(next_rxd);
  2244. prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]);
  2245. prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]);
  2246. prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]);
  2247. prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]);
  2248. prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]);
  2249. prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]);
  2250. prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]);
  2251. prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]);
  2252. }
  2253. static void
  2254. rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd)
  2255. {
  2256. int flid, cidx;
  2257. bus_dmamap_t map;
  2258. iflib_fl_t fl;
  2259. int next;
  2260. map = NULL;
  2261. flid = irf->irf_flid;
  2262. cidx = irf->irf_idx;
  2263. fl = &rxq->ifr_fl[flid];
  2264. sd->ifsd_fl = fl;
  2265. sd->ifsd_cidx = cidx;
  2266. sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx];
  2267. sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx];
  2268. fl->ifl_credits--;
  2269. #if MEMORY_LOGGING
  2270. fl->ifl_m_dequeued++;
  2271. #endif
  2272. if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH)
  2273. prefetch_pkts(fl, cidx);
  2274. next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1);
  2275. prefetch(&fl->ifl_sds.ifsd_map[next]);
  2276. map = fl->ifl_sds.ifsd_map[cidx];
  2277. next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1);
  2278. /* not valid assert if bxe really does SGE from non-contiguous elements */
  2279. MPASS(fl->ifl_cidx == cidx);
  2280. bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD);
  2281. if (unload)
  2282. bus_dmamap_unload(fl->ifl_buf_tag, map);
  2283. fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1);
  2284. if (__predict_false(fl->ifl_cidx == 0))
  2285. fl->ifl_gen = 0;
  2286. bit_clear(fl->ifl_rx_bitmap, cidx);
  2287. }
  2288. static struct mbuf *
  2289. assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd)
  2290. {
  2291. int i, padlen , flags;
  2292. struct mbuf *m, *mh, *mt;
  2293. caddr_t cl;
  2294. i = 0;
  2295. mh = NULL;
  2296. do {
  2297. rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd);
  2298. MPASS(*sd->ifsd_cl != NULL);
  2299. MPASS(*sd->ifsd_m != NULL);
  2300. /* Don't include zero-length frags */
  2301. if (ri->iri_frags[i].irf_len == 0) {
  2302. /* XXX we can save the cluster here, but not the mbuf */
  2303. m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0);
  2304. m_free(*sd->ifsd_m);
  2305. *sd->ifsd_m = NULL;
  2306. continue;
  2307. }
  2308. m = *sd->ifsd_m;
  2309. *sd->ifsd_m = NULL;
  2310. if (mh == NULL) {
  2311. flags = M_PKTHDR|M_EXT;
  2312. mh = mt = m;
  2313. padlen = ri->iri_pad;
  2314. } else {
  2315. flags = M_EXT;
  2316. mt->m_next = m;
  2317. mt = m;
  2318. /* assuming padding is only on the first fragment */
  2319. padlen = 0;
  2320. }
  2321. cl = *sd->ifsd_cl;
  2322. *sd->ifsd_cl = NULL;
  2323. /* Can these two be made one ? */
  2324. m_init(m, M_NOWAIT, MT_DATA, flags);
  2325. m_cljset(m, cl, sd->ifsd_fl->ifl_cltype);
  2326. /*
  2327. * These must follow m_init and m_cljset
  2328. */
  2329. m->m_data += padlen;
  2330. ri->iri_len -= padlen;
  2331. m->m_len = ri->iri_frags[i].irf_len;
  2332. } while (++i < ri->iri_nfrags);
  2333. return (mh);
  2334. }
  2335. /*
  2336. * Process one software descriptor
  2337. */
  2338. static struct mbuf *
  2339. iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri)
  2340. {
  2341. struct if_rxsd sd;
  2342. struct mbuf *m;
  2343. /* should I merge this back in now that the two paths are basically duplicated? */
  2344. if (ri->iri_nfrags == 1 &&
  2345. ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) {
  2346. rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd);
  2347. m = *sd.ifsd_m;
  2348. *sd.ifsd_m = NULL;
  2349. m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
  2350. #ifndef __NO_STRICT_ALIGNMENT
  2351. if (!IP_ALIGNED(m))
  2352. m->m_data += 2;
  2353. #endif
  2354. memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len);
  2355. m->m_len = ri->iri_frags[0].irf_len;
  2356. } else {
  2357. m = assemble_segments(rxq, ri, &sd);
  2358. }
  2359. m->m_pkthdr.len = ri->iri_len;
  2360. m->m_pkthdr.rcvif = ri->iri_ifp;
  2361. m->m_flags |= ri->iri_flags;
  2362. m->m_pkthdr.ether_vtag = ri->iri_vtag;
  2363. m->m_pkthdr.flowid = ri->iri_flowid;
  2364. M_HASHTYPE_SET(m, ri->iri_rsstype);
  2365. m->m_pkthdr.csum_flags = ri->iri_csum_flags;
  2366. m->m_pkthdr.csum_data = ri->iri_csum_data;
  2367. return (m);
  2368. }
  2369. #if defined(INET6) || defined(INET)
  2370. static void
  2371. iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6)
  2372. {
  2373. CURVNET_SET(lc->ifp->if_vnet);
  2374. #if defined(INET6)
  2375. *v6 = VNET(ip6_forwarding);
  2376. #endif
  2377. #if defined(INET)
  2378. *v4 = VNET(ipforwarding);
  2379. #endif
  2380. CURVNET_RESTORE();
  2381. }
  2382. /*
  2383. * Returns true if it's possible this packet could be LROed.
  2384. * if it returns false, it is guaranteed that tcp_lro_rx()
  2385. * would not return zero.
  2386. */
  2387. static bool
  2388. iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding)
  2389. {
  2390. struct ether_header *eh;
  2391. uint16_t eh_type;
  2392. eh = mtod(m, struct ether_header *);
  2393. eh_type = ntohs(eh->ether_type);
  2394. switch (eh_type) {
  2395. #if defined(INET6)
  2396. case ETHERTYPE_IPV6:
  2397. return !v6_forwarding;
  2398. #endif
  2399. #if defined (INET)
  2400. case ETHERTYPE_IP:
  2401. return !v4_forwarding;
  2402. #endif
  2403. }
  2404. return false;
  2405. }
  2406. #else
  2407. static void
  2408. iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused)
  2409. {
  2410. }
  2411. #endif
  2412. static void
  2413. _task_fn_rx_watchdog(void *context)
  2414. {
  2415. iflib_rxq_t rxq = context;
  2416. GROUPTASK_ENQUEUE(&rxq->ifr_task);
  2417. }
  2418. static uint8_t
  2419. iflib_rxeof(iflib_rxq_t rxq, qidx_t budget)
  2420. {
  2421. if_t ifp;
  2422. if_ctx_t ctx = rxq->ifr_ctx;
  2423. if_shared_ctx_t sctx = ctx->ifc_sctx;
  2424. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  2425. int avail, i;
  2426. qidx_t *cidxp;
  2427. struct if_rxd_info ri;
  2428. int err, budget_left, rx_bytes, rx_pkts;
  2429. iflib_fl_t fl;
  2430. int lro_enabled;
  2431. bool v4_forwarding, v6_forwarding, lro_possible;
  2432. uint8_t retval = 0;
  2433. /*
  2434. * XXX early demux data packets so that if_input processing only handles
  2435. * acks in interrupt context
  2436. */
  2437. struct mbuf *m, *mh, *mt, *mf;
  2438. lro_possible = v4_forwarding = v6_forwarding = false;
  2439. ifp = ctx->ifc_ifp;
  2440. mh = mt = NULL;
  2441. MPASS(budget > 0);
  2442. rx_pkts = rx_bytes = 0;
  2443. if (sctx->isc_flags & IFLIB_HAS_RXCQ)
  2444. cidxp = &rxq->ifr_cq_cidx;
  2445. else
  2446. cidxp = &rxq->ifr_fl[0].ifl_cidx;
  2447. if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) {
  2448. for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
  2449. retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8);
  2450. DBG_COUNTER_INC(rx_unavail);
  2451. return (retval);
  2452. }
  2453. for (budget_left = budget; budget_left > 0 && avail > 0;) {
  2454. if (__predict_false(!CTX_ACTIVE(ctx))) {
  2455. DBG_COUNTER_INC(rx_ctx_inactive);
  2456. break;
  2457. }
  2458. /*
  2459. * Reset client set fields to their default values
  2460. */
  2461. rxd_info_zero(&ri);
  2462. ri.iri_qsidx = rxq->ifr_id;
  2463. ri.iri_cidx = *cidxp;
  2464. ri.iri_ifp = ifp;
  2465. ri.iri_frags = rxq->ifr_frags;
  2466. err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri);
  2467. if (err)
  2468. goto err;
  2469. if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
  2470. *cidxp = ri.iri_cidx;
  2471. /* Update our consumer index */
  2472. /* XXX NB: shurd - check if this is still safe */
  2473. while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0])
  2474. rxq->ifr_cq_cidx -= scctx->isc_nrxd[0];
  2475. /* was this only a completion queue message? */
  2476. if (__predict_false(ri.iri_nfrags == 0))
  2477. continue;
  2478. }
  2479. MPASS(ri.iri_nfrags != 0);
  2480. MPASS(ri.iri_len != 0);
  2481. /* will advance the cidx on the corresponding free lists */
  2482. m = iflib_rxd_pkt_get(rxq, &ri);
  2483. avail--;
  2484. budget_left--;
  2485. if (avail == 0 && budget_left)
  2486. avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left);
  2487. if (__predict_false(m == NULL)) {
  2488. DBG_COUNTER_INC(rx_mbuf_null);
  2489. continue;
  2490. }
  2491. /* imm_pkt: -- cxgb */
  2492. if (mh == NULL)
  2493. mh = mt = m;
  2494. else {
  2495. mt->m_nextpkt = m;
  2496. mt = m;
  2497. }
  2498. }
  2499. /* make sure that we can refill faster than drain */
  2500. for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++)
  2501. retval |= __iflib_fl_refill_lt(ctx, fl, budget + 8);
  2502. lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO);
  2503. if (lro_enabled)
  2504. iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding);
  2505. mt = mf = NULL;
  2506. while (mh != NULL) {
  2507. m = mh;
  2508. mh = mh->m_nextpkt;
  2509. m->m_nextpkt = NULL;
  2510. #ifndef __NO_STRICT_ALIGNMENT
  2511. if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL)
  2512. continue;
  2513. #endif
  2514. rx_bytes += m->m_pkthdr.len;
  2515. rx_pkts++;
  2516. #if defined(INET6) || defined(INET)
  2517. if (lro_enabled) {
  2518. if (!lro_possible) {
  2519. lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding);
  2520. if (lro_possible && mf != NULL) {
  2521. ifp->if_input(ifp, mf);
  2522. DBG_COUNTER_INC(rx_if_input);
  2523. mt = mf = NULL;
  2524. }
  2525. }
  2526. if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) ==
  2527. (CSUM_L4_CALC|CSUM_L4_VALID)) {
  2528. if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0)
  2529. continue;
  2530. }
  2531. }
  2532. #endif
  2533. if (lro_possible) {
  2534. ifp->if_input(ifp, m);
  2535. DBG_COUNTER_INC(rx_if_input);
  2536. continue;
  2537. }
  2538. if (mf == NULL)
  2539. mf = m;
  2540. if (mt != NULL)
  2541. mt->m_nextpkt = m;
  2542. mt = m;
  2543. }
  2544. if (mf != NULL) {
  2545. ifp->if_input(ifp, mf);
  2546. DBG_COUNTER_INC(rx_if_input);
  2547. }
  2548. if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes);
  2549. if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts);
  2550. /*
  2551. * Flush any outstanding LRO work
  2552. */
  2553. #if defined(INET6) || defined(INET)
  2554. tcp_lro_flush_all(&rxq->ifr_lc);
  2555. #endif
  2556. if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0)
  2557. retval |= IFLIB_RXEOF_MORE;
  2558. return (retval);
  2559. err:
  2560. STATE_LOCK(ctx);
  2561. ctx->ifc_flags |= IFC_DO_RESET;
  2562. iflib_admin_intr_deferred(ctx);
  2563. STATE_UNLOCK(ctx);
  2564. return (0);
  2565. }
  2566. #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1)
  2567. static inline qidx_t
  2568. txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use)
  2569. {
  2570. qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
  2571. qidx_t minthresh = txq->ift_size / 8;
  2572. if (in_use > 4*minthresh)
  2573. return (notify_count);
  2574. if (in_use > 2*minthresh)
  2575. return (notify_count >> 1);
  2576. if (in_use > minthresh)
  2577. return (notify_count >> 3);
  2578. return (0);
  2579. }
  2580. static inline qidx_t
  2581. txq_max_rs_deferred(iflib_txq_t txq)
  2582. {
  2583. qidx_t notify_count = TXD_NOTIFY_COUNT(txq);
  2584. qidx_t minthresh = txq->ift_size / 8;
  2585. if (txq->ift_in_use > 4*minthresh)
  2586. return (notify_count);
  2587. if (txq->ift_in_use > 2*minthresh)
  2588. return (notify_count >> 1);
  2589. if (txq->ift_in_use > minthresh)
  2590. return (notify_count >> 2);
  2591. return (2);
  2592. }
  2593. #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags)
  2594. #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG)
  2595. #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use))
  2596. #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq)
  2597. #define TXQ_MAX_DB_CONSUMED(size) (size >> 4)
  2598. /* forward compatibility for cxgb */
  2599. #define FIRST_QSET(ctx) 0
  2600. #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets)
  2601. #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets)
  2602. #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx))
  2603. #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments))
  2604. /* XXX we should be setting this to something other than zero */
  2605. #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh)
  2606. #define MAX_TX_DESC(ctx) max((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \
  2607. (ctx)->ifc_softc_ctx.isc_tx_nsegments)
  2608. static inline bool
  2609. iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use)
  2610. {
  2611. qidx_t dbval, max;
  2612. bool rang;
  2613. rang = false;
  2614. max = TXQ_MAX_DB_DEFERRED(txq, in_use);
  2615. if (ring || txq->ift_db_pending >= max) {
  2616. dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx;
  2617. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  2618. BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  2619. ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval);
  2620. txq->ift_db_pending = txq->ift_npending = 0;
  2621. rang = true;
  2622. }
  2623. return (rang);
  2624. }
  2625. #ifdef PKT_DEBUG
  2626. static void
  2627. print_pkt(if_pkt_info_t pi)
  2628. {
  2629. printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n",
  2630. pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx);
  2631. printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n",
  2632. pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag);
  2633. printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n",
  2634. pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto);
  2635. }
  2636. #endif
  2637. #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO)
  2638. #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO))
  2639. #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO)
  2640. #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO))
  2641. static int
  2642. iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp)
  2643. {
  2644. if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx;
  2645. struct ether_vlan_header *eh;
  2646. struct mbuf *m;
  2647. m = *mp;
  2648. if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) &&
  2649. M_WRITABLE(m) == 0) {
  2650. if ((m = m_dup(m, M_NOWAIT)) == NULL) {
  2651. return (ENOMEM);
  2652. } else {
  2653. m_freem(*mp);
  2654. DBG_COUNTER_INC(tx_frees);
  2655. *mp = m;
  2656. }
  2657. }
  2658. /*
  2659. * Determine where frame payload starts.
  2660. * Jump over vlan headers if already present,
  2661. * helpful for QinQ too.
  2662. */
  2663. if (__predict_false(m->m_len < sizeof(*eh))) {
  2664. txq->ift_pullups++;
  2665. if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL))
  2666. return (ENOMEM);
  2667. }
  2668. eh = mtod(m, struct ether_vlan_header *);
  2669. if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
  2670. pi->ipi_etype = ntohs(eh->evl_proto);
  2671. pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
  2672. } else {
  2673. pi->ipi_etype = ntohs(eh->evl_encap_proto);
  2674. pi->ipi_ehdrlen = ETHER_HDR_LEN;
  2675. }
  2676. switch (pi->ipi_etype) {
  2677. #ifdef INET
  2678. case ETHERTYPE_IP:
  2679. {
  2680. struct mbuf *n;
  2681. struct ip *ip = NULL;
  2682. struct tcphdr *th = NULL;
  2683. int minthlen;
  2684. minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th));
  2685. if (__predict_false(m->m_len < minthlen)) {
  2686. /*
  2687. * if this code bloat is causing too much of a hit
  2688. * move it to a separate function and mark it noinline
  2689. */
  2690. if (m->m_len == pi->ipi_ehdrlen) {
  2691. n = m->m_next;
  2692. MPASS(n);
  2693. if (n->m_len >= sizeof(*ip)) {
  2694. ip = (struct ip *)n->m_data;
  2695. if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th))
  2696. th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
  2697. } else {
  2698. txq->ift_pullups++;
  2699. if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
  2700. return (ENOMEM);
  2701. ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
  2702. }
  2703. } else {
  2704. txq->ift_pullups++;
  2705. if (__predict_false((m = m_pullup(m, minthlen)) == NULL))
  2706. return (ENOMEM);
  2707. ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
  2708. if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
  2709. th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
  2710. }
  2711. } else {
  2712. ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen);
  2713. if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th))
  2714. th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
  2715. }
  2716. pi->ipi_ip_hlen = ip->ip_hl << 2;
  2717. pi->ipi_ipproto = ip->ip_p;
  2718. pi->ipi_flags |= IPI_TX_IPV4;
  2719. /* TCP checksum offload may require TCP header length */
  2720. if (IS_TX_OFFLOAD4(pi)) {
  2721. if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) {
  2722. if (__predict_false(th == NULL)) {
  2723. txq->ift_pullups++;
  2724. if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL))
  2725. return (ENOMEM);
  2726. th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen);
  2727. }
  2728. pi->ipi_tcp_hflags = th->th_flags;
  2729. pi->ipi_tcp_hlen = th->th_off << 2;
  2730. pi->ipi_tcp_seq = th->th_seq;
  2731. }
  2732. if (IS_TSO4(pi)) {
  2733. if (__predict_false(ip->ip_p != IPPROTO_TCP))
  2734. return (ENXIO);
  2735. /*
  2736. * TSO always requires hardware checksum offload.
  2737. */
  2738. pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP);
  2739. th->th_sum = in_pseudo(ip->ip_src.s_addr,
  2740. ip->ip_dst.s_addr, htons(IPPROTO_TCP));
  2741. pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
  2742. if (sctx->isc_flags & IFLIB_TSO_INIT_IP) {
  2743. ip->ip_sum = 0;
  2744. ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
  2745. }
  2746. }
  2747. }
  2748. if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP))
  2749. ip->ip_sum = 0;
  2750. break;
  2751. }
  2752. #endif
  2753. #ifdef INET6
  2754. case ETHERTYPE_IPV6:
  2755. {
  2756. struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen);
  2757. struct tcphdr *th;
  2758. pi->ipi_ip_hlen = sizeof(struct ip6_hdr);
  2759. if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) {
  2760. txq->ift_pullups++;
  2761. if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL))
  2762. return (ENOMEM);
  2763. }
  2764. th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen);
  2765. /* XXX-BZ this will go badly in case of ext hdrs. */
  2766. pi->ipi_ipproto = ip6->ip6_nxt;
  2767. pi->ipi_flags |= IPI_TX_IPV6;
  2768. /* TCP checksum offload may require TCP header length */
  2769. if (IS_TX_OFFLOAD6(pi)) {
  2770. if (pi->ipi_ipproto == IPPROTO_TCP) {
  2771. if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) {
  2772. txq->ift_pullups++;
  2773. if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL))
  2774. return (ENOMEM);
  2775. }
  2776. pi->ipi_tcp_hflags = th->th_flags;
  2777. pi->ipi_tcp_hlen = th->th_off << 2;
  2778. pi->ipi_tcp_seq = th->th_seq;
  2779. }
  2780. if (IS_TSO6(pi)) {
  2781. if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP))
  2782. return (ENXIO);
  2783. /*
  2784. * TSO always requires hardware checksum offload.
  2785. */
  2786. pi->ipi_csum_flags |= CSUM_IP6_TCP;
  2787. th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
  2788. pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz;
  2789. }
  2790. }
  2791. break;
  2792. }
  2793. #endif
  2794. default:
  2795. pi->ipi_csum_flags &= ~CSUM_OFFLOAD;
  2796. pi->ipi_ip_hlen = 0;
  2797. break;
  2798. }
  2799. *mp = m;
  2800. return (0);
  2801. }
  2802. /*
  2803. * If dodgy hardware rejects the scatter gather chain we've handed it
  2804. * we'll need to remove the mbuf chain from ifsg_m[] before we can add the
  2805. * m_defrag'd mbufs
  2806. */
  2807. static __noinline struct mbuf *
  2808. iflib_remove_mbuf(iflib_txq_t txq)
  2809. {
  2810. int ntxd, pidx;
  2811. struct mbuf *m, **ifsd_m;
  2812. ifsd_m = txq->ift_sds.ifsd_m;
  2813. ntxd = txq->ift_size;
  2814. pidx = txq->ift_pidx & (ntxd - 1);
  2815. ifsd_m = txq->ift_sds.ifsd_m;
  2816. m = ifsd_m[pidx];
  2817. ifsd_m[pidx] = NULL;
  2818. bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]);
  2819. if (txq->ift_sds.ifsd_tso_map != NULL)
  2820. bus_dmamap_unload(txq->ift_tso_buf_tag,
  2821. txq->ift_sds.ifsd_tso_map[pidx]);
  2822. #if MEMORY_LOGGING
  2823. txq->ift_dequeued++;
  2824. #endif
  2825. return (m);
  2826. }
  2827. static inline caddr_t
  2828. calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid)
  2829. {
  2830. qidx_t size;
  2831. int ntxd;
  2832. caddr_t start, end, cur, next;
  2833. ntxd = txq->ift_size;
  2834. size = txq->ift_txd_size[qid];
  2835. start = txq->ift_ifdi[qid].idi_vaddr;
  2836. if (__predict_false(size == 0))
  2837. return (start);
  2838. cur = start + size*cidx;
  2839. end = start + size*ntxd;
  2840. next = CACHE_PTR_NEXT(cur);
  2841. return (next < end ? next : start);
  2842. }
  2843. /*
  2844. * Pad an mbuf to ensure a minimum ethernet frame size.
  2845. * min_frame_size is the frame size (less CRC) to pad the mbuf to
  2846. */
  2847. static __noinline int
  2848. iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size)
  2849. {
  2850. /*
  2851. * 18 is enough bytes to pad an ARP packet to 46 bytes, and
  2852. * and ARP message is the smallest common payload I can think of
  2853. */
  2854. static char pad[18]; /* just zeros */
  2855. int n;
  2856. struct mbuf *new_head;
  2857. if (!M_WRITABLE(*m_head)) {
  2858. new_head = m_dup(*m_head, M_NOWAIT);
  2859. if (new_head == NULL) {
  2860. m_freem(*m_head);
  2861. device_printf(dev, "cannot pad short frame, m_dup() failed");
  2862. DBG_COUNTER_INC(encap_pad_mbuf_fail);
  2863. DBG_COUNTER_INC(tx_frees);
  2864. return ENOMEM;
  2865. }
  2866. m_freem(*m_head);
  2867. *m_head = new_head;
  2868. }
  2869. for (n = min_frame_size - (*m_head)->m_pkthdr.len;
  2870. n > 0; n -= sizeof(pad))
  2871. if (!m_append(*m_head, min(n, sizeof(pad)), pad))
  2872. break;
  2873. if (n > 0) {
  2874. m_freem(*m_head);
  2875. device_printf(dev, "cannot pad short frame\n");
  2876. DBG_COUNTER_INC(encap_pad_mbuf_fail);
  2877. DBG_COUNTER_INC(tx_frees);
  2878. return (ENOBUFS);
  2879. }
  2880. return 0;
  2881. }
  2882. static int
  2883. iflib_encap(iflib_txq_t txq, struct mbuf **m_headp)
  2884. {
  2885. if_ctx_t ctx;
  2886. if_shared_ctx_t sctx;
  2887. if_softc_ctx_t scctx;
  2888. bus_dma_tag_t buf_tag;
  2889. bus_dma_segment_t *segs;
  2890. struct mbuf *m_head, **ifsd_m;
  2891. void *next_txd;
  2892. bus_dmamap_t map;
  2893. struct if_pkt_info pi;
  2894. int remap = 0;
  2895. int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd;
  2896. ctx = txq->ift_ctx;
  2897. sctx = ctx->ifc_sctx;
  2898. scctx = &ctx->ifc_softc_ctx;
  2899. segs = txq->ift_segs;
  2900. ntxd = txq->ift_size;
  2901. m_head = *m_headp;
  2902. map = NULL;
  2903. /*
  2904. * If we're doing TSO the next descriptor to clean may be quite far ahead
  2905. */
  2906. cidx = txq->ift_cidx;
  2907. pidx = txq->ift_pidx;
  2908. if (ctx->ifc_flags & IFC_PREFETCH) {
  2909. next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1);
  2910. if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) {
  2911. next_txd = calc_next_txd(txq, cidx, 0);
  2912. prefetch(next_txd);
  2913. }
  2914. /* prefetch the next cache line of mbuf pointers and flags */
  2915. prefetch(&txq->ift_sds.ifsd_m[next]);
  2916. prefetch(&txq->ift_sds.ifsd_map[next]);
  2917. next = (cidx + CACHE_LINE_SIZE) & (ntxd-1);
  2918. }
  2919. map = txq->ift_sds.ifsd_map[pidx];
  2920. ifsd_m = txq->ift_sds.ifsd_m;
  2921. if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
  2922. buf_tag = txq->ift_tso_buf_tag;
  2923. max_segs = scctx->isc_tx_tso_segments_max;
  2924. map = txq->ift_sds.ifsd_tso_map[pidx];
  2925. MPASS(buf_tag != NULL);
  2926. MPASS(max_segs > 0);
  2927. } else {
  2928. buf_tag = txq->ift_buf_tag;
  2929. max_segs = scctx->isc_tx_nsegments;
  2930. map = txq->ift_sds.ifsd_map[pidx];
  2931. }
  2932. if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) &&
  2933. __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) {
  2934. err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size);
  2935. if (err) {
  2936. DBG_COUNTER_INC(encap_txd_encap_fail);
  2937. return err;
  2938. }
  2939. }
  2940. m_head = *m_headp;
  2941. pkt_info_zero(&pi);
  2942. pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST));
  2943. pi.ipi_pidx = pidx;
  2944. pi.ipi_qsidx = txq->ift_id;
  2945. pi.ipi_len = m_head->m_pkthdr.len;
  2946. pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags;
  2947. pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0;
  2948. /* deliberate bitwise OR to make one condition */
  2949. if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) {
  2950. if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) {
  2951. DBG_COUNTER_INC(encap_txd_encap_fail);
  2952. return (err);
  2953. }
  2954. m_head = *m_headp;
  2955. }
  2956. retry:
  2957. err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs,
  2958. BUS_DMA_NOWAIT);
  2959. defrag:
  2960. if (__predict_false(err)) {
  2961. switch (err) {
  2962. case EFBIG:
  2963. /* try collapse once and defrag once */
  2964. if (remap == 0) {
  2965. m_head = m_collapse(*m_headp, M_NOWAIT, max_segs);
  2966. /* try defrag if collapsing fails */
  2967. if (m_head == NULL)
  2968. remap++;
  2969. }
  2970. if (remap == 1) {
  2971. txq->ift_mbuf_defrag++;
  2972. m_head = m_defrag(*m_headp, M_NOWAIT);
  2973. }
  2974. /*
  2975. * remap should never be >1 unless bus_dmamap_load_mbuf_sg
  2976. * failed to map an mbuf that was run through m_defrag
  2977. */
  2978. MPASS(remap <= 1);
  2979. if (__predict_false(m_head == NULL || remap > 1))
  2980. goto defrag_failed;
  2981. remap++;
  2982. *m_headp = m_head;
  2983. goto retry;
  2984. break;
  2985. case ENOMEM:
  2986. txq->ift_no_tx_dma_setup++;
  2987. break;
  2988. default:
  2989. txq->ift_no_tx_dma_setup++;
  2990. m_freem(*m_headp);
  2991. DBG_COUNTER_INC(tx_frees);
  2992. *m_headp = NULL;
  2993. break;
  2994. }
  2995. txq->ift_map_failed++;
  2996. DBG_COUNTER_INC(encap_load_mbuf_fail);
  2997. DBG_COUNTER_INC(encap_txd_encap_fail);
  2998. return (err);
  2999. }
  3000. ifsd_m[pidx] = m_head;
  3001. /*
  3002. * XXX assumes a 1 to 1 relationship between segments and
  3003. * descriptors - this does not hold true on all drivers, e.g.
  3004. * cxgb
  3005. */
  3006. if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) {
  3007. txq->ift_no_desc_avail++;
  3008. bus_dmamap_unload(buf_tag, map);
  3009. DBG_COUNTER_INC(encap_txq_avail_fail);
  3010. DBG_COUNTER_INC(encap_txd_encap_fail);
  3011. if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0)
  3012. GROUPTASK_ENQUEUE(&txq->ift_task);
  3013. return (ENOBUFS);
  3014. }
  3015. /*
  3016. * On Intel cards we can greatly reduce the number of TX interrupts
  3017. * we see by only setting report status on every Nth descriptor.
  3018. * However, this also means that the driver will need to keep track
  3019. * of the descriptors that RS was set on to check them for the DD bit.
  3020. */
  3021. txq->ift_rs_pending += nsegs + 1;
  3022. if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) ||
  3023. iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) {
  3024. pi.ipi_flags |= IPI_TX_INTR;
  3025. txq->ift_rs_pending = 0;
  3026. }
  3027. pi.ipi_segs = segs;
  3028. pi.ipi_nsegs = nsegs;
  3029. MPASS(pidx >= 0 && pidx < txq->ift_size);
  3030. #ifdef PKT_DEBUG
  3031. print_pkt(&pi);
  3032. #endif
  3033. if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) {
  3034. bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE);
  3035. DBG_COUNTER_INC(tx_encap);
  3036. MPASS(pi.ipi_new_pidx < txq->ift_size);
  3037. ndesc = pi.ipi_new_pidx - pi.ipi_pidx;
  3038. if (pi.ipi_new_pidx < pi.ipi_pidx) {
  3039. ndesc += txq->ift_size;
  3040. txq->ift_gen = 1;
  3041. }
  3042. /*
  3043. * drivers can need as many as
  3044. * two sentinels
  3045. */
  3046. MPASS(ndesc <= pi.ipi_nsegs + 2);
  3047. MPASS(pi.ipi_new_pidx != pidx);
  3048. MPASS(ndesc > 0);
  3049. txq->ift_in_use += ndesc;
  3050. /*
  3051. * We update the last software descriptor again here because there may
  3052. * be a sentinel and/or there may be more mbufs than segments
  3053. */
  3054. txq->ift_pidx = pi.ipi_new_pidx;
  3055. txq->ift_npending += pi.ipi_ndescs;
  3056. } else {
  3057. *m_headp = m_head = iflib_remove_mbuf(txq);
  3058. if (err == EFBIG) {
  3059. txq->ift_txd_encap_efbig++;
  3060. if (remap < 2) {
  3061. remap = 1;
  3062. goto defrag;
  3063. }
  3064. }
  3065. goto defrag_failed;
  3066. }
  3067. /*
  3068. * err can't possibly be non-zero here, so we don't neet to test it
  3069. * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail).
  3070. */
  3071. return (err);
  3072. defrag_failed:
  3073. txq->ift_mbuf_defrag_failed++;
  3074. txq->ift_map_failed++;
  3075. m_freem(*m_headp);
  3076. DBG_COUNTER_INC(tx_frees);
  3077. *m_headp = NULL;
  3078. DBG_COUNTER_INC(encap_txd_encap_fail);
  3079. return (ENOMEM);
  3080. }
  3081. static void
  3082. iflib_tx_desc_free(iflib_txq_t txq, int n)
  3083. {
  3084. uint32_t qsize, cidx, mask, gen;
  3085. struct mbuf *m, **ifsd_m;
  3086. bool do_prefetch;
  3087. cidx = txq->ift_cidx;
  3088. gen = txq->ift_gen;
  3089. qsize = txq->ift_size;
  3090. mask = qsize-1;
  3091. ifsd_m = txq->ift_sds.ifsd_m;
  3092. do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH);
  3093. while (n-- > 0) {
  3094. if (do_prefetch) {
  3095. prefetch(ifsd_m[(cidx + 3) & mask]);
  3096. prefetch(ifsd_m[(cidx + 4) & mask]);
  3097. }
  3098. if ((m = ifsd_m[cidx]) != NULL) {
  3099. prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]);
  3100. if (m->m_pkthdr.csum_flags & CSUM_TSO) {
  3101. bus_dmamap_sync(txq->ift_tso_buf_tag,
  3102. txq->ift_sds.ifsd_tso_map[cidx],
  3103. BUS_DMASYNC_POSTWRITE);
  3104. bus_dmamap_unload(txq->ift_tso_buf_tag,
  3105. txq->ift_sds.ifsd_tso_map[cidx]);
  3106. } else {
  3107. bus_dmamap_sync(txq->ift_buf_tag,
  3108. txq->ift_sds.ifsd_map[cidx],
  3109. BUS_DMASYNC_POSTWRITE);
  3110. bus_dmamap_unload(txq->ift_buf_tag,
  3111. txq->ift_sds.ifsd_map[cidx]);
  3112. }
  3113. /* XXX we don't support any drivers that batch packets yet */
  3114. MPASS(m->m_nextpkt == NULL);
  3115. m_freem(m);
  3116. ifsd_m[cidx] = NULL;
  3117. #if MEMORY_LOGGING
  3118. txq->ift_dequeued++;
  3119. #endif
  3120. DBG_COUNTER_INC(tx_frees);
  3121. }
  3122. if (__predict_false(++cidx == qsize)) {
  3123. cidx = 0;
  3124. gen = 0;
  3125. }
  3126. }
  3127. txq->ift_cidx = cidx;
  3128. txq->ift_gen = gen;
  3129. }
  3130. static __inline int
  3131. iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh)
  3132. {
  3133. int reclaim;
  3134. if_ctx_t ctx = txq->ift_ctx;
  3135. KASSERT(thresh >= 0, ("invalid threshold to reclaim"));
  3136. MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size);
  3137. /*
  3138. * Need a rate-limiting check so that this isn't called every time
  3139. */
  3140. iflib_tx_credits_update(ctx, txq);
  3141. reclaim = DESC_RECLAIMABLE(txq);
  3142. if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) {
  3143. #ifdef INVARIANTS
  3144. if (iflib_verbose_debug) {
  3145. printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__,
  3146. txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments,
  3147. reclaim, thresh);
  3148. }
  3149. #endif
  3150. return (0);
  3151. }
  3152. iflib_tx_desc_free(txq, reclaim);
  3153. txq->ift_cleaned += reclaim;
  3154. txq->ift_in_use -= reclaim;
  3155. return (reclaim);
  3156. }
  3157. static struct mbuf **
  3158. _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining)
  3159. {
  3160. int next, size;
  3161. struct mbuf **items;
  3162. size = r->size;
  3163. next = (cidx + CACHE_PTR_INCREMENT) & (size-1);
  3164. items = __DEVOLATILE(struct mbuf **, &r->items[0]);
  3165. prefetch(items[(cidx + offset) & (size-1)]);
  3166. if (remaining > 1) {
  3167. prefetch2cachelines(&items[next]);
  3168. prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]);
  3169. prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]);
  3170. prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]);
  3171. }
  3172. return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)]));
  3173. }
  3174. static void
  3175. iflib_txq_check_drain(iflib_txq_t txq, int budget)
  3176. {
  3177. ifmp_ring_check_drainage(txq->ift_br, budget);
  3178. }
  3179. static uint32_t
  3180. iflib_txq_can_drain(struct ifmp_ring *r)
  3181. {
  3182. iflib_txq_t txq = r->cookie;
  3183. if_ctx_t ctx = txq->ift_ctx;
  3184. if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2)
  3185. return (1);
  3186. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  3187. BUS_DMASYNC_POSTREAD);
  3188. return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id,
  3189. false));
  3190. }
  3191. static uint32_t
  3192. iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
  3193. {
  3194. iflib_txq_t txq = r->cookie;
  3195. if_ctx_t ctx = txq->ift_ctx;
  3196. if_t ifp = ctx->ifc_ifp;
  3197. struct mbuf **mp, *m;
  3198. int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail;
  3199. int reclaimed, err, in_use_prev, desc_used;
  3200. bool do_prefetch, ring, rang;
  3201. if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) ||
  3202. !LINK_ACTIVE(ctx))) {
  3203. DBG_COUNTER_INC(txq_drain_notready);
  3204. return (0);
  3205. }
  3206. reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
  3207. rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use);
  3208. avail = IDXDIFF(pidx, cidx, r->size);
  3209. if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) {
  3210. DBG_COUNTER_INC(txq_drain_flushing);
  3211. for (i = 0; i < avail; i++) {
  3212. if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq))
  3213. m_free(r->items[(cidx + i) & (r->size-1)]);
  3214. r->items[(cidx + i) & (r->size-1)] = NULL;
  3215. }
  3216. return (avail);
  3217. }
  3218. if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) {
  3219. txq->ift_qstatus = IFLIB_QUEUE_IDLE;
  3220. CALLOUT_LOCK(txq);
  3221. callout_stop(&txq->ift_timer);
  3222. CALLOUT_UNLOCK(txq);
  3223. DBG_COUNTER_INC(txq_drain_oactive);
  3224. return (0);
  3225. }
  3226. if (reclaimed)
  3227. txq->ift_qstatus = IFLIB_QUEUE_IDLE;
  3228. consumed = mcast_sent = bytes_sent = pkt_sent = 0;
  3229. count = MIN(avail, TX_BATCH_SIZE);
  3230. #ifdef INVARIANTS
  3231. if (iflib_verbose_debug)
  3232. printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__,
  3233. avail, ctx->ifc_flags, TXQ_AVAIL(txq));
  3234. #endif
  3235. do_prefetch = (ctx->ifc_flags & IFC_PREFETCH);
  3236. avail = TXQ_AVAIL(txq);
  3237. err = 0;
  3238. for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) {
  3239. int rem = do_prefetch ? count - i : 0;
  3240. mp = _ring_peek_one(r, cidx, i, rem);
  3241. MPASS(mp != NULL && *mp != NULL);
  3242. if (__predict_false(*mp == (struct mbuf *)txq)) {
  3243. consumed++;
  3244. reclaimed++;
  3245. continue;
  3246. }
  3247. in_use_prev = txq->ift_in_use;
  3248. err = iflib_encap(txq, mp);
  3249. if (__predict_false(err)) {
  3250. /* no room - bail out */
  3251. if (err == ENOBUFS)
  3252. break;
  3253. consumed++;
  3254. /* we can't send this packet - skip it */
  3255. continue;
  3256. }
  3257. consumed++;
  3258. pkt_sent++;
  3259. m = *mp;
  3260. DBG_COUNTER_INC(tx_sent);
  3261. bytes_sent += m->m_pkthdr.len;
  3262. mcast_sent += !!(m->m_flags & M_MCAST);
  3263. avail = TXQ_AVAIL(txq);
  3264. txq->ift_db_pending += (txq->ift_in_use - in_use_prev);
  3265. desc_used += (txq->ift_in_use - in_use_prev);
  3266. ETHER_BPF_MTAP(ifp, m);
  3267. if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING)))
  3268. break;
  3269. rang = iflib_txd_db_check(ctx, txq, false, in_use_prev);
  3270. }
  3271. /* deliberate use of bitwise or to avoid gratuitous short-circuit */
  3272. ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx));
  3273. iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use);
  3274. if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent);
  3275. if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent);
  3276. if (mcast_sent)
  3277. if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent);
  3278. #ifdef INVARIANTS
  3279. if (iflib_verbose_debug)
  3280. printf("consumed=%d\n", consumed);
  3281. #endif
  3282. return (consumed);
  3283. }
  3284. static uint32_t
  3285. iflib_txq_drain_always(struct ifmp_ring *r)
  3286. {
  3287. return (1);
  3288. }
  3289. static uint32_t
  3290. iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx)
  3291. {
  3292. int i, avail;
  3293. struct mbuf **mp;
  3294. iflib_txq_t txq;
  3295. txq = r->cookie;
  3296. txq->ift_qstatus = IFLIB_QUEUE_IDLE;
  3297. CALLOUT_LOCK(txq);
  3298. callout_stop(&txq->ift_timer);
  3299. CALLOUT_UNLOCK(txq);
  3300. avail = IDXDIFF(pidx, cidx, r->size);
  3301. for (i = 0; i < avail; i++) {
  3302. mp = _ring_peek_one(r, cidx, i, avail - i);
  3303. if (__predict_false(*mp == (struct mbuf *)txq))
  3304. continue;
  3305. m_freem(*mp);
  3306. DBG_COUNTER_INC(tx_frees);
  3307. }
  3308. MPASS(ifmp_ring_is_stalled(r) == 0);
  3309. return (avail);
  3310. }
  3311. static void
  3312. iflib_ifmp_purge(iflib_txq_t txq)
  3313. {
  3314. struct ifmp_ring *r;
  3315. r = txq->ift_br;
  3316. r->drain = iflib_txq_drain_free;
  3317. r->can_drain = iflib_txq_drain_always;
  3318. ifmp_ring_check_drainage(r, r->size);
  3319. r->drain = iflib_txq_drain;
  3320. r->can_drain = iflib_txq_can_drain;
  3321. }
  3322. static void
  3323. _task_fn_tx(void *context)
  3324. {
  3325. iflib_txq_t txq = context;
  3326. if_ctx_t ctx = txq->ift_ctx;
  3327. #if defined(ALTQ) || defined(DEV_NETMAP)
  3328. if_t ifp = ctx->ifc_ifp;
  3329. #endif
  3330. int abdicate = ctx->ifc_sysctl_tx_abdicate;
  3331. #ifdef IFLIB_DIAGNOSTICS
  3332. txq->ift_cpu_exec_count[curcpu]++;
  3333. #endif
  3334. if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))
  3335. return;
  3336. #ifdef DEV_NETMAP
  3337. if (if_getcapenable(ifp) & IFCAP_NETMAP) {
  3338. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  3339. BUS_DMASYNC_POSTREAD);
  3340. if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false))
  3341. netmap_tx_irq(ifp, txq->ift_id);
  3342. if (ctx->ifc_flags & IFC_LEGACY)
  3343. IFDI_INTR_ENABLE(ctx);
  3344. else
  3345. IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
  3346. return;
  3347. }
  3348. #endif
  3349. #ifdef ALTQ
  3350. if (ALTQ_IS_ENABLED(&ifp->if_snd))
  3351. iflib_altq_if_start(ifp);
  3352. #endif
  3353. if (txq->ift_db_pending)
  3354. ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate);
  3355. else if (!abdicate)
  3356. ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
  3357. /*
  3358. * When abdicating, we always need to check drainage, not just when we don't enqueue
  3359. */
  3360. if (abdicate)
  3361. ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
  3362. if (ctx->ifc_flags & IFC_LEGACY)
  3363. IFDI_INTR_ENABLE(ctx);
  3364. else
  3365. IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id);
  3366. }
  3367. static void
  3368. _task_fn_rx(void *context)
  3369. {
  3370. iflib_rxq_t rxq = context;
  3371. if_ctx_t ctx = rxq->ifr_ctx;
  3372. uint8_t more;
  3373. uint16_t budget;
  3374. #ifdef DEV_NETMAP
  3375. u_int work = 0;
  3376. int nmirq;
  3377. #endif
  3378. #ifdef IFLIB_DIAGNOSTICS
  3379. rxq->ifr_cpu_exec_count[curcpu]++;
  3380. #endif
  3381. DBG_COUNTER_INC(task_fn_rxs);
  3382. if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
  3383. return;
  3384. #ifdef DEV_NETMAP
  3385. nmirq = netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work);
  3386. if (nmirq != NM_IRQ_PASS) {
  3387. more = (nmirq == NM_IRQ_RESCHED) ? IFLIB_RXEOF_MORE : 0;
  3388. goto skip_rxeof;
  3389. }
  3390. #endif
  3391. budget = ctx->ifc_sysctl_rx_budget;
  3392. if (budget == 0)
  3393. budget = 16; /* XXX */
  3394. more = iflib_rxeof(rxq, budget);
  3395. #ifdef DEV_NETMAP
  3396. skip_rxeof:
  3397. #endif
  3398. if ((more & IFLIB_RXEOF_MORE) == 0) {
  3399. if (ctx->ifc_flags & IFC_LEGACY)
  3400. IFDI_INTR_ENABLE(ctx);
  3401. else
  3402. IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id);
  3403. DBG_COUNTER_INC(rx_intr_enables);
  3404. }
  3405. if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)))
  3406. return;
  3407. if (more & IFLIB_RXEOF_MORE)
  3408. GROUPTASK_ENQUEUE(&rxq->ifr_task);
  3409. else if (more & IFLIB_RXEOF_EMPTY)
  3410. callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq);
  3411. }
  3412. static void
  3413. _task_fn_admin(void *context)
  3414. {
  3415. if_ctx_t ctx = context;
  3416. if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
  3417. iflib_txq_t txq;
  3418. int i;
  3419. bool oactive, running, do_reset, do_watchdog, in_detach;
  3420. uint32_t reset_on = hz / 2;
  3421. STATE_LOCK(ctx);
  3422. running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING);
  3423. oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE);
  3424. do_reset = (ctx->ifc_flags & IFC_DO_RESET);
  3425. do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG);
  3426. in_detach = (ctx->ifc_flags & IFC_IN_DETACH);
  3427. ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG);
  3428. STATE_UNLOCK(ctx);
  3429. if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN))
  3430. return;
  3431. if (in_detach)
  3432. return;
  3433. CTX_LOCK(ctx);
  3434. for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
  3435. CALLOUT_LOCK(txq);
  3436. callout_stop(&txq->ift_timer);
  3437. CALLOUT_UNLOCK(txq);
  3438. }
  3439. if (do_watchdog) {
  3440. ctx->ifc_watchdog_events++;
  3441. IFDI_WATCHDOG_RESET(ctx);
  3442. }
  3443. IFDI_UPDATE_ADMIN_STATUS(ctx);
  3444. for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) {
  3445. #ifdef DEV_NETMAP
  3446. reset_on = hz / 2;
  3447. if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP)
  3448. iflib_netmap_timer_adjust(ctx, txq, &reset_on);
  3449. #endif
  3450. callout_reset_on(&txq->ift_timer, reset_on, iflib_timer, txq, txq->ift_timer.c_cpu);
  3451. }
  3452. IFDI_LINK_INTR_ENABLE(ctx);
  3453. if (do_reset)
  3454. iflib_if_init_locked(ctx);
  3455. CTX_UNLOCK(ctx);
  3456. if (LINK_ACTIVE(ctx) == 0)
  3457. return;
  3458. for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++)
  3459. iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
  3460. }
  3461. static void
  3462. _task_fn_iov(void *context)
  3463. {
  3464. if_ctx_t ctx = context;
  3465. if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) &&
  3466. !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN))
  3467. return;
  3468. CTX_LOCK(ctx);
  3469. IFDI_VFLR_HANDLE(ctx);
  3470. CTX_UNLOCK(ctx);
  3471. }
  3472. static int
  3473. iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
  3474. {
  3475. int err;
  3476. if_int_delay_info_t info;
  3477. if_ctx_t ctx;
  3478. info = (if_int_delay_info_t)arg1;
  3479. ctx = info->iidi_ctx;
  3480. info->iidi_req = req;
  3481. info->iidi_oidp = oidp;
  3482. CTX_LOCK(ctx);
  3483. err = IFDI_SYSCTL_INT_DELAY(ctx, info);
  3484. CTX_UNLOCK(ctx);
  3485. return (err);
  3486. }
  3487. /*********************************************************************
  3488. *
  3489. * IFNET FUNCTIONS
  3490. *
  3491. **********************************************************************/
  3492. static void
  3493. iflib_if_init_locked(if_ctx_t ctx)
  3494. {
  3495. iflib_stop(ctx);
  3496. iflib_init_locked(ctx);
  3497. }
  3498. static void
  3499. iflib_if_init(void *arg)
  3500. {
  3501. if_ctx_t ctx = arg;
  3502. CTX_LOCK(ctx);
  3503. iflib_if_init_locked(ctx);
  3504. CTX_UNLOCK(ctx);
  3505. }
  3506. static int
  3507. iflib_if_transmit(if_t ifp, struct mbuf *m)
  3508. {
  3509. if_ctx_t ctx = if_getsoftc(ifp);
  3510. iflib_txq_t txq;
  3511. int err, qidx;
  3512. int abdicate = ctx->ifc_sysctl_tx_abdicate;
  3513. if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) {
  3514. DBG_COUNTER_INC(tx_frees);
  3515. m_freem(m);
  3516. return (ENETDOWN);
  3517. }
  3518. MPASS(m->m_nextpkt == NULL);
  3519. /* ALTQ-enabled interfaces always use queue 0. */
  3520. qidx = 0;
  3521. if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd))
  3522. qidx = QIDX(ctx, m);
  3523. /*
  3524. * XXX calculate buf_ring based on flowid (divvy up bits?)
  3525. */
  3526. txq = &ctx->ifc_txqs[qidx];
  3527. #ifdef DRIVER_BACKPRESSURE
  3528. if (txq->ift_closed) {
  3529. while (m != NULL) {
  3530. next = m->m_nextpkt;
  3531. m->m_nextpkt = NULL;
  3532. m_freem(m);
  3533. DBG_COUNTER_INC(tx_frees);
  3534. m = next;
  3535. }
  3536. return (ENOBUFS);
  3537. }
  3538. #endif
  3539. #ifdef notyet
  3540. qidx = count = 0;
  3541. mp = marr;
  3542. next = m;
  3543. do {
  3544. count++;
  3545. next = next->m_nextpkt;
  3546. } while (next != NULL);
  3547. if (count > nitems(marr))
  3548. if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) {
  3549. /* XXX check nextpkt */
  3550. m_freem(m);
  3551. /* XXX simplify for now */
  3552. DBG_COUNTER_INC(tx_frees);
  3553. return (ENOBUFS);
  3554. }
  3555. for (next = m, i = 0; next != NULL; i++) {
  3556. mp[i] = next;
  3557. next = next->m_nextpkt;
  3558. mp[i]->m_nextpkt = NULL;
  3559. }
  3560. #endif
  3561. DBG_COUNTER_INC(tx_seen);
  3562. err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate);
  3563. if (abdicate)
  3564. GROUPTASK_ENQUEUE(&txq->ift_task);
  3565. if (err) {
  3566. if (!abdicate)
  3567. GROUPTASK_ENQUEUE(&txq->ift_task);
  3568. /* support forthcoming later */
  3569. #ifdef DRIVER_BACKPRESSURE
  3570. txq->ift_closed = TRUE;
  3571. #endif
  3572. ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE);
  3573. m_freem(m);
  3574. DBG_COUNTER_INC(tx_frees);
  3575. }
  3576. return (err);
  3577. }
  3578. #ifdef ALTQ
  3579. /*
  3580. * The overall approach to integrating iflib with ALTQ is to continue to use
  3581. * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware
  3582. * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring
  3583. * is redundant/unnecessary, but doing so minimizes the amount of
  3584. * ALTQ-specific code required in iflib. It is assumed that the overhead of
  3585. * redundantly queueing to an intermediate mp_ring is swamped by the
  3586. * performance limitations inherent in using ALTQ.
  3587. *
  3588. * When ALTQ support is compiled in, all iflib drivers will use a transmit
  3589. * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the
  3590. * given interface. If ALTQ is enabled for an interface, then all
  3591. * transmitted packets for that interface will be submitted to the ALTQ
  3592. * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit()
  3593. * implementation because it uses IFQ_HANDOFF(), which will duplicatively
  3594. * update stats that the iflib machinery handles, and which is sensitve to
  3595. * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start()
  3596. * will be installed as the start routine for use by ALTQ facilities that
  3597. * need to trigger queue drains on a scheduled basis.
  3598. *
  3599. */
  3600. static void
  3601. iflib_altq_if_start(if_t ifp)
  3602. {
  3603. struct ifaltq *ifq = &ifp->if_snd;
  3604. struct mbuf *m;
  3605. IFQ_LOCK(ifq);
  3606. IFQ_DEQUEUE_NOLOCK(ifq, m);
  3607. while (m != NULL) {
  3608. iflib_if_transmit(ifp, m);
  3609. IFQ_DEQUEUE_NOLOCK(ifq, m);
  3610. }
  3611. IFQ_UNLOCK(ifq);
  3612. }
  3613. static int
  3614. iflib_altq_if_transmit(if_t ifp, struct mbuf *m)
  3615. {
  3616. int err;
  3617. if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
  3618. IFQ_ENQUEUE(&ifp->if_snd, m, err);
  3619. if (err == 0)
  3620. iflib_altq_if_start(ifp);
  3621. } else
  3622. err = iflib_if_transmit(ifp, m);
  3623. return (err);
  3624. }
  3625. #endif /* ALTQ */
  3626. static void
  3627. iflib_if_qflush(if_t ifp)
  3628. {
  3629. if_ctx_t ctx = if_getsoftc(ifp);
  3630. iflib_txq_t txq = ctx->ifc_txqs;
  3631. int i;
  3632. STATE_LOCK(ctx);
  3633. ctx->ifc_flags |= IFC_QFLUSH;
  3634. STATE_UNLOCK(ctx);
  3635. for (i = 0; i < NTXQSETS(ctx); i++, txq++)
  3636. while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br)))
  3637. iflib_txq_check_drain(txq, 0);
  3638. STATE_LOCK(ctx);
  3639. ctx->ifc_flags &= ~IFC_QFLUSH;
  3640. STATE_UNLOCK(ctx);
  3641. /*
  3642. * When ALTQ is enabled, this will also take care of purging the
  3643. * ALTQ queue(s).
  3644. */
  3645. if_qflush(ifp);
  3646. }
  3647. #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \
  3648. IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \
  3649. IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \
  3650. IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM)
  3651. static int
  3652. iflib_if_ioctl(if_t ifp, u_long command, caddr_t data)
  3653. {
  3654. if_ctx_t ctx = if_getsoftc(ifp);
  3655. struct ifreq *ifr = (struct ifreq *)data;
  3656. #if defined(INET) || defined(INET6)
  3657. struct ifaddr *ifa = (struct ifaddr *)data;
  3658. #endif
  3659. bool avoid_reset = false;
  3660. int err = 0, reinit = 0, bits;
  3661. switch (command) {
  3662. case SIOCSIFADDR:
  3663. #ifdef INET
  3664. if (ifa->ifa_addr->sa_family == AF_INET)
  3665. avoid_reset = true;
  3666. #endif
  3667. #ifdef INET6
  3668. if (ifa->ifa_addr->sa_family == AF_INET6)
  3669. avoid_reset = true;
  3670. #endif
  3671. /*
  3672. ** Calling init results in link renegotiation,
  3673. ** so we avoid doing it when possible.
  3674. */
  3675. if (avoid_reset) {
  3676. if_setflagbits(ifp, IFF_UP,0);
  3677. if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
  3678. reinit = 1;
  3679. #ifdef INET
  3680. if (!(if_getflags(ifp) & IFF_NOARP))
  3681. arp_ifinit(ifp, ifa);
  3682. #endif
  3683. } else
  3684. err = ether_ioctl(ifp, command, data);
  3685. break;
  3686. case SIOCSIFMTU:
  3687. CTX_LOCK(ctx);
  3688. if (ifr->ifr_mtu == if_getmtu(ifp)) {
  3689. CTX_UNLOCK(ctx);
  3690. break;
  3691. }
  3692. bits = if_getdrvflags(ifp);
  3693. /* stop the driver and free any clusters before proceeding */
  3694. iflib_stop(ctx);
  3695. if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) {
  3696. STATE_LOCK(ctx);
  3697. if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size)
  3698. ctx->ifc_flags |= IFC_MULTISEG;
  3699. else
  3700. ctx->ifc_flags &= ~IFC_MULTISEG;
  3701. STATE_UNLOCK(ctx);
  3702. err = if_setmtu(ifp, ifr->ifr_mtu);
  3703. }
  3704. iflib_init_locked(ctx);
  3705. STATE_LOCK(ctx);
  3706. if_setdrvflags(ifp, bits);
  3707. STATE_UNLOCK(ctx);
  3708. CTX_UNLOCK(ctx);
  3709. break;
  3710. case SIOCSIFFLAGS:
  3711. CTX_LOCK(ctx);
  3712. if (if_getflags(ifp) & IFF_UP) {
  3713. if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
  3714. if ((if_getflags(ifp) ^ ctx->ifc_if_flags) &
  3715. (IFF_PROMISC | IFF_ALLMULTI)) {
  3716. err = IFDI_PROMISC_SET(ctx, if_getflags(ifp));
  3717. }
  3718. } else
  3719. reinit = 1;
  3720. } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
  3721. iflib_stop(ctx);
  3722. }
  3723. ctx->ifc_if_flags = if_getflags(ifp);
  3724. CTX_UNLOCK(ctx);
  3725. break;
  3726. case SIOCADDMULTI:
  3727. case SIOCDELMULTI:
  3728. if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
  3729. CTX_LOCK(ctx);
  3730. IFDI_INTR_DISABLE(ctx);
  3731. IFDI_MULTI_SET(ctx);
  3732. IFDI_INTR_ENABLE(ctx);
  3733. CTX_UNLOCK(ctx);
  3734. }
  3735. break;
  3736. case SIOCSIFMEDIA:
  3737. CTX_LOCK(ctx);
  3738. IFDI_MEDIA_SET(ctx);
  3739. CTX_UNLOCK(ctx);
  3740. /* FALLTHROUGH */
  3741. case SIOCGIFMEDIA:
  3742. case SIOCGIFXMEDIA:
  3743. err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command);
  3744. break;
  3745. case SIOCGI2C:
  3746. {
  3747. struct ifi2creq i2c;
  3748. err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
  3749. if (err != 0)
  3750. break;
  3751. if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) {
  3752. err = EINVAL;
  3753. break;
  3754. }
  3755. if (i2c.len > sizeof(i2c.data)) {
  3756. err = EINVAL;
  3757. break;
  3758. }
  3759. if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0)
  3760. err = copyout(&i2c, ifr_data_get_ptr(ifr),
  3761. sizeof(i2c));
  3762. break;
  3763. }
  3764. case SIOCSIFCAP:
  3765. {
  3766. int mask, setmask, oldmask;
  3767. oldmask = if_getcapenable(ifp);
  3768. mask = ifr->ifr_reqcap ^ oldmask;
  3769. mask &= ctx->ifc_softc_ctx.isc_capabilities;
  3770. setmask = 0;
  3771. #ifdef TCP_OFFLOAD
  3772. setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6);
  3773. #endif
  3774. setmask |= (mask & IFCAP_FLAGS);
  3775. setmask |= (mask & IFCAP_WOL);
  3776. /*
  3777. * If any RX csum has changed, change all the ones that
  3778. * are supported by the driver.
  3779. */
  3780. if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
  3781. setmask |= ctx->ifc_softc_ctx.isc_capabilities &
  3782. (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6);
  3783. }
  3784. /*
  3785. * want to ensure that traffic has stopped before we change any of the flags
  3786. */
  3787. if (setmask) {
  3788. CTX_LOCK(ctx);
  3789. bits = if_getdrvflags(ifp);
  3790. if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL)
  3791. iflib_stop(ctx);
  3792. STATE_LOCK(ctx);
  3793. if_togglecapenable(ifp, setmask);
  3794. STATE_UNLOCK(ctx);
  3795. if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL)
  3796. iflib_init_locked(ctx);
  3797. STATE_LOCK(ctx);
  3798. if_setdrvflags(ifp, bits);
  3799. STATE_UNLOCK(ctx);
  3800. CTX_UNLOCK(ctx);
  3801. }
  3802. if_vlancap(ifp);
  3803. break;
  3804. }
  3805. case SIOCGPRIVATE_0:
  3806. case SIOCSDRVSPEC:
  3807. case SIOCGDRVSPEC:
  3808. CTX_LOCK(ctx);
  3809. err = IFDI_PRIV_IOCTL(ctx, command, data);
  3810. CTX_UNLOCK(ctx);
  3811. break;
  3812. default:
  3813. err = ether_ioctl(ifp, command, data);
  3814. break;
  3815. }
  3816. if (reinit)
  3817. iflib_if_init(ctx);
  3818. return (err);
  3819. }
  3820. static uint64_t
  3821. iflib_if_get_counter(if_t ifp, ift_counter cnt)
  3822. {
  3823. if_ctx_t ctx = if_getsoftc(ifp);
  3824. return (IFDI_GET_COUNTER(ctx, cnt));
  3825. }
  3826. /*********************************************************************
  3827. *
  3828. * OTHER FUNCTIONS EXPORTED TO THE STACK
  3829. *
  3830. **********************************************************************/
  3831. static void
  3832. iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag)
  3833. {
  3834. if_ctx_t ctx = if_getsoftc(ifp);
  3835. if ((void *)ctx != arg)
  3836. return;
  3837. if ((vtag == 0) || (vtag > 4095))
  3838. return;
  3839. if (iflib_in_detach(ctx))
  3840. return;
  3841. CTX_LOCK(ctx);
  3842. /* Driver may need all untagged packets to be flushed */
  3843. if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
  3844. iflib_stop(ctx);
  3845. IFDI_VLAN_REGISTER(ctx, vtag);
  3846. /* Re-init to load the changes, if required */
  3847. if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
  3848. iflib_init_locked(ctx);
  3849. CTX_UNLOCK(ctx);
  3850. }
  3851. static void
  3852. iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag)
  3853. {
  3854. if_ctx_t ctx = if_getsoftc(ifp);
  3855. if ((void *)ctx != arg)
  3856. return;
  3857. if ((vtag == 0) || (vtag > 4095))
  3858. return;
  3859. CTX_LOCK(ctx);
  3860. /* Driver may need all tagged packets to be flushed */
  3861. if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
  3862. iflib_stop(ctx);
  3863. IFDI_VLAN_UNREGISTER(ctx, vtag);
  3864. /* Re-init to load the changes, if required */
  3865. if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG))
  3866. iflib_init_locked(ctx);
  3867. CTX_UNLOCK(ctx);
  3868. }
  3869. static void
  3870. iflib_led_func(void *arg, int onoff)
  3871. {
  3872. if_ctx_t ctx = arg;
  3873. CTX_LOCK(ctx);
  3874. IFDI_LED_FUNC(ctx, onoff);
  3875. CTX_UNLOCK(ctx);
  3876. }
  3877. /*********************************************************************
  3878. *
  3879. * BUS FUNCTION DEFINITIONS
  3880. *
  3881. **********************************************************************/
  3882. int
  3883. iflib_device_probe(device_t dev)
  3884. {
  3885. pci_vendor_info_t *ent;
  3886. uint16_t pci_vendor_id, pci_device_id;
  3887. uint16_t pci_subvendor_id, pci_subdevice_id;
  3888. uint16_t pci_rev_id;
  3889. if_shared_ctx_t sctx;
  3890. if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
  3891. return (ENOTSUP);
  3892. pci_vendor_id = pci_get_vendor(dev);
  3893. pci_device_id = pci_get_device(dev);
  3894. pci_subvendor_id = pci_get_subvendor(dev);
  3895. pci_subdevice_id = pci_get_subdevice(dev);
  3896. pci_rev_id = pci_get_revid(dev);
  3897. if (sctx->isc_parse_devinfo != NULL)
  3898. sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id);
  3899. ent = sctx->isc_vendor_info;
  3900. while (ent->pvi_vendor_id != 0) {
  3901. if (pci_vendor_id != ent->pvi_vendor_id) {
  3902. ent++;
  3903. continue;
  3904. }
  3905. if ((pci_device_id == ent->pvi_device_id) &&
  3906. ((pci_subvendor_id == ent->pvi_subvendor_id) ||
  3907. (ent->pvi_subvendor_id == 0)) &&
  3908. ((pci_subdevice_id == ent->pvi_subdevice_id) ||
  3909. (ent->pvi_subdevice_id == 0)) &&
  3910. ((pci_rev_id == ent->pvi_rev_id) ||
  3911. (ent->pvi_rev_id == 0))) {
  3912. device_set_desc_copy(dev, ent->pvi_name);
  3913. /* this needs to be changed to zero if the bus probing code
  3914. * ever stops re-probing on best match because the sctx
  3915. * may have its values over written by register calls
  3916. * in subsequent probes
  3917. */
  3918. return (BUS_PROBE_DEFAULT);
  3919. }
  3920. ent++;
  3921. }
  3922. return (ENXIO);
  3923. }
  3924. int
  3925. iflib_device_probe_vendor(device_t dev)
  3926. {
  3927. int probe;
  3928. probe = iflib_device_probe(dev);
  3929. if (probe == BUS_PROBE_DEFAULT)
  3930. return (BUS_PROBE_VENDOR);
  3931. else
  3932. return (probe);
  3933. }
  3934. static void
  3935. iflib_reset_qvalues(if_ctx_t ctx)
  3936. {
  3937. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  3938. if_shared_ctx_t sctx = ctx->ifc_sctx;
  3939. device_t dev = ctx->ifc_dev;
  3940. int i;
  3941. scctx->isc_txrx_budget_bytes_max = IFLIB_MAX_TX_BYTES;
  3942. scctx->isc_tx_qdepth = IFLIB_DEFAULT_TX_QDEPTH;
  3943. if (ctx->ifc_sysctl_ntxqs != 0)
  3944. scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs;
  3945. if (ctx->ifc_sysctl_nrxqs != 0)
  3946. scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs;
  3947. for (i = 0; i < sctx->isc_ntxqs; i++) {
  3948. if (ctx->ifc_sysctl_ntxds[i] != 0)
  3949. scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i];
  3950. else
  3951. scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
  3952. }
  3953. for (i = 0; i < sctx->isc_nrxqs; i++) {
  3954. if (ctx->ifc_sysctl_nrxds[i] != 0)
  3955. scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i];
  3956. else
  3957. scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
  3958. }
  3959. for (i = 0; i < sctx->isc_nrxqs; i++) {
  3960. if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) {
  3961. device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n",
  3962. i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]);
  3963. scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i];
  3964. }
  3965. if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) {
  3966. device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n",
  3967. i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]);
  3968. scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i];
  3969. }
  3970. if (!powerof2(scctx->isc_nrxd[i])) {
  3971. device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n",
  3972. i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]);
  3973. scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i];
  3974. }
  3975. }
  3976. for (i = 0; i < sctx->isc_ntxqs; i++) {
  3977. if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) {
  3978. device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n",
  3979. i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]);
  3980. scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i];
  3981. }
  3982. if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) {
  3983. device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n",
  3984. i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]);
  3985. scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i];
  3986. }
  3987. if (!powerof2(scctx->isc_ntxd[i])) {
  3988. device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n",
  3989. i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]);
  3990. scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i];
  3991. }
  3992. }
  3993. }
  3994. static uint16_t
  3995. get_ctx_core_offset(if_ctx_t ctx)
  3996. {
  3997. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  3998. struct cpu_offset *op;
  3999. uint16_t qc;
  4000. uint16_t ret = ctx->ifc_sysctl_core_offset;
  4001. if (ret != CORE_OFFSET_UNSPECIFIED)
  4002. return (ret);
  4003. if (ctx->ifc_sysctl_separate_txrx)
  4004. qc = scctx->isc_ntxqsets + scctx->isc_nrxqsets;
  4005. else
  4006. qc = max(scctx->isc_ntxqsets, scctx->isc_nrxqsets);
  4007. mtx_lock(&cpu_offset_mtx);
  4008. SLIST_FOREACH(op, &cpu_offsets, entries) {
  4009. if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
  4010. ret = op->offset;
  4011. op->offset += qc;
  4012. MPASS(op->refcount < UINT_MAX);
  4013. op->refcount++;
  4014. break;
  4015. }
  4016. }
  4017. if (ret == CORE_OFFSET_UNSPECIFIED) {
  4018. ret = 0;
  4019. op = malloc(sizeof(struct cpu_offset), M_IFLIB,
  4020. M_NOWAIT | M_ZERO);
  4021. if (op == NULL) {
  4022. device_printf(ctx->ifc_dev,
  4023. "allocation for cpu offset failed.\n");
  4024. } else {
  4025. op->offset = qc;
  4026. op->refcount = 1;
  4027. CPU_COPY(&ctx->ifc_cpus, &op->set);
  4028. SLIST_INSERT_HEAD(&cpu_offsets, op, entries);
  4029. }
  4030. }
  4031. mtx_unlock(&cpu_offset_mtx);
  4032. return (ret);
  4033. }
  4034. static void
  4035. unref_ctx_core_offset(if_ctx_t ctx)
  4036. {
  4037. struct cpu_offset *op, *top;
  4038. mtx_lock(&cpu_offset_mtx);
  4039. SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) {
  4040. if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) {
  4041. MPASS(op->refcount > 0);
  4042. op->refcount--;
  4043. if (op->refcount == 0) {
  4044. SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries);
  4045. free(op, M_IFLIB);
  4046. }
  4047. break;
  4048. }
  4049. }
  4050. mtx_unlock(&cpu_offset_mtx);
  4051. }
  4052. int
  4053. iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp)
  4054. {
  4055. if_ctx_t ctx;
  4056. if_t ifp;
  4057. if_softc_ctx_t scctx;
  4058. kobjop_desc_t kobj_desc;
  4059. kobj_method_t *kobj_method;
  4060. int err, msix, rid;
  4061. uint16_t main_rxq, main_txq;
  4062. ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO);
  4063. if (sc == NULL) {
  4064. sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
  4065. device_set_softc(dev, ctx);
  4066. ctx->ifc_flags |= IFC_SC_ALLOCATED;
  4067. }
  4068. ctx->ifc_sctx = sctx;
  4069. ctx->ifc_dev = dev;
  4070. ctx->ifc_softc = sc;
  4071. if ((err = iflib_register(ctx)) != 0) {
  4072. device_printf(dev, "iflib_register failed %d\n", err);
  4073. goto fail_ctx_free;
  4074. }
  4075. iflib_add_device_sysctl_pre(ctx);
  4076. scctx = &ctx->ifc_softc_ctx;
  4077. ifp = ctx->ifc_ifp;
  4078. iflib_reset_qvalues(ctx);
  4079. CTX_LOCK(ctx);
  4080. if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
  4081. device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
  4082. goto fail_unlock;
  4083. }
  4084. _iflib_pre_assert(scctx);
  4085. ctx->ifc_txrx = *scctx->isc_txrx;
  4086. #ifdef INVARIANTS
  4087. if (scctx->isc_capabilities & IFCAP_TXCSUM)
  4088. MPASS(scctx->isc_tx_csum_flags);
  4089. #endif
  4090. if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS);
  4091. if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS);
  4092. if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
  4093. scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
  4094. if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
  4095. scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
  4096. main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
  4097. main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
  4098. /* XXX change for per-queue sizes */
  4099. device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n",
  4100. scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
  4101. if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
  4102. MAX_SINGLE_PACKET_FRACTION)
  4103. scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
  4104. MAX_SINGLE_PACKET_FRACTION);
  4105. if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
  4106. MAX_SINGLE_PACKET_FRACTION)
  4107. scctx->isc_tx_tso_segments_max = max(1,
  4108. scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
  4109. /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
  4110. if (if_getcapabilities(ifp) & IFCAP_TSO) {
  4111. /*
  4112. * The stack can't handle a TSO size larger than IP_MAXPACKET,
  4113. * but some MACs do.
  4114. */
  4115. if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max,
  4116. IP_MAXPACKET));
  4117. /*
  4118. * Take maximum number of m_pullup(9)'s in iflib_parse_header()
  4119. * into account. In the worst case, each of these calls will
  4120. * add another mbuf and, thus, the requirement for another DMA
  4121. * segment. So for best performance, it doesn't make sense to
  4122. * advertize a maximum of TSO segments that typically will
  4123. * require defragmentation in iflib_encap().
  4124. */
  4125. if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3);
  4126. if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max);
  4127. }
  4128. if (scctx->isc_rss_table_size == 0)
  4129. scctx->isc_rss_table_size = 64;
  4130. scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
  4131. GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
  4132. /* XXX format name */
  4133. taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx,
  4134. -1, "admin");
  4135. /* Set up cpu set. If it fails, use the set of all CPUs. */
  4136. if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) {
  4137. device_printf(dev, "Unable to fetch CPU list\n");
  4138. CPU_COPY(&all_cpus, &ctx->ifc_cpus);
  4139. }
  4140. MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0);
  4141. /*
  4142. ** Now set up MSI or MSI-X, should return us the number of supported
  4143. ** vectors (will be 1 for a legacy interrupt and MSI).
  4144. */
  4145. if (sctx->isc_flags & IFLIB_SKIP_MSIX) {
  4146. msix = scctx->isc_vectors;
  4147. } else if (scctx->isc_msix_bar != 0)
  4148. /*
  4149. * The simple fact that isc_msix_bar is not 0 does not mean we
  4150. * we have a good value there that is known to work.
  4151. */
  4152. msix = iflib_msix_init(ctx);
  4153. else {
  4154. scctx->isc_vectors = 1;
  4155. scctx->isc_ntxqsets = 1;
  4156. scctx->isc_nrxqsets = 1;
  4157. scctx->isc_intr = IFLIB_INTR_LEGACY;
  4158. msix = 0;
  4159. }
  4160. /* Get memory for the station queues */
  4161. if ((err = iflib_queues_alloc(ctx))) {
  4162. device_printf(dev, "Unable to allocate queue memory\n");
  4163. goto fail_intr_free;
  4164. }
  4165. if ((err = iflib_qset_structures_setup(ctx)))
  4166. goto fail_queues;
  4167. /*
  4168. * Now that we know how many queues there are, get the core offset.
  4169. */
  4170. ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx);
  4171. /*
  4172. * Group taskqueues aren't properly set up until SMP is started,
  4173. * so we disable interrupts until we can handle them post
  4174. * SI_SUB_SMP.
  4175. *
  4176. * XXX: disabling interrupts doesn't actually work, at least for
  4177. * the non-MSI case. When they occur before SI_SUB_SMP completes,
  4178. * we do null handling and depend on this not causing too large an
  4179. * interrupt storm.
  4180. */
  4181. IFDI_INTR_DISABLE(ctx);
  4182. if (msix > 1) {
  4183. /*
  4184. * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable
  4185. * aren't the default NULL implementation.
  4186. */
  4187. kobj_desc = &ifdi_rx_queue_intr_enable_desc;
  4188. kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL,
  4189. kobj_desc);
  4190. if (kobj_method == &kobj_desc->deflt) {
  4191. device_printf(dev,
  4192. "MSI-X requires ifdi_rx_queue_intr_enable method");
  4193. err = EOPNOTSUPP;
  4194. goto fail_queues;
  4195. }
  4196. kobj_desc = &ifdi_tx_queue_intr_enable_desc;
  4197. kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL,
  4198. kobj_desc);
  4199. if (kobj_method == &kobj_desc->deflt) {
  4200. device_printf(dev,
  4201. "MSI-X requires ifdi_tx_queue_intr_enable method");
  4202. err = EOPNOTSUPP;
  4203. goto fail_queues;
  4204. }
  4205. /*
  4206. * Assign the MSI-X vectors.
  4207. * Note that the default NULL ifdi_msix_intr_assign method will
  4208. * fail here, too.
  4209. */
  4210. err = IFDI_MSIX_INTR_ASSIGN(ctx, msix);
  4211. if (err != 0) {
  4212. device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n",
  4213. err);
  4214. goto fail_queues;
  4215. }
  4216. } else if (scctx->isc_intr != IFLIB_INTR_MSIX) {
  4217. rid = 0;
  4218. if (scctx->isc_intr == IFLIB_INTR_MSI) {
  4219. MPASS(msix == 1);
  4220. rid = 1;
  4221. }
  4222. if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) {
  4223. device_printf(dev, "iflib_legacy_setup failed %d\n", err);
  4224. goto fail_queues;
  4225. }
  4226. } else {
  4227. device_printf(dev,
  4228. "Cannot use iflib with only 1 MSI-X interrupt!\n");
  4229. err = ENODEV;
  4230. goto fail_intr_free;
  4231. }
  4232. ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
  4233. if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
  4234. device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
  4235. goto fail_detach;
  4236. }
  4237. /*
  4238. * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported.
  4239. * This must appear after the call to ether_ifattach() because
  4240. * ether_ifattach() sets if_hdrlen to the default value.
  4241. */
  4242. if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU)
  4243. if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
  4244. if ((err = iflib_netmap_attach(ctx))) {
  4245. device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err);
  4246. goto fail_detach;
  4247. }
  4248. *ctxp = ctx;
  4249. NETDUMP_SET(ctx->ifc_ifp, iflib);
  4250. if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
  4251. iflib_add_device_sysctl_post(ctx);
  4252. ctx->ifc_flags |= IFC_INIT_DONE;
  4253. CTX_UNLOCK(ctx);
  4254. return (0);
  4255. fail_detach:
  4256. ether_ifdetach(ctx->ifc_ifp);
  4257. fail_intr_free:
  4258. iflib_free_intr_mem(ctx);
  4259. fail_queues:
  4260. iflib_tx_structures_free(ctx);
  4261. iflib_rx_structures_free(ctx);
  4262. taskqgroup_detach(qgroup_if_config_tqg, &ctx->ifc_admin_task);
  4263. IFDI_DETACH(ctx);
  4264. fail_unlock:
  4265. CTX_UNLOCK(ctx);
  4266. iflib_deregister(ctx);
  4267. fail_ctx_free:
  4268. device_set_softc(ctx->ifc_dev, NULL);
  4269. if (ctx->ifc_flags & IFC_SC_ALLOCATED)
  4270. free(ctx->ifc_softc, M_IFLIB);
  4271. free(ctx, M_IFLIB);
  4272. return (err);
  4273. }
  4274. int
  4275. iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp,
  4276. struct iflib_cloneattach_ctx *clctx)
  4277. {
  4278. int err;
  4279. if_ctx_t ctx;
  4280. if_t ifp;
  4281. if_softc_ctx_t scctx;
  4282. int i;
  4283. void *sc;
  4284. uint16_t main_txq;
  4285. uint16_t main_rxq;
  4286. ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO);
  4287. sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO);
  4288. ctx->ifc_flags |= IFC_SC_ALLOCATED;
  4289. if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL))
  4290. ctx->ifc_flags |= IFC_PSEUDO;
  4291. ctx->ifc_sctx = sctx;
  4292. ctx->ifc_softc = sc;
  4293. ctx->ifc_dev = dev;
  4294. if ((err = iflib_register(ctx)) != 0) {
  4295. device_printf(dev, "%s: iflib_register failed %d\n", __func__, err);
  4296. goto fail_ctx_free;
  4297. }
  4298. iflib_add_device_sysctl_pre(ctx);
  4299. scctx = &ctx->ifc_softc_ctx;
  4300. ifp = ctx->ifc_ifp;
  4301. iflib_reset_qvalues(ctx);
  4302. CTX_LOCK(ctx);
  4303. if ((err = IFDI_ATTACH_PRE(ctx)) != 0) {
  4304. device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err);
  4305. goto fail_unlock;
  4306. }
  4307. if (sctx->isc_flags & IFLIB_GEN_MAC)
  4308. iflib_gen_mac(ctx);
  4309. if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name,
  4310. clctx->cc_params)) != 0) {
  4311. device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err);
  4312. goto fail_ctx_free;
  4313. }
  4314. ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
  4315. ifmedia_add(&ctx->ifc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
  4316. ifmedia_set(&ctx->ifc_media, IFM_ETHER | IFM_AUTO);
  4317. #ifdef INVARIANTS
  4318. if (scctx->isc_capabilities & IFCAP_TXCSUM)
  4319. MPASS(scctx->isc_tx_csum_flags);
  4320. #endif
  4321. if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE);
  4322. if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE);
  4323. ifp->if_flags |= IFF_NOGROUP;
  4324. if (sctx->isc_flags & IFLIB_PSEUDO) {
  4325. ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
  4326. if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
  4327. device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
  4328. goto fail_detach;
  4329. }
  4330. *ctxp = ctx;
  4331. /*
  4332. * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported.
  4333. * This must appear after the call to ether_ifattach() because
  4334. * ether_ifattach() sets if_hdrlen to the default value.
  4335. */
  4336. if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU)
  4337. if_setifheaderlen(ifp,
  4338. sizeof(struct ether_vlan_header));
  4339. if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
  4340. iflib_add_device_sysctl_post(ctx);
  4341. ctx->ifc_flags |= IFC_INIT_DONE;
  4342. return (0);
  4343. }
  4344. _iflib_pre_assert(scctx);
  4345. ctx->ifc_txrx = *scctx->isc_txrx;
  4346. if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets))
  4347. scctx->isc_ntxqsets = scctx->isc_ntxqsets_max;
  4348. if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets))
  4349. scctx->isc_nrxqsets = scctx->isc_nrxqsets_max;
  4350. main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0;
  4351. main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0;
  4352. /* XXX change for per-queue sizes */
  4353. device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n",
  4354. scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]);
  4355. if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] /
  4356. MAX_SINGLE_PACKET_FRACTION)
  4357. scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] /
  4358. MAX_SINGLE_PACKET_FRACTION);
  4359. if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] /
  4360. MAX_SINGLE_PACKET_FRACTION)
  4361. scctx->isc_tx_tso_segments_max = max(1,
  4362. scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION);
  4363. /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */
  4364. if (if_getcapabilities(ifp) & IFCAP_TSO) {
  4365. /*
  4366. * The stack can't handle a TSO size larger than IP_MAXPACKET,
  4367. * but some MACs do.
  4368. */
  4369. if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max,
  4370. IP_MAXPACKET));
  4371. /*
  4372. * Take maximum number of m_pullup(9)'s in iflib_parse_header()
  4373. * into account. In the worst case, each of these calls will
  4374. * add another mbuf and, thus, the requirement for another DMA
  4375. * segment. So for best performance, it doesn't make sense to
  4376. * advertize a maximum of TSO segments that typically will
  4377. * require defragmentation in iflib_encap().
  4378. */
  4379. if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3);
  4380. if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max);
  4381. }
  4382. if (scctx->isc_rss_table_size == 0)
  4383. scctx->isc_rss_table_size = 64;
  4384. scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1;
  4385. GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx);
  4386. /* XXX format name */
  4387. taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx,
  4388. -1, "admin");
  4389. /* XXX --- can support > 1 -- but keep it simple for now */
  4390. scctx->isc_intr = IFLIB_INTR_LEGACY;
  4391. /* Get memory for the station queues */
  4392. if ((err = iflib_queues_alloc(ctx))) {
  4393. device_printf(dev, "Unable to allocate queue memory\n");
  4394. goto fail_iflib_detach;
  4395. }
  4396. if ((err = iflib_qset_structures_setup(ctx))) {
  4397. device_printf(dev, "qset structure setup failed %d\n", err);
  4398. goto fail_queues;
  4399. }
  4400. /*
  4401. * XXX What if anything do we want to do about interrupts?
  4402. */
  4403. ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac);
  4404. if ((err = IFDI_ATTACH_POST(ctx)) != 0) {
  4405. device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err);
  4406. goto fail_detach;
  4407. }
  4408. /*
  4409. * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported.
  4410. * This must appear after the call to ether_ifattach() because
  4411. * ether_ifattach() sets if_hdrlen to the default value.
  4412. */
  4413. if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU)
  4414. if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
  4415. /* XXX handle more than one queue */
  4416. for (i = 0; i < scctx->isc_nrxqsets; i++)
  4417. IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl);
  4418. *ctxp = ctx;
  4419. if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter);
  4420. iflib_add_device_sysctl_post(ctx);
  4421. ctx->ifc_flags |= IFC_INIT_DONE;
  4422. CTX_UNLOCK(ctx);
  4423. return (0);
  4424. fail_detach:
  4425. ether_ifdetach(ctx->ifc_ifp);
  4426. fail_queues:
  4427. iflib_tx_structures_free(ctx);
  4428. iflib_rx_structures_free(ctx);
  4429. fail_iflib_detach:
  4430. IFDI_DETACH(ctx);
  4431. fail_unlock:
  4432. CTX_UNLOCK(ctx);
  4433. iflib_deregister(ctx);
  4434. fail_ctx_free:
  4435. free(ctx->ifc_softc, M_IFLIB);
  4436. free(ctx, M_IFLIB);
  4437. return (err);
  4438. }
  4439. int
  4440. iflib_pseudo_deregister(if_ctx_t ctx)
  4441. {
  4442. if_t ifp = ctx->ifc_ifp;
  4443. iflib_txq_t txq;
  4444. iflib_rxq_t rxq;
  4445. int i, j;
  4446. struct taskqgroup *tqg;
  4447. iflib_fl_t fl;
  4448. /* Unregister VLAN event handlers early */
  4449. iflib_unregister_vlan_handlers(ctx);
  4450. ether_ifdetach(ifp);
  4451. /* XXX drain any dependent tasks */
  4452. tqg = qgroup_if_io_tqg;
  4453. for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
  4454. callout_drain(&txq->ift_timer);
  4455. if (txq->ift_task.gt_uniq != NULL)
  4456. taskqgroup_detach(tqg, &txq->ift_task);
  4457. }
  4458. for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
  4459. callout_drain(&rxq->ifr_watchdog);
  4460. if (rxq->ifr_task.gt_uniq != NULL)
  4461. taskqgroup_detach(tqg, &rxq->ifr_task);
  4462. for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
  4463. free(fl->ifl_rx_bitmap, M_IFLIB);
  4464. }
  4465. tqg = qgroup_if_config_tqg;
  4466. if (ctx->ifc_admin_task.gt_uniq != NULL)
  4467. taskqgroup_detach(tqg, &ctx->ifc_admin_task);
  4468. if (ctx->ifc_vflr_task.gt_uniq != NULL)
  4469. taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
  4470. iflib_tx_structures_free(ctx);
  4471. iflib_rx_structures_free(ctx);
  4472. iflib_deregister(ctx);
  4473. if (ctx->ifc_flags & IFC_SC_ALLOCATED)
  4474. free(ctx->ifc_softc, M_IFLIB);
  4475. free(ctx, M_IFLIB);
  4476. return (0);
  4477. }
  4478. int
  4479. iflib_device_attach(device_t dev)
  4480. {
  4481. if_ctx_t ctx;
  4482. if_shared_ctx_t sctx;
  4483. if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC)
  4484. return (ENOTSUP);
  4485. pci_enable_busmaster(dev);
  4486. return (iflib_device_register(dev, NULL, sctx, &ctx));
  4487. }
  4488. int
  4489. iflib_device_deregister(if_ctx_t ctx)
  4490. {
  4491. if_t ifp = ctx->ifc_ifp;
  4492. iflib_txq_t txq;
  4493. iflib_rxq_t rxq;
  4494. device_t dev = ctx->ifc_dev;
  4495. int i, j;
  4496. struct taskqgroup *tqg;
  4497. iflib_fl_t fl;
  4498. /* Make sure VLANS are not using driver */
  4499. if (if_vlantrunkinuse(ifp)) {
  4500. device_printf(dev, "Vlan in use, detach first\n");
  4501. return (EBUSY);
  4502. }
  4503. #ifdef PCI_IOV
  4504. if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) {
  4505. device_printf(dev, "SR-IOV in use; detach first.\n");
  4506. return (EBUSY);
  4507. }
  4508. #endif
  4509. STATE_LOCK(ctx);
  4510. ctx->ifc_flags |= IFC_IN_DETACH;
  4511. STATE_UNLOCK(ctx);
  4512. /* Unregister VLAN handlers before calling iflib_stop() */
  4513. iflib_unregister_vlan_handlers(ctx);
  4514. iflib_netmap_detach(ifp);
  4515. ether_ifdetach(ifp);
  4516. CTX_LOCK(ctx);
  4517. iflib_stop(ctx);
  4518. CTX_UNLOCK(ctx);
  4519. if (ctx->ifc_led_dev != NULL)
  4520. led_destroy(ctx->ifc_led_dev);
  4521. /* XXX drain any dependent tasks */
  4522. tqg = qgroup_if_io_tqg;
  4523. for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) {
  4524. callout_drain(&txq->ift_timer);
  4525. if (txq->ift_task.gt_uniq != NULL)
  4526. taskqgroup_detach(tqg, &txq->ift_task);
  4527. }
  4528. for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
  4529. if (rxq->ifr_task.gt_uniq != NULL)
  4530. taskqgroup_detach(tqg, &rxq->ifr_task);
  4531. for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
  4532. free(fl->ifl_rx_bitmap, M_IFLIB);
  4533. }
  4534. tqg = qgroup_if_config_tqg;
  4535. if (ctx->ifc_admin_task.gt_uniq != NULL)
  4536. taskqgroup_detach(tqg, &ctx->ifc_admin_task);
  4537. if (ctx->ifc_vflr_task.gt_uniq != NULL)
  4538. taskqgroup_detach(tqg, &ctx->ifc_vflr_task);
  4539. CTX_LOCK(ctx);
  4540. IFDI_DETACH(ctx);
  4541. CTX_UNLOCK(ctx);
  4542. /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
  4543. iflib_free_intr_mem(ctx);
  4544. bus_generic_detach(dev);
  4545. iflib_tx_structures_free(ctx);
  4546. iflib_rx_structures_free(ctx);
  4547. iflib_deregister(ctx);
  4548. device_set_softc(ctx->ifc_dev, NULL);
  4549. if (ctx->ifc_flags & IFC_SC_ALLOCATED)
  4550. free(ctx->ifc_softc, M_IFLIB);
  4551. unref_ctx_core_offset(ctx);
  4552. free(ctx, M_IFLIB);
  4553. return (0);
  4554. }
  4555. static void
  4556. iflib_free_intr_mem(if_ctx_t ctx)
  4557. {
  4558. if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) {
  4559. iflib_irq_free(ctx, &ctx->ifc_legacy_irq);
  4560. }
  4561. if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) {
  4562. pci_release_msi(ctx->ifc_dev);
  4563. }
  4564. if (ctx->ifc_msix_mem != NULL) {
  4565. bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY,
  4566. rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem);
  4567. ctx->ifc_msix_mem = NULL;
  4568. }
  4569. }
  4570. int
  4571. iflib_device_detach(device_t dev)
  4572. {
  4573. if_ctx_t ctx = device_get_softc(dev);
  4574. return (iflib_device_deregister(ctx));
  4575. }
  4576. int
  4577. iflib_device_suspend(device_t dev)
  4578. {
  4579. if_ctx_t ctx = device_get_softc(dev);
  4580. CTX_LOCK(ctx);
  4581. IFDI_SUSPEND(ctx);
  4582. CTX_UNLOCK(ctx);
  4583. return bus_generic_suspend(dev);
  4584. }
  4585. int
  4586. iflib_device_shutdown(device_t dev)
  4587. {
  4588. if_ctx_t ctx = device_get_softc(dev);
  4589. CTX_LOCK(ctx);
  4590. IFDI_SHUTDOWN(ctx);
  4591. CTX_UNLOCK(ctx);
  4592. return bus_generic_suspend(dev);
  4593. }
  4594. int
  4595. iflib_device_resume(device_t dev)
  4596. {
  4597. if_ctx_t ctx = device_get_softc(dev);
  4598. iflib_txq_t txq = ctx->ifc_txqs;
  4599. CTX_LOCK(ctx);
  4600. IFDI_RESUME(ctx);
  4601. iflib_if_init_locked(ctx);
  4602. CTX_UNLOCK(ctx);
  4603. for (int i = 0; i < NTXQSETS(ctx); i++, txq++)
  4604. iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET);
  4605. return (bus_generic_resume(dev));
  4606. }
  4607. int
  4608. iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params)
  4609. {
  4610. int error;
  4611. if_ctx_t ctx = device_get_softc(dev);
  4612. CTX_LOCK(ctx);
  4613. error = IFDI_IOV_INIT(ctx, num_vfs, params);
  4614. CTX_UNLOCK(ctx);
  4615. return (error);
  4616. }
  4617. void
  4618. iflib_device_iov_uninit(device_t dev)
  4619. {
  4620. if_ctx_t ctx = device_get_softc(dev);
  4621. CTX_LOCK(ctx);
  4622. IFDI_IOV_UNINIT(ctx);
  4623. CTX_UNLOCK(ctx);
  4624. }
  4625. int
  4626. iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params)
  4627. {
  4628. int error;
  4629. if_ctx_t ctx = device_get_softc(dev);
  4630. CTX_LOCK(ctx);
  4631. error = IFDI_IOV_VF_ADD(ctx, vfnum, params);
  4632. CTX_UNLOCK(ctx);
  4633. return (error);
  4634. }
  4635. /*********************************************************************
  4636. *
  4637. * MODULE FUNCTION DEFINITIONS
  4638. *
  4639. **********************************************************************/
  4640. /*
  4641. * - Start a fast taskqueue thread for each core
  4642. * - Start a taskqueue for control operations
  4643. */
  4644. static int
  4645. iflib_module_init(void)
  4646. {
  4647. return (0);
  4648. }
  4649. static int
  4650. iflib_module_event_handler(module_t mod, int what, void *arg)
  4651. {
  4652. int err;
  4653. switch (what) {
  4654. case MOD_LOAD:
  4655. if ((err = iflib_module_init()) != 0)
  4656. return (err);
  4657. break;
  4658. case MOD_UNLOAD:
  4659. return (EBUSY);
  4660. default:
  4661. return (EOPNOTSUPP);
  4662. }
  4663. return (0);
  4664. }
  4665. /*********************************************************************
  4666. *
  4667. * PUBLIC FUNCTION DEFINITIONS
  4668. * ordered as in iflib.h
  4669. *
  4670. **********************************************************************/
  4671. static void
  4672. _iflib_assert(if_shared_ctx_t sctx)
  4673. {
  4674. int i;
  4675. MPASS(sctx->isc_tx_maxsize);
  4676. MPASS(sctx->isc_tx_maxsegsize);
  4677. MPASS(sctx->isc_rx_maxsize);
  4678. MPASS(sctx->isc_rx_nsegments);
  4679. MPASS(sctx->isc_rx_maxsegsize);
  4680. MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8);
  4681. for (i = 0; i < sctx->isc_nrxqs; i++) {
  4682. MPASS(sctx->isc_nrxd_min[i]);
  4683. MPASS(powerof2(sctx->isc_nrxd_min[i]));
  4684. MPASS(sctx->isc_nrxd_max[i]);
  4685. MPASS(powerof2(sctx->isc_nrxd_max[i]));
  4686. MPASS(sctx->isc_nrxd_default[i]);
  4687. MPASS(powerof2(sctx->isc_nrxd_default[i]));
  4688. }
  4689. MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8);
  4690. for (i = 0; i < sctx->isc_ntxqs; i++) {
  4691. MPASS(sctx->isc_ntxd_min[i]);
  4692. MPASS(powerof2(sctx->isc_ntxd_min[i]));
  4693. MPASS(sctx->isc_ntxd_max[i]);
  4694. MPASS(powerof2(sctx->isc_ntxd_max[i]));
  4695. MPASS(sctx->isc_ntxd_default[i]);
  4696. MPASS(powerof2(sctx->isc_ntxd_default[i]));
  4697. }
  4698. }
  4699. static void
  4700. _iflib_pre_assert(if_softc_ctx_t scctx)
  4701. {
  4702. MPASS(scctx->isc_txrx->ift_txd_encap);
  4703. MPASS(scctx->isc_txrx->ift_txd_flush);
  4704. MPASS(scctx->isc_txrx->ift_txd_credits_update);
  4705. MPASS(scctx->isc_txrx->ift_rxd_available);
  4706. MPASS(scctx->isc_txrx->ift_rxd_pkt_get);
  4707. MPASS(scctx->isc_txrx->ift_rxd_refill);
  4708. MPASS(scctx->isc_txrx->ift_rxd_flush);
  4709. }
  4710. static int
  4711. iflib_register(if_ctx_t ctx)
  4712. {
  4713. if_shared_ctx_t sctx = ctx->ifc_sctx;
  4714. driver_t *driver = sctx->isc_driver;
  4715. device_t dev = ctx->ifc_dev;
  4716. if_t ifp;
  4717. _iflib_assert(sctx);
  4718. CTX_LOCK_INIT(ctx);
  4719. STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev));
  4720. ifp = ctx->ifc_ifp = if_alloc(IFT_ETHER);
  4721. if (ifp == NULL) {
  4722. device_printf(dev, "can not allocate ifnet structure\n");
  4723. return (ENOMEM);
  4724. }
  4725. /*
  4726. * Initialize our context's device specific methods
  4727. */
  4728. kobj_init((kobj_t) ctx, (kobj_class_t) driver);
  4729. kobj_class_compile((kobj_class_t) driver);
  4730. if_initname(ifp, device_get_name(dev), device_get_unit(dev));
  4731. if_setsoftc(ifp, ctx);
  4732. if_setdev(ifp, dev);
  4733. if_setinitfn(ifp, iflib_if_init);
  4734. if_setioctlfn(ifp, iflib_if_ioctl);
  4735. #ifdef ALTQ
  4736. if_setstartfn(ifp, iflib_altq_if_start);
  4737. if_settransmitfn(ifp, iflib_altq_if_transmit);
  4738. if_setsendqready(ifp);
  4739. #else
  4740. if_settransmitfn(ifp, iflib_if_transmit);
  4741. #endif
  4742. if_setqflushfn(ifp, iflib_if_qflush);
  4743. if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
  4744. ctx->ifc_vlan_attach_event =
  4745. EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx,
  4746. EVENTHANDLER_PRI_FIRST);
  4747. ctx->ifc_vlan_detach_event =
  4748. EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx,
  4749. EVENTHANDLER_PRI_FIRST);
  4750. ifmedia_init(&ctx->ifc_media, IFM_IMASK,
  4751. iflib_media_change, iflib_media_status);
  4752. return (0);
  4753. }
  4754. static void
  4755. iflib_unregister_vlan_handlers(if_ctx_t ctx)
  4756. {
  4757. /* Unregister VLAN events */
  4758. if (ctx->ifc_vlan_attach_event != NULL) {
  4759. EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event);
  4760. ctx->ifc_vlan_attach_event = NULL;
  4761. }
  4762. if (ctx->ifc_vlan_detach_event != NULL) {
  4763. EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event);
  4764. ctx->ifc_vlan_detach_event = NULL;
  4765. }
  4766. }
  4767. static void
  4768. iflib_deregister(if_ctx_t ctx)
  4769. {
  4770. if_t ifp = ctx->ifc_ifp;
  4771. /* Remove all media */
  4772. ifmedia_removeall(&ctx->ifc_media);
  4773. /* Ensure that VLAN event handlers are unregistered */
  4774. iflib_unregister_vlan_handlers(ctx);
  4775. /* Release kobject reference */
  4776. kobj_delete((kobj_t) ctx, NULL);
  4777. /* Free the ifnet structure */
  4778. if_free(ifp);
  4779. STATE_LOCK_DESTROY(ctx);
  4780. /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/
  4781. CTX_LOCK_DESTROY(ctx);
  4782. }
  4783. static int
  4784. iflib_queues_alloc(if_ctx_t ctx)
  4785. {
  4786. if_shared_ctx_t sctx = ctx->ifc_sctx;
  4787. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  4788. device_t dev = ctx->ifc_dev;
  4789. int nrxqsets = scctx->isc_nrxqsets;
  4790. int ntxqsets = scctx->isc_ntxqsets;
  4791. iflib_txq_t txq;
  4792. iflib_rxq_t rxq;
  4793. iflib_fl_t fl = NULL;
  4794. int i, j, cpu, err, txconf, rxconf;
  4795. iflib_dma_info_t ifdip;
  4796. uint32_t *rxqsizes = scctx->isc_rxqsizes;
  4797. uint32_t *txqsizes = scctx->isc_txqsizes;
  4798. uint8_t nrxqs = sctx->isc_nrxqs;
  4799. uint8_t ntxqs = sctx->isc_ntxqs;
  4800. int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1;
  4801. caddr_t *vaddrs;
  4802. uint64_t *paddrs;
  4803. KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1"));
  4804. KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1"));
  4805. /* Allocate the TX ring struct memory */
  4806. if (!(ctx->ifc_txqs =
  4807. (iflib_txq_t) malloc(sizeof(struct iflib_txq) *
  4808. ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
  4809. device_printf(dev, "Unable to allocate TX ring memory\n");
  4810. err = ENOMEM;
  4811. goto fail;
  4812. }
  4813. /* Now allocate the RX */
  4814. if (!(ctx->ifc_rxqs =
  4815. (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) *
  4816. nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) {
  4817. device_printf(dev, "Unable to allocate RX ring memory\n");
  4818. err = ENOMEM;
  4819. goto rx_fail;
  4820. }
  4821. txq = ctx->ifc_txqs;
  4822. rxq = ctx->ifc_rxqs;
  4823. /*
  4824. * XXX handle allocation failure
  4825. */
  4826. for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) {
  4827. /* Set up some basics */
  4828. if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs,
  4829. M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
  4830. device_printf(dev,
  4831. "Unable to allocate TX DMA info memory\n");
  4832. err = ENOMEM;
  4833. goto err_tx_desc;
  4834. }
  4835. txq->ift_ifdi = ifdip;
  4836. for (j = 0; j < ntxqs; j++, ifdip++) {
  4837. if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) {
  4838. device_printf(dev,
  4839. "Unable to allocate TX descriptors\n");
  4840. err = ENOMEM;
  4841. goto err_tx_desc;
  4842. }
  4843. txq->ift_txd_size[j] = scctx->isc_txd_size[j];
  4844. bzero((void *)ifdip->idi_vaddr, txqsizes[j]);
  4845. }
  4846. txq->ift_ctx = ctx;
  4847. txq->ift_id = i;
  4848. if (sctx->isc_flags & IFLIB_HAS_TXCQ) {
  4849. txq->ift_br_offset = 1;
  4850. } else {
  4851. txq->ift_br_offset = 0;
  4852. }
  4853. /* XXX fix this */
  4854. txq->ift_timer.c_cpu = cpu;
  4855. if (iflib_txsd_alloc(txq)) {
  4856. device_printf(dev, "Critical Failure setting up TX buffers\n");
  4857. err = ENOMEM;
  4858. goto err_tx_desc;
  4859. }
  4860. /* Initialize the TX lock */
  4861. snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout",
  4862. device_get_nameunit(dev), txq->ift_id);
  4863. mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF);
  4864. callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0);
  4865. err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain,
  4866. iflib_txq_can_drain, M_IFLIB, M_WAITOK);
  4867. if (err) {
  4868. /* XXX free any allocated rings */
  4869. device_printf(dev, "Unable to allocate buf_ring\n");
  4870. goto err_tx_desc;
  4871. }
  4872. }
  4873. for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) {
  4874. /* Set up some basics */
  4875. callout_init(&rxq->ifr_watchdog, 1);
  4876. if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs,
  4877. M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) {
  4878. device_printf(dev,
  4879. "Unable to allocate RX DMA info memory\n");
  4880. err = ENOMEM;
  4881. goto err_tx_desc;
  4882. }
  4883. rxq->ifr_ifdi = ifdip;
  4884. /* XXX this needs to be changed if #rx queues != #tx queues */
  4885. rxq->ifr_ntxqirq = 1;
  4886. rxq->ifr_txqid[0] = i;
  4887. for (j = 0; j < nrxqs; j++, ifdip++) {
  4888. if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) {
  4889. device_printf(dev,
  4890. "Unable to allocate RX descriptors\n");
  4891. err = ENOMEM;
  4892. goto err_tx_desc;
  4893. }
  4894. bzero((void *)ifdip->idi_vaddr, rxqsizes[j]);
  4895. }
  4896. rxq->ifr_ctx = ctx;
  4897. rxq->ifr_id = i;
  4898. if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
  4899. rxq->ifr_fl_offset = 1;
  4900. } else {
  4901. rxq->ifr_fl_offset = 0;
  4902. }
  4903. rxq->ifr_nfl = nfree_lists;
  4904. if (!(fl =
  4905. (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) {
  4906. device_printf(dev, "Unable to allocate free list memory\n");
  4907. err = ENOMEM;
  4908. goto err_tx_desc;
  4909. }
  4910. rxq->ifr_fl = fl;
  4911. for (j = 0; j < nfree_lists; j++) {
  4912. fl[j].ifl_rxq = rxq;
  4913. fl[j].ifl_id = j;
  4914. fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset];
  4915. fl[j].ifl_rxd_size = scctx->isc_rxd_size[j];
  4916. }
  4917. /* Allocate receive buffers for the ring */
  4918. if (iflib_rxsd_alloc(rxq)) {
  4919. device_printf(dev,
  4920. "Critical Failure setting up receive buffers\n");
  4921. err = ENOMEM;
  4922. goto err_rx_desc;
  4923. }
  4924. for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
  4925. fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB,
  4926. M_WAITOK);
  4927. }
  4928. /* TXQs */
  4929. vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
  4930. paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK);
  4931. for (i = 0; i < ntxqsets; i++) {
  4932. iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi;
  4933. for (j = 0; j < ntxqs; j++, di++) {
  4934. vaddrs[i*ntxqs + j] = di->idi_vaddr;
  4935. paddrs[i*ntxqs + j] = di->idi_paddr;
  4936. }
  4937. }
  4938. if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) {
  4939. device_printf(ctx->ifc_dev,
  4940. "Unable to allocate device TX queue\n");
  4941. iflib_tx_structures_free(ctx);
  4942. free(vaddrs, M_IFLIB);
  4943. free(paddrs, M_IFLIB);
  4944. goto err_rx_desc;
  4945. }
  4946. free(vaddrs, M_IFLIB);
  4947. free(paddrs, M_IFLIB);
  4948. /* RXQs */
  4949. vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
  4950. paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK);
  4951. for (i = 0; i < nrxqsets; i++) {
  4952. iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi;
  4953. for (j = 0; j < nrxqs; j++, di++) {
  4954. vaddrs[i*nrxqs + j] = di->idi_vaddr;
  4955. paddrs[i*nrxqs + j] = di->idi_paddr;
  4956. }
  4957. }
  4958. if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) {
  4959. device_printf(ctx->ifc_dev,
  4960. "Unable to allocate device RX queue\n");
  4961. iflib_tx_structures_free(ctx);
  4962. free(vaddrs, M_IFLIB);
  4963. free(paddrs, M_IFLIB);
  4964. goto err_rx_desc;
  4965. }
  4966. free(vaddrs, M_IFLIB);
  4967. free(paddrs, M_IFLIB);
  4968. return (0);
  4969. /* XXX handle allocation failure changes */
  4970. err_rx_desc:
  4971. err_tx_desc:
  4972. rx_fail:
  4973. if (ctx->ifc_rxqs != NULL)
  4974. free(ctx->ifc_rxqs, M_IFLIB);
  4975. ctx->ifc_rxqs = NULL;
  4976. if (ctx->ifc_txqs != NULL)
  4977. free(ctx->ifc_txqs, M_IFLIB);
  4978. ctx->ifc_txqs = NULL;
  4979. fail:
  4980. return (err);
  4981. }
  4982. static int
  4983. iflib_tx_structures_setup(if_ctx_t ctx)
  4984. {
  4985. iflib_txq_t txq = ctx->ifc_txqs;
  4986. int i;
  4987. for (i = 0; i < NTXQSETS(ctx); i++, txq++)
  4988. iflib_txq_setup(txq);
  4989. return (0);
  4990. }
  4991. static void
  4992. iflib_tx_structures_free(if_ctx_t ctx)
  4993. {
  4994. iflib_txq_t txq = ctx->ifc_txqs;
  4995. if_shared_ctx_t sctx = ctx->ifc_sctx;
  4996. int i, j;
  4997. for (i = 0; i < NTXQSETS(ctx); i++, txq++) {
  4998. for (j = 0; j < sctx->isc_ntxqs; j++)
  4999. iflib_dma_free(&txq->ift_ifdi[j]);
  5000. iflib_txq_destroy(txq);
  5001. }
  5002. free(ctx->ifc_txqs, M_IFLIB);
  5003. ctx->ifc_txqs = NULL;
  5004. IFDI_QUEUES_FREE(ctx);
  5005. }
  5006. /*********************************************************************
  5007. *
  5008. * Initialize all receive rings.
  5009. *
  5010. **********************************************************************/
  5011. static int
  5012. iflib_rx_structures_setup(if_ctx_t ctx)
  5013. {
  5014. iflib_rxq_t rxq = ctx->ifc_rxqs;
  5015. int q;
  5016. #if defined(INET6) || defined(INET)
  5017. int err, i;
  5018. #endif
  5019. for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) {
  5020. #if defined(INET6) || defined(INET)
  5021. if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) {
  5022. err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp,
  5023. TCP_LRO_ENTRIES, min(1024,
  5024. ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]));
  5025. if (err != 0) {
  5026. device_printf(ctx->ifc_dev,
  5027. "LRO Initialization failed!\n");
  5028. goto fail;
  5029. }
  5030. }
  5031. #endif
  5032. IFDI_RXQ_SETUP(ctx, rxq->ifr_id);
  5033. }
  5034. return (0);
  5035. #if defined(INET6) || defined(INET)
  5036. fail:
  5037. /*
  5038. * Free LRO resources allocated so far, we will only handle
  5039. * the rings that completed, the failing case will have
  5040. * cleaned up for itself. 'q' failed, so its the terminus.
  5041. */
  5042. rxq = ctx->ifc_rxqs;
  5043. for (i = 0; i < q; ++i, rxq++) {
  5044. if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO)
  5045. tcp_lro_free(&rxq->ifr_lc);
  5046. }
  5047. return (err);
  5048. #endif
  5049. }
  5050. /*********************************************************************
  5051. *
  5052. * Free all receive rings.
  5053. *
  5054. **********************************************************************/
  5055. static void
  5056. iflib_rx_structures_free(if_ctx_t ctx)
  5057. {
  5058. iflib_rxq_t rxq = ctx->ifc_rxqs;
  5059. if_shared_ctx_t sctx = ctx->ifc_sctx;
  5060. int i, j;
  5061. for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) {
  5062. for (j = 0; j < sctx->isc_nrxqs; j++)
  5063. iflib_dma_free(&rxq->ifr_ifdi[j]);
  5064. iflib_rx_sds_free(rxq);
  5065. #if defined(INET6) || defined(INET)
  5066. if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO)
  5067. tcp_lro_free(&rxq->ifr_lc);
  5068. #endif
  5069. }
  5070. free(ctx->ifc_rxqs, M_IFLIB);
  5071. ctx->ifc_rxqs = NULL;
  5072. }
  5073. static int
  5074. iflib_qset_structures_setup(if_ctx_t ctx)
  5075. {
  5076. int err;
  5077. /*
  5078. * It is expected that the caller takes care of freeing queues if this
  5079. * fails.
  5080. */
  5081. if ((err = iflib_tx_structures_setup(ctx)) != 0) {
  5082. device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err);
  5083. return (err);
  5084. }
  5085. if ((err = iflib_rx_structures_setup(ctx)) != 0)
  5086. device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err);
  5087. return (err);
  5088. }
  5089. int
  5090. iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid,
  5091. driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name)
  5092. {
  5093. return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name));
  5094. }
  5095. #ifdef SMP
  5096. static int
  5097. find_nth(if_ctx_t ctx, int qid)
  5098. {
  5099. cpuset_t cpus;
  5100. int i, cpuid, eqid, count;
  5101. CPU_COPY(&ctx->ifc_cpus, &cpus);
  5102. count = CPU_COUNT(&cpus);
  5103. eqid = qid % count;
  5104. /* clear up to the qid'th bit */
  5105. for (i = 0; i < eqid; i++) {
  5106. cpuid = CPU_FFS(&cpus);
  5107. MPASS(cpuid != 0);
  5108. CPU_CLR(cpuid-1, &cpus);
  5109. }
  5110. cpuid = CPU_FFS(&cpus);
  5111. MPASS(cpuid != 0);
  5112. return (cpuid-1);
  5113. }
  5114. #ifdef SCHED_ULE
  5115. extern struct cpu_group *cpu_top; /* CPU topology */
  5116. static int
  5117. find_child_with_core(int cpu, struct cpu_group *grp)
  5118. {
  5119. int i;
  5120. if (grp->cg_children == 0)
  5121. return -1;
  5122. MPASS(grp->cg_child);
  5123. for (i = 0; i < grp->cg_children; i++) {
  5124. if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask))
  5125. return i;
  5126. }
  5127. return -1;
  5128. }
  5129. /*
  5130. * Find the nth "close" core to the specified core
  5131. * "close" is defined as the deepest level that shares
  5132. * at least an L2 cache. With threads, this will be
  5133. * threads on the same core. If the shared cache is L3
  5134. * or higher, simply returns the same core.
  5135. */
  5136. static int
  5137. find_close_core(int cpu, int core_offset)
  5138. {
  5139. struct cpu_group *grp;
  5140. int i;
  5141. int fcpu;
  5142. cpuset_t cs;
  5143. grp = cpu_top;
  5144. if (grp == NULL)
  5145. return cpu;
  5146. i = 0;
  5147. while ((i = find_child_with_core(cpu, grp)) != -1) {
  5148. /* If the child only has one cpu, don't descend */
  5149. if (grp->cg_child[i].cg_count <= 1)
  5150. break;
  5151. grp = &grp->cg_child[i];
  5152. }
  5153. /* If they don't share at least an L2 cache, use the same CPU */
  5154. if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE)
  5155. return cpu;
  5156. /* Now pick one */
  5157. CPU_COPY(&grp->cg_mask, &cs);
  5158. /* Add the selected CPU offset to core offset. */
  5159. for (i = 0; (fcpu = CPU_FFS(&cs)) != 0; i++) {
  5160. if (fcpu - 1 == cpu)
  5161. break;
  5162. CPU_CLR(fcpu - 1, &cs);
  5163. }
  5164. MPASS(fcpu);
  5165. core_offset += i;
  5166. CPU_COPY(&grp->cg_mask, &cs);
  5167. for (i = core_offset % grp->cg_count; i > 0; i--) {
  5168. MPASS(CPU_FFS(&cs));
  5169. CPU_CLR(CPU_FFS(&cs) - 1, &cs);
  5170. }
  5171. MPASS(CPU_FFS(&cs));
  5172. return CPU_FFS(&cs) - 1;
  5173. }
  5174. #else
  5175. static int
  5176. find_close_core(int cpu, int core_offset __unused)
  5177. {
  5178. return cpu;
  5179. }
  5180. #endif
  5181. static int
  5182. get_core_offset(if_ctx_t ctx, iflib_intr_type_t type, int qid)
  5183. {
  5184. switch (type) {
  5185. case IFLIB_INTR_TX:
  5186. /* TX queues get cores which share at least an L2 cache with the corresponding RX queue */
  5187. /* XXX handle multiple RX threads per core and more than two core per L2 group */
  5188. return qid / CPU_COUNT(&ctx->ifc_cpus) + 1;
  5189. case IFLIB_INTR_RX:
  5190. case IFLIB_INTR_RXTX:
  5191. /* RX queues get the specified core */
  5192. return qid / CPU_COUNT(&ctx->ifc_cpus);
  5193. default:
  5194. return -1;
  5195. }
  5196. }
  5197. #else
  5198. #define get_core_offset(ctx, type, qid) CPU_FIRST()
  5199. #define find_close_core(cpuid, tid) CPU_FIRST()
  5200. #define find_nth(ctx, gid) CPU_FIRST()
  5201. #endif
  5202. /* Just to avoid copy/paste */
  5203. static inline int
  5204. iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type,
  5205. int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq,
  5206. const char *name)
  5207. {
  5208. device_t dev;
  5209. int co, cpuid, err, tid;
  5210. dev = ctx->ifc_dev;
  5211. co = ctx->ifc_sysctl_core_offset;
  5212. if (ctx->ifc_sysctl_separate_txrx && type == IFLIB_INTR_TX)
  5213. co += ctx->ifc_softc_ctx.isc_nrxqsets;
  5214. cpuid = find_nth(ctx, qid + co);
  5215. tid = get_core_offset(ctx, type, qid);
  5216. if (tid < 0) {
  5217. device_printf(dev, "get_core_offset failed\n");
  5218. return (EOPNOTSUPP);
  5219. }
  5220. cpuid = find_close_core(cpuid, tid);
  5221. err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid,
  5222. rman_get_start(irq->ii_res), name);
  5223. if (err) {
  5224. device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err);
  5225. return (err);
  5226. }
  5227. #ifdef notyet
  5228. if (cpuid > ctx->ifc_cpuid_highest)
  5229. ctx->ifc_cpuid_highest = cpuid;
  5230. #endif
  5231. return (0);
  5232. }
  5233. int
  5234. iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid,
  5235. iflib_intr_type_t type, driver_filter_t *filter,
  5236. void *filter_arg, int qid, const char *name)
  5237. {
  5238. device_t dev;
  5239. struct grouptask *gtask;
  5240. struct taskqgroup *tqg;
  5241. iflib_filter_info_t info;
  5242. gtask_fn_t *fn;
  5243. int tqrid, err;
  5244. driver_filter_t *intr_fast;
  5245. void *q;
  5246. info = &ctx->ifc_filter_info;
  5247. tqrid = rid;
  5248. switch (type) {
  5249. /* XXX merge tx/rx for netmap? */
  5250. case IFLIB_INTR_TX:
  5251. q = &ctx->ifc_txqs[qid];
  5252. info = &ctx->ifc_txqs[qid].ift_filter_info;
  5253. gtask = &ctx->ifc_txqs[qid].ift_task;
  5254. tqg = qgroup_if_io_tqg;
  5255. fn = _task_fn_tx;
  5256. intr_fast = iflib_fast_intr;
  5257. GROUPTASK_INIT(gtask, 0, fn, q);
  5258. ctx->ifc_flags |= IFC_NETMAP_TX_IRQ;
  5259. break;
  5260. case IFLIB_INTR_RX:
  5261. q = &ctx->ifc_rxqs[qid];
  5262. info = &ctx->ifc_rxqs[qid].ifr_filter_info;
  5263. gtask = &ctx->ifc_rxqs[qid].ifr_task;
  5264. tqg = qgroup_if_io_tqg;
  5265. fn = _task_fn_rx;
  5266. intr_fast = iflib_fast_intr;
  5267. GROUPTASK_INIT(gtask, 0, fn, q);
  5268. break;
  5269. case IFLIB_INTR_RXTX:
  5270. q = &ctx->ifc_rxqs[qid];
  5271. info = &ctx->ifc_rxqs[qid].ifr_filter_info;
  5272. gtask = &ctx->ifc_rxqs[qid].ifr_task;
  5273. tqg = qgroup_if_io_tqg;
  5274. fn = _task_fn_rx;
  5275. intr_fast = iflib_fast_intr_rxtx;
  5276. GROUPTASK_INIT(gtask, 0, fn, q);
  5277. break;
  5278. case IFLIB_INTR_ADMIN:
  5279. q = ctx;
  5280. tqrid = -1;
  5281. info = &ctx->ifc_filter_info;
  5282. gtask = &ctx->ifc_admin_task;
  5283. tqg = qgroup_if_config_tqg;
  5284. fn = _task_fn_admin;
  5285. intr_fast = iflib_fast_intr_ctx;
  5286. break;
  5287. default:
  5288. device_printf(ctx->ifc_dev, "%s: unknown net intr type\n",
  5289. __func__);
  5290. return (EINVAL);
  5291. }
  5292. info->ifi_filter = filter;
  5293. info->ifi_filter_arg = filter_arg;
  5294. info->ifi_task = gtask;
  5295. info->ifi_ctx = q;
  5296. dev = ctx->ifc_dev;
  5297. err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name);
  5298. if (err != 0) {
  5299. device_printf(dev, "_iflib_irq_alloc failed %d\n", err);
  5300. return (err);
  5301. }
  5302. if (type == IFLIB_INTR_ADMIN)
  5303. return (0);
  5304. if (tqrid != -1) {
  5305. err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg,
  5306. q, name);
  5307. if (err)
  5308. return (err);
  5309. } else {
  5310. taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res),
  5311. name);
  5312. }
  5313. return (0);
  5314. }
  5315. void
  5316. iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name)
  5317. {
  5318. struct grouptask *gtask;
  5319. struct taskqgroup *tqg;
  5320. gtask_fn_t *fn;
  5321. void *q;
  5322. int err;
  5323. switch (type) {
  5324. case IFLIB_INTR_TX:
  5325. q = &ctx->ifc_txqs[qid];
  5326. gtask = &ctx->ifc_txqs[qid].ift_task;
  5327. tqg = qgroup_if_io_tqg;
  5328. fn = _task_fn_tx;
  5329. break;
  5330. case IFLIB_INTR_RX:
  5331. q = &ctx->ifc_rxqs[qid];
  5332. gtask = &ctx->ifc_rxqs[qid].ifr_task;
  5333. tqg = qgroup_if_io_tqg;
  5334. fn = _task_fn_rx;
  5335. break;
  5336. case IFLIB_INTR_IOV:
  5337. q = ctx;
  5338. gtask = &ctx->ifc_vflr_task;
  5339. tqg = qgroup_if_config_tqg;
  5340. fn = _task_fn_iov;
  5341. break;
  5342. default:
  5343. panic("unknown net intr type");
  5344. }
  5345. GROUPTASK_INIT(gtask, 0, fn, q);
  5346. if (irq != NULL) {
  5347. err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg,
  5348. q, name);
  5349. if (err)
  5350. taskqgroup_attach(tqg, gtask, q,
  5351. rman_get_start(irq->ii_res), name);
  5352. } else {
  5353. taskqgroup_attach(tqg, gtask, q, -1, name);
  5354. }
  5355. }
  5356. void
  5357. iflib_irq_free(if_ctx_t ctx, if_irq_t irq)
  5358. {
  5359. if (irq->ii_tag)
  5360. bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag);
  5361. if (irq->ii_res)
  5362. bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ,
  5363. rman_get_rid(irq->ii_res), irq->ii_res);
  5364. }
  5365. static int
  5366. iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name)
  5367. {
  5368. iflib_txq_t txq = ctx->ifc_txqs;
  5369. iflib_rxq_t rxq = ctx->ifc_rxqs;
  5370. if_irq_t irq = &ctx->ifc_legacy_irq;
  5371. iflib_filter_info_t info;
  5372. struct grouptask *gtask;
  5373. struct taskqgroup *tqg;
  5374. gtask_fn_t *fn;
  5375. int tqrid;
  5376. void *q;
  5377. int err;
  5378. bool rx_only;
  5379. q = &ctx->ifc_rxqs[0];
  5380. info = &rxq[0].ifr_filter_info;
  5381. gtask = &rxq[0].ifr_task;
  5382. tqg = qgroup_if_io_tqg;
  5383. tqrid = irq->ii_rid = *rid;
  5384. fn = _task_fn_rx;
  5385. rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0;
  5386. ctx->ifc_flags |= IFC_LEGACY;
  5387. info->ifi_filter = filter;
  5388. info->ifi_filter_arg = filter_arg;
  5389. info->ifi_task = gtask;
  5390. info->ifi_ctx = rx_only ? ctx : q;
  5391. /* We allocate a single interrupt resource */
  5392. err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx :
  5393. iflib_fast_intr_rxtx, NULL, info, name);
  5394. if (err != 0)
  5395. return (err);
  5396. GROUPTASK_INIT(gtask, 0, fn, q);
  5397. taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name);
  5398. GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq);
  5399. taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq,
  5400. rman_get_start(irq->ii_res), "tx");
  5401. return (0);
  5402. }
  5403. void
  5404. iflib_led_create(if_ctx_t ctx)
  5405. {
  5406. ctx->ifc_led_dev = led_create(iflib_led_func, ctx,
  5407. device_get_nameunit(ctx->ifc_dev));
  5408. }
  5409. void
  5410. iflib_tx_intr_deferred(if_ctx_t ctx, int txqid)
  5411. {
  5412. GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task);
  5413. }
  5414. void
  5415. iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid)
  5416. {
  5417. GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task);
  5418. }
  5419. void
  5420. iflib_admin_intr_deferred(if_ctx_t ctx)
  5421. {
  5422. #ifdef INVARIANTS
  5423. struct grouptask *gtask;
  5424. gtask = &ctx->ifc_admin_task;
  5425. MPASS(gtask != NULL && gtask->gt_taskqueue != NULL);
  5426. #endif
  5427. GROUPTASK_ENQUEUE(&ctx->ifc_admin_task);
  5428. }
  5429. void
  5430. iflib_iov_intr_deferred(if_ctx_t ctx)
  5431. {
  5432. GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task);
  5433. }
  5434. void
  5435. iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name)
  5436. {
  5437. taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name);
  5438. }
  5439. void
  5440. iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
  5441. const char *name)
  5442. {
  5443. GROUPTASK_INIT(gtask, 0, fn, ctx);
  5444. taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name);
  5445. }
  5446. void
  5447. iflib_config_gtask_deinit(struct grouptask *gtask)
  5448. {
  5449. taskqgroup_detach(qgroup_if_config_tqg, gtask);
  5450. }
  5451. void
  5452. iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate)
  5453. {
  5454. if_t ifp = ctx->ifc_ifp;
  5455. iflib_txq_t txq = ctx->ifc_txqs;
  5456. if_setbaudrate(ifp, baudrate);
  5457. if (baudrate >= IF_Gbps(10)) {
  5458. STATE_LOCK(ctx);
  5459. ctx->ifc_flags |= IFC_PREFETCH;
  5460. STATE_UNLOCK(ctx);
  5461. }
  5462. /* If link down, disable watchdog */
  5463. if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) {
  5464. for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++)
  5465. txq->ift_qstatus = IFLIB_QUEUE_IDLE;
  5466. }
  5467. ctx->ifc_link_state = link_state;
  5468. if_link_state_change(ifp, link_state);
  5469. }
  5470. static int
  5471. iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq)
  5472. {
  5473. int credits;
  5474. #ifdef INVARIANTS
  5475. int credits_pre = txq->ift_cidx_processed;
  5476. #endif
  5477. if (ctx->isc_txd_credits_update == NULL)
  5478. return (0);
  5479. bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map,
  5480. BUS_DMASYNC_POSTREAD);
  5481. if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0)
  5482. return (0);
  5483. txq->ift_processed += credits;
  5484. txq->ift_cidx_processed += credits;
  5485. MPASS(credits_pre + credits == txq->ift_cidx_processed);
  5486. if (txq->ift_cidx_processed >= txq->ift_size)
  5487. txq->ift_cidx_processed -= txq->ift_size;
  5488. return (credits);
  5489. }
  5490. static int
  5491. iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget)
  5492. {
  5493. iflib_fl_t fl;
  5494. u_int i;
  5495. for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++)
  5496. bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
  5497. BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  5498. return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx,
  5499. budget));
  5500. }
  5501. void
  5502. iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name,
  5503. const char *description, if_int_delay_info_t info,
  5504. int offset, int value)
  5505. {
  5506. info->iidi_ctx = ctx;
  5507. info->iidi_offset = offset;
  5508. info->iidi_value = value;
  5509. SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev),
  5510. SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)),
  5511. OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
  5512. info, 0, iflib_sysctl_int_delay, "I", description);
  5513. }
  5514. struct sx *
  5515. iflib_ctx_lock_get(if_ctx_t ctx)
  5516. {
  5517. return (&ctx->ifc_ctx_sx);
  5518. }
  5519. static int
  5520. iflib_msix_init(if_ctx_t ctx)
  5521. {
  5522. device_t dev = ctx->ifc_dev;
  5523. if_shared_ctx_t sctx = ctx->ifc_sctx;
  5524. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  5525. int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues;
  5526. int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors;
  5527. iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs;
  5528. iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs;
  5529. if (bootverbose)
  5530. device_printf(dev, "msix_init qsets capped at %d\n",
  5531. imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets));
  5532. /* Override by tuneable */
  5533. if (scctx->isc_disable_msix)
  5534. goto msi;
  5535. /* First try MSI-X */
  5536. if ((msgs = pci_msix_count(dev)) == 0) {
  5537. if (bootverbose)
  5538. device_printf(dev, "MSI-X not supported or disabled\n");
  5539. goto msi;
  5540. }
  5541. bar = ctx->ifc_softc_ctx.isc_msix_bar;
  5542. /*
  5543. * bar == -1 => "trust me I know what I'm doing"
  5544. * Some drivers are for hardware that is so shoddily
  5545. * documented that no one knows which bars are which
  5546. * so the developer has to map all bars. This hack
  5547. * allows shoddy garbage to use MSI-X in this framework.
  5548. */
  5549. if (bar != -1) {
  5550. ctx->ifc_msix_mem = bus_alloc_resource_any(dev,
  5551. SYS_RES_MEMORY, &bar, RF_ACTIVE);
  5552. if (ctx->ifc_msix_mem == NULL) {
  5553. device_printf(dev, "Unable to map MSI-X table\n");
  5554. goto msi;
  5555. }
  5556. }
  5557. admincnt = sctx->isc_admin_intrcnt;
  5558. #if IFLIB_DEBUG
  5559. /* use only 1 qset in debug mode */
  5560. queuemsgs = min(msgs - admincnt, 1);
  5561. #else
  5562. queuemsgs = msgs - admincnt;
  5563. #endif
  5564. #ifdef RSS
  5565. queues = imin(queuemsgs, rss_getnumbuckets());
  5566. #else
  5567. queues = queuemsgs;
  5568. #endif
  5569. queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues);
  5570. if (bootverbose)
  5571. device_printf(dev,
  5572. "intr CPUs: %d queue msgs: %d admincnt: %d\n",
  5573. CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt);
  5574. #ifdef RSS
  5575. /* If we're doing RSS, clamp at the number of RSS buckets */
  5576. if (queues > rss_getnumbuckets())
  5577. queues = rss_getnumbuckets();
  5578. #endif
  5579. if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt)
  5580. rx_queues = iflib_num_rx_queues;
  5581. else
  5582. rx_queues = queues;
  5583. if (rx_queues > scctx->isc_nrxqsets)
  5584. rx_queues = scctx->isc_nrxqsets;
  5585. /*
  5586. * We want this to be all logical CPUs by default
  5587. */
  5588. if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues)
  5589. tx_queues = iflib_num_tx_queues;
  5590. else
  5591. tx_queues = mp_ncpus;
  5592. if (tx_queues > scctx->isc_ntxqsets)
  5593. tx_queues = scctx->isc_ntxqsets;
  5594. if (ctx->ifc_sysctl_qs_eq_override == 0) {
  5595. #ifdef INVARIANTS
  5596. if (tx_queues != rx_queues)
  5597. device_printf(dev,
  5598. "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n",
  5599. min(rx_queues, tx_queues), min(rx_queues, tx_queues));
  5600. #endif
  5601. tx_queues = min(rx_queues, tx_queues);
  5602. rx_queues = min(rx_queues, tx_queues);
  5603. }
  5604. vectors = rx_queues + admincnt;
  5605. if (msgs < vectors) {
  5606. device_printf(dev,
  5607. "insufficient number of MSI-X vectors "
  5608. "(supported %d, need %d)\n", msgs, vectors);
  5609. goto msi;
  5610. }
  5611. device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues,
  5612. tx_queues);
  5613. msgs = vectors;
  5614. if ((err = pci_alloc_msix(dev, &vectors)) == 0) {
  5615. if (vectors != msgs) {
  5616. device_printf(dev,
  5617. "Unable to allocate sufficient MSI-X vectors "
  5618. "(got %d, need %d)\n", vectors, msgs);
  5619. pci_release_msi(dev);
  5620. if (bar != -1) {
  5621. bus_release_resource(dev, SYS_RES_MEMORY, bar,
  5622. ctx->ifc_msix_mem);
  5623. ctx->ifc_msix_mem = NULL;
  5624. }
  5625. goto msi;
  5626. }
  5627. device_printf(dev, "Using MSI-X interrupts with %d vectors\n",
  5628. vectors);
  5629. scctx->isc_vectors = vectors;
  5630. scctx->isc_nrxqsets = rx_queues;
  5631. scctx->isc_ntxqsets = tx_queues;
  5632. scctx->isc_intr = IFLIB_INTR_MSIX;
  5633. return (vectors);
  5634. } else {
  5635. device_printf(dev,
  5636. "failed to allocate %d MSI-X vectors, err: %d\n", vectors,
  5637. err);
  5638. if (bar != -1) {
  5639. bus_release_resource(dev, SYS_RES_MEMORY, bar,
  5640. ctx->ifc_msix_mem);
  5641. ctx->ifc_msix_mem = NULL;
  5642. }
  5643. }
  5644. msi:
  5645. vectors = pci_msi_count(dev);
  5646. scctx->isc_nrxqsets = 1;
  5647. scctx->isc_ntxqsets = 1;
  5648. scctx->isc_vectors = vectors;
  5649. if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) {
  5650. device_printf(dev,"Using an MSI interrupt\n");
  5651. scctx->isc_intr = IFLIB_INTR_MSI;
  5652. } else {
  5653. scctx->isc_vectors = 1;
  5654. device_printf(dev,"Using a Legacy interrupt\n");
  5655. scctx->isc_intr = IFLIB_INTR_LEGACY;
  5656. }
  5657. return (vectors);
  5658. }
  5659. static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" };
  5660. static int
  5661. mp_ring_state_handler(SYSCTL_HANDLER_ARGS)
  5662. {
  5663. int rc;
  5664. uint16_t *state = ((uint16_t *)oidp->oid_arg1);
  5665. struct sbuf *sb;
  5666. const char *ring_state = "UNKNOWN";
  5667. /* XXX needed ? */
  5668. rc = sysctl_wire_old_buffer(req, 0);
  5669. MPASS(rc == 0);
  5670. if (rc != 0)
  5671. return (rc);
  5672. sb = sbuf_new_for_sysctl(NULL, NULL, 80, req);
  5673. MPASS(sb != NULL);
  5674. if (sb == NULL)
  5675. return (ENOMEM);
  5676. if (state[3] <= 3)
  5677. ring_state = ring_states[state[3]];
  5678. sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s",
  5679. state[0], state[1], state[2], ring_state);
  5680. rc = sbuf_finish(sb);
  5681. sbuf_delete(sb);
  5682. return(rc);
  5683. }
  5684. enum iflib_ndesc_handler {
  5685. IFLIB_NTXD_HANDLER,
  5686. IFLIB_NRXD_HANDLER,
  5687. };
  5688. static int
  5689. mp_ndesc_handler(SYSCTL_HANDLER_ARGS)
  5690. {
  5691. if_ctx_t ctx = (void *)arg1;
  5692. enum iflib_ndesc_handler type = arg2;
  5693. char buf[256] = {0};
  5694. qidx_t *ndesc;
  5695. char *p, *next;
  5696. int nqs, rc, i;
  5697. nqs = 8;
  5698. switch(type) {
  5699. case IFLIB_NTXD_HANDLER:
  5700. ndesc = ctx->ifc_sysctl_ntxds;
  5701. if (ctx->ifc_sctx)
  5702. nqs = ctx->ifc_sctx->isc_ntxqs;
  5703. break;
  5704. case IFLIB_NRXD_HANDLER:
  5705. ndesc = ctx->ifc_sysctl_nrxds;
  5706. if (ctx->ifc_sctx)
  5707. nqs = ctx->ifc_sctx->isc_nrxqs;
  5708. break;
  5709. default:
  5710. printf("%s: unhandled type\n", __func__);
  5711. return (EINVAL);
  5712. }
  5713. if (nqs == 0)
  5714. nqs = 8;
  5715. for (i=0; i<8; i++) {
  5716. if (i >= nqs)
  5717. break;
  5718. if (i)
  5719. strcat(buf, ",");
  5720. sprintf(strchr(buf, 0), "%d", ndesc[i]);
  5721. }
  5722. rc = sysctl_handle_string(oidp, buf, sizeof(buf), req);
  5723. if (rc || req->newptr == NULL)
  5724. return rc;
  5725. for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p;
  5726. i++, p = strsep(&next, " ,")) {
  5727. ndesc[i] = strtoul(p, NULL, 10);
  5728. }
  5729. return(rc);
  5730. }
  5731. #define NAME_BUFLEN 32
  5732. static void
  5733. iflib_add_device_sysctl_pre(if_ctx_t ctx)
  5734. {
  5735. device_t dev = iflib_get_dev(ctx);
  5736. struct sysctl_oid_list *child, *oid_list;
  5737. struct sysctl_ctx_list *ctx_list;
  5738. struct sysctl_oid *node;
  5739. ctx_list = device_get_sysctl_ctx(dev);
  5740. child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
  5741. ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib",
  5742. CTLFLAG_RD, NULL, "IFLIB fields");
  5743. oid_list = SYSCTL_CHILDREN(node);
  5744. SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version",
  5745. CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version,
  5746. "driver version");
  5747. SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs",
  5748. CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0,
  5749. "# of txqs to use, 0 => use default #");
  5750. SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs",
  5751. CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0,
  5752. "# of rxqs to use, 0 => use default #");
  5753. SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable",
  5754. CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0,
  5755. "permit #txq != #rxq");
  5756. SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix",
  5757. CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0,
  5758. "disable MSI-X (default 0)");
  5759. SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget",
  5760. CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0,
  5761. "set the RX budget");
  5762. SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate",
  5763. CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0,
  5764. "cause TX to abdicate instead of running to completion");
  5765. ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED;
  5766. SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset",
  5767. CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0,
  5768. "offset to start using cores at");
  5769. SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx",
  5770. CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0,
  5771. "use separate cores for TX and RX");
  5772. /* XXX change for per-queue sizes */
  5773. SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds",
  5774. CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER,
  5775. mp_ndesc_handler, "A",
  5776. "list of # of TX descriptors to use, 0 = use default #");
  5777. SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds",
  5778. CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER,
  5779. mp_ndesc_handler, "A",
  5780. "list of # of RX descriptors to use, 0 = use default #");
  5781. }
  5782. static void
  5783. iflib_add_device_sysctl_post(if_ctx_t ctx)
  5784. {
  5785. if_shared_ctx_t sctx = ctx->ifc_sctx;
  5786. if_softc_ctx_t scctx = &ctx->ifc_softc_ctx;
  5787. device_t dev = iflib_get_dev(ctx);
  5788. struct sysctl_oid_list *child;
  5789. struct sysctl_ctx_list *ctx_list;
  5790. iflib_fl_t fl;
  5791. iflib_txq_t txq;
  5792. iflib_rxq_t rxq;
  5793. int i, j;
  5794. char namebuf[NAME_BUFLEN];
  5795. char *qfmt;
  5796. struct sysctl_oid *queue_node, *fl_node, *node;
  5797. struct sysctl_oid_list *queue_list, *fl_list;
  5798. ctx_list = device_get_sysctl_ctx(dev);
  5799. node = ctx->ifc_sysctl_node;
  5800. child = SYSCTL_CHILDREN(node);
  5801. if (scctx->isc_ntxqsets > 100)
  5802. qfmt = "txq%03d";
  5803. else if (scctx->isc_ntxqsets > 10)
  5804. qfmt = "txq%02d";
  5805. else
  5806. qfmt = "txq%d";
  5807. for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) {
  5808. snprintf(namebuf, NAME_BUFLEN, qfmt, i);
  5809. queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
  5810. CTLFLAG_RD, NULL, "Queue Name");
  5811. queue_list = SYSCTL_CHILDREN(queue_node);
  5812. #if MEMORY_LOGGING
  5813. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued",
  5814. CTLFLAG_RD,
  5815. &txq->ift_dequeued, "total mbufs freed");
  5816. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued",
  5817. CTLFLAG_RD,
  5818. &txq->ift_enqueued, "total mbufs enqueued");
  5819. #endif
  5820. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag",
  5821. CTLFLAG_RD,
  5822. &txq->ift_mbuf_defrag, "# of times m_defrag was called");
  5823. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups",
  5824. CTLFLAG_RD,
  5825. &txq->ift_pullups, "# of times m_pullup was called");
  5826. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed",
  5827. CTLFLAG_RD,
  5828. &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed");
  5829. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail",
  5830. CTLFLAG_RD,
  5831. &txq->ift_no_desc_avail, "# of times no descriptors were available");
  5832. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed",
  5833. CTLFLAG_RD,
  5834. &txq->ift_map_failed, "# of times DMA map failed");
  5835. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig",
  5836. CTLFLAG_RD,
  5837. &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG");
  5838. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup",
  5839. CTLFLAG_RD,
  5840. &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG");
  5841. SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx",
  5842. CTLFLAG_RD,
  5843. &txq->ift_pidx, 1, "Producer Index");
  5844. SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx",
  5845. CTLFLAG_RD,
  5846. &txq->ift_cidx, 1, "Consumer Index");
  5847. SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed",
  5848. CTLFLAG_RD,
  5849. &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update");
  5850. SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use",
  5851. CTLFLAG_RD,
  5852. &txq->ift_in_use, 1, "descriptors in use");
  5853. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed",
  5854. CTLFLAG_RD,
  5855. &txq->ift_processed, "descriptors procesed for clean");
  5856. SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned",
  5857. CTLFLAG_RD,
  5858. &txq->ift_cleaned, "total cleaned");
  5859. SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state",
  5860. CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state),
  5861. 0, mp_ring_state_handler, "A", "soft ring state");
  5862. SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues",
  5863. CTLFLAG_RD, &txq->ift_br->enqueues,
  5864. "# of enqueues to the mp_ring for this queue");
  5865. SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops",
  5866. CTLFLAG_RD, &txq->ift_br->drops,
  5867. "# of drops in the mp_ring for this queue");
  5868. SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts",
  5869. CTLFLAG_RD, &txq->ift_br->starts,
  5870. "# of normal consumer starts in the mp_ring for this queue");
  5871. SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls",
  5872. CTLFLAG_RD, &txq->ift_br->stalls,
  5873. "# of consumer stalls in the mp_ring for this queue");
  5874. SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts",
  5875. CTLFLAG_RD, &txq->ift_br->restarts,
  5876. "# of consumer restarts in the mp_ring for this queue");
  5877. SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications",
  5878. CTLFLAG_RD, &txq->ift_br->abdications,
  5879. "# of consumer abdications in the mp_ring for this queue");
  5880. }
  5881. if (scctx->isc_nrxqsets > 100)
  5882. qfmt = "rxq%03d";
  5883. else if (scctx->isc_nrxqsets > 10)
  5884. qfmt = "rxq%02d";
  5885. else
  5886. qfmt = "rxq%d";
  5887. for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) {
  5888. snprintf(namebuf, NAME_BUFLEN, qfmt, i);
  5889. queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf,
  5890. CTLFLAG_RD, NULL, "Queue Name");
  5891. queue_list = SYSCTL_CHILDREN(queue_node);
  5892. if (sctx->isc_flags & IFLIB_HAS_RXCQ) {
  5893. SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx",
  5894. CTLFLAG_RD,
  5895. &rxq->ifr_cq_cidx, 1, "Consumer Index");
  5896. }
  5897. for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) {
  5898. snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j);
  5899. fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf,
  5900. CTLFLAG_RD, NULL, "freelist Name");
  5901. fl_list = SYSCTL_CHILDREN(fl_node);
  5902. SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx",
  5903. CTLFLAG_RD,
  5904. &fl->ifl_pidx, 1, "Producer Index");
  5905. SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx",
  5906. CTLFLAG_RD,
  5907. &fl->ifl_cidx, 1, "Consumer Index");
  5908. SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits",
  5909. CTLFLAG_RD,
  5910. &fl->ifl_credits, 1, "credits available");
  5911. #if MEMORY_LOGGING
  5912. SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued",
  5913. CTLFLAG_RD,
  5914. &fl->ifl_m_enqueued, "mbufs allocated");
  5915. SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued",
  5916. CTLFLAG_RD,
  5917. &fl->ifl_m_dequeued, "mbufs freed");
  5918. SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued",
  5919. CTLFLAG_RD,
  5920. &fl->ifl_cl_enqueued, "clusters allocated");
  5921. SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued",
  5922. CTLFLAG_RD,
  5923. &fl->ifl_cl_dequeued, "clusters freed");
  5924. #endif
  5925. }
  5926. }
  5927. }
  5928. void
  5929. iflib_request_reset(if_ctx_t ctx)
  5930. {
  5931. STATE_LOCK(ctx);
  5932. ctx->ifc_flags |= IFC_DO_RESET;
  5933. STATE_UNLOCK(ctx);
  5934. }
  5935. #ifndef __NO_STRICT_ALIGNMENT
  5936. static struct mbuf *
  5937. iflib_fixup_rx(struct mbuf *m)
  5938. {
  5939. struct mbuf *n;
  5940. if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
  5941. bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
  5942. m->m_data += ETHER_HDR_LEN;
  5943. n = m;
  5944. } else {
  5945. MGETHDR(n, M_NOWAIT, MT_DATA);
  5946. if (n == NULL) {
  5947. m_freem(m);
  5948. return (NULL);
  5949. }
  5950. bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
  5951. m->m_data += ETHER_HDR_LEN;
  5952. m->m_len -= ETHER_HDR_LEN;
  5953. n->m_len = ETHER_HDR_LEN;
  5954. M_MOVE_PKTHDR(n, m);
  5955. n->m_next = m;
  5956. }
  5957. return (n);
  5958. }
  5959. #endif
  5960. #ifdef NETDUMP
  5961. static void
  5962. iflib_netdump_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
  5963. {
  5964. if_ctx_t ctx;
  5965. ctx = if_getsoftc(ifp);
  5966. CTX_LOCK(ctx);
  5967. *nrxr = NRXQSETS(ctx);
  5968. *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size;
  5969. *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size;
  5970. CTX_UNLOCK(ctx);
  5971. }
  5972. static void
  5973. iflib_netdump_event(if_t ifp, enum netdump_ev event)
  5974. {
  5975. if_ctx_t ctx;
  5976. if_softc_ctx_t scctx;
  5977. iflib_fl_t fl;
  5978. iflib_rxq_t rxq;
  5979. int i, j;
  5980. ctx = if_getsoftc(ifp);
  5981. scctx = &ctx->ifc_softc_ctx;
  5982. switch (event) {
  5983. case NETDUMP_START:
  5984. for (i = 0; i < scctx->isc_nrxqsets; i++) {
  5985. rxq = &ctx->ifc_rxqs[i];
  5986. for (j = 0; j < rxq->ifr_nfl; j++) {
  5987. fl = rxq->ifr_fl;
  5988. fl->ifl_zone = m_getzone(fl->ifl_buf_size);
  5989. }
  5990. }
  5991. iflib_no_tx_batch = 1;
  5992. break;
  5993. default:
  5994. break;
  5995. }
  5996. }
  5997. static int
  5998. iflib_netdump_transmit(if_t ifp, struct mbuf *m)
  5999. {
  6000. if_ctx_t ctx;
  6001. iflib_txq_t txq;
  6002. int error;
  6003. ctx = if_getsoftc(ifp);
  6004. if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
  6005. IFF_DRV_RUNNING)
  6006. return (EBUSY);
  6007. txq = &ctx->ifc_txqs[0];
  6008. error = iflib_encap(txq, &m);
  6009. if (error == 0)
  6010. (void)iflib_txd_db_check(ctx, txq, true, txq->ift_in_use);
  6011. return (error);
  6012. }
  6013. static int
  6014. iflib_netdump_poll(if_t ifp, int count)
  6015. {
  6016. if_ctx_t ctx;
  6017. if_softc_ctx_t scctx;
  6018. iflib_txq_t txq;
  6019. int i;
  6020. ctx = if_getsoftc(ifp);
  6021. scctx = &ctx->ifc_softc_ctx;
  6022. if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
  6023. IFF_DRV_RUNNING)
  6024. return (EBUSY);
  6025. txq = &ctx->ifc_txqs[0];
  6026. (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx));
  6027. for (i = 0; i < scctx->isc_nrxqsets; i++)
  6028. (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */);
  6029. return (0);
  6030. }
  6031. #endif /* NETDUMP */