HardenedBSD src tree https://hardenedbsd.org/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

4088 lines
99 KiB

  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  3. *
  4. * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice unmodified, this list of conditions, and the following
  12. * disclaimer.
  13. * 2. Redistributions in binary form must reproduce the above copyright
  14. * notice, this list of conditions and the following disclaimer in the
  15. * documentation and/or other materials provided with the distribution.
  16. *
  17. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18. * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26. * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. */
  28. /* Driver for VirtIO network devices. */
  29. #include <sys/cdefs.h>
  30. __FBSDID("$FreeBSD$");
  31. #include <sys/param.h>
  32. #include <sys/eventhandler.h>
  33. #include <sys/systm.h>
  34. #include <sys/kernel.h>
  35. #include <sys/sockio.h>
  36. #include <sys/mbuf.h>
  37. #include <sys/malloc.h>
  38. #include <sys/module.h>
  39. #include <sys/socket.h>
  40. #include <sys/sysctl.h>
  41. #include <sys/random.h>
  42. #include <sys/sglist.h>
  43. #include <sys/lock.h>
  44. #include <sys/mutex.h>
  45. #include <sys/taskqueue.h>
  46. #include <sys/smp.h>
  47. #include <machine/smp.h>
  48. #include <vm/uma.h>
  49. #include <net/ethernet.h>
  50. #include <net/if.h>
  51. #include <net/if_var.h>
  52. #include <net/if_arp.h>
  53. #include <net/if_dl.h>
  54. #include <net/if_types.h>
  55. #include <net/if_media.h>
  56. #include <net/if_vlan_var.h>
  57. #include <net/bpf.h>
  58. #include <netinet/in_systm.h>
  59. #include <netinet/in.h>
  60. #include <netinet/ip.h>
  61. #include <netinet/ip6.h>
  62. #include <netinet6/ip6_var.h>
  63. #include <netinet/udp.h>
  64. #include <netinet/tcp.h>
  65. #include <netinet/netdump/netdump.h>
  66. #include <machine/bus.h>
  67. #include <machine/resource.h>
  68. #include <sys/bus.h>
  69. #include <sys/rman.h>
  70. #include <dev/virtio/virtio.h>
  71. #include <dev/virtio/virtqueue.h>
  72. #include <dev/virtio/network/virtio_net.h>
  73. #include <dev/virtio/network/if_vtnetvar.h>
  74. #include "virtio_if.h"
  75. #include "opt_inet.h"
  76. #include "opt_inet6.h"
  77. static int vtnet_modevent(module_t, int, void *);
  78. static int vtnet_probe(device_t);
  79. static int vtnet_attach(device_t);
  80. static int vtnet_detach(device_t);
  81. static int vtnet_suspend(device_t);
  82. static int vtnet_resume(device_t);
  83. static int vtnet_shutdown(device_t);
  84. static int vtnet_attach_completed(device_t);
  85. static int vtnet_config_change(device_t);
  86. static void vtnet_negotiate_features(struct vtnet_softc *);
  87. static void vtnet_setup_features(struct vtnet_softc *);
  88. static int vtnet_init_rxq(struct vtnet_softc *, int);
  89. static int vtnet_init_txq(struct vtnet_softc *, int);
  90. static int vtnet_alloc_rxtx_queues(struct vtnet_softc *);
  91. static void vtnet_free_rxtx_queues(struct vtnet_softc *);
  92. static int vtnet_alloc_rx_filters(struct vtnet_softc *);
  93. static void vtnet_free_rx_filters(struct vtnet_softc *);
  94. static int vtnet_alloc_virtqueues(struct vtnet_softc *);
  95. static int vtnet_setup_interface(struct vtnet_softc *);
  96. static int vtnet_change_mtu(struct vtnet_softc *, int);
  97. static int vtnet_ioctl(struct ifnet *, u_long, caddr_t);
  98. static uint64_t vtnet_get_counter(struct ifnet *, ift_counter);
  99. static int vtnet_rxq_populate(struct vtnet_rxq *);
  100. static void vtnet_rxq_free_mbufs(struct vtnet_rxq *);
  101. static struct mbuf *
  102. vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
  103. static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
  104. struct mbuf *, int);
  105. static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
  106. static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
  107. static int vtnet_rxq_new_buf(struct vtnet_rxq *);
  108. static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
  109. struct virtio_net_hdr *);
  110. static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
  111. static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
  112. static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
  113. static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
  114. struct virtio_net_hdr *);
  115. static int vtnet_rxq_eof(struct vtnet_rxq *);
  116. static void vtnet_rx_vq_intr(void *);
  117. static void vtnet_rxq_tq_intr(void *, int);
  118. static int vtnet_txq_below_threshold(struct vtnet_txq *);
  119. static int vtnet_txq_notify(struct vtnet_txq *);
  120. static void vtnet_txq_free_mbufs(struct vtnet_txq *);
  121. static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
  122. int *, int *, int *);
  123. static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
  124. int, struct virtio_net_hdr *);
  125. static struct mbuf *
  126. vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
  127. struct virtio_net_hdr *);
  128. static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
  129. struct vtnet_tx_header *);
  130. static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
  131. #ifdef VTNET_LEGACY_TX
  132. static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
  133. static void vtnet_start(struct ifnet *);
  134. #else
  135. static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
  136. static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
  137. static void vtnet_txq_tq_deferred(void *, int);
  138. #endif
  139. static void vtnet_txq_start(struct vtnet_txq *);
  140. static void vtnet_txq_tq_intr(void *, int);
  141. static int vtnet_txq_eof(struct vtnet_txq *);
  142. static void vtnet_tx_vq_intr(void *);
  143. static void vtnet_tx_start_all(struct vtnet_softc *);
  144. #ifndef VTNET_LEGACY_TX
  145. static void vtnet_qflush(struct ifnet *);
  146. #endif
  147. static int vtnet_watchdog(struct vtnet_txq *);
  148. static void vtnet_accum_stats(struct vtnet_softc *,
  149. struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
  150. static void vtnet_tick(void *);
  151. static void vtnet_start_taskqueues(struct vtnet_softc *);
  152. static void vtnet_free_taskqueues(struct vtnet_softc *);
  153. static void vtnet_drain_taskqueues(struct vtnet_softc *);
  154. static void vtnet_drain_rxtx_queues(struct vtnet_softc *);
  155. static void vtnet_stop_rendezvous(struct vtnet_softc *);
  156. static void vtnet_stop(struct vtnet_softc *);
  157. static int vtnet_virtio_reinit(struct vtnet_softc *);
  158. static void vtnet_init_rx_filters(struct vtnet_softc *);
  159. static int vtnet_init_rx_queues(struct vtnet_softc *);
  160. static int vtnet_init_tx_queues(struct vtnet_softc *);
  161. static int vtnet_init_rxtx_queues(struct vtnet_softc *);
  162. static void vtnet_set_active_vq_pairs(struct vtnet_softc *);
  163. static int vtnet_reinit(struct vtnet_softc *);
  164. static void vtnet_init_locked(struct vtnet_softc *, int);
  165. static void vtnet_init(void *);
  166. static void vtnet_free_ctrl_vq(struct vtnet_softc *);
  167. static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
  168. struct sglist *, int, int);
  169. static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
  170. static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
  171. static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
  172. static int vtnet_set_promisc(struct vtnet_softc *, int);
  173. static int vtnet_set_allmulti(struct vtnet_softc *, int);
  174. static void vtnet_attach_disable_promisc(struct vtnet_softc *);
  175. static void vtnet_rx_filter(struct vtnet_softc *);
  176. static void vtnet_rx_filter_mac(struct vtnet_softc *);
  177. static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
  178. static void vtnet_rx_filter_vlan(struct vtnet_softc *);
  179. static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
  180. static void vtnet_register_vlan(void *, struct ifnet *, uint16_t);
  181. static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
  182. static int vtnet_is_link_up(struct vtnet_softc *);
  183. static void vtnet_update_link_status(struct vtnet_softc *);
  184. static int vtnet_ifmedia_upd(struct ifnet *);
  185. static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
  186. static void vtnet_get_hwaddr(struct vtnet_softc *);
  187. static void vtnet_set_hwaddr(struct vtnet_softc *);
  188. static void vtnet_vlan_tag_remove(struct mbuf *);
  189. static void vtnet_set_rx_process_limit(struct vtnet_softc *);
  190. static void vtnet_set_tx_intr_threshold(struct vtnet_softc *);
  191. static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
  192. struct sysctl_oid_list *, struct vtnet_rxq *);
  193. static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
  194. struct sysctl_oid_list *, struct vtnet_txq *);
  195. static void vtnet_setup_queue_sysctl(struct vtnet_softc *);
  196. static void vtnet_setup_sysctl(struct vtnet_softc *);
  197. static int vtnet_rxq_enable_intr(struct vtnet_rxq *);
  198. static void vtnet_rxq_disable_intr(struct vtnet_rxq *);
  199. static int vtnet_txq_enable_intr(struct vtnet_txq *);
  200. static void vtnet_txq_disable_intr(struct vtnet_txq *);
  201. static void vtnet_enable_rx_interrupts(struct vtnet_softc *);
  202. static void vtnet_enable_tx_interrupts(struct vtnet_softc *);
  203. static void vtnet_enable_interrupts(struct vtnet_softc *);
  204. static void vtnet_disable_rx_interrupts(struct vtnet_softc *);
  205. static void vtnet_disable_tx_interrupts(struct vtnet_softc *);
  206. static void vtnet_disable_interrupts(struct vtnet_softc *);
  207. static int vtnet_tunable_int(struct vtnet_softc *, const char *, int);
  208. NETDUMP_DEFINE(vtnet);
  209. /* Tunables. */
  210. static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters");
  211. static int vtnet_csum_disable = 0;
  212. TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
  213. SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
  214. &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
  215. static int vtnet_tso_disable = 0;
  216. TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
  217. SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
  218. 0, "Disables TCP Segmentation Offload");
  219. static int vtnet_lro_disable = 0;
  220. TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
  221. SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
  222. 0, "Disables TCP Large Receive Offload");
  223. static int vtnet_mq_disable = 0;
  224. TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
  225. SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
  226. 0, "Disables Multi Queue support");
  227. static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
  228. TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
  229. SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
  230. &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
  231. static int vtnet_rx_process_limit = 512;
  232. TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
  233. SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
  234. &vtnet_rx_process_limit, 0,
  235. "Limits the number RX segments processed in a single pass");
  236. static uma_zone_t vtnet_tx_header_zone;
  237. static struct virtio_feature_desc vtnet_feature_desc[] = {
  238. { VIRTIO_NET_F_CSUM, "TxChecksum" },
  239. { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" },
  240. { VIRTIO_NET_F_MAC, "MacAddress" },
  241. { VIRTIO_NET_F_GSO, "TxAllGSO" },
  242. { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" },
  243. { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" },
  244. { VIRTIO_NET_F_GUEST_ECN, "RxECN" },
  245. { VIRTIO_NET_F_GUEST_UFO, "RxUFO" },
  246. { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" },
  247. { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" },
  248. { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" },
  249. { VIRTIO_NET_F_HOST_UFO, "TxUFO" },
  250. { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" },
  251. { VIRTIO_NET_F_STATUS, "Status" },
  252. { VIRTIO_NET_F_CTRL_VQ, "ControlVq" },
  253. { VIRTIO_NET_F_CTRL_RX, "RxMode" },
  254. { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" },
  255. { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" },
  256. { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" },
  257. { VIRTIO_NET_F_MQ, "Multiqueue" },
  258. { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" },
  259. { 0, NULL }
  260. };
  261. static device_method_t vtnet_methods[] = {
  262. /* Device methods. */
  263. DEVMETHOD(device_probe, vtnet_probe),
  264. DEVMETHOD(device_attach, vtnet_attach),
  265. DEVMETHOD(device_detach, vtnet_detach),
  266. DEVMETHOD(device_suspend, vtnet_suspend),
  267. DEVMETHOD(device_resume, vtnet_resume),
  268. DEVMETHOD(device_shutdown, vtnet_shutdown),
  269. /* VirtIO methods. */
  270. DEVMETHOD(virtio_attach_completed, vtnet_attach_completed),
  271. DEVMETHOD(virtio_config_change, vtnet_config_change),
  272. DEVMETHOD_END
  273. };
  274. #ifdef DEV_NETMAP
  275. #include <dev/netmap/if_vtnet_netmap.h>
  276. #endif /* DEV_NETMAP */
  277. static driver_t vtnet_driver = {
  278. "vtnet",
  279. vtnet_methods,
  280. sizeof(struct vtnet_softc)
  281. };
  282. static devclass_t vtnet_devclass;
  283. DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass,
  284. vtnet_modevent, 0);
  285. DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
  286. vtnet_modevent, 0);
  287. MODULE_VERSION(vtnet, 1);
  288. MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
  289. #ifdef DEV_NETMAP
  290. MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
  291. #endif /* DEV_NETMAP */
  292. static int
  293. vtnet_modevent(module_t mod, int type, void *unused)
  294. {
  295. int error = 0;
  296. static int loaded = 0;
  297. switch (type) {
  298. case MOD_LOAD:
  299. if (loaded++ == 0)
  300. vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
  301. sizeof(struct vtnet_tx_header),
  302. NULL, NULL, NULL, NULL, 0, 0);
  303. break;
  304. case MOD_QUIESCE:
  305. if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
  306. error = EBUSY;
  307. break;
  308. case MOD_UNLOAD:
  309. if (--loaded == 0) {
  310. uma_zdestroy(vtnet_tx_header_zone);
  311. vtnet_tx_header_zone = NULL;
  312. }
  313. break;
  314. case MOD_SHUTDOWN:
  315. break;
  316. default:
  317. error = EOPNOTSUPP;
  318. break;
  319. }
  320. return (error);
  321. }
  322. static int
  323. vtnet_probe(device_t dev)
  324. {
  325. if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK)
  326. return (ENXIO);
  327. device_set_desc(dev, "VirtIO Networking Adapter");
  328. return (BUS_PROBE_DEFAULT);
  329. }
  330. static int
  331. vtnet_attach(device_t dev)
  332. {
  333. struct vtnet_softc *sc;
  334. int error;
  335. sc = device_get_softc(dev);
  336. sc->vtnet_dev = dev;
  337. /* Register our feature descriptions. */
  338. virtio_set_feature_desc(dev, vtnet_feature_desc);
  339. VTNET_CORE_LOCK_INIT(sc);
  340. callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
  341. vtnet_setup_sysctl(sc);
  342. vtnet_setup_features(sc);
  343. error = vtnet_alloc_rx_filters(sc);
  344. if (error) {
  345. device_printf(dev, "cannot allocate Rx filters\n");
  346. goto fail;
  347. }
  348. error = vtnet_alloc_rxtx_queues(sc);
  349. if (error) {
  350. device_printf(dev, "cannot allocate queues\n");
  351. goto fail;
  352. }
  353. error = vtnet_alloc_virtqueues(sc);
  354. if (error) {
  355. device_printf(dev, "cannot allocate virtqueues\n");
  356. goto fail;
  357. }
  358. error = vtnet_setup_interface(sc);
  359. if (error) {
  360. device_printf(dev, "cannot setup interface\n");
  361. goto fail;
  362. }
  363. error = virtio_setup_intr(dev, INTR_TYPE_NET);
  364. if (error) {
  365. device_printf(dev, "cannot setup virtqueue interrupts\n");
  366. /* BMV: This will crash if during boot! */
  367. ether_ifdetach(sc->vtnet_ifp);
  368. goto fail;
  369. }
  370. #ifdef DEV_NETMAP
  371. vtnet_netmap_attach(sc);
  372. #endif /* DEV_NETMAP */
  373. vtnet_start_taskqueues(sc);
  374. fail:
  375. if (error)
  376. vtnet_detach(dev);
  377. return (error);
  378. }
  379. static int
  380. vtnet_detach(device_t dev)
  381. {
  382. struct vtnet_softc *sc;
  383. struct ifnet *ifp;
  384. sc = device_get_softc(dev);
  385. ifp = sc->vtnet_ifp;
  386. if (device_is_attached(dev)) {
  387. VTNET_CORE_LOCK(sc);
  388. vtnet_stop(sc);
  389. VTNET_CORE_UNLOCK(sc);
  390. callout_drain(&sc->vtnet_tick_ch);
  391. vtnet_drain_taskqueues(sc);
  392. ether_ifdetach(ifp);
  393. }
  394. #ifdef DEV_NETMAP
  395. netmap_detach(ifp);
  396. #endif /* DEV_NETMAP */
  397. vtnet_free_taskqueues(sc);
  398. if (sc->vtnet_vlan_attach != NULL) {
  399. EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
  400. sc->vtnet_vlan_attach = NULL;
  401. }
  402. if (sc->vtnet_vlan_detach != NULL) {
  403. EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
  404. sc->vtnet_vlan_detach = NULL;
  405. }
  406. ifmedia_removeall(&sc->vtnet_media);
  407. if (ifp != NULL) {
  408. if_free(ifp);
  409. sc->vtnet_ifp = NULL;
  410. }
  411. vtnet_free_rxtx_queues(sc);
  412. vtnet_free_rx_filters(sc);
  413. if (sc->vtnet_ctrl_vq != NULL)
  414. vtnet_free_ctrl_vq(sc);
  415. VTNET_CORE_LOCK_DESTROY(sc);
  416. return (0);
  417. }
  418. static int
  419. vtnet_suspend(device_t dev)
  420. {
  421. struct vtnet_softc *sc;
  422. sc = device_get_softc(dev);
  423. VTNET_CORE_LOCK(sc);
  424. vtnet_stop(sc);
  425. sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
  426. VTNET_CORE_UNLOCK(sc);
  427. return (0);
  428. }
  429. static int
  430. vtnet_resume(device_t dev)
  431. {
  432. struct vtnet_softc *sc;
  433. struct ifnet *ifp;
  434. sc = device_get_softc(dev);
  435. ifp = sc->vtnet_ifp;
  436. VTNET_CORE_LOCK(sc);
  437. if (ifp->if_flags & IFF_UP)
  438. vtnet_init_locked(sc, 0);
  439. sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
  440. VTNET_CORE_UNLOCK(sc);
  441. return (0);
  442. }
  443. static int
  444. vtnet_shutdown(device_t dev)
  445. {
  446. /*
  447. * Suspend already does all of what we need to
  448. * do here; we just never expect to be resumed.
  449. */
  450. return (vtnet_suspend(dev));
  451. }
  452. static int
  453. vtnet_attach_completed(device_t dev)
  454. {
  455. vtnet_attach_disable_promisc(device_get_softc(dev));
  456. return (0);
  457. }
  458. static int
  459. vtnet_config_change(device_t dev)
  460. {
  461. struct vtnet_softc *sc;
  462. sc = device_get_softc(dev);
  463. VTNET_CORE_LOCK(sc);
  464. vtnet_update_link_status(sc);
  465. if (sc->vtnet_link_active != 0)
  466. vtnet_tx_start_all(sc);
  467. VTNET_CORE_UNLOCK(sc);
  468. return (0);
  469. }
  470. static void
  471. vtnet_negotiate_features(struct vtnet_softc *sc)
  472. {
  473. device_t dev;
  474. uint64_t mask, features;
  475. dev = sc->vtnet_dev;
  476. mask = 0;
  477. /*
  478. * TSO and LRO are only available when their corresponding checksum
  479. * offload feature is also negotiated.
  480. */
  481. if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
  482. mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
  483. mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
  484. }
  485. if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
  486. mask |= VTNET_TSO_FEATURES;
  487. if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
  488. mask |= VTNET_LRO_FEATURES;
  489. #ifndef VTNET_LEGACY_TX
  490. if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
  491. mask |= VIRTIO_NET_F_MQ;
  492. #else
  493. mask |= VIRTIO_NET_F_MQ;
  494. #endif
  495. features = VTNET_FEATURES & ~mask;
  496. sc->vtnet_features = virtio_negotiate_features(dev, features);
  497. if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
  498. virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
  499. /*
  500. * LRO without mergeable buffers requires special care. This
  501. * is not ideal because every receive buffer must be large
  502. * enough to hold the maximum TCP packet, the Ethernet header,
  503. * and the header. This requires up to 34 descriptors with
  504. * MCLBYTES clusters. If we do not have indirect descriptors,
  505. * LRO is disabled since the virtqueue will not contain very
  506. * many receive buffers.
  507. */
  508. if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
  509. device_printf(dev,
  510. "LRO disabled due to both mergeable buffers and "
  511. "indirect descriptors not negotiated\n");
  512. features &= ~VTNET_LRO_FEATURES;
  513. sc->vtnet_features =
  514. virtio_negotiate_features(dev, features);
  515. } else
  516. sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
  517. }
  518. }
  519. static void
  520. vtnet_setup_features(struct vtnet_softc *sc)
  521. {
  522. device_t dev;
  523. dev = sc->vtnet_dev;
  524. vtnet_negotiate_features(sc);
  525. if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
  526. sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
  527. if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
  528. sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
  529. if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
  530. /* This feature should always be negotiated. */
  531. sc->vtnet_flags |= VTNET_FLAG_MAC;
  532. }
  533. if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
  534. sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
  535. sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  536. } else
  537. sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
  538. if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
  539. sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
  540. else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
  541. sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
  542. else
  543. sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
  544. if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
  545. virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
  546. virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
  547. sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
  548. else
  549. sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
  550. if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
  551. sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
  552. if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
  553. sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
  554. if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
  555. sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
  556. if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
  557. sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
  558. }
  559. if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
  560. sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
  561. sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
  562. offsetof(struct virtio_net_config, max_virtqueue_pairs));
  563. } else
  564. sc->vtnet_max_vq_pairs = 1;
  565. if (sc->vtnet_max_vq_pairs > 1) {
  566. /*
  567. * Limit the maximum number of queue pairs to the lower of
  568. * the number of CPUs and the configured maximum.
  569. * The actual number of queues that get used may be less.
  570. */
  571. int max;
  572. max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
  573. if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
  574. if (max > mp_ncpus)
  575. max = mp_ncpus;
  576. if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
  577. max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
  578. if (max > 1) {
  579. sc->vtnet_requested_vq_pairs = max;
  580. sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
  581. }
  582. }
  583. }
  584. }
  585. static int
  586. vtnet_init_rxq(struct vtnet_softc *sc, int id)
  587. {
  588. struct vtnet_rxq *rxq;
  589. rxq = &sc->vtnet_rxqs[id];
  590. snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
  591. device_get_nameunit(sc->vtnet_dev), id);
  592. mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
  593. rxq->vtnrx_sc = sc;
  594. rxq->vtnrx_id = id;
  595. rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
  596. if (rxq->vtnrx_sg == NULL)
  597. return (ENOMEM);
  598. TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
  599. rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
  600. taskqueue_thread_enqueue, &rxq->vtnrx_tq);
  601. return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
  602. }
  603. static int
  604. vtnet_init_txq(struct vtnet_softc *sc, int id)
  605. {
  606. struct vtnet_txq *txq;
  607. txq = &sc->vtnet_txqs[id];
  608. snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
  609. device_get_nameunit(sc->vtnet_dev), id);
  610. mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
  611. txq->vtntx_sc = sc;
  612. txq->vtntx_id = id;
  613. txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
  614. if (txq->vtntx_sg == NULL)
  615. return (ENOMEM);
  616. #ifndef VTNET_LEGACY_TX
  617. txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
  618. M_NOWAIT, &txq->vtntx_mtx);
  619. if (txq->vtntx_br == NULL)
  620. return (ENOMEM);
  621. TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
  622. #endif
  623. TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
  624. txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
  625. taskqueue_thread_enqueue, &txq->vtntx_tq);
  626. if (txq->vtntx_tq == NULL)
  627. return (ENOMEM);
  628. return (0);
  629. }
  630. static int
  631. vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
  632. {
  633. int i, npairs, error;
  634. npairs = sc->vtnet_max_vq_pairs;
  635. sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
  636. M_NOWAIT | M_ZERO);
  637. sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
  638. M_NOWAIT | M_ZERO);
  639. if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
  640. return (ENOMEM);
  641. for (i = 0; i < npairs; i++) {
  642. error = vtnet_init_rxq(sc, i);
  643. if (error)
  644. return (error);
  645. error = vtnet_init_txq(sc, i);
  646. if (error)
  647. return (error);
  648. }
  649. vtnet_setup_queue_sysctl(sc);
  650. return (0);
  651. }
  652. static void
  653. vtnet_destroy_rxq(struct vtnet_rxq *rxq)
  654. {
  655. rxq->vtnrx_sc = NULL;
  656. rxq->vtnrx_id = -1;
  657. if (rxq->vtnrx_sg != NULL) {
  658. sglist_free(rxq->vtnrx_sg);
  659. rxq->vtnrx_sg = NULL;
  660. }
  661. if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
  662. mtx_destroy(&rxq->vtnrx_mtx);
  663. }
  664. static void
  665. vtnet_destroy_txq(struct vtnet_txq *txq)
  666. {
  667. txq->vtntx_sc = NULL;
  668. txq->vtntx_id = -1;
  669. if (txq->vtntx_sg != NULL) {
  670. sglist_free(txq->vtntx_sg);
  671. txq->vtntx_sg = NULL;
  672. }
  673. #ifndef VTNET_LEGACY_TX
  674. if (txq->vtntx_br != NULL) {
  675. buf_ring_free(txq->vtntx_br, M_DEVBUF);
  676. txq->vtntx_br = NULL;
  677. }
  678. #endif
  679. if (mtx_initialized(&txq->vtntx_mtx) != 0)
  680. mtx_destroy(&txq->vtntx_mtx);
  681. }
  682. static void
  683. vtnet_free_rxtx_queues(struct vtnet_softc *sc)
  684. {
  685. int i;
  686. if (sc->vtnet_rxqs != NULL) {
  687. for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
  688. vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
  689. free(sc->vtnet_rxqs, M_DEVBUF);
  690. sc->vtnet_rxqs = NULL;
  691. }
  692. if (sc->vtnet_txqs != NULL) {
  693. for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
  694. vtnet_destroy_txq(&sc->vtnet_txqs[i]);
  695. free(sc->vtnet_txqs, M_DEVBUF);
  696. sc->vtnet_txqs = NULL;
  697. }
  698. }
  699. static int
  700. vtnet_alloc_rx_filters(struct vtnet_softc *sc)
  701. {
  702. if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
  703. sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
  704. M_DEVBUF, M_NOWAIT | M_ZERO);
  705. if (sc->vtnet_mac_filter == NULL)
  706. return (ENOMEM);
  707. }
  708. if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
  709. sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
  710. VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
  711. if (sc->vtnet_vlan_filter == NULL)
  712. return (ENOMEM);
  713. }
  714. return (0);
  715. }
  716. static void
  717. vtnet_free_rx_filters(struct vtnet_softc *sc)
  718. {
  719. if (sc->vtnet_mac_filter != NULL) {
  720. free(sc->vtnet_mac_filter, M_DEVBUF);
  721. sc->vtnet_mac_filter = NULL;
  722. }
  723. if (sc->vtnet_vlan_filter != NULL) {
  724. free(sc->vtnet_vlan_filter, M_DEVBUF);
  725. sc->vtnet_vlan_filter = NULL;
  726. }
  727. }
  728. static int
  729. vtnet_alloc_virtqueues(struct vtnet_softc *sc)
  730. {
  731. device_t dev;
  732. struct vq_alloc_info *info;
  733. struct vtnet_rxq *rxq;
  734. struct vtnet_txq *txq;
  735. int i, idx, flags, nvqs, error;
  736. dev = sc->vtnet_dev;
  737. flags = 0;
  738. nvqs = sc->vtnet_max_vq_pairs * 2;
  739. if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
  740. nvqs++;
  741. info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
  742. if (info == NULL)
  743. return (ENOMEM);
  744. for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
  745. rxq = &sc->vtnet_rxqs[i];
  746. VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
  747. vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
  748. "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
  749. txq = &sc->vtnet_txqs[i];
  750. VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
  751. vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
  752. "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
  753. }
  754. if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
  755. VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
  756. &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
  757. }
  758. /*
  759. * Enable interrupt binding if this is multiqueue. This only matters
  760. * when per-vq MSIX is available.
  761. */
  762. if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
  763. flags |= 0;
  764. error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
  765. free(info, M_TEMP);
  766. return (error);
  767. }
  768. static int
  769. vtnet_setup_interface(struct vtnet_softc *sc)
  770. {
  771. device_t dev;
  772. struct ifnet *ifp;
  773. dev = sc->vtnet_dev;
  774. ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
  775. if (ifp == NULL) {
  776. device_printf(dev, "cannot allocate ifnet structure\n");
  777. return (ENOSPC);
  778. }
  779. if_initname(ifp, device_get_name(dev), device_get_unit(dev));
  780. ifp->if_baudrate = IF_Gbps(10); /* Approx. */
  781. ifp->if_softc = sc;
  782. ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
  783. ifp->if_init = vtnet_init;
  784. ifp->if_ioctl = vtnet_ioctl;
  785. ifp->if_get_counter = vtnet_get_counter;
  786. #ifndef VTNET_LEGACY_TX
  787. ifp->if_transmit = vtnet_txq_mq_start;
  788. ifp->if_qflush = vtnet_qflush;
  789. #else
  790. struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
  791. ifp->if_start = vtnet_start;
  792. IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
  793. ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
  794. IFQ_SET_READY(&ifp->if_snd);
  795. #endif
  796. ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
  797. vtnet_ifmedia_sts);
  798. ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
  799. ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
  800. /* Read (or generate) the MAC address for the adapter. */
  801. vtnet_get_hwaddr(sc);
  802. ether_ifattach(ifp, sc->vtnet_hwaddr);
  803. if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
  804. ifp->if_capabilities |= IFCAP_LINKSTATE;
  805. /* Tell the upper layer(s) we support long frames. */
  806. ifp->if_hdrlen = sizeof(struct ether_vlan_header);
  807. ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
  808. if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
  809. ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
  810. if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
  811. ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
  812. sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
  813. } else {
  814. if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
  815. ifp->if_capabilities |= IFCAP_TSO4;
  816. if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
  817. ifp->if_capabilities |= IFCAP_TSO6;
  818. if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
  819. sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
  820. }
  821. if (ifp->if_capabilities & IFCAP_TSO)
  822. ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
  823. }
  824. if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
  825. ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
  826. if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
  827. virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
  828. ifp->if_capabilities |= IFCAP_LRO;
  829. }
  830. if (ifp->if_capabilities & IFCAP_HWCSUM) {
  831. /*
  832. * VirtIO does not support VLAN tagging, but we can fake
  833. * it by inserting and removing the 802.1Q header during
  834. * transmit and receive. We are then able to do checksum
  835. * offloading of VLAN frames.
  836. */
  837. ifp->if_capabilities |=
  838. IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
  839. }
  840. ifp->if_capenable = ifp->if_capabilities;
  841. /*
  842. * Capabilities after here are not enabled by default.
  843. */
  844. if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
  845. ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
  846. sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
  847. vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
  848. sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
  849. vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
  850. }
  851. vtnet_set_rx_process_limit(sc);
  852. vtnet_set_tx_intr_threshold(sc);
  853. NETDUMP_SET(ifp, vtnet);
  854. return (0);
  855. }
  856. static int
  857. vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
  858. {
  859. struct ifnet *ifp;
  860. int frame_size, clsize;
  861. ifp = sc->vtnet_ifp;
  862. if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
  863. return (EINVAL);
  864. frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
  865. new_mtu;
  866. /*
  867. * Based on the new MTU (and hence frame size) determine which
  868. * cluster size is most appropriate for the receive queues.
  869. */
  870. if (frame_size <= MCLBYTES) {
  871. clsize = MCLBYTES;
  872. } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
  873. /* Avoid going past 9K jumbos. */
  874. if (frame_size > MJUM9BYTES)
  875. return (EINVAL);
  876. clsize = MJUM9BYTES;
  877. } else
  878. clsize = MJUMPAGESIZE;
  879. ifp->if_mtu = new_mtu;
  880. sc->vtnet_rx_new_clsize = clsize;
  881. if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
  882. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  883. vtnet_init_locked(sc, 0);
  884. }
  885. return (0);
  886. }
  887. static int
  888. vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
  889. {
  890. struct vtnet_softc *sc;
  891. struct ifreq *ifr;
  892. int reinit, mask, error;
  893. sc = ifp->if_softc;
  894. ifr = (struct ifreq *) data;
  895. error = 0;
  896. switch (cmd) {
  897. case SIOCSIFMTU:
  898. if (ifp->if_mtu != ifr->ifr_mtu) {
  899. VTNET_CORE_LOCK(sc);
  900. error = vtnet_change_mtu(sc, ifr->ifr_mtu);
  901. VTNET_CORE_UNLOCK(sc);
  902. }
  903. break;
  904. case SIOCSIFFLAGS:
  905. VTNET_CORE_LOCK(sc);
  906. if ((ifp->if_flags & IFF_UP) == 0) {
  907. if (ifp->if_drv_flags & IFF_DRV_RUNNING)
  908. vtnet_stop(sc);
  909. } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
  910. if ((ifp->if_flags ^ sc->vtnet_if_flags) &
  911. (IFF_PROMISC | IFF_ALLMULTI)) {
  912. if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
  913. vtnet_rx_filter(sc);
  914. else {
  915. ifp->if_flags |= IFF_PROMISC;
  916. if ((ifp->if_flags ^ sc->vtnet_if_flags)
  917. & IFF_ALLMULTI)
  918. error = ENOTSUP;
  919. }
  920. }
  921. } else
  922. vtnet_init_locked(sc, 0);
  923. if (error == 0)
  924. sc->vtnet_if_flags = ifp->if_flags;
  925. VTNET_CORE_UNLOCK(sc);
  926. break;
  927. case SIOCADDMULTI:
  928. case SIOCDELMULTI:
  929. if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
  930. break;
  931. VTNET_CORE_LOCK(sc);
  932. if (ifp->if_drv_flags & IFF_DRV_RUNNING)
  933. vtnet_rx_filter_mac(sc);
  934. VTNET_CORE_UNLOCK(sc);
  935. break;
  936. case SIOCSIFMEDIA:
  937. case SIOCGIFMEDIA:
  938. error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
  939. break;
  940. case SIOCSIFCAP:
  941. VTNET_CORE_LOCK(sc);
  942. mask = ifr->ifr_reqcap ^ ifp->if_capenable;
  943. if (mask & IFCAP_TXCSUM)
  944. ifp->if_capenable ^= IFCAP_TXCSUM;
  945. if (mask & IFCAP_TXCSUM_IPV6)
  946. ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
  947. if (mask & IFCAP_TSO4)
  948. ifp->if_capenable ^= IFCAP_TSO4;
  949. if (mask & IFCAP_TSO6)
  950. ifp->if_capenable ^= IFCAP_TSO6;
  951. if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
  952. IFCAP_VLAN_HWFILTER)) {
  953. /* These Rx features require us to renegotiate. */
  954. reinit = 1;
  955. if (mask & IFCAP_RXCSUM)
  956. ifp->if_capenable ^= IFCAP_RXCSUM;
  957. if (mask & IFCAP_RXCSUM_IPV6)
  958. ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
  959. if (mask & IFCAP_LRO)
  960. ifp->if_capenable ^= IFCAP_LRO;
  961. if (mask & IFCAP_VLAN_HWFILTER)
  962. ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
  963. } else
  964. reinit = 0;
  965. if (mask & IFCAP_VLAN_HWTSO)
  966. ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
  967. if (mask & IFCAP_VLAN_HWTAGGING)
  968. ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
  969. if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
  970. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  971. vtnet_init_locked(sc, 0);
  972. }
  973. VTNET_CORE_UNLOCK(sc);
  974. VLAN_CAPABILITIES(ifp);
  975. break;
  976. default:
  977. error = ether_ioctl(ifp, cmd, data);
  978. break;
  979. }
  980. VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
  981. return (error);
  982. }
  983. static int
  984. vtnet_rxq_populate(struct vtnet_rxq *rxq)
  985. {
  986. struct virtqueue *vq;
  987. int nbufs, error;
  988. #ifdef DEV_NETMAP
  989. error = vtnet_netmap_rxq_populate(rxq);
  990. if (error >= 0)
  991. return (error);
  992. #endif /* DEV_NETMAP */
  993. vq = rxq->vtnrx_vq;
  994. error = ENOSPC;
  995. for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
  996. error = vtnet_rxq_new_buf(rxq);
  997. if (error)
  998. break;
  999. }
  1000. if (nbufs > 0) {
  1001. virtqueue_notify(vq);
  1002. /*
  1003. * EMSGSIZE signifies the virtqueue did not have enough
  1004. * entries available to hold the last mbuf. This is not
  1005. * an error.
  1006. */
  1007. if (error == EMSGSIZE)
  1008. error = 0;
  1009. }
  1010. return (error);
  1011. }
  1012. static void
  1013. vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
  1014. {
  1015. struct virtqueue *vq;
  1016. struct mbuf *m;
  1017. int last;
  1018. #ifdef DEV_NETMAP
  1019. struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
  1020. rxq->vtnrx_id, NR_RX);
  1021. #else /* !DEV_NETMAP */
  1022. void *kring = NULL;
  1023. #endif /* !DEV_NETMAP */
  1024. vq = rxq->vtnrx_vq;
  1025. last = 0;
  1026. while ((m = virtqueue_drain(vq, &last)) != NULL) {
  1027. if (kring == NULL)
  1028. m_freem(m);
  1029. }
  1030. KASSERT(virtqueue_empty(vq),
  1031. ("%s: mbufs remaining in rx queue %p", __func__, rxq));
  1032. }
  1033. static struct mbuf *
  1034. vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
  1035. {
  1036. struct mbuf *m_head, *m_tail, *m;
  1037. int i, clsize;
  1038. clsize = sc->vtnet_rx_clsize;
  1039. KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
  1040. ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
  1041. m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
  1042. if (m_head == NULL)
  1043. goto fail;
  1044. m_head->m_len = clsize;
  1045. m_tail = m_head;
  1046. /* Allocate the rest of the chain. */
  1047. for (i = 1; i < nbufs; i++) {
  1048. m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
  1049. if (m == NULL)
  1050. goto fail;
  1051. m->m_len = clsize;
  1052. m_tail->m_next = m;
  1053. m_tail = m;
  1054. }
  1055. if (m_tailp != NULL)
  1056. *m_tailp = m_tail;
  1057. return (m_head);
  1058. fail:
  1059. sc->vtnet_stats.mbuf_alloc_failed++;
  1060. m_freem(m_head);
  1061. return (NULL);
  1062. }
  1063. /*
  1064. * Slow path for when LRO without mergeable buffers is negotiated.
  1065. */
  1066. static int
  1067. vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
  1068. int len0)
  1069. {
  1070. struct vtnet_softc *sc;
  1071. struct mbuf *m, *m_prev;
  1072. struct mbuf *m_new, *m_tail;
  1073. int len, clsize, nreplace, error;
  1074. sc = rxq->vtnrx_sc;
  1075. clsize = sc->vtnet_rx_clsize;
  1076. m_prev = NULL;
  1077. m_tail = NULL;
  1078. nreplace = 0;
  1079. m = m0;
  1080. len = len0;
  1081. /*
  1082. * Since these mbuf chains are so large, we avoid allocating an
  1083. * entire replacement chain if possible. When the received frame
  1084. * did not consume the entire chain, the unused mbufs are moved
  1085. * to the replacement chain.
  1086. */
  1087. while (len > 0) {
  1088. /*
  1089. * Something is seriously wrong if we received a frame
  1090. * larger than the chain. Drop it.
  1091. */
  1092. if (m == NULL) {
  1093. sc->vtnet_stats.rx_frame_too_large++;
  1094. return (EMSGSIZE);
  1095. }
  1096. /* We always allocate the same cluster size. */
  1097. KASSERT(m->m_len == clsize,
  1098. ("%s: mbuf size %d is not the cluster size %d",
  1099. __func__, m->m_len, clsize));
  1100. m->m_len = MIN(m->m_len, len);
  1101. len -= m->m_len;
  1102. m_prev = m;
  1103. m = m->m_next;
  1104. nreplace++;
  1105. }
  1106. KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
  1107. ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
  1108. sc->vtnet_rx_nmbufs));
  1109. m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
  1110. if (m_new == NULL) {
  1111. m_prev->m_len = clsize;
  1112. return (ENOBUFS);
  1113. }
  1114. /*
  1115. * Move any unused mbufs from the received chain onto the end
  1116. * of the new chain.
  1117. */
  1118. if (m_prev->m_next != NULL) {
  1119. m_tail->m_next = m_prev->m_next;
  1120. m_prev->m_next = NULL;
  1121. }
  1122. error = vtnet_rxq_enqueue_buf(rxq, m_new);
  1123. if (error) {
  1124. /*
  1125. * BAD! We could not enqueue the replacement mbuf chain. We
  1126. * must restore the m0 chain to the original state if it was
  1127. * modified so we can subsequently discard it.
  1128. *
  1129. * NOTE: The replacement is suppose to be an identical copy
  1130. * to the one just dequeued so this is an unexpected error.
  1131. */
  1132. sc->vtnet_stats.rx_enq_replacement_failed++;
  1133. if (m_tail->m_next != NULL) {
  1134. m_prev->m_next = m_tail->m_next;
  1135. m_tail->m_next = NULL;
  1136. }
  1137. m_prev->m_len = clsize;
  1138. m_freem(m_new);
  1139. }
  1140. return (error);
  1141. }
  1142. static int
  1143. vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
  1144. {
  1145. struct vtnet_softc *sc;
  1146. struct mbuf *m_new;
  1147. int error;
  1148. sc = rxq->vtnrx_sc;
  1149. KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
  1150. ("%s: chained mbuf without LRO_NOMRG", __func__));
  1151. if (m->m_next == NULL) {
  1152. /* Fast-path for the common case of just one mbuf. */
  1153. if (m->m_len < len)
  1154. return (EINVAL);
  1155. m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
  1156. if (m_new == NULL)
  1157. return (ENOBUFS);
  1158. error = vtnet_rxq_enqueue_buf(rxq, m_new);
  1159. if (error) {
  1160. /*
  1161. * The new mbuf is suppose to be an identical
  1162. * copy of the one just dequeued so this is an
  1163. * unexpected error.
  1164. */
  1165. m_freem(m_new);
  1166. sc->vtnet_stats.rx_enq_replacement_failed++;
  1167. } else
  1168. m->m_len = len;
  1169. } else
  1170. error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
  1171. return (error);
  1172. }
  1173. static int
  1174. vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
  1175. {
  1176. struct vtnet_softc *sc;
  1177. struct sglist *sg;
  1178. struct vtnet_rx_header *rxhdr;
  1179. uint8_t *mdata;
  1180. int offset, error;
  1181. sc = rxq->vtnrx_sc;
  1182. sg = rxq->vtnrx_sg;
  1183. mdata = mtod(m, uint8_t *);
  1184. VTNET_RXQ_LOCK_ASSERT(rxq);
  1185. KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
  1186. ("%s: chained mbuf without LRO_NOMRG", __func__));
  1187. KASSERT(m->m_len == sc->vtnet_rx_clsize,
  1188. ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
  1189. sc->vtnet_rx_clsize));
  1190. sglist_reset(sg);
  1191. if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
  1192. MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
  1193. rxhdr = (struct vtnet_rx_header *) mdata;
  1194. sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
  1195. offset = sizeof(struct vtnet_rx_header);
  1196. } else
  1197. offset = 0;
  1198. sglist_append(sg, mdata + offset, m->m_len - offset);
  1199. if (m->m_next != NULL) {
  1200. error = sglist_append_mbuf(sg, m->m_next);
  1201. MPASS(error == 0);
  1202. }
  1203. error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
  1204. return (error);
  1205. }
  1206. static int
  1207. vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
  1208. {
  1209. struct vtnet_softc *sc;
  1210. struct mbuf *m;
  1211. int error;
  1212. sc = rxq->vtnrx_sc;
  1213. m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
  1214. if (m == NULL)
  1215. return (ENOBUFS);
  1216. error = vtnet_rxq_enqueue_buf(rxq, m);
  1217. if (error)
  1218. m_freem(m);
  1219. return (error);
  1220. }
  1221. /*
  1222. * Use the checksum offset in the VirtIO header to set the
  1223. * correct CSUM_* flags.
  1224. */
  1225. static int
  1226. vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
  1227. uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
  1228. {
  1229. struct vtnet_softc *sc;
  1230. #if defined(INET) || defined(INET6)
  1231. int offset = hdr->csum_start + hdr->csum_offset;
  1232. #endif
  1233. sc = rxq->vtnrx_sc;
  1234. /* Only do a basic sanity check on the offset. */
  1235. switch (eth_type) {
  1236. #if defined(INET)
  1237. case ETHERTYPE_IP:
  1238. if (__predict_false(offset < ip_start + sizeof(struct ip)))
  1239. return (1);
  1240. break;
  1241. #endif
  1242. #if defined(INET6)
  1243. case ETHERTYPE_IPV6:
  1244. if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
  1245. return (1);
  1246. break;
  1247. #endif
  1248. default:
  1249. sc->vtnet_stats.rx_csum_bad_ethtype++;
  1250. return (1);
  1251. }
  1252. /*
  1253. * Use the offset to determine the appropriate CSUM_* flags. This is
  1254. * a bit dirty, but we can get by with it since the checksum offsets
  1255. * happen to be different. We assume the host host does not do IPv4
  1256. * header checksum offloading.
  1257. */
  1258. switch (hdr->csum_offset) {
  1259. case offsetof(struct udphdr, uh_sum):
  1260. case offsetof(struct tcphdr, th_sum):
  1261. m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  1262. m->m_pkthdr.csum_data = 0xFFFF;
  1263. break;
  1264. default:
  1265. sc->vtnet_stats.rx_csum_bad_offset++;
  1266. return (1);
  1267. }
  1268. return (0);
  1269. }
  1270. static int
  1271. vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
  1272. uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
  1273. {
  1274. struct vtnet_softc *sc;
  1275. int offset, proto;
  1276. sc = rxq->vtnrx_sc;
  1277. switch (eth_type) {
  1278. #if defined(INET)
  1279. case ETHERTYPE_IP: {
  1280. struct ip *ip;
  1281. if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
  1282. return (1);
  1283. ip = (struct ip *)(m->m_data + ip_start);
  1284. proto = ip->ip_p;
  1285. offset = ip_start + (ip->ip_hl << 2);
  1286. break;
  1287. }
  1288. #endif
  1289. #if defined(INET6)
  1290. case ETHERTYPE_IPV6:
  1291. if (__predict_false(m->m_len < ip_start +
  1292. sizeof(struct ip6_hdr)))
  1293. return (1);
  1294. offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
  1295. if (__predict_false(offset < 0))
  1296. return (1);
  1297. break;
  1298. #endif
  1299. default:
  1300. sc->vtnet_stats.rx_csum_bad_ethtype++;
  1301. return (1);
  1302. }
  1303. switch (proto) {
  1304. case IPPROTO_TCP:
  1305. if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
  1306. return (1);
  1307. m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  1308. m->m_pkthdr.csum_data = 0xFFFF;
  1309. break;
  1310. case IPPROTO_UDP:
  1311. if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
  1312. return (1);
  1313. m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
  1314. m->m_pkthdr.csum_data = 0xFFFF;
  1315. break;
  1316. default:
  1317. /*
  1318. * For the remaining protocols, FreeBSD does not support
  1319. * checksum offloading, so the checksum will be recomputed.
  1320. */
  1321. #if 0
  1322. if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
  1323. "protocol eth_type=%#x proto=%d csum_start=%d "
  1324. "csum_offset=%d\n", __func__, eth_type, proto,
  1325. hdr->csum_start, hdr->csum_offset);
  1326. #endif
  1327. break;
  1328. }
  1329. return (0);
  1330. }
  1331. /*
  1332. * Set the appropriate CSUM_* flags. Unfortunately, the information
  1333. * provided is not directly useful to us. The VirtIO header gives the
  1334. * offset of the checksum, which is all Linux needs, but this is not
  1335. * how FreeBSD does things. We are forced to peek inside the packet
  1336. * a bit.
  1337. *
  1338. * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
  1339. * could accept the offsets and let the stack figure it out.
  1340. */
  1341. static int
  1342. vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
  1343. struct virtio_net_hdr *hdr)
  1344. {
  1345. struct ether_header *eh;
  1346. struct ether_vlan_header *evh;
  1347. uint16_t eth_type;
  1348. int offset, error;
  1349. eh = mtod(m, struct ether_header *);
  1350. eth_type = ntohs(eh->ether_type);
  1351. if (eth_type == ETHERTYPE_VLAN) {
  1352. /* BMV: We should handle nested VLAN tags too. */
  1353. evh = mtod(m, struct ether_vlan_header *);
  1354. eth_type = ntohs(evh->evl_proto);
  1355. offset = sizeof(struct ether_vlan_header);
  1356. } else
  1357. offset = sizeof(struct ether_header);
  1358. if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
  1359. error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
  1360. else
  1361. error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
  1362. return (error);
  1363. }
  1364. static void
  1365. vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
  1366. {
  1367. struct mbuf *m;
  1368. while (--nbufs > 0) {
  1369. m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
  1370. if (m == NULL)
  1371. break;
  1372. vtnet_rxq_discard_buf(rxq, m);
  1373. }
  1374. }
  1375. static void
  1376. vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
  1377. {
  1378. int error;
  1379. /*
  1380. * Requeue the discarded mbuf. This should always be successful
  1381. * since it was just dequeued.
  1382. */
  1383. error = vtnet_rxq_enqueue_buf(rxq, m);
  1384. KASSERT(error == 0,
  1385. ("%s: cannot requeue discarded mbuf %d", __func__, error));
  1386. }
  1387. static int
  1388. vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
  1389. {
  1390. struct vtnet_softc *sc;
  1391. struct virtqueue *vq;
  1392. struct mbuf *m, *m_tail;
  1393. int len;
  1394. sc = rxq->vtnrx_sc;
  1395. vq = rxq->vtnrx_vq;
  1396. m_tail = m_head;
  1397. while (--nbufs > 0) {
  1398. m = virtqueue_dequeue(vq, &len);
  1399. if (m == NULL) {
  1400. rxq->vtnrx_stats.vrxs_ierrors++;
  1401. goto fail;
  1402. }
  1403. if (vtnet_rxq_new_buf(rxq) != 0) {
  1404. rxq->vtnrx_stats.vrxs_iqdrops++;
  1405. vtnet_rxq_discard_buf(rxq, m);
  1406. if (nbufs > 1)
  1407. vtnet_rxq_discard_merged_bufs(rxq, nbufs);
  1408. goto fail;
  1409. }
  1410. if (m->m_len < len)
  1411. len = m->m_len;
  1412. m->m_len = len;
  1413. m->m_flags &= ~M_PKTHDR;
  1414. m_head->m_pkthdr.len += len;
  1415. m_tail->m_next = m;
  1416. m_tail = m;
  1417. }
  1418. return (0);
  1419. fail:
  1420. sc->vtnet_stats.rx_mergeable_failed++;
  1421. m_freem(m_head);
  1422. return (1);
  1423. }
  1424. static void
  1425. vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
  1426. struct virtio_net_hdr *hdr)
  1427. {
  1428. struct vtnet_softc *sc;
  1429. struct ifnet *ifp;
  1430. struct ether_header *eh;
  1431. sc = rxq->vtnrx_sc;
  1432. ifp = sc->vtnet_ifp;
  1433. if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
  1434. eh = mtod(m, struct ether_header *);
  1435. if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
  1436. vtnet_vlan_tag_remove(m);
  1437. /*
  1438. * With the 802.1Q header removed, update the
  1439. * checksum starting location accordingly.
  1440. */
  1441. if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
  1442. hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
  1443. }
  1444. }
  1445. m->m_pkthdr.flowid = rxq->vtnrx_id;
  1446. M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
  1447. /*
  1448. * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
  1449. * distinction that Linux does. Need to reevaluate if performing
  1450. * offloading for the NEEDS_CSUM case is really appropriate.
  1451. */
  1452. if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
  1453. VIRTIO_NET_HDR_F_DATA_VALID)) {
  1454. if (vtnet_rxq_csum(rxq, m, hdr) == 0)
  1455. rxq->vtnrx_stats.vrxs_csum++;
  1456. else
  1457. rxq->vtnrx_stats.vrxs_csum_failed++;
  1458. }
  1459. rxq->vtnrx_stats.vrxs_ipackets++;
  1460. rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
  1461. VTNET_RXQ_UNLOCK(rxq);
  1462. (*ifp->if_input)(ifp, m);
  1463. VTNET_RXQ_LOCK(rxq);
  1464. }
  1465. static int
  1466. vtnet_rxq_eof(struct vtnet_rxq *rxq)
  1467. {
  1468. struct virtio_net_hdr lhdr, *hdr;
  1469. struct vtnet_softc *sc;
  1470. struct ifnet *ifp;
  1471. struct virtqueue *vq;
  1472. struct mbuf *m;
  1473. struct virtio_net_hdr_mrg_rxbuf *mhdr;
  1474. int len, deq, nbufs, adjsz, count;
  1475. sc = rxq->vtnrx_sc;
  1476. vq = rxq->vtnrx_vq;
  1477. ifp = sc->vtnet_ifp;
  1478. hdr = &lhdr;
  1479. deq = 0;
  1480. count = sc->vtnet_rx_process_limit;
  1481. VTNET_RXQ_LOCK_ASSERT(rxq);
  1482. while (count-- > 0) {
  1483. m = virtqueue_dequeue(vq, &len);
  1484. if (m == NULL)
  1485. break;
  1486. deq++;
  1487. if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
  1488. rxq->vtnrx_stats.vrxs_ierrors++;
  1489. vtnet_rxq_discard_buf(rxq, m);
  1490. continue;
  1491. }
  1492. if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
  1493. nbufs = 1;
  1494. adjsz = sizeof(struct vtnet_rx_header);
  1495. /*
  1496. * Account for our pad inserted between the header
  1497. * and the actual start of the frame.
  1498. */
  1499. len += VTNET_RX_HEADER_PAD;
  1500. } else {
  1501. mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
  1502. nbufs = mhdr->num_buffers;
  1503. adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
  1504. }
  1505. if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
  1506. rxq->vtnrx_stats.vrxs_iqdrops++;
  1507. vtnet_rxq_discard_buf(rxq, m);
  1508. if (nbufs > 1)
  1509. vtnet_rxq_discard_merged_bufs(rxq, nbufs);
  1510. continue;
  1511. }
  1512. m->m_pkthdr.len = len;
  1513. m->m_pkthdr.rcvif = ifp;
  1514. m->m_pkthdr.csum_flags = 0;
  1515. if (nbufs > 1) {
  1516. /* Dequeue the rest of chain. */
  1517. if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
  1518. continue;
  1519. }
  1520. /*
  1521. * Save copy of header before we strip it. For both mergeable
  1522. * and non-mergeable, the header is at the beginning of the
  1523. * mbuf data. We no longer need num_buffers, so always use a
  1524. * regular header.
  1525. *
  1526. * BMV: Is this memcpy() expensive? We know the mbuf data is
  1527. * still valid even after the m_adj().
  1528. */
  1529. memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
  1530. m_adj(m, adjsz);
  1531. vtnet_rxq_input(rxq, m, hdr);
  1532. /* Must recheck after dropping the Rx lock. */
  1533. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
  1534. break;
  1535. }
  1536. if (deq > 0)
  1537. virtqueue_notify(vq);
  1538. return (count > 0 ? 0 : EAGAIN);
  1539. }
  1540. static void
  1541. vtnet_rx_vq_intr(void *xrxq)
  1542. {
  1543. struct vtnet_softc *sc;
  1544. struct vtnet_rxq *rxq;
  1545. struct ifnet *ifp;
  1546. int tries, more;
  1547. #ifdef DEV_NETMAP
  1548. int nmirq;
  1549. #endif /* DEV_NETMAP */
  1550. rxq = xrxq;
  1551. sc = rxq->vtnrx_sc;
  1552. ifp = sc->vtnet_ifp;
  1553. tries = 0;
  1554. if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
  1555. /*
  1556. * Ignore this interrupt. Either this is a spurious interrupt
  1557. * or multiqueue without per-VQ MSIX so every queue needs to
  1558. * be polled (a brain dead configuration we could try harder
  1559. * to avoid).
  1560. */
  1561. vtnet_rxq_disable_intr(rxq);
  1562. return;
  1563. }
  1564. VTNET_RXQ_LOCK(rxq);
  1565. #ifdef DEV_NETMAP
  1566. /*
  1567. * We call netmap_rx_irq() under lock to prevent concurrent calls.
  1568. * This is not necessary to serialize the access to the RX vq, but
  1569. * rather to avoid races that may happen if this interface is
  1570. * attached to a VALE switch, which would cause received packets
  1571. * to stall in the RX queue (nm_kr_tryget() could find the kring
  1572. * busy when called from netmap_bwrap_intr_notify()).
  1573. */
  1574. nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
  1575. if (nmirq != NM_IRQ_PASS) {
  1576. VTNET_RXQ_UNLOCK(rxq);
  1577. if (nmirq == NM_IRQ_RESCHED) {
  1578. taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
  1579. }
  1580. return;
  1581. }
  1582. #endif /* DEV_NETMAP */
  1583. again:
  1584. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  1585. VTNET_RXQ_UNLOCK(rxq);
  1586. return;
  1587. }
  1588. more = vtnet_rxq_eof(rxq);
  1589. if (more || vtnet_rxq_enable_intr(rxq) != 0) {
  1590. if (!more)
  1591. vtnet_rxq_disable_intr(rxq);
  1592. /*
  1593. * This is an occasional condition or race (when !more),
  1594. * so retry a few times before scheduling the taskqueue.
  1595. */
  1596. if (tries++ < VTNET_INTR_DISABLE_RETRIES)
  1597. goto again;
  1598. VTNET_RXQ_UNLOCK(rxq);
  1599. rxq->vtnrx_stats.vrxs_rescheduled++;
  1600. taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
  1601. } else
  1602. VTNET_RXQ_UNLOCK(rxq);
  1603. }
  1604. static void
  1605. vtnet_rxq_tq_intr(void *xrxq, int pending)
  1606. {
  1607. struct vtnet_softc *sc;
  1608. struct vtnet_rxq *rxq;
  1609. struct ifnet *ifp;
  1610. int more;
  1611. #ifdef DEV_NETMAP
  1612. int nmirq;
  1613. #endif /* DEV_NETMAP */
  1614. rxq = xrxq;
  1615. sc = rxq->vtnrx_sc;
  1616. ifp = sc->vtnet_ifp;
  1617. VTNET_RXQ_LOCK(rxq);
  1618. #ifdef DEV_NETMAP
  1619. nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
  1620. if (nmirq != NM_IRQ_PASS) {
  1621. VTNET_RXQ_UNLOCK(rxq);
  1622. if (nmirq == NM_IRQ_RESCHED) {
  1623. taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
  1624. }
  1625. return;
  1626. }
  1627. #endif /* DEV_NETMAP */
  1628. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  1629. VTNET_RXQ_UNLOCK(rxq);
  1630. return;
  1631. }
  1632. more = vtnet_rxq_eof(rxq);
  1633. if (more || vtnet_rxq_enable_intr(rxq) != 0) {
  1634. if (!more)
  1635. vtnet_rxq_disable_intr(rxq);
  1636. rxq->vtnrx_stats.vrxs_rescheduled++;
  1637. taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
  1638. }
  1639. VTNET_RXQ_UNLOCK(rxq);
  1640. }
  1641. static int
  1642. vtnet_txq_below_threshold(struct vtnet_txq *txq)
  1643. {
  1644. struct vtnet_softc *sc;
  1645. struct virtqueue *vq;
  1646. sc = txq->vtntx_sc;
  1647. vq = txq->vtntx_vq;
  1648. return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
  1649. }
  1650. static int
  1651. vtnet_txq_notify(struct vtnet_txq *txq)
  1652. {
  1653. struct virtqueue *vq;
  1654. vq = txq->vtntx_vq;
  1655. txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
  1656. virtqueue_notify(vq);
  1657. if (vtnet_txq_enable_intr(txq) == 0)
  1658. return (0);
  1659. /*
  1660. * Drain frames that were completed since last checked. If this
  1661. * causes the queue to go above the threshold, the caller should
  1662. * continue transmitting.
  1663. */
  1664. if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
  1665. virtqueue_disable_intr(vq);
  1666. return (1);
  1667. }
  1668. return (0);
  1669. }
  1670. static void
  1671. vtnet_txq_free_mbufs(struct vtnet_txq *txq)
  1672. {
  1673. struct virtqueue *vq;
  1674. struct vtnet_tx_header *txhdr;
  1675. int last;
  1676. #ifdef DEV_NETMAP
  1677. struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
  1678. txq->vtntx_id, NR_TX);
  1679. #else /* !DEV_NETMAP */
  1680. void *kring = NULL;
  1681. #endif /* !DEV_NETMAP */
  1682. vq = txq->vtntx_vq;
  1683. last = 0;
  1684. while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
  1685. if (kring == NULL) {
  1686. m_freem(txhdr->vth_mbuf);
  1687. uma_zfree(vtnet_tx_header_zone, txhdr);
  1688. }
  1689. }
  1690. KASSERT(virtqueue_empty(vq),
  1691. ("%s: mbufs remaining in tx queue %p", __func__, txq));
  1692. }
  1693. /*
  1694. * BMV: Much of this can go away once we finally have offsets in
  1695. * the mbuf packet header. Bug andre@.
  1696. */
  1697. static int
  1698. vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
  1699. int *etype, int *proto, int *start)
  1700. {
  1701. struct vtnet_softc *sc;
  1702. struct ether_vlan_header *evh;
  1703. int offset;
  1704. sc = txq->vtntx_sc;
  1705. evh = mtod(m, struct ether_vlan_header *);
  1706. if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
  1707. /* BMV: We should handle nested VLAN tags too. */
  1708. *etype = ntohs(evh->evl_proto);
  1709. offset = sizeof(struct ether_vlan_header);
  1710. } else {
  1711. *etype = ntohs(evh->evl_encap_proto);
  1712. offset = sizeof(struct ether_header);
  1713. }
  1714. switch (*etype) {
  1715. #if defined(INET)
  1716. case ETHERTYPE_IP: {
  1717. struct ip *ip, iphdr;
  1718. if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
  1719. m_copydata(m, offset, sizeof(struct ip),
  1720. (caddr_t) &iphdr);
  1721. ip = &iphdr;
  1722. } else
  1723. ip = (struct ip *)(m->m_data + offset);
  1724. *proto = ip->ip_p;
  1725. *start = offset + (ip->ip_hl << 2);
  1726. break;
  1727. }
  1728. #endif
  1729. #if defined(INET6)
  1730. case ETHERTYPE_IPV6:
  1731. *proto = -1;
  1732. *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
  1733. /* Assert the network stack sent us a valid packet. */
  1734. KASSERT(*start > offset,
  1735. ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
  1736. *start, offset, *proto));
  1737. break;
  1738. #endif
  1739. default:
  1740. sc->vtnet_stats.tx_csum_bad_ethtype++;
  1741. return (EINVAL);
  1742. }
  1743. return (0);
  1744. }
  1745. static int
  1746. vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
  1747. int offset, struct virtio_net_hdr *hdr)
  1748. {
  1749. static struct timeval lastecn;
  1750. static int curecn;
  1751. struct vtnet_softc *sc;
  1752. struct tcphdr *tcp, tcphdr;
  1753. sc = txq->vtntx_sc;
  1754. if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
  1755. m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
  1756. tcp = &tcphdr;
  1757. } else
  1758. tcp = (struct tcphdr *)(m->m_data + offset);
  1759. hdr->hdr_len = offset + (tcp->th_off << 2);
  1760. hdr->gso_size = m->m_pkthdr.tso_segsz;
  1761. hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
  1762. VIRTIO_NET_HDR_GSO_TCPV6;
  1763. if (tcp->th_flags & TH_CWR) {
  1764. /*
  1765. * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
  1766. * ECN support is not on a per-interface basis, but globally via
  1767. * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
  1768. */
  1769. if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
  1770. if (ppsratecheck(&lastecn, &curecn, 1))
  1771. if_printf(sc->vtnet_ifp,
  1772. "TSO with ECN not negotiated with host\n");
  1773. return (ENOTSUP);
  1774. }
  1775. hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
  1776. }
  1777. txq->vtntx_stats.vtxs_tso++;
  1778. return (0);
  1779. }
  1780. static struct mbuf *
  1781. vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
  1782. struct virtio_net_hdr *hdr)
  1783. {
  1784. struct vtnet_softc *sc;
  1785. int flags, etype, csum_start, proto, error;
  1786. sc = txq->vtntx_sc;
  1787. flags = m->m_pkthdr.csum_flags;
  1788. error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
  1789. if (error)
  1790. goto drop;
  1791. if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
  1792. (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
  1793. /*
  1794. * We could compare the IP protocol vs the CSUM_ flag too,
  1795. * but that really should not be necessary.
  1796. */
  1797. hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
  1798. hdr->csum_start = csum_start;
  1799. hdr->csum_offset = m->m_pkthdr.csum_data;
  1800. txq->vtntx_stats.vtxs_csum++;
  1801. }
  1802. if (flags & CSUM_TSO) {
  1803. if (__predict_false(proto != IPPROTO_TCP)) {
  1804. /* Likely failed to correctly parse the mbuf. */
  1805. sc->vtnet_stats.tx_tso_not_tcp++;
  1806. goto drop;
  1807. }
  1808. KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
  1809. ("%s: mbuf %p TSO without checksum offload %#x",
  1810. __func__, m, flags));
  1811. error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
  1812. if (error)
  1813. goto drop;
  1814. }
  1815. return (m);
  1816. drop:
  1817. m_freem(m);
  1818. return (NULL);
  1819. }
  1820. static int
  1821. vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
  1822. struct vtnet_tx_header *txhdr)
  1823. {
  1824. struct vtnet_softc *sc;
  1825. struct virtqueue *vq;
  1826. struct sglist *sg;
  1827. struct mbuf *m;
  1828. int error;
  1829. sc = txq->vtntx_sc;
  1830. vq = txq->vtntx_vq;
  1831. sg = txq->vtntx_sg;
  1832. m = *m_head;
  1833. sglist_reset(sg);
  1834. error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
  1835. KASSERT(error == 0 && sg->sg_nseg == 1,
  1836. ("%s: error %d adding header to sglist", __func__, error));
  1837. error = sglist_append_mbuf(sg, m);
  1838. if (error) {
  1839. m = m_defrag(m, M_NOWAIT);
  1840. if (m == NULL)
  1841. goto fail;
  1842. *m_head = m;
  1843. sc->vtnet_stats.tx_defragged++;
  1844. error = sglist_append_mbuf(sg, m);
  1845. if (error)
  1846. goto fail;
  1847. }
  1848. txhdr->vth_mbuf = m;
  1849. error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
  1850. return (error);
  1851. fail:
  1852. sc->vtnet_stats.tx_defrag_failed++;
  1853. m_freem(*m_head);
  1854. *m_head = NULL;
  1855. return (ENOBUFS);
  1856. }
  1857. static int
  1858. vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
  1859. {
  1860. struct vtnet_tx_header *txhdr;
  1861. struct virtio_net_hdr *hdr;
  1862. struct mbuf *m;
  1863. int error;
  1864. m = *m_head;
  1865. M_ASSERTPKTHDR(m);
  1866. txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
  1867. if (txhdr == NULL) {
  1868. m_freem(m);
  1869. *m_head = NULL;
  1870. return (ENOMEM);
  1871. }
  1872. /*
  1873. * Always use the non-mergeable header, regardless if the feature
  1874. * was negotiated. For transmit, num_buffers is always zero. The
  1875. * vtnet_hdr_size is used to enqueue the correct header size.
  1876. */
  1877. hdr = &txhdr->vth_uhdr.hdr;
  1878. if (m->m_flags & M_VLANTAG) {
  1879. m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
  1880. if ((*m_head = m) == NULL) {
  1881. error = ENOBUFS;
  1882. goto fail;
  1883. }
  1884. m->m_flags &= ~M_VLANTAG;
  1885. }
  1886. if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
  1887. m = vtnet_txq_offload(txq, m, hdr);
  1888. if ((*m_head = m) == NULL) {
  1889. error = ENOBUFS;
  1890. goto fail;
  1891. }
  1892. }
  1893. error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
  1894. if (error == 0)
  1895. return (0);
  1896. fail:
  1897. uma_zfree(vtnet_tx_header_zone, txhdr);
  1898. return (error);
  1899. }
  1900. #ifdef VTNET_LEGACY_TX
  1901. static void
  1902. vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
  1903. {
  1904. struct vtnet_softc *sc;
  1905. struct virtqueue *vq;
  1906. struct mbuf *m0;
  1907. int tries, enq;
  1908. sc = txq->vtntx_sc;
  1909. vq = txq->vtntx_vq;
  1910. tries = 0;
  1911. VTNET_TXQ_LOCK_ASSERT(txq);
  1912. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  1913. sc->vtnet_link_active == 0)
  1914. return;
  1915. vtnet_txq_eof(txq);
  1916. again:
  1917. enq = 0;
  1918. while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
  1919. if (virtqueue_full(vq))
  1920. break;
  1921. IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
  1922. if (m0 == NULL)
  1923. break;
  1924. if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
  1925. if (m0 != NULL)
  1926. IFQ_DRV_PREPEND(&ifp->if_snd, m0);
  1927. break;
  1928. }
  1929. enq++;
  1930. ETHER_BPF_MTAP(ifp, m0);
  1931. }
  1932. if (enq > 0 && vtnet_txq_notify(txq) != 0) {
  1933. if (tries++ < VTNET_NOTIFY_RETRIES)
  1934. goto again;
  1935. txq->vtntx_stats.vtxs_rescheduled++;
  1936. taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
  1937. }
  1938. }
  1939. static void
  1940. vtnet_start(struct ifnet *ifp)
  1941. {
  1942. struct vtnet_softc *sc;
  1943. struct vtnet_txq *txq;
  1944. sc = ifp->if_softc;
  1945. txq = &sc->vtnet_txqs[0];
  1946. VTNET_TXQ_LOCK(txq);
  1947. vtnet_start_locked(txq, ifp);
  1948. VTNET_TXQ_UNLOCK(txq);
  1949. }
  1950. #else /* !VTNET_LEGACY_TX */
  1951. static int
  1952. vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
  1953. {
  1954. struct vtnet_softc *sc;
  1955. struct virtqueue *vq;
  1956. struct buf_ring *br;
  1957. struct ifnet *ifp;
  1958. int enq, tries, error;
  1959. sc = txq->vtntx_sc;
  1960. vq = txq->vtntx_vq;
  1961. br = txq->vtntx_br;
  1962. ifp = sc->vtnet_ifp;
  1963. tries = 0;
  1964. error = 0;
  1965. VTNET_TXQ_LOCK_ASSERT(txq);
  1966. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
  1967. sc->vtnet_link_active == 0) {
  1968. if (m != NULL)
  1969. error = drbr_enqueue(ifp, br, m);
  1970. return (error);
  1971. }
  1972. if (m != NULL) {
  1973. error = drbr_enqueue(ifp, br, m);
  1974. if (error)
  1975. return (error);
  1976. }
  1977. vtnet_txq_eof(txq);
  1978. again:
  1979. enq = 0;
  1980. while ((m = drbr_peek(ifp, br)) != NULL) {
  1981. if (virtqueue_full(vq)) {
  1982. drbr_putback(ifp, br, m);
  1983. break;
  1984. }
  1985. if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
  1986. if (m != NULL)
  1987. drbr_putback(ifp, br, m);
  1988. else
  1989. drbr_advance(ifp, br);
  1990. break;
  1991. }
  1992. drbr_advance(ifp, br);
  1993. enq++;
  1994. ETHER_BPF_MTAP(ifp, m);
  1995. }
  1996. if (enq > 0 && vtnet_txq_notify(txq) != 0) {
  1997. if (tries++ < VTNET_NOTIFY_RETRIES)
  1998. goto again;
  1999. txq->vtntx_stats.vtxs_rescheduled++;
  2000. taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
  2001. }
  2002. return (0);
  2003. }
  2004. static int
  2005. vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
  2006. {
  2007. struct vtnet_softc *sc;
  2008. struct vtnet_txq *txq;
  2009. int i, npairs, error;
  2010. sc = ifp->if_softc;
  2011. npairs = sc->vtnet_act_vq_pairs;
  2012. /* check if flowid is set */
  2013. if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
  2014. i = m->m_pkthdr.flowid % npairs;
  2015. else
  2016. i = curcpu % npairs;
  2017. txq = &sc->vtnet_txqs[i];
  2018. if (VTNET_TXQ_TRYLOCK(txq) != 0) {
  2019. error = vtnet_txq_mq_start_locked(txq, m);
  2020. VTNET_TXQ_UNLOCK(txq);
  2021. } else {
  2022. error = drbr_enqueue(ifp, txq->vtntx_br, m);
  2023. taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
  2024. }
  2025. return (error);
  2026. }
  2027. static void
  2028. vtnet_txq_tq_deferred(void *xtxq, int pending)
  2029. {
  2030. struct vtnet_softc *sc;
  2031. struct vtnet_txq *txq;
  2032. txq = xtxq;
  2033. sc = txq->vtntx_sc;
  2034. VTNET_TXQ_LOCK(txq);
  2035. if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
  2036. vtnet_txq_mq_start_locked(txq, NULL);
  2037. VTNET_TXQ_UNLOCK(txq);
  2038. }
  2039. #endif /* VTNET_LEGACY_TX */
  2040. static void
  2041. vtnet_txq_start(struct vtnet_txq *txq)
  2042. {
  2043. struct vtnet_softc *sc;
  2044. struct ifnet *ifp;
  2045. sc = txq->vtntx_sc;
  2046. ifp = sc->vtnet_ifp;
  2047. #ifdef VTNET_LEGACY_TX
  2048. if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
  2049. vtnet_start_locked(txq, ifp);
  2050. #else
  2051. if (!drbr_empty(ifp, txq->vtntx_br))
  2052. vtnet_txq_mq_start_locked(txq, NULL);
  2053. #endif
  2054. }
  2055. static void
  2056. vtnet_txq_tq_intr(void *xtxq, int pending)
  2057. {
  2058. struct vtnet_softc *sc;
  2059. struct vtnet_txq *txq;
  2060. struct ifnet *ifp;
  2061. txq = xtxq;
  2062. sc = txq->vtntx_sc;
  2063. ifp = sc->vtnet_ifp;
  2064. VTNET_TXQ_LOCK(txq);
  2065. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  2066. VTNET_TXQ_UNLOCK(txq);
  2067. return;
  2068. }
  2069. vtnet_txq_eof(txq);
  2070. vtnet_txq_start(txq);
  2071. VTNET_TXQ_UNLOCK(txq);
  2072. }
  2073. static int
  2074. vtnet_txq_eof(struct vtnet_txq *txq)
  2075. {
  2076. struct virtqueue *vq;
  2077. struct vtnet_tx_header *txhdr;
  2078. struct mbuf *m;
  2079. int deq;
  2080. vq = txq->vtntx_vq;
  2081. deq = 0;
  2082. VTNET_TXQ_LOCK_ASSERT(txq);
  2083. while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
  2084. m = txhdr->vth_mbuf;
  2085. deq++;
  2086. txq->vtntx_stats.vtxs_opackets++;
  2087. txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
  2088. if (m->m_flags & M_MCAST)
  2089. txq->vtntx_stats.vtxs_omcasts++;
  2090. m_freem(m);
  2091. uma_zfree(vtnet_tx_header_zone, txhdr);
  2092. }
  2093. if (virtqueue_empty(vq))
  2094. txq->vtntx_watchdog = 0;
  2095. return (deq);
  2096. }
  2097. static void
  2098. vtnet_tx_vq_intr(void *xtxq)
  2099. {
  2100. struct vtnet_softc *sc;
  2101. struct vtnet_txq *txq;
  2102. struct ifnet *ifp;
  2103. txq = xtxq;
  2104. sc = txq->vtntx_sc;
  2105. ifp = sc->vtnet_ifp;
  2106. if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
  2107. /*
  2108. * Ignore this interrupt. Either this is a spurious interrupt
  2109. * or multiqueue without per-VQ MSIX so every queue needs to
  2110. * be polled (a brain dead configuration we could try harder
  2111. * to avoid).
  2112. */
  2113. vtnet_txq_disable_intr(txq);
  2114. return;
  2115. }
  2116. #ifdef DEV_NETMAP
  2117. if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
  2118. return;
  2119. #endif /* DEV_NETMAP */
  2120. VTNET_TXQ_LOCK(txq);
  2121. if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
  2122. VTNET_TXQ_UNLOCK(txq);
  2123. return;
  2124. }
  2125. vtnet_txq_eof(txq);
  2126. vtnet_txq_start(txq);
  2127. VTNET_TXQ_UNLOCK(txq);
  2128. }
  2129. static void
  2130. vtnet_tx_start_all(struct vtnet_softc *sc)
  2131. {
  2132. struct vtnet_txq *txq;
  2133. int i;
  2134. VTNET_CORE_LOCK_ASSERT(sc);
  2135. for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
  2136. txq = &sc->vtnet_txqs[i];
  2137. VTNET_TXQ_LOCK(txq);
  2138. vtnet_txq_start(txq);
  2139. VTNET_TXQ_UNLOCK(txq);
  2140. }
  2141. }
  2142. #ifndef VTNET_LEGACY_TX
  2143. static void
  2144. vtnet_qflush(struct ifnet *ifp)
  2145. {
  2146. struct vtnet_softc *sc;
  2147. struct vtnet_txq *txq;
  2148. struct mbuf *m;
  2149. int i;
  2150. sc = ifp->if_softc;
  2151. for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
  2152. txq = &sc->vtnet_txqs[i];
  2153. VTNET_TXQ_LOCK(txq);
  2154. while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
  2155. m_freem(m);
  2156. VTNET_TXQ_UNLOCK(txq);
  2157. }
  2158. if_qflush(ifp);
  2159. }
  2160. #endif
  2161. static int
  2162. vtnet_watchdog(struct vtnet_txq *txq)
  2163. {
  2164. struct ifnet *ifp;
  2165. ifp = txq->vtntx_sc->vtnet_ifp;
  2166. VTNET_TXQ_LOCK(txq);
  2167. if (txq->vtntx_watchdog == 1) {
  2168. /*
  2169. * Only drain completed frames if the watchdog is about to
  2170. * expire. If any frames were drained, there may be enough
  2171. * free descriptors now available to transmit queued frames.
  2172. * In that case, the timer will immediately be decremented
  2173. * below, but the timeout is generous enough that should not
  2174. * be a problem.
  2175. */
  2176. if (vtnet_txq_eof(txq) != 0)
  2177. vtnet_txq_start(txq);
  2178. }
  2179. if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
  2180. VTNET_TXQ_UNLOCK(txq);
  2181. return (0);
  2182. }
  2183. VTNET_TXQ_UNLOCK(txq);
  2184. if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
  2185. return (1);
  2186. }
  2187. static void
  2188. vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
  2189. struct vtnet_txq_stats *txacc)
  2190. {
  2191. bzero(rxacc, sizeof(struct vtnet_rxq_stats));
  2192. bzero(txacc, sizeof(struct vtnet_txq_stats));
  2193. for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  2194. struct vtnet_rxq_stats *rxst;
  2195. struct vtnet_txq_stats *txst;
  2196. rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
  2197. rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
  2198. rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
  2199. rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
  2200. rxacc->vrxs_csum += rxst->vrxs_csum;
  2201. rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
  2202. rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
  2203. txst = &sc->vtnet_txqs[i].vtntx_stats;
  2204. txacc->vtxs_opackets += txst->vtxs_opackets;
  2205. txacc->vtxs_obytes += txst->vtxs_obytes;
  2206. txacc->vtxs_csum += txst->vtxs_csum;
  2207. txacc->vtxs_tso += txst->vtxs_tso;
  2208. txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
  2209. }
  2210. }
  2211. static uint64_t
  2212. vtnet_get_counter(if_t ifp, ift_counter cnt)
  2213. {
  2214. struct vtnet_softc *sc;
  2215. struct vtnet_rxq_stats rxaccum;
  2216. struct vtnet_txq_stats txaccum;
  2217. sc = if_getsoftc(ifp);
  2218. vtnet_accum_stats(sc, &rxaccum, &txaccum);
  2219. switch (cnt) {
  2220. case IFCOUNTER_IPACKETS:
  2221. return (rxaccum.vrxs_ipackets);
  2222. case IFCOUNTER_IQDROPS:
  2223. return (rxaccum.vrxs_iqdrops);
  2224. case IFCOUNTER_IERRORS:
  2225. return (rxaccum.vrxs_ierrors);
  2226. case IFCOUNTER_OPACKETS:
  2227. return (txaccum.vtxs_opackets);
  2228. #ifndef VTNET_LEGACY_TX
  2229. case IFCOUNTER_OBYTES:
  2230. return (txaccum.vtxs_obytes);
  2231. case IFCOUNTER_OMCASTS:
  2232. return (txaccum.vtxs_omcasts);
  2233. #endif
  2234. default:
  2235. return (if_get_counter_default(ifp, cnt));
  2236. }
  2237. }
  2238. static void
  2239. vtnet_tick(void *xsc)
  2240. {
  2241. struct vtnet_softc *sc;
  2242. struct ifnet *ifp;
  2243. int i, timedout;
  2244. sc = xsc;
  2245. ifp = sc->vtnet_ifp;
  2246. timedout = 0;
  2247. VTNET_CORE_LOCK_ASSERT(sc);
  2248. for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
  2249. timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
  2250. if (timedout != 0) {
  2251. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  2252. vtnet_init_locked(sc, 0);
  2253. } else
  2254. callout_schedule(&sc->vtnet_tick_ch, hz);
  2255. }
  2256. static void
  2257. vtnet_start_taskqueues(struct vtnet_softc *sc)
  2258. {
  2259. device_t dev;
  2260. struct vtnet_rxq *rxq;
  2261. struct vtnet_txq *txq;
  2262. int i, error;
  2263. dev = sc->vtnet_dev;
  2264. /*
  2265. * Errors here are very difficult to recover from - we cannot
  2266. * easily fail because, if this is during boot, we will hang
  2267. * when freeing any successfully started taskqueues because
  2268. * the scheduler isn't up yet.
  2269. *
  2270. * Most drivers just ignore the return value - it only fails
  2271. * with ENOMEM so an error is not likely.
  2272. */
  2273. for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  2274. rxq = &sc->vtnet_rxqs[i];
  2275. error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
  2276. "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
  2277. if (error) {
  2278. device_printf(dev, "failed to start rx taskq %d\n",
  2279. rxq->vtnrx_id);
  2280. }
  2281. txq = &sc->vtnet_txqs[i];
  2282. error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
  2283. "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
  2284. if (error) {
  2285. device_printf(dev, "failed to start tx taskq %d\n",
  2286. txq->vtntx_id);
  2287. }
  2288. }
  2289. }
  2290. static void
  2291. vtnet_free_taskqueues(struct vtnet_softc *sc)
  2292. {
  2293. struct vtnet_rxq *rxq;
  2294. struct vtnet_txq *txq;
  2295. int i;
  2296. for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  2297. rxq = &sc->vtnet_rxqs[i];
  2298. if (rxq->vtnrx_tq != NULL) {
  2299. taskqueue_free(rxq->vtnrx_tq);
  2300. rxq->vtnrx_tq = NULL;
  2301. }
  2302. txq = &sc->vtnet_txqs[i];
  2303. if (txq->vtntx_tq != NULL) {
  2304. taskqueue_free(txq->vtntx_tq);
  2305. txq->vtntx_tq = NULL;
  2306. }
  2307. }
  2308. }
  2309. static void
  2310. vtnet_drain_taskqueues(struct vtnet_softc *sc)
  2311. {
  2312. struct vtnet_rxq *rxq;
  2313. struct vtnet_txq *txq;
  2314. int i;
  2315. for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  2316. rxq = &sc->vtnet_rxqs[i];
  2317. if (rxq->vtnrx_tq != NULL)
  2318. taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
  2319. txq = &sc->vtnet_txqs[i];
  2320. if (txq->vtntx_tq != NULL) {
  2321. taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
  2322. #ifndef VTNET_LEGACY_TX
  2323. taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
  2324. #endif
  2325. }
  2326. }
  2327. }
  2328. static void
  2329. vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
  2330. {
  2331. struct vtnet_rxq *rxq;
  2332. struct vtnet_txq *txq;
  2333. int i;
  2334. for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
  2335. rxq = &sc->vtnet_rxqs[i];
  2336. vtnet_rxq_free_mbufs(rxq);
  2337. txq = &sc->vtnet_txqs[i];
  2338. vtnet_txq_free_mbufs(txq);
  2339. }
  2340. }
  2341. static void
  2342. vtnet_stop_rendezvous(struct vtnet_softc *sc)
  2343. {
  2344. struct vtnet_rxq *rxq;
  2345. struct vtnet_txq *txq;
  2346. int i;
  2347. /*
  2348. * Lock and unlock the per-queue mutex so we known the stop
  2349. * state is visible. Doing only the active queues should be
  2350. * sufficient, but it does not cost much extra to do all the
  2351. * queues. Note we hold the core mutex here too.
  2352. */
  2353. for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  2354. rxq = &sc->vtnet_rxqs[i];
  2355. VTNET_RXQ_LOCK(rxq);
  2356. VTNET_RXQ_UNLOCK(rxq);
  2357. txq = &sc->vtnet_txqs[i];
  2358. VTNET_TXQ_LOCK(txq);
  2359. VTNET_TXQ_UNLOCK(txq);
  2360. }
  2361. }
  2362. static void
  2363. vtnet_stop(struct vtnet_softc *sc)
  2364. {
  2365. device_t dev;
  2366. struct ifnet *ifp;
  2367. dev = sc->vtnet_dev;
  2368. ifp = sc->vtnet_ifp;
  2369. VTNET_CORE_LOCK_ASSERT(sc);
  2370. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  2371. sc->vtnet_link_active = 0;
  2372. callout_stop(&sc->vtnet_tick_ch);
  2373. /* Only advisory. */
  2374. vtnet_disable_interrupts(sc);
  2375. /*
  2376. * Stop the host adapter. This resets it to the pre-initialized
  2377. * state. It will not generate any interrupts until after it is
  2378. * reinitialized.
  2379. */
  2380. virtio_stop(dev);
  2381. vtnet_stop_rendezvous(sc);
  2382. /* Free any mbufs left in the virtqueues. */
  2383. vtnet_drain_rxtx_queues(sc);
  2384. }
  2385. static int
  2386. vtnet_virtio_reinit(struct vtnet_softc *sc)
  2387. {
  2388. device_t dev;
  2389. struct ifnet *ifp;
  2390. uint64_t features;
  2391. int mask, error;
  2392. dev = sc->vtnet_dev;
  2393. ifp = sc->vtnet_ifp;
  2394. features = sc->vtnet_features;
  2395. mask = 0;
  2396. #if defined(INET)
  2397. mask |= IFCAP_RXCSUM;
  2398. #endif
  2399. #if defined (INET6)
  2400. mask |= IFCAP_RXCSUM_IPV6;
  2401. #endif
  2402. /*
  2403. * Re-negotiate with the host, removing any disabled receive
  2404. * features. Transmit features are disabled only on our side
  2405. * via if_capenable and if_hwassist.
  2406. */
  2407. if (ifp->if_capabilities & mask) {
  2408. /*
  2409. * We require both IPv4 and IPv6 offloading to be enabled
  2410. * in order to negotiated it: VirtIO does not distinguish
  2411. * between the two.
  2412. */
  2413. if ((ifp->if_capenable & mask) != mask)
  2414. features &= ~VIRTIO_NET_F_GUEST_CSUM;
  2415. }
  2416. if (ifp->if_capabilities & IFCAP_LRO) {
  2417. if ((ifp->if_capenable & IFCAP_LRO) == 0)
  2418. features &= ~VTNET_LRO_FEATURES;
  2419. }
  2420. if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
  2421. if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
  2422. features &= ~VIRTIO_NET_F_CTRL_VLAN;
  2423. }
  2424. error = virtio_reinit(dev, features);
  2425. if (error)
  2426. device_printf(dev, "virtio reinit error %d\n", error);
  2427. return (error);
  2428. }
  2429. static void
  2430. vtnet_init_rx_filters(struct vtnet_softc *sc)
  2431. {
  2432. struct ifnet *ifp;
  2433. ifp = sc->vtnet_ifp;
  2434. if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
  2435. /* Restore promiscuous and all-multicast modes. */
  2436. vtnet_rx_filter(sc);
  2437. /* Restore filtered MAC addresses. */
  2438. vtnet_rx_filter_mac(sc);
  2439. }
  2440. if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
  2441. vtnet_rx_filter_vlan(sc);
  2442. }
  2443. static int
  2444. vtnet_init_rx_queues(struct vtnet_softc *sc)
  2445. {
  2446. device_t dev;
  2447. struct vtnet_rxq *rxq;
  2448. int i, clsize, error;
  2449. dev = sc->vtnet_dev;
  2450. /*
  2451. * Use the new cluster size if one has been set (via a MTU
  2452. * change). Otherwise, use the standard 2K clusters.
  2453. *
  2454. * BMV: It might make sense to use page sized clusters as
  2455. * the default (depending on the features negotiated).
  2456. */
  2457. if (sc->vtnet_rx_new_clsize != 0) {
  2458. clsize = sc->vtnet_rx_new_clsize;
  2459. sc->vtnet_rx_new_clsize = 0;
  2460. } else
  2461. clsize = MCLBYTES;
  2462. sc->vtnet_rx_clsize = clsize;
  2463. sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
  2464. KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
  2465. sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
  2466. ("%s: too many rx mbufs %d for %d segments", __func__,
  2467. sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
  2468. for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
  2469. rxq = &sc->vtnet_rxqs[i];
  2470. /* Hold the lock to satisfy asserts. */
  2471. VTNET_RXQ_LOCK(rxq);
  2472. error = vtnet_rxq_populate(rxq);
  2473. VTNET_RXQ_UNLOCK(rxq);
  2474. if (error) {
  2475. device_printf(dev,
  2476. "cannot allocate mbufs for Rx queue %d\n", i);
  2477. return (error);
  2478. }
  2479. }
  2480. return (0);
  2481. }
  2482. static int
  2483. vtnet_init_tx_queues(struct vtnet_softc *sc)
  2484. {
  2485. struct vtnet_txq *txq;
  2486. int i;
  2487. for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
  2488. txq = &sc->vtnet_txqs[i];
  2489. txq->vtntx_watchdog = 0;
  2490. #ifdef DEV_NETMAP
  2491. netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
  2492. #endif /* DEV_NETMAP */
  2493. }
  2494. return (0);
  2495. }
  2496. static int
  2497. vtnet_init_rxtx_queues(struct vtnet_softc *sc)
  2498. {
  2499. int error;
  2500. error = vtnet_init_rx_queues(sc);
  2501. if (error)
  2502. return (error);
  2503. error = vtnet_init_tx_queues(sc);
  2504. if (error)
  2505. return (error);
  2506. return (0);
  2507. }
  2508. static void
  2509. vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
  2510. {
  2511. device_t dev;
  2512. int npairs;
  2513. dev = sc->vtnet_dev;
  2514. if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
  2515. sc->vtnet_act_vq_pairs = 1;
  2516. return;
  2517. }
  2518. npairs = sc->vtnet_requested_vq_pairs;
  2519. if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
  2520. device_printf(dev,
  2521. "cannot set active queue pairs to %d\n", npairs);
  2522. npairs = 1;
  2523. }
  2524. sc->vtnet_act_vq_pairs = npairs;
  2525. }
  2526. static int
  2527. vtnet_reinit(struct vtnet_softc *sc)
  2528. {
  2529. struct ifnet *ifp;
  2530. int error;
  2531. ifp = sc->vtnet_ifp;
  2532. /* Use the current MAC address. */
  2533. bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
  2534. vtnet_set_hwaddr(sc);
  2535. vtnet_set_active_vq_pairs(sc);
  2536. ifp->if_hwassist = 0;
  2537. if (ifp->if_capenable & IFCAP_TXCSUM)
  2538. ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
  2539. if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
  2540. ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
  2541. if (ifp->if_capenable & IFCAP_TSO4)
  2542. ifp->if_hwassist |= CSUM_IP_TSO;
  2543. if (ifp->if_capenable & IFCAP_TSO6)
  2544. ifp->if_hwassist |= CSUM_IP6_TSO;
  2545. if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
  2546. vtnet_init_rx_filters(sc);
  2547. error = vtnet_init_rxtx_queues(sc);
  2548. if (error)
  2549. return (error);
  2550. vtnet_enable_interrupts(sc);
  2551. ifp->if_drv_flags |= IFF_DRV_RUNNING;
  2552. return (0);
  2553. }
  2554. static void
  2555. vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
  2556. {
  2557. device_t dev;
  2558. struct ifnet *ifp;
  2559. dev = sc->vtnet_dev;
  2560. ifp = sc->vtnet_ifp;
  2561. VTNET_CORE_LOCK_ASSERT(sc);
  2562. if (ifp->if_drv_flags & IFF_DRV_RUNNING)
  2563. return;
  2564. vtnet_stop(sc);
  2565. #ifdef DEV_NETMAP
  2566. /* Once stopped we can update the netmap flags, if necessary. */
  2567. switch (init_mode) {
  2568. case VTNET_INIT_NETMAP_ENTER:
  2569. nm_set_native_flags(NA(ifp));
  2570. break;
  2571. case VTNET_INIT_NETMAP_EXIT:
  2572. nm_clear_native_flags(NA(ifp));
  2573. break;
  2574. }
  2575. #endif /* DEV_NETMAP */
  2576. /* Reinitialize with the host. */
  2577. if (vtnet_virtio_reinit(sc) != 0)
  2578. goto fail;
  2579. if (vtnet_reinit(sc) != 0)
  2580. goto fail;
  2581. virtio_reinit_complete(dev);
  2582. vtnet_update_link_status(sc);
  2583. callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
  2584. return;
  2585. fail:
  2586. vtnet_stop(sc);
  2587. }
  2588. static void
  2589. vtnet_init(void *xsc)
  2590. {
  2591. struct vtnet_softc *sc;
  2592. sc = xsc;
  2593. VTNET_CORE_LOCK(sc);
  2594. vtnet_init_locked(sc, 0);
  2595. VTNET_CORE_UNLOCK(sc);
  2596. }
  2597. static void
  2598. vtnet_free_ctrl_vq(struct vtnet_softc *sc)
  2599. {
  2600. struct virtqueue *vq;
  2601. vq = sc->vtnet_ctrl_vq;
  2602. /*
  2603. * The control virtqueue is only polled and therefore it should
  2604. * already be empty.
  2605. */
  2606. KASSERT(virtqueue_empty(vq),
  2607. ("%s: ctrl vq %p not empty", __func__, vq));
  2608. }
  2609. static void
  2610. vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
  2611. struct sglist *sg, int readable, int writable)
  2612. {
  2613. struct virtqueue *vq;
  2614. vq = sc->vtnet_ctrl_vq;
  2615. VTNET_CORE_LOCK_ASSERT(sc);
  2616. KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
  2617. ("%s: CTRL_VQ feature not negotiated", __func__));
  2618. if (!virtqueue_empty(vq))
  2619. return;
  2620. if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
  2621. return;
  2622. /*
  2623. * Poll for the response, but the command is likely already
  2624. * done when we return from the notify.
  2625. */
  2626. virtqueue_notify(vq);
  2627. virtqueue_poll(vq, NULL);
  2628. }
  2629. static int
  2630. vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
  2631. {
  2632. struct virtio_net_ctrl_hdr hdr __aligned(2);
  2633. struct sglist_seg segs[3];
  2634. struct sglist sg;
  2635. uint8_t ack;
  2636. int error;
  2637. hdr.class = VIRTIO_NET_CTRL_MAC;
  2638. hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
  2639. ack = VIRTIO_NET_ERR;
  2640. sglist_init(&sg, 3, segs);
  2641. error = 0;
  2642. error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
  2643. error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
  2644. error |= sglist_append(&sg, &ack, sizeof(uint8_t));
  2645. KASSERT(error == 0 && sg.sg_nseg == 3,
  2646. ("%s: error %d adding set MAC msg to sglist", __func__, error));
  2647. vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
  2648. return (ack == VIRTIO_NET_OK ? 0 : EIO);
  2649. }
  2650. static int
  2651. vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
  2652. {
  2653. struct sglist_seg segs[3];
  2654. struct sglist sg;
  2655. struct {
  2656. struct virtio_net_ctrl_hdr hdr;
  2657. uint8_t pad1;
  2658. struct virtio_net_ctrl_mq mq;
  2659. uint8_t pad2;
  2660. uint8_t ack;
  2661. } s __aligned(2);
  2662. int error;
  2663. s.hdr.class = VIRTIO_NET_CTRL_MQ;
  2664. s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
  2665. s.mq.virtqueue_pairs = npairs;
  2666. s.ack = VIRTIO_NET_ERR;
  2667. sglist_init(&sg, 3, segs);
  2668. error = 0;
  2669. error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
  2670. error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
  2671. error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
  2672. KASSERT(error == 0 && sg.sg_nseg == 3,
  2673. ("%s: error %d adding MQ message to sglist", __func__, error));
  2674. vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
  2675. return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
  2676. }
  2677. static int
  2678. vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
  2679. {
  2680. struct sglist_seg segs[3];
  2681. struct sglist sg;
  2682. struct {
  2683. struct virtio_net_ctrl_hdr hdr;
  2684. uint8_t pad1;
  2685. uint8_t onoff;
  2686. uint8_t pad2;
  2687. uint8_t ack;
  2688. } s __aligned(2);
  2689. int error;
  2690. KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
  2691. ("%s: CTRL_RX feature not negotiated", __func__));
  2692. s.hdr.class = VIRTIO_NET_CTRL_RX;
  2693. s.hdr.cmd = cmd;
  2694. s.onoff = !!on;
  2695. s.ack = VIRTIO_NET_ERR;
  2696. sglist_init(&sg, 3, segs);
  2697. error = 0;
  2698. error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
  2699. error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
  2700. error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
  2701. KASSERT(error == 0 && sg.sg_nseg == 3,
  2702. ("%s: error %d adding Rx message to sglist", __func__, error));
  2703. vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
  2704. return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
  2705. }
  2706. static int
  2707. vtnet_set_promisc(struct vtnet_softc *sc, int on)
  2708. {
  2709. return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
  2710. }
  2711. static int
  2712. vtnet_set_allmulti(struct vtnet_softc *sc, int on)
  2713. {
  2714. return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
  2715. }
  2716. /*
  2717. * The device defaults to promiscuous mode for backwards compatibility.
  2718. * Turn it off at attach time if possible.
  2719. */
  2720. static void
  2721. vtnet_attach_disable_promisc(struct vtnet_softc *sc)
  2722. {
  2723. struct ifnet *ifp;
  2724. ifp = sc->vtnet_ifp;
  2725. VTNET_CORE_LOCK(sc);
  2726. if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
  2727. ifp->if_flags |= IFF_PROMISC;
  2728. } else if (vtnet_set_promisc(sc, 0) != 0) {
  2729. ifp->if_flags |= IFF_PROMISC;
  2730. device_printf(sc->vtnet_dev,
  2731. "cannot disable default promiscuous mode\n");
  2732. }
  2733. VTNET_CORE_UNLOCK(sc);
  2734. }
  2735. static void
  2736. vtnet_rx_filter(struct vtnet_softc *sc)
  2737. {
  2738. device_t dev;
  2739. struct ifnet *ifp;
  2740. dev = sc->vtnet_dev;
  2741. ifp = sc->vtnet_ifp;
  2742. VTNET_CORE_LOCK_ASSERT(sc);
  2743. if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
  2744. device_printf(dev, "cannot %s promiscuous mode\n",
  2745. ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
  2746. if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
  2747. device_printf(dev, "cannot %s all-multicast mode\n",
  2748. ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
  2749. }
  2750. static void
  2751. vtnet_rx_filter_mac(struct vtnet_softc *sc)
  2752. {
  2753. struct virtio_net_ctrl_hdr hdr __aligned(2);
  2754. struct vtnet_mac_filter *filter;
  2755. struct sglist_seg segs[4];
  2756. struct sglist sg;
  2757. struct ifnet *ifp;
  2758. struct ifaddr *ifa;
  2759. struct ifmultiaddr *ifma;
  2760. int ucnt, mcnt, promisc, allmulti, error;
  2761. uint8_t ack;
  2762. ifp = sc->vtnet_ifp;
  2763. filter = sc->vtnet_mac_filter;
  2764. ucnt = 0;
  2765. mcnt = 0;
  2766. promisc = 0;
  2767. allmulti = 0;
  2768. VTNET_CORE_LOCK_ASSERT(sc);
  2769. KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
  2770. ("%s: CTRL_RX feature not negotiated", __func__));
  2771. /* Unicast MAC addresses: */
  2772. if_addr_rlock(ifp);
  2773. CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
  2774. if (ifa->ifa_addr->sa_family != AF_LINK)
  2775. continue;
  2776. else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
  2777. sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
  2778. continue;
  2779. else if (ucnt == VTNET_MAX_MAC_ENTRIES) {
  2780. promisc = 1;
  2781. break;
  2782. }
  2783. bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr),
  2784. &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN);
  2785. ucnt++;
  2786. }
  2787. if_addr_runlock(ifp);
  2788. if (promisc != 0) {
  2789. filter->vmf_unicast.nentries = 0;
  2790. if_printf(ifp, "more than %d MAC addresses assigned, "
  2791. "falling back to promiscuous mode\n",
  2792. VTNET_MAX_MAC_ENTRIES);
  2793. } else
  2794. filter->vmf_unicast.nentries = ucnt;
  2795. /* Multicast MAC addresses: */
  2796. if_maddr_rlock(ifp);
  2797. CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
  2798. if (ifma->ifma_addr->sa_family != AF_LINK)
  2799. continue;
  2800. else if (mcnt == VTNET_MAX_MAC_ENTRIES) {
  2801. allmulti = 1;
  2802. break;
  2803. }
  2804. bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
  2805. &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN);
  2806. mcnt++;
  2807. }
  2808. if_maddr_runlock(ifp);
  2809. if (allmulti != 0) {
  2810. filter->vmf_multicast.nentries = 0;
  2811. if_printf(ifp, "more than %d multicast MAC addresses "
  2812. "assigned, falling back to all-multicast mode\n",
  2813. VTNET_MAX_MAC_ENTRIES);
  2814. } else
  2815. filter->vmf_multicast.nentries = mcnt;
  2816. if (promisc != 0 && allmulti != 0)
  2817. goto out;
  2818. hdr.class = VIRTIO_NET_CTRL_MAC;
  2819. hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
  2820. ack = VIRTIO_NET_ERR;
  2821. sglist_init(&sg, 4, segs);
  2822. error = 0;
  2823. error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
  2824. error |= sglist_append(&sg, &filter->vmf_unicast,
  2825. sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
  2826. error |= sglist_append(&sg, &filter->vmf_multicast,
  2827. sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
  2828. error |= sglist_append(&sg, &ack, sizeof(uint8_t));
  2829. KASSERT(error == 0 && sg.sg_nseg == 4,
  2830. ("%s: error %d adding MAC filter msg to sglist", __func__, error));
  2831. vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
  2832. if (ack != VIRTIO_NET_OK)
  2833. if_printf(ifp, "error setting host MAC filter table\n");
  2834. out:
  2835. if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
  2836. if_printf(ifp, "cannot enable promiscuous mode\n");
  2837. if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
  2838. if_printf(ifp, "cannot enable all-multicast mode\n");
  2839. }
  2840. static int
  2841. vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
  2842. {
  2843. struct sglist_seg segs[3];
  2844. struct sglist sg;
  2845. struct {
  2846. struct virtio_net_ctrl_hdr hdr;
  2847. uint8_t pad1;
  2848. uint16_t tag;
  2849. uint8_t pad2;
  2850. uint8_t ack;
  2851. } s __aligned(2);
  2852. int error;
  2853. s.hdr.class = VIRTIO_NET_CTRL_VLAN;
  2854. s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
  2855. s.tag = tag;
  2856. s.ack = VIRTIO_NET_ERR;
  2857. sglist_init(&sg, 3, segs);
  2858. error = 0;
  2859. error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
  2860. error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
  2861. error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
  2862. KASSERT(error == 0 && sg.sg_nseg == 3,
  2863. ("%s: error %d adding VLAN message to sglist", __func__, error));
  2864. vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
  2865. return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
  2866. }
  2867. static void
  2868. vtnet_rx_filter_vlan(struct vtnet_softc *sc)
  2869. {
  2870. uint32_t w;
  2871. uint16_t tag;
  2872. int i, bit;
  2873. VTNET_CORE_LOCK_ASSERT(sc);
  2874. KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
  2875. ("%s: VLAN_FILTER feature not negotiated", __func__));
  2876. /* Enable the filter for each configured VLAN. */
  2877. for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
  2878. w = sc->vtnet_vlan_filter[i];
  2879. while ((bit = ffs(w) - 1) != -1) {
  2880. w &= ~(1 << bit);
  2881. tag = sizeof(w) * CHAR_BIT * i + bit;
  2882. if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
  2883. device_printf(sc->vtnet_dev,
  2884. "cannot enable VLAN %d filter\n", tag);
  2885. }
  2886. }
  2887. }
  2888. }
  2889. static void
  2890. vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
  2891. {
  2892. struct ifnet *ifp;
  2893. int idx, bit;
  2894. ifp = sc->vtnet_ifp;
  2895. idx = (tag >> 5) & 0x7F;
  2896. bit = tag & 0x1F;
  2897. if (tag == 0 || tag > 4095)
  2898. return;
  2899. VTNET_CORE_LOCK(sc);
  2900. if (add)
  2901. sc->vtnet_vlan_filter[idx] |= (1 << bit);
  2902. else
  2903. sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
  2904. if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
  2905. ifp->if_drv_flags & IFF_DRV_RUNNING &&
  2906. vtnet_exec_vlan_filter(sc, add, tag) != 0) {
  2907. device_printf(sc->vtnet_dev,
  2908. "cannot %s VLAN %d %s the host filter table\n",
  2909. add ? "add" : "remove", tag, add ? "to" : "from");
  2910. }
  2911. VTNET_CORE_UNLOCK(sc);
  2912. }
  2913. static void
  2914. vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
  2915. {
  2916. if (ifp->if_softc != arg)
  2917. return;
  2918. vtnet_update_vlan_filter(arg, 1, tag);
  2919. }
  2920. static void
  2921. vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
  2922. {
  2923. if (ifp->if_softc != arg)
  2924. return;
  2925. vtnet_update_vlan_filter(arg, 0, tag);
  2926. }
  2927. static int
  2928. vtnet_is_link_up(struct vtnet_softc *sc)
  2929. {
  2930. device_t dev;
  2931. struct ifnet *ifp;
  2932. uint16_t status;
  2933. dev = sc->vtnet_dev;
  2934. ifp = sc->vtnet_ifp;
  2935. if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
  2936. status = VIRTIO_NET_S_LINK_UP;
  2937. else
  2938. status = virtio_read_dev_config_2(dev,
  2939. offsetof(struct virtio_net_config, status));
  2940. return ((status & VIRTIO_NET_S_LINK_UP) != 0);
  2941. }
  2942. static void
  2943. vtnet_update_link_status(struct vtnet_softc *sc)
  2944. {
  2945. struct ifnet *ifp;
  2946. int link;
  2947. ifp = sc->vtnet_ifp;
  2948. VTNET_CORE_LOCK_ASSERT(sc);
  2949. link = vtnet_is_link_up(sc);
  2950. /* Notify if the link status has changed. */
  2951. if (link != 0 && sc->vtnet_link_active == 0) {
  2952. sc->vtnet_link_active = 1;
  2953. if_link_state_change(ifp, LINK_STATE_UP);
  2954. } else if (link == 0 && sc->vtnet_link_active != 0) {
  2955. sc->vtnet_link_active = 0;
  2956. if_link_state_change(ifp, LINK_STATE_DOWN);
  2957. }
  2958. }
  2959. static int
  2960. vtnet_ifmedia_upd(struct ifnet *ifp)
  2961. {
  2962. struct vtnet_softc *sc;
  2963. struct ifmedia *ifm;
  2964. sc = ifp->if_softc;
  2965. ifm = &sc->vtnet_media;
  2966. if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
  2967. return (EINVAL);
  2968. return (0);
  2969. }
  2970. static void
  2971. vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
  2972. {
  2973. struct vtnet_softc *sc;
  2974. sc = ifp->if_softc;
  2975. ifmr->ifm_status = IFM_AVALID;
  2976. ifmr->ifm_active = IFM_ETHER;
  2977. VTNET_CORE_LOCK(sc);
  2978. if (vtnet_is_link_up(sc) != 0) {
  2979. ifmr->ifm_status |= IFM_ACTIVE;
  2980. ifmr->ifm_active |= VTNET_MEDIATYPE;
  2981. } else
  2982. ifmr->ifm_active |= IFM_NONE;
  2983. VTNET_CORE_UNLOCK(sc);
  2984. }
  2985. static void
  2986. vtnet_set_hwaddr(struct vtnet_softc *sc)
  2987. {
  2988. device_t dev;
  2989. int i;
  2990. dev = sc->vtnet_dev;
  2991. if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
  2992. if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
  2993. device_printf(dev, "unable to set MAC address\n");
  2994. } else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
  2995. for (i = 0; i < ETHER_ADDR_LEN; i++) {
  2996. virtio_write_dev_config_1(dev,
  2997. offsetof(struct virtio_net_config, mac) + i,
  2998. sc->vtnet_hwaddr[i]);
  2999. }
  3000. }
  3001. }
  3002. static void
  3003. vtnet_get_hwaddr(struct vtnet_softc *sc)
  3004. {
  3005. device_t dev;
  3006. int i;
  3007. dev = sc->vtnet_dev;
  3008. if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
  3009. /*
  3010. * Generate a random locally administered unicast address.
  3011. *
  3012. * It would be nice to generate the same MAC address across
  3013. * reboots, but it seems all the hosts currently available
  3014. * support the MAC feature, so this isn't too important.
  3015. */
  3016. sc->vtnet_hwaddr[0] = 0xB2;
  3017. arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
  3018. vtnet_set_hwaddr(sc);
  3019. return;
  3020. }
  3021. for (i = 0; i < ETHER_ADDR_LEN; i++) {
  3022. sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
  3023. offsetof(struct virtio_net_config, mac) + i);
  3024. }
  3025. }
  3026. static void
  3027. vtnet_vlan_tag_remove(struct mbuf *m)
  3028. {
  3029. struct ether_vlan_header *evh;
  3030. evh = mtod(m, struct ether_vlan_header *);
  3031. m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
  3032. m->m_flags |= M_VLANTAG;
  3033. /* Strip the 802.1Q header. */
  3034. bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
  3035. ETHER_HDR_LEN - ETHER_TYPE_LEN);
  3036. m_adj(m, ETHER_VLAN_ENCAP_LEN);
  3037. }
  3038. static void
  3039. vtnet_set_rx_process_limit(struct vtnet_softc *sc)
  3040. {
  3041. int limit;
  3042. limit = vtnet_tunable_int(sc, "rx_process_limit",
  3043. vtnet_rx_process_limit);
  3044. if (limit < 0)
  3045. limit = INT_MAX;
  3046. sc->vtnet_rx_process_limit = limit;
  3047. }
  3048. static void
  3049. vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
  3050. {
  3051. int size, thresh;
  3052. size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
  3053. /*
  3054. * The Tx interrupt is disabled until the queue free count falls
  3055. * below our threshold. Completed frames are drained from the Tx
  3056. * virtqueue before transmitting new frames and in the watchdog
  3057. * callout, so the frequency of Tx interrupts is greatly reduced,
  3058. * at the cost of not freeing mbufs as quickly as they otherwise
  3059. * would be.
  3060. *
  3061. * N.B. We assume all the Tx queues are the same size.
  3062. */
  3063. thresh = size / 4;
  3064. /*
  3065. * Without indirect descriptors, leave enough room for the most
  3066. * segments we handle.
  3067. */
  3068. if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
  3069. thresh < sc->vtnet_tx_nsegs)
  3070. thresh = sc->vtnet_tx_nsegs;
  3071. sc->vtnet_tx_intr_thresh = thresh;
  3072. }
  3073. static void
  3074. vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
  3075. struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
  3076. {
  3077. struct sysctl_oid *node;
  3078. struct sysctl_oid_list *list;
  3079. struct vtnet_rxq_stats *stats;
  3080. char namebuf[16];
  3081. snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
  3082. node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
  3083. CTLFLAG_RD, NULL, "Receive Queue");
  3084. list = SYSCTL_CHILDREN(node);
  3085. stats = &rxq->vtnrx_stats;
  3086. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
  3087. &stats->vrxs_ipackets, "Receive packets");
  3088. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
  3089. &stats->vrxs_ibytes, "Receive bytes");
  3090. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
  3091. &stats->vrxs_iqdrops, "Receive drops");
  3092. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
  3093. &stats->vrxs_ierrors, "Receive errors");
  3094. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
  3095. &stats->vrxs_csum, "Receive checksum offloaded");
  3096. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
  3097. &stats->vrxs_csum_failed, "Receive checksum offload failed");
  3098. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
  3099. &stats->vrxs_rescheduled,
  3100. "Receive interrupt handler rescheduled");
  3101. }
  3102. static void
  3103. vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
  3104. struct sysctl_oid_list *child, struct vtnet_txq *txq)
  3105. {
  3106. struct sysctl_oid *node;
  3107. struct sysctl_oid_list *list;
  3108. struct vtnet_txq_stats *stats;
  3109. char namebuf[16];
  3110. snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
  3111. node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
  3112. CTLFLAG_RD, NULL, "Transmit Queue");
  3113. list = SYSCTL_CHILDREN(node);
  3114. stats = &txq->vtntx_stats;
  3115. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
  3116. &stats->vtxs_opackets, "Transmit packets");
  3117. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
  3118. &stats->vtxs_obytes, "Transmit bytes");
  3119. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
  3120. &stats->vtxs_omcasts, "Transmit multicasts");
  3121. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
  3122. &stats->vtxs_csum, "Transmit checksum offloaded");
  3123. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
  3124. &stats->vtxs_tso, "Transmit segmentation offloaded");
  3125. SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
  3126. &stats->vtxs_rescheduled,
  3127. "Transmit interrupt handler rescheduled");
  3128. }
  3129. static void
  3130. vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
  3131. {
  3132. device_t dev;
  3133. struct sysctl_ctx_list *ctx;
  3134. struct sysctl_oid *tree;
  3135. struct sysctl_oid_list *child;
  3136. int i;
  3137. dev = sc->vtnet_dev;
  3138. ctx = device_get_sysctl_ctx(dev);
  3139. tree = device_get_sysctl_tree(dev);
  3140. child = SYSCTL_CHILDREN(tree);
  3141. for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  3142. vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
  3143. vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
  3144. }
  3145. }
  3146. static void
  3147. vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
  3148. struct sysctl_oid_list *child, struct vtnet_softc *sc)
  3149. {
  3150. struct vtnet_statistics *stats;
  3151. struct vtnet_rxq_stats rxaccum;
  3152. struct vtnet_txq_stats txaccum;
  3153. vtnet_accum_stats(sc, &rxaccum, &txaccum);
  3154. stats = &sc->vtnet_stats;
  3155. stats->rx_csum_offloaded = rxaccum.vrxs_csum;
  3156. stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
  3157. stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
  3158. stats->tx_csum_offloaded = txaccum.vtxs_csum;
  3159. stats->tx_tso_offloaded = txaccum.vtxs_tso;
  3160. stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
  3161. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
  3162. CTLFLAG_RD, &stats->mbuf_alloc_failed,
  3163. "Mbuf cluster allocation failures");
  3164. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
  3165. CTLFLAG_RD, &stats->rx_frame_too_large,
  3166. "Received frame larger than the mbuf chain");
  3167. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
  3168. CTLFLAG_RD, &stats->rx_enq_replacement_failed,
  3169. "Enqueuing the replacement receive mbuf failed");
  3170. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
  3171. CTLFLAG_RD, &stats->rx_mergeable_failed,
  3172. "Mergeable buffers receive failures");
  3173. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
  3174. CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
  3175. "Received checksum offloaded buffer with unsupported "
  3176. "Ethernet type");
  3177. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
  3178. CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
  3179. "Received checksum offloaded buffer with incorrect IP protocol");
  3180. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
  3181. CTLFLAG_RD, &stats->rx_csum_bad_offset,
  3182. "Received checksum offloaded buffer with incorrect offset");
  3183. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
  3184. CTLFLAG_RD, &stats->rx_csum_bad_proto,
  3185. "Received checksum offloaded buffer with incorrect protocol");
  3186. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
  3187. CTLFLAG_RD, &stats->rx_csum_failed,
  3188. "Received buffer checksum offload failed");
  3189. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
  3190. CTLFLAG_RD, &stats->rx_csum_offloaded,
  3191. "Received buffer checksum offload succeeded");
  3192. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
  3193. CTLFLAG_RD, &stats->rx_task_rescheduled,
  3194. "Times the receive interrupt task rescheduled itself");
  3195. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
  3196. CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
  3197. "Aborted transmit of checksum offloaded buffer with unknown "
  3198. "Ethernet type");
  3199. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
  3200. CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
  3201. "Aborted transmit of TSO buffer with unknown Ethernet type");
  3202. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
  3203. CTLFLAG_RD, &stats->tx_tso_not_tcp,
  3204. "Aborted transmit of TSO buffer with non TCP protocol");
  3205. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
  3206. CTLFLAG_RD, &stats->tx_defragged,
  3207. "Transmit mbufs defragged");
  3208. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
  3209. CTLFLAG_RD, &stats->tx_defrag_failed,
  3210. "Aborted transmit of buffer because defrag failed");
  3211. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
  3212. CTLFLAG_RD, &stats->tx_csum_offloaded,
  3213. "Offloaded checksum of transmitted buffer");
  3214. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
  3215. CTLFLAG_RD, &stats->tx_tso_offloaded,
  3216. "Segmentation offload of transmitted buffer");
  3217. SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
  3218. CTLFLAG_RD, &stats->tx_task_rescheduled,
  3219. "Times the transmit interrupt task rescheduled itself");
  3220. }
  3221. static void
  3222. vtnet_setup_sysctl(struct vtnet_softc *sc)
  3223. {
  3224. device_t dev;
  3225. struct sysctl_ctx_list *ctx;
  3226. struct sysctl_oid *tree;
  3227. struct sysctl_oid_list *child;
  3228. dev = sc->vtnet_dev;
  3229. ctx = device_get_sysctl_ctx(dev);
  3230. tree = device_get_sysctl_tree(dev);
  3231. child = SYSCTL_CHILDREN(tree);
  3232. SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
  3233. CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
  3234. "Maximum number of supported virtqueue pairs");
  3235. SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
  3236. CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
  3237. "Requested number of virtqueue pairs");
  3238. SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
  3239. CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
  3240. "Number of active virtqueue pairs");
  3241. vtnet_setup_stat_sysctl(ctx, child, sc);
  3242. }
  3243. static int
  3244. vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
  3245. {
  3246. return (virtqueue_enable_intr(rxq->vtnrx_vq));
  3247. }
  3248. static void
  3249. vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
  3250. {
  3251. virtqueue_disable_intr(rxq->vtnrx_vq);
  3252. }
  3253. static int
  3254. vtnet_txq_enable_intr(struct vtnet_txq *txq)
  3255. {
  3256. struct virtqueue *vq;
  3257. vq = txq->vtntx_vq;
  3258. if (vtnet_txq_below_threshold(txq) != 0)
  3259. return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
  3260. /*
  3261. * The free count is above our threshold. Keep the Tx interrupt
  3262. * disabled until the queue is fuller.
  3263. */
  3264. return (0);
  3265. }
  3266. static void
  3267. vtnet_txq_disable_intr(struct vtnet_txq *txq)
  3268. {
  3269. virtqueue_disable_intr(txq->vtntx_vq);
  3270. }
  3271. static void
  3272. vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
  3273. {
  3274. int i;
  3275. for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
  3276. vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
  3277. }
  3278. static void
  3279. vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
  3280. {
  3281. int i;
  3282. for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
  3283. vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
  3284. }
  3285. static void
  3286. vtnet_enable_interrupts(struct vtnet_softc *sc)
  3287. {
  3288. vtnet_enable_rx_interrupts(sc);
  3289. vtnet_enable_tx_interrupts(sc);
  3290. }
  3291. static void
  3292. vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
  3293. {
  3294. int i;
  3295. for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
  3296. vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
  3297. }
  3298. static void
  3299. vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
  3300. {
  3301. int i;
  3302. for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
  3303. vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
  3304. }
  3305. static void
  3306. vtnet_disable_interrupts(struct vtnet_softc *sc)
  3307. {
  3308. vtnet_disable_rx_interrupts(sc);
  3309. vtnet_disable_tx_interrupts(sc);
  3310. }
  3311. static int
  3312. vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
  3313. {
  3314. char path[64];
  3315. snprintf(path, sizeof(path),
  3316. "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
  3317. TUNABLE_INT_FETCH(path, &def);
  3318. return (def);
  3319. }
  3320. #ifdef NETDUMP
  3321. static void
  3322. vtnet_netdump_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
  3323. {
  3324. struct vtnet_softc *sc;
  3325. sc = if_getsoftc(ifp);
  3326. VTNET_CORE_LOCK(sc);
  3327. *nrxr = sc->vtnet_max_vq_pairs;
  3328. *ncl = NETDUMP_MAX_IN_FLIGHT;
  3329. *clsize = sc->vtnet_rx_clsize;
  3330. VTNET_CORE_UNLOCK(sc);
  3331. /*
  3332. * We need to allocate from this zone in the transmit path, so ensure
  3333. * that we have at least one item per header available.
  3334. * XXX add a separate zone like we do for mbufs? otherwise we may alloc
  3335. * buckets
  3336. */
  3337. uma_zone_reserve(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2);
  3338. uma_prealloc(vtnet_tx_header_zone, NETDUMP_MAX_IN_FLIGHT * 2);
  3339. }
  3340. static void
  3341. vtnet_netdump_event(struct ifnet *ifp __unused, enum netdump_ev event __unused)
  3342. {
  3343. }
  3344. static int
  3345. vtnet_netdump_transmit(struct ifnet *ifp, struct mbuf *m)
  3346. {
  3347. struct vtnet_softc *sc;
  3348. struct vtnet_txq *txq;
  3349. int error;
  3350. sc = if_getsoftc(ifp);
  3351. if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
  3352. IFF_DRV_RUNNING)
  3353. return (EBUSY);
  3354. txq = &sc->vtnet_txqs[0];
  3355. error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
  3356. if (error == 0)
  3357. (void)vtnet_txq_notify(txq);
  3358. return (error);
  3359. }
  3360. static int
  3361. vtnet_netdump_poll(struct ifnet *ifp, int count)
  3362. {
  3363. struct vtnet_softc *sc;
  3364. int i;
  3365. sc = if_getsoftc(ifp);
  3366. if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
  3367. IFF_DRV_RUNNING)
  3368. return (EBUSY);
  3369. (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
  3370. for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
  3371. (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
  3372. return (0);
  3373. }
  3374. #endif /* NETDUMP */