HardenedBSD src tree https://hardenedbsd.org/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

448 lines
13 KiB

  1. /*
  2. * Copyright (C) 2014-2018 Vincenzo Maffione, Luigi Rizzo.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. * 1. Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * 2. Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. *
  13. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  14. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  17. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23. * SUCH DAMAGE.
  24. */
  25. /*
  26. * $FreeBSD$
  27. */
  28. #include <net/netmap.h>
  29. #include <sys/selinfo.h>
  30. #include <vm/vm.h>
  31. #include <vm/pmap.h> /* vtophys ? */
  32. #include <dev/netmap/netmap_kern.h>
  33. /* Register and unregister. */
  34. static int
  35. vtnet_netmap_reg(struct netmap_adapter *na, int state)
  36. {
  37. struct ifnet *ifp = na->ifp;
  38. struct vtnet_softc *sc = ifp->if_softc;
  39. /*
  40. * Trigger a device reinit, asking vtnet_init_locked() to
  41. * also enter or exit netmap mode.
  42. */
  43. VTNET_CORE_LOCK(sc);
  44. ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
  45. vtnet_init_locked(sc, state ? VTNET_INIT_NETMAP_ENTER
  46. : VTNET_INIT_NETMAP_EXIT);
  47. VTNET_CORE_UNLOCK(sc);
  48. return 0;
  49. }
  50. /* Reconcile kernel and user view of the transmit ring. */
  51. static int
  52. vtnet_netmap_txsync(struct netmap_kring *kring, int flags)
  53. {
  54. struct netmap_adapter *na = kring->na;
  55. struct ifnet *ifp = na->ifp;
  56. struct netmap_ring *ring = kring->ring;
  57. u_int ring_nr = kring->ring_id;
  58. u_int nm_i; /* index into the netmap ring */
  59. u_int const lim = kring->nkr_num_slots - 1;
  60. u_int const head = kring->rhead;
  61. /* device-specific */
  62. struct vtnet_softc *sc = ifp->if_softc;
  63. struct vtnet_txq *txq = &sc->vtnet_txqs[ring_nr];
  64. struct virtqueue *vq = txq->vtntx_vq;
  65. int interrupts = !(kring->nr_kflags & NKR_NOINTR);
  66. u_int n;
  67. /*
  68. * First part: process new packets to send.
  69. */
  70. nm_i = kring->nr_hwcur;
  71. if (nm_i != head) { /* we have new packets to send */
  72. struct sglist *sg = txq->vtntx_sg;
  73. for (; nm_i != head; nm_i = nm_next(nm_i, lim)) {
  74. /* we use an empty header here */
  75. struct netmap_slot *slot = &ring->slot[nm_i];
  76. u_int len = slot->len;
  77. uint64_t paddr;
  78. void *addr = PNMB(na, slot, &paddr);
  79. int err;
  80. NM_CHECK_ADDR_LEN(na, addr, len);
  81. slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
  82. /* Initialize the scatterlist, expose it to the hypervisor,
  83. * and kick the hypervisor (if necessary).
  84. */
  85. sglist_reset(sg); // cheap
  86. err = sglist_append(sg, &txq->vtntx_shrhdr, sc->vtnet_hdr_size);
  87. err |= sglist_append_phys(sg, paddr, len);
  88. KASSERT(err == 0, ("%s: cannot append to sglist %d",
  89. __func__, err));
  90. err = virtqueue_enqueue(vq, /*cookie=*/txq, sg,
  91. /*readable=*/sg->sg_nseg,
  92. /*writeable=*/0);
  93. if (unlikely(err)) {
  94. if (err != ENOSPC)
  95. nm_prerr("virtqueue_enqueue(%s) failed: %d",
  96. kring->name, err);
  97. break;
  98. }
  99. }
  100. virtqueue_notify(vq);
  101. /* Update hwcur depending on where we stopped. */
  102. kring->nr_hwcur = nm_i; /* note we migth break early */
  103. }
  104. /* Free used slots. We only consider our own used buffers, recognized
  105. * by the token we passed to virtqueue_enqueue.
  106. */
  107. n = 0;
  108. for (;;) {
  109. void *token = virtqueue_dequeue(vq, NULL);
  110. if (token == NULL)
  111. break;
  112. if (unlikely(token != (void *)txq))
  113. nm_prerr("BUG: TX token mismatch");
  114. else
  115. n++;
  116. }
  117. if (n > 0) {
  118. kring->nr_hwtail += n;
  119. if (kring->nr_hwtail > lim)
  120. kring->nr_hwtail -= lim + 1;
  121. }
  122. if (interrupts && virtqueue_nfree(vq) < 32)
  123. virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG);
  124. return 0;
  125. }
  126. /*
  127. * Publish 'num 'netmap receive buffers to the host, starting
  128. * from the next available one (rx->vtnrx_nm_refill).
  129. * Return a positive error code on error, and 0 on success.
  130. * If we could not publish all of the buffers that's an error,
  131. * since the netmap ring and the virtqueue would go out of sync.
  132. */
  133. static int
  134. vtnet_netmap_kring_refill(struct netmap_kring *kring, u_int num)
  135. {
  136. struct netmap_adapter *na = kring->na;
  137. struct ifnet *ifp = na->ifp;
  138. struct netmap_ring *ring = kring->ring;
  139. u_int ring_nr = kring->ring_id;
  140. u_int const lim = kring->nkr_num_slots - 1;
  141. u_int nm_i;
  142. /* device-specific */
  143. struct vtnet_softc *sc = ifp->if_softc;
  144. struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
  145. struct virtqueue *vq = rxq->vtnrx_vq;
  146. /* use a local sglist, default might be short */
  147. struct sglist_seg ss[2];
  148. struct sglist sg = { ss, 0, 0, 2 };
  149. for (nm_i = rxq->vtnrx_nm_refill; num > 0;
  150. nm_i = nm_next(nm_i, lim), num--) {
  151. struct netmap_slot *slot = &ring->slot[nm_i];
  152. uint64_t paddr;
  153. void *addr = PNMB(na, slot, &paddr);
  154. int err;
  155. if (addr == NETMAP_BUF_BASE(na)) { /* bad buf */
  156. if (netmap_ring_reinit(kring))
  157. return EFAULT;
  158. }
  159. slot->flags &= ~NS_BUF_CHANGED;
  160. sglist_reset(&sg);
  161. err = sglist_append(&sg, &rxq->vtnrx_shrhdr, sc->vtnet_hdr_size);
  162. err |= sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na));
  163. KASSERT(err == 0, ("%s: cannot append to sglist %d",
  164. __func__, err));
  165. /* writable for the host */
  166. err = virtqueue_enqueue(vq, /*cookie=*/rxq, &sg,
  167. /*readable=*/0, /*writeable=*/sg.sg_nseg);
  168. if (unlikely(err)) {
  169. nm_prerr("virtqueue_enqueue(%s) failed: %d",
  170. kring->name, err);
  171. break;
  172. }
  173. }
  174. rxq->vtnrx_nm_refill = nm_i;
  175. return num == 0 ? 0 : ENOSPC;
  176. }
  177. /*
  178. * Publish netmap buffers on a RX virtqueue.
  179. * Returns -1 if this virtqueue is not being opened in netmap mode.
  180. * If the virtqueue is being opened in netmap mode, return 0 on success and
  181. * a positive error code on failure.
  182. */
  183. static int
  184. vtnet_netmap_rxq_populate(struct vtnet_rxq *rxq)
  185. {
  186. struct netmap_adapter *na = NA(rxq->vtnrx_sc->vtnet_ifp);
  187. struct netmap_kring *kring;
  188. struct netmap_slot *slot;
  189. int error;
  190. slot = netmap_reset(na, NR_RX, rxq->vtnrx_id, 0);
  191. if (slot == NULL)
  192. return -1;
  193. kring = na->rx_rings[rxq->vtnrx_id];
  194. /* Expose all the RX netmap buffers we can. In case of no indirect
  195. * buffers, the number of netmap slots in the RX ring matches the
  196. * maximum number of 2-elements sglist that the RX virtqueue can
  197. * accommodate. We need to start from kring->nr_hwcur, which is 0
  198. * on netmap register and may be different from 0 if a virtio
  199. * re-init happens while the device is in use by netmap. */
  200. rxq->vtnrx_nm_refill = kring->nr_hwcur;
  201. error = vtnet_netmap_kring_refill(kring, na->num_rx_desc - 1);
  202. virtqueue_notify(rxq->vtnrx_vq);
  203. return error;
  204. }
  205. /* Reconcile kernel and user view of the receive ring. */
  206. static int
  207. vtnet_netmap_rxsync(struct netmap_kring *kring, int flags)
  208. {
  209. struct netmap_adapter *na = kring->na;
  210. struct ifnet *ifp = na->ifp;
  211. struct netmap_ring *ring = kring->ring;
  212. u_int ring_nr = kring->ring_id;
  213. u_int nm_i; /* index into the netmap ring */
  214. u_int const lim = kring->nkr_num_slots - 1;
  215. u_int const head = kring->rhead;
  216. int force_update = (flags & NAF_FORCE_READ) ||
  217. (kring->nr_kflags & NKR_PENDINTR);
  218. int interrupts = !(kring->nr_kflags & NKR_NOINTR);
  219. /* device-specific */
  220. struct vtnet_softc *sc = ifp->if_softc;
  221. struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr];
  222. struct virtqueue *vq = rxq->vtnrx_vq;
  223. /*
  224. * First part: import newly received packets.
  225. * Only accept our own buffers (matching the token). We should only get
  226. * matching buffers. The hwtail should never overrun hwcur, because
  227. * we publish only N-1 receive buffers (and non N).
  228. * In any case we must not leave this routine with the interrupts
  229. * disabled, pending packets in the VQ and hwtail == (hwcur - 1),
  230. * otherwise the pending packets could stall.
  231. */
  232. if (netmap_no_pendintr || force_update) {
  233. uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim);
  234. void *token;
  235. vtnet_rxq_disable_intr(rxq);
  236. nm_i = kring->nr_hwtail;
  237. for (;;) {
  238. int len;
  239. token = virtqueue_dequeue(vq, &len);
  240. if (token == NULL) {
  241. /*
  242. * Enable the interrupts again and double-check
  243. * for more work. We can go on until we win the
  244. * race condition, since we are not replenishing
  245. * in the meanwhile, and thus we will process at
  246. * most N-1 slots.
  247. */
  248. if (interrupts && vtnet_rxq_enable_intr(rxq)) {
  249. vtnet_rxq_disable_intr(rxq);
  250. continue;
  251. }
  252. break;
  253. }
  254. if (unlikely(token != (void *)rxq)) {
  255. nm_prerr("BUG: RX token mismatch");
  256. } else {
  257. if (nm_i == hwtail_lim) {
  258. KASSERT(false, ("hwtail would "
  259. "overrun hwcur"));
  260. }
  261. /* Skip the virtio-net header. */
  262. len -= sc->vtnet_hdr_size;
  263. if (unlikely(len < 0)) {
  264. nm_prlim(1, "Truncated virtio-net-header, "
  265. "missing %d bytes", -len);
  266. len = 0;
  267. }
  268. ring->slot[nm_i].len = len;
  269. ring->slot[nm_i].flags = 0;
  270. nm_i = nm_next(nm_i, lim);
  271. }
  272. }
  273. kring->nr_hwtail = nm_i;
  274. kring->nr_kflags &= ~NKR_PENDINTR;
  275. }
  276. /*
  277. * Second part: skip past packets that userspace has released.
  278. */
  279. nm_i = kring->nr_hwcur; /* netmap ring index */
  280. if (nm_i != head) {
  281. int released;
  282. int error;
  283. released = head - nm_i;
  284. if (released < 0)
  285. released += kring->nkr_num_slots;
  286. error = vtnet_netmap_kring_refill(kring, released);
  287. if (error) {
  288. nm_prerr("Failed to replenish RX VQ with %u sgs",
  289. released);
  290. return error;
  291. }
  292. kring->nr_hwcur = head;
  293. virtqueue_notify(vq);
  294. }
  295. nm_prdis("h %d c %d t %d hwcur %d hwtail %d", kring->rhead,
  296. kring->rcur, kring->rtail, kring->nr_hwcur, kring->nr_hwtail);
  297. return 0;
  298. }
  299. /* Enable/disable interrupts on all virtqueues. */
  300. static void
  301. vtnet_netmap_intr(struct netmap_adapter *na, int state)
  302. {
  303. struct vtnet_softc *sc = na->ifp->if_softc;
  304. int i;
  305. for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
  306. struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i];
  307. struct vtnet_txq *txq = &sc->vtnet_txqs[i];
  308. struct virtqueue *txvq = txq->vtntx_vq;
  309. if (state) {
  310. vtnet_rxq_enable_intr(rxq);
  311. virtqueue_enable_intr(txvq);
  312. } else {
  313. vtnet_rxq_disable_intr(rxq);
  314. virtqueue_disable_intr(txvq);
  315. }
  316. }
  317. }
  318. static int
  319. vtnet_netmap_tx_slots(struct vtnet_softc *sc)
  320. {
  321. int div;
  322. /* We need to prepend a virtio-net header to each netmap buffer to be
  323. * transmitted, therefore calling virtqueue_enqueue() passing sglist
  324. * with 2 elements.
  325. * TX virtqueues use indirect descriptors if the feature was negotiated
  326. * with the host, and if sc->vtnet_tx_nsegs > 1. With indirect
  327. * descriptors, a single virtio descriptor is sufficient to reference
  328. * each TX sglist. Without them, we need two separate virtio descriptors
  329. * for each TX sglist. We therefore compute the number of netmap TX
  330. * slots according to these assumptions.
  331. */
  332. if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_tx_nsegs > 1)
  333. div = 1;
  334. else
  335. div = 2;
  336. return virtqueue_size(sc->vtnet_txqs[0].vtntx_vq) / div;
  337. }
  338. static int
  339. vtnet_netmap_rx_slots(struct vtnet_softc *sc)
  340. {
  341. int div;
  342. /* We need to prepend a virtio-net header to each netmap buffer to be
  343. * received, therefore calling virtqueue_enqueue() passing sglist
  344. * with 2 elements.
  345. * RX virtqueues use indirect descriptors if the feature was negotiated
  346. * with the host, and if sc->vtnet_rx_nsegs > 1. With indirect
  347. * descriptors, a single virtio descriptor is sufficient to reference
  348. * each RX sglist. Without them, we need two separate virtio descriptors
  349. * for each RX sglist. We therefore compute the number of netmap RX
  350. * slots according to these assumptions.
  351. */
  352. if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) && sc->vtnet_rx_nsegs > 1)
  353. div = 1;
  354. else
  355. div = 2;
  356. return virtqueue_size(sc->vtnet_rxqs[0].vtnrx_vq) / div;
  357. }
  358. static int
  359. vtnet_netmap_config(struct netmap_adapter *na, struct nm_config_info *info)
  360. {
  361. struct vtnet_softc *sc = na->ifp->if_softc;
  362. info->num_tx_rings = sc->vtnet_act_vq_pairs;
  363. info->num_rx_rings = sc->vtnet_act_vq_pairs;
  364. info->num_tx_descs = vtnet_netmap_tx_slots(sc);
  365. info->num_rx_descs = vtnet_netmap_rx_slots(sc);
  366. info->rx_buf_maxsize = NETMAP_BUF_SIZE(na);
  367. return 0;
  368. }
  369. static void
  370. vtnet_netmap_attach(struct vtnet_softc *sc)
  371. {
  372. struct netmap_adapter na;
  373. bzero(&na, sizeof(na));
  374. na.ifp = sc->vtnet_ifp;
  375. na.na_flags = 0;
  376. na.num_tx_desc = vtnet_netmap_tx_slots(sc);
  377. na.num_rx_desc = vtnet_netmap_rx_slots(sc);
  378. na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs;
  379. na.rx_buf_maxsize = 0;
  380. na.nm_register = vtnet_netmap_reg;
  381. na.nm_txsync = vtnet_netmap_txsync;
  382. na.nm_rxsync = vtnet_netmap_rxsync;
  383. na.nm_intr = vtnet_netmap_intr;
  384. na.nm_config = vtnet_netmap_config;
  385. netmap_attach(&na);
  386. nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d",
  387. na.num_tx_rings, na.num_tx_desc,
  388. na.num_tx_rings, na.num_rx_desc);
  389. }
  390. /* end of file */