1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Devmem TCP 4 * 5 * Authors: Mina Almasry <[email protected]> 6 * Willem de Bruijn <[email protected]> 7 * Kaiyuan Zhang <[email protected] 8 */ 9 10 #include <linux/dma-buf.h> 11 #include <linux/ethtool_netlink.h> 12 #include <linux/genalloc.h> 13 #include <linux/mm.h> 14 #include <linux/netdevice.h> 15 #include <linux/types.h> 16 #include <net/netdev_queues.h> 17 #include <net/netdev_rx_queue.h> 18 #include <net/page_pool/helpers.h> 19 #include <trace/events/page_pool.h> 20 21 #include "devmem.h" 22 #include "mp_dmabuf_devmem.h" 23 #include "page_pool_priv.h" 24 25 /* Device memory support */ 26 27 /* Protected by rtnl_lock() */ 28 static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); 29 30 static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, 31 struct gen_pool_chunk *chunk, 32 void *not_used) 33 { 34 struct dmabuf_genpool_chunk_owner *owner = chunk->owner; 35 36 kvfree(owner->area.niovs); 37 kfree(owner); 38 } 39 40 static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) 41 { 42 struct dmabuf_genpool_chunk_owner *owner; 43 44 owner = net_devmem_iov_to_chunk_owner(niov); 45 return owner->base_dma_addr + 46 ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); 47 } 48 49 void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding) 50 { 51 size_t size, avail; 52 53 gen_pool_for_each_chunk(binding->chunk_pool, 54 net_devmem_dmabuf_free_chunk_owner, NULL); 55 56 size = gen_pool_size(binding->chunk_pool); 57 avail = gen_pool_avail(binding->chunk_pool); 58 59 if (!WARN(size != avail, "can't destroy genpool. size=%zu, avail=%zu", 60 size, avail)) 61 gen_pool_destroy(binding->chunk_pool); 62 63 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 64 DMA_FROM_DEVICE); 65 dma_buf_detach(binding->dmabuf, binding->attachment); 66 dma_buf_put(binding->dmabuf); 67 xa_destroy(&binding->bound_rxqs); 68 kfree(binding); 69 } 70 71 struct net_iov * 72 net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) 73 { 74 struct dmabuf_genpool_chunk_owner *owner; 75 unsigned long dma_addr; 76 struct net_iov *niov; 77 ssize_t offset; 78 ssize_t index; 79 80 dma_addr = gen_pool_alloc_owner(binding->chunk_pool, PAGE_SIZE, 81 (void **)&owner); 82 if (!dma_addr) 83 return NULL; 84 85 offset = dma_addr - owner->base_dma_addr; 86 index = offset / PAGE_SIZE; 87 niov = &owner->area.niovs[index]; 88 89 niov->pp_magic = 0; 90 niov->pp = NULL; 91 atomic_long_set(&niov->pp_ref_count, 0); 92 93 return niov; 94 } 95 96 void net_devmem_free_dmabuf(struct net_iov *niov) 97 { 98 struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov); 99 unsigned long dma_addr = net_devmem_get_dma_addr(niov); 100 101 if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, 102 PAGE_SIZE))) 103 return; 104 105 gen_pool_free(binding->chunk_pool, dma_addr, PAGE_SIZE); 106 } 107 108 void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) 109 { 110 struct netdev_rx_queue *rxq; 111 unsigned long xa_idx; 112 unsigned int rxq_idx; 113 114 if (binding->list.next) 115 list_del(&binding->list); 116 117 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) { 118 WARN_ON(rxq->mp_params.mp_priv != binding); 119 120 rxq->mp_params.mp_priv = NULL; 121 122 rxq_idx = get_netdev_rx_queue_index(rxq); 123 124 WARN_ON(netdev_rx_queue_restart(binding->dev, rxq_idx)); 125 } 126 127 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 128 129 net_devmem_dmabuf_binding_put(binding); 130 } 131 132 int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, 133 struct net_devmem_dmabuf_binding *binding, 134 struct netlink_ext_ack *extack) 135 { 136 struct netdev_rx_queue *rxq; 137 u32 xa_idx; 138 int err; 139 140 if (rxq_idx >= dev->real_num_rx_queues) { 141 NL_SET_ERR_MSG(extack, "rx queue index out of range"); 142 return -ERANGE; 143 } 144 145 if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { 146 NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); 147 return -EINVAL; 148 } 149 150 if (dev->cfg->hds_thresh) { 151 NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); 152 return -EINVAL; 153 } 154 155 rxq = __netif_get_rx_queue(dev, rxq_idx); 156 if (rxq->mp_params.mp_priv) { 157 NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); 158 return -EEXIST; 159 } 160 161 #ifdef CONFIG_XDP_SOCKETS 162 if (rxq->pool) { 163 NL_SET_ERR_MSG(extack, "designated queue already in use by AF_XDP"); 164 return -EBUSY; 165 } 166 #endif 167 168 err = xa_alloc(&binding->bound_rxqs, &xa_idx, rxq, xa_limit_32b, 169 GFP_KERNEL); 170 if (err) 171 return err; 172 173 rxq->mp_params.mp_priv = binding; 174 175 err = netdev_rx_queue_restart(dev, rxq_idx); 176 if (err) 177 goto err_xa_erase; 178 179 return 0; 180 181 err_xa_erase: 182 rxq->mp_params.mp_priv = NULL; 183 xa_erase(&binding->bound_rxqs, xa_idx); 184 185 return err; 186 } 187 188 struct net_devmem_dmabuf_binding * 189 net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, 190 struct netlink_ext_ack *extack) 191 { 192 struct net_devmem_dmabuf_binding *binding; 193 static u32 id_alloc_next; 194 struct scatterlist *sg; 195 struct dma_buf *dmabuf; 196 unsigned int sg_idx, i; 197 unsigned long virtual; 198 int err; 199 200 dmabuf = dma_buf_get(dmabuf_fd); 201 if (IS_ERR(dmabuf)) 202 return ERR_CAST(dmabuf); 203 204 binding = kzalloc_node(sizeof(*binding), GFP_KERNEL, 205 dev_to_node(&dev->dev)); 206 if (!binding) { 207 err = -ENOMEM; 208 goto err_put_dmabuf; 209 } 210 211 binding->dev = dev; 212 213 err = xa_alloc_cyclic(&net_devmem_dmabuf_bindings, &binding->id, 214 binding, xa_limit_32b, &id_alloc_next, 215 GFP_KERNEL); 216 if (err < 0) 217 goto err_free_binding; 218 219 xa_init_flags(&binding->bound_rxqs, XA_FLAGS_ALLOC); 220 221 refcount_set(&binding->ref, 1); 222 223 binding->dmabuf = dmabuf; 224 225 binding->attachment = dma_buf_attach(binding->dmabuf, dev->dev.parent); 226 if (IS_ERR(binding->attachment)) { 227 err = PTR_ERR(binding->attachment); 228 NL_SET_ERR_MSG(extack, "Failed to bind dmabuf to device"); 229 goto err_free_id; 230 } 231 232 binding->sgt = dma_buf_map_attachment_unlocked(binding->attachment, 233 DMA_FROM_DEVICE); 234 if (IS_ERR(binding->sgt)) { 235 err = PTR_ERR(binding->sgt); 236 NL_SET_ERR_MSG(extack, "Failed to map dmabuf attachment"); 237 goto err_detach; 238 } 239 240 /* For simplicity we expect to make PAGE_SIZE allocations, but the 241 * binding can be much more flexible than that. We may be able to 242 * allocate MTU sized chunks here. Leave that for future work... 243 */ 244 binding->chunk_pool = 245 gen_pool_create(PAGE_SHIFT, dev_to_node(&dev->dev)); 246 if (!binding->chunk_pool) { 247 err = -ENOMEM; 248 goto err_unmap; 249 } 250 251 virtual = 0; 252 for_each_sgtable_dma_sg(binding->sgt, sg, sg_idx) { 253 dma_addr_t dma_addr = sg_dma_address(sg); 254 struct dmabuf_genpool_chunk_owner *owner; 255 size_t len = sg_dma_len(sg); 256 struct net_iov *niov; 257 258 owner = kzalloc_node(sizeof(*owner), GFP_KERNEL, 259 dev_to_node(&dev->dev)); 260 if (!owner) { 261 err = -ENOMEM; 262 goto err_free_chunks; 263 } 264 265 owner->area.base_virtual = virtual; 266 owner->base_dma_addr = dma_addr; 267 owner->area.num_niovs = len / PAGE_SIZE; 268 owner->binding = binding; 269 270 err = gen_pool_add_owner(binding->chunk_pool, dma_addr, 271 dma_addr, len, dev_to_node(&dev->dev), 272 owner); 273 if (err) { 274 kfree(owner); 275 err = -EINVAL; 276 goto err_free_chunks; 277 } 278 279 owner->area.niovs = kvmalloc_array(owner->area.num_niovs, 280 sizeof(*owner->area.niovs), 281 GFP_KERNEL); 282 if (!owner->area.niovs) { 283 err = -ENOMEM; 284 goto err_free_chunks; 285 } 286 287 for (i = 0; i < owner->area.num_niovs; i++) { 288 niov = &owner->area.niovs[i]; 289 niov->owner = &owner->area; 290 page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), 291 net_devmem_get_dma_addr(niov)); 292 } 293 294 virtual += len; 295 } 296 297 return binding; 298 299 err_free_chunks: 300 gen_pool_for_each_chunk(binding->chunk_pool, 301 net_devmem_dmabuf_free_chunk_owner, NULL); 302 gen_pool_destroy(binding->chunk_pool); 303 err_unmap: 304 dma_buf_unmap_attachment_unlocked(binding->attachment, binding->sgt, 305 DMA_FROM_DEVICE); 306 err_detach: 307 dma_buf_detach(dmabuf, binding->attachment); 308 err_free_id: 309 xa_erase(&net_devmem_dmabuf_bindings, binding->id); 310 err_free_binding: 311 kfree(binding); 312 err_put_dmabuf: 313 dma_buf_put(dmabuf); 314 return ERR_PTR(err); 315 } 316 317 void dev_dmabuf_uninstall(struct net_device *dev) 318 { 319 struct net_devmem_dmabuf_binding *binding; 320 struct netdev_rx_queue *rxq; 321 unsigned long xa_idx; 322 unsigned int i; 323 324 for (i = 0; i < dev->real_num_rx_queues; i++) { 325 binding = dev->_rx[i].mp_params.mp_priv; 326 if (!binding) 327 continue; 328 329 xa_for_each(&binding->bound_rxqs, xa_idx, rxq) 330 if (rxq == &dev->_rx[i]) { 331 xa_erase(&binding->bound_rxqs, xa_idx); 332 break; 333 } 334 } 335 } 336 337 /*** "Dmabuf devmem memory provider" ***/ 338 339 int mp_dmabuf_devmem_init(struct page_pool *pool) 340 { 341 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 342 343 if (!binding) 344 return -EINVAL; 345 346 /* dma-buf dma addresses do not need and should not be used with 347 * dma_sync_for_cpu/device. Force disable dma_sync. 348 */ 349 pool->dma_sync = false; 350 pool->dma_sync_for_cpu = false; 351 352 if (pool->p.order != 0) 353 return -E2BIG; 354 355 net_devmem_dmabuf_binding_get(binding); 356 return 0; 357 } 358 359 netmem_ref mp_dmabuf_devmem_alloc_netmems(struct page_pool *pool, gfp_t gfp) 360 { 361 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 362 struct net_iov *niov; 363 netmem_ref netmem; 364 365 niov = net_devmem_alloc_dmabuf(binding); 366 if (!niov) 367 return 0; 368 369 netmem = net_iov_to_netmem(niov); 370 371 page_pool_set_pp_info(pool, netmem); 372 373 pool->pages_state_hold_cnt++; 374 trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); 375 return netmem; 376 } 377 378 void mp_dmabuf_devmem_destroy(struct page_pool *pool) 379 { 380 struct net_devmem_dmabuf_binding *binding = pool->mp_priv; 381 382 net_devmem_dmabuf_binding_put(binding); 383 } 384 385 bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) 386 { 387 long refcount = atomic_long_read(netmem_get_pp_ref_count_ref(netmem)); 388 389 if (WARN_ON_ONCE(!netmem_is_net_iov(netmem))) 390 return false; 391 392 if (WARN_ON_ONCE(refcount != 1)) 393 return false; 394 395 page_pool_clear_pp_info(netmem); 396 397 net_devmem_free_dmabuf(netmem_to_net_iov(netmem)); 398 399 /* We don't want the page pool put_page()ing our net_iovs. */ 400 return false; 401 } 402