1 /* $FreeBSD$ */
2 /*-
3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include "icl_iser.h"
28
29 SYSCTL_NODE(_kern, OID_AUTO, iser, CTLFLAG_RW, 0, "iSER module");
30 int iser_debug = 0;
31 SYSCTL_INT(_kern_iser, OID_AUTO, debug, CTLFLAG_RWTUN,
32 &iser_debug, 0, "Enable iser debug messages");
33
34 static MALLOC_DEFINE(M_ICL_ISER, "icl_iser", "iSCSI iser backend");
35 static uma_zone_t icl_pdu_zone;
36
37 static volatile u_int icl_iser_ncons;
38 struct iser_global ig;
39
40 static void iser_conn_release(struct icl_conn *ic);
41
42 static icl_conn_new_pdu_t iser_conn_new_pdu;
43 static icl_conn_pdu_free_t iser_conn_pdu_free;
44 static icl_conn_pdu_data_segment_length_t iser_conn_pdu_data_segment_length;
45 static icl_conn_pdu_append_data_t iser_conn_pdu_append_data;
46 static icl_conn_pdu_queue_t iser_conn_pdu_queue;
47 static icl_conn_handoff_t iser_conn_handoff;
48 static icl_conn_free_t iser_conn_free;
49 static icl_conn_close_t iser_conn_close;
50 static icl_conn_connect_t iser_conn_connect;
51 static icl_conn_task_setup_t iser_conn_task_setup;
52 static icl_conn_task_done_t iser_conn_task_done;
53 static icl_conn_pdu_get_data_t iser_conn_pdu_get_data;
54
55 static kobj_method_t icl_iser_methods[] = {
56 KOBJMETHOD(icl_conn_new_pdu, iser_conn_new_pdu),
57 KOBJMETHOD(icl_conn_pdu_free, iser_conn_pdu_free),
58 KOBJMETHOD(icl_conn_pdu_data_segment_length, iser_conn_pdu_data_segment_length),
59 KOBJMETHOD(icl_conn_pdu_append_data, iser_conn_pdu_append_data),
60 KOBJMETHOD(icl_conn_pdu_queue, iser_conn_pdu_queue),
61 KOBJMETHOD(icl_conn_handoff, iser_conn_handoff),
62 KOBJMETHOD(icl_conn_free, iser_conn_free),
63 KOBJMETHOD(icl_conn_close, iser_conn_close),
64 KOBJMETHOD(icl_conn_connect, iser_conn_connect),
65 KOBJMETHOD(icl_conn_task_setup, iser_conn_task_setup),
66 KOBJMETHOD(icl_conn_task_done, iser_conn_task_done),
67 KOBJMETHOD(icl_conn_pdu_get_data, iser_conn_pdu_get_data),
68 { 0, 0 }
69 };
70
71 DEFINE_CLASS(icl_iser, icl_iser_methods, sizeof(struct iser_conn));
72
73 /**
74 * iser_initialize_headers() - Initialize task headers
75 * @pdu: iser pdu
76 * @iser_conn: iser connection
77 *
78 * Notes:
79 * This routine may race with iser teardown flow for scsi
80 * error handling TMFs. So for TMF we should acquire the
81 * state mutex to avoid dereferencing the IB device which
82 * may have already been terminated (racing teardown sequence).
83 */
84 int
iser_initialize_headers(struct icl_iser_pdu * pdu,struct iser_conn * iser_conn)85 iser_initialize_headers(struct icl_iser_pdu *pdu, struct iser_conn *iser_conn)
86 {
87 struct iser_tx_desc *tx_desc = &pdu->desc;
88 struct iser_device *device = iser_conn->ib_conn.device;
89 u64 dma_addr;
90 int ret = 0;
91
92 dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
93 ISER_HEADERS_LEN, DMA_TO_DEVICE);
94 if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
95 ret = -ENOMEM;
96 goto out;
97 }
98
99 tx_desc->mapped = true;
100 tx_desc->dma_addr = dma_addr;
101 tx_desc->tx_sg[0].addr = tx_desc->dma_addr;
102 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
103 tx_desc->tx_sg[0].lkey = device->mr->lkey;
104
105 out:
106
107 return (ret);
108 }
109
110 int
iser_conn_pdu_append_data(struct icl_conn * ic,struct icl_pdu * request,const void * addr,size_t len,int flags)111 iser_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
112 const void *addr, size_t len, int flags)
113 {
114 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
115
116 if (request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_LOGIN_REQUEST ||
117 request->ip_bhs->bhs_opcode & ISCSI_BHS_OPCODE_TEXT_REQUEST) {
118 ISER_DBG("copy to login buff");
119 memcpy(iser_conn->login_req_buf, addr, len);
120 request->ip_data_len = len;
121 }
122
123 return (0);
124 }
125
126 void
iser_conn_pdu_get_data(struct icl_conn * ic,struct icl_pdu * ip,size_t off,void * addr,size_t len)127 iser_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
128 size_t off, void *addr, size_t len)
129 {
130 /* If we have a receive data, copy it to upper layer buffer */
131 if (ip->ip_data_mbuf)
132 memcpy(addr, ip->ip_data_mbuf + off, len);
133 }
134
135 /*
136 * Allocate icl_pdu with empty BHS to fill up by the caller.
137 */
138 struct icl_pdu *
iser_new_pdu(struct icl_conn * ic,int flags)139 iser_new_pdu(struct icl_conn *ic, int flags)
140 {
141 struct icl_iser_pdu *iser_pdu;
142 struct icl_pdu *ip;
143 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
144
145 iser_pdu = uma_zalloc(icl_pdu_zone, flags | M_ZERO);
146 if (iser_pdu == NULL) {
147 ISER_WARN("failed to allocate %zd bytes", sizeof(*iser_pdu));
148 return (NULL);
149 }
150
151 iser_pdu->iser_conn = iser_conn;
152 ip = &iser_pdu->icl_pdu;
153 ip->ip_conn = ic;
154 ip->ip_bhs = &iser_pdu->desc.iscsi_header;
155
156 return (ip);
157 }
158
159 struct icl_pdu *
iser_conn_new_pdu(struct icl_conn * ic,int flags)160 iser_conn_new_pdu(struct icl_conn *ic, int flags)
161 {
162 return (iser_new_pdu(ic, flags));
163 }
164
165 void
iser_pdu_free(struct icl_conn * ic,struct icl_pdu * ip)166 iser_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
167 {
168 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
169
170 uma_zfree(icl_pdu_zone, iser_pdu);
171 }
172
173 size_t
iser_conn_pdu_data_segment_length(struct icl_conn * ic,const struct icl_pdu * request)174 iser_conn_pdu_data_segment_length(struct icl_conn *ic,
175 const struct icl_pdu *request)
176 {
177 uint32_t len = 0;
178
179 len += request->ip_bhs->bhs_data_segment_len[0];
180 len <<= 8;
181 len += request->ip_bhs->bhs_data_segment_len[1];
182 len <<= 8;
183 len += request->ip_bhs->bhs_data_segment_len[2];
184
185 return (len);
186 }
187
188 void
iser_conn_pdu_free(struct icl_conn * ic,struct icl_pdu * ip)189 iser_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
190 {
191 iser_pdu_free(ic, ip);
192 }
193
194 static bool
is_control_opcode(uint8_t opcode)195 is_control_opcode(uint8_t opcode)
196 {
197 bool is_control = false;
198
199 switch (opcode & ISCSI_OPCODE_MASK) {
200 case ISCSI_BHS_OPCODE_NOP_OUT:
201 case ISCSI_BHS_OPCODE_LOGIN_REQUEST:
202 case ISCSI_BHS_OPCODE_LOGOUT_REQUEST:
203 case ISCSI_BHS_OPCODE_TEXT_REQUEST:
204 is_control = true;
205 break;
206 case ISCSI_BHS_OPCODE_SCSI_COMMAND:
207 is_control = false;
208 break;
209 default:
210 ISER_ERR("unknown opcode %d", opcode);
211 }
212
213 return (is_control);
214 }
215
216 void
iser_conn_pdu_queue(struct icl_conn * ic,struct icl_pdu * ip)217 iser_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
218 {
219 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
220 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
221 int ret;
222
223 if (iser_conn->state != ISER_CONN_UP)
224 return;
225
226 ret = iser_initialize_headers(iser_pdu, iser_conn);
227 if (ret) {
228 ISER_ERR("Failed to map TX descriptor pdu %p", iser_pdu);
229 return;
230 }
231
232 if (is_control_opcode(ip->ip_bhs->bhs_opcode)) {
233 ret = iser_send_control(iser_conn, iser_pdu);
234 if (unlikely(ret))
235 ISER_ERR("Failed to send control pdu %p", iser_pdu);
236 } else {
237 ret = iser_send_command(iser_conn, iser_pdu);
238 if (unlikely(ret))
239 ISER_ERR("Failed to send command pdu %p", iser_pdu);
240 }
241 }
242
243 static struct icl_conn *
iser_new_conn(const char * name,struct mtx * lock)244 iser_new_conn(const char *name, struct mtx *lock)
245 {
246 struct iser_conn *iser_conn;
247 struct icl_conn *ic;
248
249 refcount_acquire(&icl_iser_ncons);
250
251 iser_conn = (struct iser_conn *)kobj_create(&icl_iser_class, M_ICL_ISER, M_WAITOK | M_ZERO);
252 if (!iser_conn) {
253 ISER_ERR("failed to allocate iser conn");
254 refcount_release(&icl_iser_ncons);
255 return (NULL);
256 }
257
258 cv_init(&iser_conn->up_cv, "iser_cv");
259 sx_init(&iser_conn->state_mutex, "iser_conn_state_mutex");
260 mtx_init(&iser_conn->ib_conn.beacon.flush_lock, "iser_flush_lock", NULL, MTX_DEF);
261 cv_init(&iser_conn->ib_conn.beacon.flush_cv, "flush_cv");
262 mtx_init(&iser_conn->ib_conn.lock, "iser_lock", NULL, MTX_DEF);
263
264 ic = &iser_conn->icl_conn;
265 ic->ic_lock = lock;
266 ic->ic_name = name;
267 ic->ic_offload = strdup("iser", M_TEMP);
268 ic->ic_iser = true;
269 ic->ic_unmapped = true;
270
271 return (ic);
272 }
273
274 void
iser_conn_free(struct icl_conn * ic)275 iser_conn_free(struct icl_conn *ic)
276 {
277 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
278
279 iser_conn_release(ic);
280 mtx_destroy(&iser_conn->ib_conn.lock);
281 cv_destroy(&iser_conn->ib_conn.beacon.flush_cv);
282 mtx_destroy(&iser_conn->ib_conn.beacon.flush_lock);
283 sx_destroy(&iser_conn->state_mutex);
284 cv_destroy(&iser_conn->up_cv);
285 kobj_delete((struct kobj *)iser_conn, M_ICL_ISER);
286 refcount_release(&icl_iser_ncons);
287 }
288
289 int
iser_conn_handoff(struct icl_conn * ic,int fd)290 iser_conn_handoff(struct icl_conn *ic, int fd)
291 {
292 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
293 int error = 0;
294
295 sx_xlock(&iser_conn->state_mutex);
296 if (iser_conn->state != ISER_CONN_UP) {
297 error = EINVAL;
298 ISER_ERR("iser_conn %p state is %d, teardown started\n",
299 iser_conn, iser_conn->state);
300 goto out;
301 }
302
303 error = iser_alloc_rx_descriptors(iser_conn, ic->ic_maxtags);
304 if (error)
305 goto out;
306
307 error = iser_post_recvm(iser_conn, iser_conn->min_posted_rx);
308 if (error)
309 goto post_error;
310
311 iser_conn->handoff_done = true;
312
313 sx_xunlock(&iser_conn->state_mutex);
314 return (error);
315
316 post_error:
317 iser_free_rx_descriptors(iser_conn);
318 out:
319 sx_xunlock(&iser_conn->state_mutex);
320 return (error);
321
322 }
323
324 /**
325 * Frees all conn objects
326 */
327 static void
iser_conn_release(struct icl_conn * ic)328 iser_conn_release(struct icl_conn *ic)
329 {
330 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
331 struct ib_conn *ib_conn = &iser_conn->ib_conn;
332 struct iser_conn *curr, *tmp;
333
334 mtx_lock(&ig.connlist_mutex);
335 /*
336 * Search for iser connection in global list.
337 * It may not be there in case of failure in connection establishment
338 * stage.
339 */
340 list_for_each_entry_safe(curr, tmp, &ig.connlist, conn_list) {
341 if (iser_conn == curr) {
342 ISER_WARN("found iser_conn %p", iser_conn);
343 list_del(&iser_conn->conn_list);
344 }
345 }
346 mtx_unlock(&ig.connlist_mutex);
347
348 /*
349 * In case we reconnecting or removing session, we need to
350 * release IB resources (which is safe to call more than once).
351 */
352 sx_xlock(&iser_conn->state_mutex);
353 iser_free_ib_conn_res(iser_conn, true);
354 sx_xunlock(&iser_conn->state_mutex);
355
356 if (ib_conn->cma_id != NULL) {
357 rdma_destroy_id(ib_conn->cma_id);
358 ib_conn->cma_id = NULL;
359 }
360
361 }
362
363 void
iser_conn_close(struct icl_conn * ic)364 iser_conn_close(struct icl_conn *ic)
365 {
366 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
367
368 ISER_INFO("closing conn %p", iser_conn);
369
370 sx_xlock(&iser_conn->state_mutex);
371 /*
372 * In case iser connection is waiting on conditional variable
373 * (state PENDING) and we try to close it before connection establishment,
374 * we need to signal it to continue releasing connection properly.
375 */
376 if (!iser_conn_terminate(iser_conn) && iser_conn->state == ISER_CONN_PENDING)
377 cv_signal(&iser_conn->up_cv);
378 sx_xunlock(&iser_conn->state_mutex);
379
380 }
381
382 int
iser_conn_connect(struct icl_conn * ic,int domain,int socktype,int protocol,struct sockaddr * from_sa,struct sockaddr * to_sa)383 iser_conn_connect(struct icl_conn *ic, int domain, int socktype,
384 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
385 {
386 struct iser_conn *iser_conn = icl_to_iser_conn(ic);
387 struct ib_conn *ib_conn = &iser_conn->ib_conn;
388 int err = 0;
389
390 iser_conn_release(ic);
391
392 sx_xlock(&iser_conn->state_mutex);
393 /* the device is known only --after-- address resolution */
394 ib_conn->device = NULL;
395 iser_conn->handoff_done = false;
396
397 iser_conn->state = ISER_CONN_PENDING;
398
399 ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler, (void *)iser_conn,
400 RDMA_PS_TCP, IB_QPT_RC);
401 if (IS_ERR(ib_conn->cma_id)) {
402 err = -PTR_ERR(ib_conn->cma_id);
403 ISER_ERR("rdma_create_id failed: %d", err);
404 goto id_failure;
405 }
406
407 err = rdma_resolve_addr(ib_conn->cma_id, from_sa, to_sa, 1000);
408 if (err) {
409 ISER_ERR("rdma_resolve_addr failed: %d", err);
410 if (err < 0)
411 err = -err;
412 goto addr_failure;
413 }
414
415 ISER_DBG("before cv_wait: %p", iser_conn);
416 cv_wait(&iser_conn->up_cv, &iser_conn->state_mutex);
417 ISER_DBG("after cv_wait: %p", iser_conn);
418
419 if (iser_conn->state != ISER_CONN_UP) {
420 err = EIO;
421 goto addr_failure;
422 }
423
424 err = iser_alloc_login_buf(iser_conn);
425 if (err)
426 goto addr_failure;
427 sx_xunlock(&iser_conn->state_mutex);
428
429 mtx_lock(&ig.connlist_mutex);
430 list_add(&iser_conn->conn_list, &ig.connlist);
431 mtx_unlock(&ig.connlist_mutex);
432
433 return (0);
434
435 id_failure:
436 ib_conn->cma_id = NULL;
437 addr_failure:
438 sx_xunlock(&iser_conn->state_mutex);
439 return (err);
440 }
441
442 int
iser_conn_task_setup(struct icl_conn * ic,struct icl_pdu * ip,struct ccb_scsiio * csio,uint32_t * task_tagp,void ** prvp)443 iser_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
444 struct ccb_scsiio *csio,
445 uint32_t *task_tagp, void **prvp)
446 {
447 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
448
449 *prvp = ip;
450 iser_pdu->csio = csio;
451
452 return (0);
453 }
454
455 void
iser_conn_task_done(struct icl_conn * ic,void * prv)456 iser_conn_task_done(struct icl_conn *ic, void *prv)
457 {
458 struct icl_pdu *ip = prv;
459 struct icl_iser_pdu *iser_pdu = icl_to_iser_pdu(ip);
460 struct iser_device *device = iser_pdu->iser_conn->ib_conn.device;
461 struct iser_tx_desc *tx_desc = &iser_pdu->desc;
462
463 if (iser_pdu->dir[ISER_DIR_IN]) {
464 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_IN);
465 iser_dma_unmap_task_data(iser_pdu,
466 &iser_pdu->data[ISER_DIR_IN],
467 DMA_FROM_DEVICE);
468 }
469
470 if (iser_pdu->dir[ISER_DIR_OUT]) {
471 iser_unreg_rdma_mem(iser_pdu, ISER_DIR_OUT);
472 iser_dma_unmap_task_data(iser_pdu,
473 &iser_pdu->data[ISER_DIR_OUT],
474 DMA_TO_DEVICE);
475 }
476
477 if (likely(tx_desc->mapped)) {
478 ib_dma_unmap_single(device->ib_device, tx_desc->dma_addr,
479 ISER_HEADERS_LEN, DMA_TO_DEVICE);
480 tx_desc->mapped = false;
481 }
482
483 iser_pdu_free(ic, ip);
484 }
485
486 static int
iser_limits(struct icl_drv_limits * idl)487 iser_limits(struct icl_drv_limits *idl)
488 {
489
490 idl->idl_max_recv_data_segment_length = 128 * 1024;
491 idl->idl_max_send_data_segment_length = 128 * 1024;
492 idl->idl_max_burst_length = 262144;
493 idl->idl_first_burst_length = 65536;
494
495 return (0);
496 }
497
498 static int
icl_iser_load(void)499 icl_iser_load(void)
500 {
501 int error;
502
503 ISER_DBG("Starting iSER datamover...");
504
505 icl_pdu_zone = uma_zcreate("icl_iser_pdu", sizeof(struct icl_iser_pdu),
506 NULL, NULL, NULL, NULL,
507 UMA_ALIGN_PTR, 0);
508 /* FIXME: Check rc */
509
510 refcount_init(&icl_iser_ncons, 0);
511
512 error = icl_register("iser", true, 0, iser_limits, iser_new_conn);
513 KASSERT(error == 0, ("failed to register iser"));
514
515 memset(&ig, 0, sizeof(struct iser_global));
516
517 /* device init is called only after the first addr resolution */
518 sx_init(&ig.device_list_mutex, "global_device_lock");
519 INIT_LIST_HEAD(&ig.device_list);
520 mtx_init(&ig.connlist_mutex, "iser_global_conn_lock", NULL, MTX_DEF);
521 INIT_LIST_HEAD(&ig.connlist);
522 sx_init(&ig.close_conns_mutex, "global_close_conns_lock");
523
524 return (error);
525 }
526
527 static int
icl_iser_unload(void)528 icl_iser_unload(void)
529 {
530 ISER_DBG("Removing iSER datamover...");
531
532 if (icl_iser_ncons != 0)
533 return (EBUSY);
534
535 sx_destroy(&ig.close_conns_mutex);
536 mtx_destroy(&ig.connlist_mutex);
537 sx_destroy(&ig.device_list_mutex);
538
539 icl_unregister("iser", true);
540
541 uma_zdestroy(icl_pdu_zone);
542
543 return (0);
544 }
545
546 static int
icl_iser_modevent(module_t mod,int what,void * arg)547 icl_iser_modevent(module_t mod, int what, void *arg)
548 {
549 switch (what) {
550 case MOD_LOAD:
551 return (icl_iser_load());
552 case MOD_UNLOAD:
553 return (icl_iser_unload());
554 default:
555 return (EINVAL);
556 }
557 }
558
559 moduledata_t icl_iser_data = {
560 .name = "icl_iser",
561 .evhand = icl_iser_modevent,
562 .priv = 0
563 };
564
565 DECLARE_MODULE(icl_iser, icl_iser_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
566 MODULE_DEPEND(icl_iser, icl, 1, 1, 1);
567 MODULE_DEPEND(icl_iser, ibcore, 1, 1, 1);
568 MODULE_DEPEND(icl_iser, linuxkpi, 1, 1, 1);
569 MODULE_VERSION(icl_iser, 1);
570