xref: /mOS-networking-stack/core/src/mos_api.c (revision 3ae9e016)
1 #include <assert.h>
2 #include <ctype.h>
3 #include <string.h>
4 #ifdef ENABLE_DEBUG_EVENT
5 #include <stdarg.h>
6 #endif
7 
8 #include "mtcp.h"
9 #include "mos_api.h"
10 #include "debug.h"
11 #include "config.h"
12 #include "ip_in.h"
13 #include "tcp_out.h"
14 /*----------------------------------------------------------------------------*/
15 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
16 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
17 #define SKIP_SPACES(x) while (*x && isspace((int)*x)) x++;
18 #define SKIP_CHAR(x) while((*x) && !isspace(*x)) x++;
19 
20 #define KW_AND       "and "
21 #define KW_OR        "or "
22 #define KW_NOT       "not "
23 #define KW_TCP       "tcp"
24 #define KW_NOT_TCP   "!tcp"
25 #define KW_NOT_TCP2  "not tcp"
26 #define KW_SRC       "src "
27 #define KW_DST       "dst "
28 #define KW_HOST      "host "
29 #define KW_NET       "net "
30 #define KW_MASK      "mask "
31 #define KW_PORT      "port "
32 #define KW_PORTRANGE "portrange "
33 /*----------------------------------------------------------------------------*/
34 int
35 IsValidFlowRule(char *cf)
36 {
37 	char *word;
38 	int skip_word = 0;
39 
40 	/* '!tcp' or 'not tcp' are also not supported in TCP flow filter */
41 	if (strstr(cf, KW_NOT_TCP) || strstr(cf, KW_NOT_TCP2)) {
42 		TRACE_ERROR("'!tcp' or 'not tcp' is not a valid rule for TCP flow monitor.\n");
43 		return FALSE;
44 	}
45 
46 	/* verify that the rule contains flow-related keywords only */
47 	word = cf;
48 	SKIP_SPACES(word);
49 
50 	/* while (browse the rule by words) */
51 	while (*word) {
52 		if (skip_word) {
53 			skip_word = 0;
54 			SKIP_CHAR(word);
55 			SKIP_SPACES(word);
56 			continue;
57 		}
58 		/* parse the keyword */
59 		/* case "tcp" "src" "dst" "not' "and" "or" -> move to the next word */
60 		if (!strncmp(word, KW_TCP, sizeof(KW_TCP) - 1) ||
61 			!strncmp(word, KW_SRC, sizeof(KW_SRC) - 1) ||
62 			!strncmp(word, KW_DST, sizeof(KW_DST) - 1) ||
63 			!strncmp(word, KW_NOT, sizeof(KW_NOT) - 1) ||
64 			!strncmp(word, KW_AND, sizeof(KW_AND) - 1) ||
65 			!strncmp(word, KW_OR, sizeof(KW_OR) - 1)) {
66 			skip_word = 0;
67 		}
68 		/* case "net" "mask" "port" "portrange" -> skip a word (= param) */
69 		else if (!strncmp(word, KW_HOST, sizeof(KW_HOST) - 1) ||
70 				 !strncmp(word, KW_NET, sizeof(KW_NET) - 1) ||
71 				 !strncmp(word, KW_MASK, sizeof(KW_MASK) - 1) ||
72 				 !strncmp(word, KW_PORT, sizeof(KW_PORT) - 1) ||
73 				 !strncmp(word, KW_PORTRANGE, sizeof(KW_PORTRANGE) - 1)) {
74 			skip_word = 1;
75 		}
76 		/* default (rule has any invalid keyword) -> return error */
77 		else {
78 			TRACE_ERROR("Invalid keyword in filter (%s)\n", word);
79 			return FALSE;
80 		}
81 
82 		SKIP_CHAR(word);
83 		SKIP_SPACES(word);
84 	}
85 
86 	return TRUE;
87 }
88 /*----------------------------------------------------------------------------*/
89 /* Assign an address range (specified by ft) to monitor via sock */
90 int
91 mtcp_bind_monitor_filter(mctx_t mctx, int sockid, monitor_filter_t ft)
92 {
93 	socket_map_t sock;
94 	mtcp_manager_t mtcp;
95 
96 	mtcp = GetMTCPManager(mctx);
97 	if (!mtcp) {
98 		errno = EACCES;
99 		return -1;
100 	}
101 
102 	/* if filter is not set, do nothing and return */
103 	if (ft == NULL) {
104 		TRACE_ERROR("filter not set!\n");
105 		return 0;
106 	}
107 
108 	/* retrieve the socket */
109 	if (sockid < 0 || sockid >= g_config.mos->max_concurrency) {
110 		errno = EBADF;
111 		TRACE_ERROR("sockid is invalid!\n");
112 		return -1;
113 	}
114 	sock = &mtcp->msmap[sockid];
115 
116 	/* check socket type */
117 	switch (sock->socktype) {
118 	case MOS_SOCK_MONITOR_RAW:
119 		/* For MONITOR_RAW type, allow any bpf rule */
120 		if (!ft->raw_pkt_filter) {
121 			TRACE_ERROR("raw pkt filter is null");
122 			return 0;
123 		}
124 		if (SET_BPFFILTER(&sock->monitor_listener->raw_pkt_fcode,
125 						  ft->raw_pkt_filter) < 0) {
126 			TRACE_ERROR("Invalid filter expression!\n");
127 			errno = EINVAL;
128 			return -1;
129 		}
130 		break;
131 	case MOS_SOCK_MONITOR_STREAM:
132 		/* For MONITOR_STREAM_PASSIVE type, restrict to flow-level keywords */
133 		if (ft->stream_syn_filter) {
134 			if (!IsValidFlowRule(ft->stream_syn_filter)) {
135 				errno = EINVAL;
136 				return -1;
137 			}
138 			if (SET_BPFFILTER(&sock->monitor_listener->stream_syn_fcode,
139 							  ft->stream_syn_filter) < 0) {
140 				TRACE_ERROR("Invalid filter expression!\n");
141 				errno = EINVAL;
142 				return -1;
143 			}
144 		}
145 		if (ft->stream_orphan_filter) {
146 			if (!IsValidFlowRule(ft->stream_orphan_filter)) {
147 				errno = EINVAL;
148 				return -1;
149 			}
150 			if (SET_BPFFILTER(&sock->monitor_listener->stream_orphan_fcode,
151 							  ft->stream_orphan_filter) < 0) {
152 				TRACE_ERROR("Invalid filter expression!\n");
153 				errno = EINVAL;
154 				return -1;
155 			}
156 		}
157 		break;
158 	default:
159 		/* return error for other socket types */
160 		errno = ENOPROTOOPT;
161 		TRACE_ERROR("Invalid sock type!\n");
162 		return -1;
163 	}
164 
165 	return 0;
166 }
167 /*----------------------------------------------------------------------------*/
168 void
169 mtcp_app_join(mctx_t mctx)
170 {
171 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
172 	if (!mtcp) return;
173 
174 	RunPassiveLoop(mtcp);
175 	return;
176 }
177 /*----------------------------------------------------------------------------*/
178 /* Callback only functions */
179 /*----------------------------------------------------------------------------*/
180 void
181 mtcp_set_uctx(mctx_t mctx, int msock, void *uctx)
182 {
183 	mtcp_manager_t mtcp;
184 
185 	mtcp = GetMTCPManager(mctx);
186 	if (!mtcp) {
187 		return;
188 	}
189 
190 	/* check if the calling thread is in MOS context */
191 	if (mtcp->ctx->thread != pthread_self())
192 		return;
193 
194 	if (msock < 0 || msock >= g_config.mos->max_concurrency) {
195 		TRACE_API("Socket id %d out of range.\n", msock);
196 		errno = EBADF;
197 		return;
198 	}
199 
200 	socket_map_t socket = &mtcp->msmap[msock];
201 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE)
202 		socket->monitor_stream->uctx = uctx;
203 	else if (socket->socktype == MOS_SOCK_MONITOR_STREAM ||
204 			 socket->socktype == MOS_SOCK_MONITOR_RAW)
205 		socket->monitor_listener->uctx = uctx;
206 }
207 /*----------------------------------------------------------------------------*/
208 void *
209 mtcp_get_uctx(mctx_t mctx, int msock)
210 {
211 	mtcp_manager_t mtcp;
212 
213 	mtcp = GetMTCPManager(mctx);
214 	if (!mtcp) {
215 		errno = EACCES;
216 		return NULL;
217 	}
218 
219 	/* check if the calling thread is in MOS context */
220 	if (mtcp->ctx->thread != pthread_self()) {
221 		errno = EPERM;
222 		return NULL;
223 	}
224 
225 	if (msock < 0 || msock >= g_config.mos->max_concurrency) {
226 		TRACE_API("Socket id %d out of range.\n", msock);
227 		errno = EBADF;
228 		return NULL;
229 	}
230 
231 	socket_map_t socket = &mtcp->msmap[msock];
232 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE)
233 		return socket->monitor_stream->uctx;
234 	else if (socket->socktype == MOS_SOCK_MONITOR_STREAM ||
235 			 socket->socktype == MOS_SOCK_MONITOR_RAW)
236 		return socket->monitor_listener->uctx;
237 	else
238 		return NULL;
239 }
240 /*----------------------------------------------------------------------------*/
241 ssize_t
242 mtcp_peek(mctx_t mctx, int msock, int side, char *buf, size_t len)
243 {
244 	int copylen, rc;
245 	struct tcp_stream *cur_stream;
246 	mtcp_manager_t mtcp;
247 	socket_map_t sock;
248 
249 	copylen = rc = 0;
250 	mtcp = GetMTCPManager(mctx);
251 	if (!mtcp) {
252 		errno = EACCES;
253 		return -1;
254 	}
255 
256 	/* check if the calling thread is in MOS context */
257 	if (mtcp->ctx->thread != pthread_self()) {
258 		errno = EPERM;
259 		return -1;
260 	}
261 
262 	/* check if the socket is monitor stream */
263 	sock = &mtcp->msmap[msock];
264 	if (sock->socktype != MOS_SOCK_MONITOR_STREAM_ACTIVE) {
265 		TRACE_DBG("Invalid socket type!\n");
266 		errno = EBADF;
267 		return -1;
268 	}
269 
270 	if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
271 		TRACE_ERROR("Invalid side requested!\n");
272 		exit(EXIT_FAILURE);
273 		return -1;
274 	}
275 
276 	struct tcp_stream *mstrm = sock->monitor_stream->stream;
277 	cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
278 
279 	if (!cur_stream || !cur_stream->buffer_mgmt) {
280 		TRACE_DBG("Stream is NULL!! or buffer management is disabled\n");
281 		errno = EINVAL;
282 		return -1;
283 	}
284 
285 	/* Check if the read was not just due to syn-ack recv */
286 	if (cur_stream->rcvvar != NULL &&
287 	    cur_stream->rcvvar->rcvbuf != NULL) {
288 		tcprb_t *rcvbuf = cur_stream->rcvvar->rcvbuf;
289 		loff_t *poff = &sock->monitor_stream->peek_offset[cur_stream->side];
290 
291 		rc = tcprb_ppeek(rcvbuf, (uint8_t *)buf, len, *poff);
292 		if (rc < 0) {
293 			errno = ENODATA;
294 			return -1;
295 		}
296 
297 		*poff += rc;
298 		UNUSED(copylen);
299 
300 		return rc;
301 	} else {
302 		TRACE_DBG("Stream hasn't yet been initialized!\n");
303 		rc = 0;
304 	}
305 
306 	return rc;
307 }
308 /*----------------------------------------------------------------------------*/
309 /**
310  * Copies from the frags.. returns no. of bytes copied to buf
311  */
312 static inline int
313 ExtractPayloadFromFrags(struct tcp_ring_buffer *rcvbuf, char *buf,
314 						size_t count, off_t seq_num)
315 {
316 	int cpbytesleft;
317 	struct fragment_ctx *it;
318 
319 	it = rcvbuf->fctx;
320 	cpbytesleft = count;
321 	/* go through each frag */
322 	while (it) {
323 		/* first check whether sequent number matches */
324 		if (TCP_SEQ_BETWEEN(seq_num, it->seq, it->seq + it->len)) {
325 			/* copy buf starting from seq# seq_num */
326 			/* copy the MIN of seq-range and bytes to be copied */
327 			memcpy(buf + count - cpbytesleft,
328 			       rcvbuf->head + seq_num - rcvbuf->head_seq,
329 			       MIN(it->len - (seq_num - it->seq), cpbytesleft));
330 			/* update target seq num */
331 			seq_num += it->len - (seq_num - it->seq);
332 			/* update cpbytes left */
333 			cpbytesleft -= it->len - (seq_num - it->seq);
334 			if (cpbytesleft == 0)
335 				break;
336 		}
337 		it = it->next;
338 	}
339 
340 	count -= cpbytesleft;
341 
342 	/* return number of bytes copied */
343 	return count;
344 }
345 /*----------------------------------------------------------------------------*/
346 /* Please see in-code comments for description */
347 ssize_t
348 mtcp_ppeek(mctx_t mctx, int msock, int side,
349 			  char *buf, size_t count, uint64_t off)
350 {
351 	mtcp_manager_t mtcp;
352 	struct tcp_stream *cur_stream;
353 	int rc;
354 	socket_map_t sock;
355 
356 	mtcp = GetMTCPManager(mctx);
357 	if (!mtcp) {
358 		errno = EACCES;
359 		goto ppeek_error;
360 	}
361 
362 	/* check if the calling thread is in MOS context */
363 	if (mtcp->ctx->thread != pthread_self()) {
364 		errno = EPERM;
365 		goto ppeek_error;
366 	}
367 
368 	/* check if the socket is monitor stream */
369 	sock = &mtcp->msmap[msock];
370 	if (sock->socktype != MOS_SOCK_MONITOR_STREAM_ACTIVE) {
371 		TRACE_DBG("Invalid socket type!\n");
372 		errno = ESOCKTNOSUPPORT;
373 		goto ppeek_error;
374 	}
375 
376 	if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
377 		TRACE_ERROR("Invalid side requested!\n");
378 		exit(EXIT_FAILURE);
379 		return -1;
380 	}
381 
382 	struct tcp_stream *mstrm = sock->monitor_stream->stream;
383 	cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
384 
385 	if (!cur_stream || !cur_stream->buffer_mgmt) {
386 		TRACE_DBG("Stream is either NULL or ring buffer is not managed!!\n");
387 		errno = EACCES;
388 		goto ppeek_error;
389 	}
390 
391 	rc = 0;
392 	/* Check if the read was not just due to syn-ack recv */
393 	if (cur_stream->rcvvar != NULL &&
394 	    cur_stream->rcvvar->rcvbuf != NULL) {
395 		tcprb_t *rcvbuf = cur_stream->rcvvar->rcvbuf;
396 		return tcprb_ppeek(rcvbuf, (uint8_t *)buf, count, off);
397 	} else {
398 		errno = EPERM;
399 		goto ppeek_error;
400 	}
401 
402 	return rc;
403 
404  ppeek_error:
405 	return -1;
406 }
407 /*----------------------------------------------------------------------------*/
408 #ifdef MTCP_CB_GETCURPKT_CREATE_COPY
409 static __thread unsigned char local_frame[ETHERNET_FRAME_LEN];
410 inline struct pkt_info *
411 ClonePacketCtx(struct pkt_info *to, unsigned char *frame, struct pkt_ctx *from)
412 {
413 	/* only memcpy till the last field before ethh */
414 	/* memcpy(to, from, PCTX_COPY_LEN); */
415 	memcpy(to, &(from->p), PKT_INFO_LEN);
416 	/* memcpy the entire ethernet frame */
417 	assert(from);
418 	assert(from->p.eth_len > 0);
419 	assert(from->p.eth_len <= ETHERNET_FRAME_LEN);
420 	memcpy(frame, from->p.ethh, from->p.eth_len);
421 	/* set iph */
422 	to->ethh = (struct ethhdr *)frame;
423 	/* set iph */
424 	to->iph = from->p.iph ?
425 		(struct iphdr *)((uint8_t *)(frame + ETHERNET_HEADER_LEN)) : NULL;
426 	/* set tcph */
427 	to->tcph = from->p.tcph ?
428 		(struct tcphdr *)(((uint8_t *)(to->iph)) + (to->iph->ihl<<2)) : NULL;
429 	/* set payload */
430 	to->payload = from->p.tcph ?
431 		((uint8_t *)(to->tcph) + (to->tcph->doff<<2)) : NULL;
432 	return to;
433 }
434 /*----------------------------------------------------------------------------*/
435 int
436 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_info *pkt)
437 {
438 	mtcp_manager_t mtcp;
439 	socket_map_t socket;
440 	struct tcp_stream *cur_stream;
441 	struct pkt_ctx *cur_pkt_ctx;
442 
443 	mtcp = GetMTCPManager(mctx);
444 	if (!mtcp) {
445 		errno = EACCES;
446 		return -1;
447 	}
448 
449 	/* check if the calling thread is in MOS context */
450 	if (mtcp->ctx->thread != pthread_self()) {
451 		errno = EPERM;
452 		return -1;
453 	}
454 
455 	/* check if the socket is monitor stream */
456 	socket = &mtcp->msmap[sock];
457 
458 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
459 		if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
460 			TRACE_ERROR("Invalid side requested!\n");
461 			exit(EXIT_FAILURE);
462 			return -1;
463 		}
464 
465 		struct tcp_stream *mstrm = socket->monitor_stream->stream;
466 		cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
467 
468 		cur_pkt_ctx = &cur_stream->last_pctx;
469 		if (!cur_pkt_ctx->p.ethh) {
470 			errno = ENODATA;
471 			return -1;
472 		}
473 	} else if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
474 		cur_pkt_ctx = mtcp->pctx;
475 	} else if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
476 		/*
477 		 * if it is a monitor socket, then this means that
478 		 * this is a request for an orphan tcp packet
479 		 */
480 		cur_pkt_ctx = mtcp->pctx;
481 	} else {
482 		TRACE_DBG("Invalid socket type!\n");
483 		errno = EBADF;
484 		return -1;
485 	}
486 
487 	ClonePacketCtx(pkt, local_frame, cur_pkt_ctx);
488 	return 0;
489 }
490 #else
491 /*----------------------------------------------------------------------------*/
492 int
493 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_ctx **pctx)
494 {
495 	mtcp_manager_t mtcp;
496 
497 	mtcp = GetMTCPManager(mctx);
498 	if (!mtcp) {
499 		errno = EACCES;
500 		return -1;
501 	}
502 
503 	/* check if the calling thread is in MOS context */
504 	if (mtcp->ctx->thread != pthread_self()) {
505 		errno = EPERM;
506 		return -1;
507 	}
508 	/* just pass direct pointer */
509 	*pctx = mtcp->pctx;
510 
511 	return 0;
512 }
513 #endif
514 /*----------------------------------------------------------------------------*/
515 /** Disable events from the monitor stream socket
516  * @param [in] mtcp: mtcp_manager
517  * @param [in] sock: socket
518  *
519  * returns 0 on success, -1 on failure
520  *
521  * This is used for flow management based monitoring sockets
522  */
523 int
524 RemoveMonitorEvents(mtcp_manager_t mtcp, socket_map_t socket, int side)
525 {
526 	struct mon_stream *mstream;
527 	struct mon_listener *mlistener;
528 
529 	if (mtcp == NULL) {
530 		TRACE_DBG("mtcp is not defined!!!\n");
531 		errno = EACCES;
532 		return -1;
533 	}
534 
535 	switch (socket->socktype) {
536 	case MOS_SOCK_MONITOR_STREAM_ACTIVE:
537 		mstream = socket->monitor_stream;
538 		if (mstream == NULL) {
539 			TRACE_ERROR("Mon Stream does not exist!\n");
540 			/* exit(-1); */
541 			errno = ENODATA;
542 			return -1;
543 		}
544 
545 		if (side == MOS_SIDE_SVR) mstream->server_mon = 0;
546 		else if (side == MOS_SIDE_CLI) mstream->client_mon = 0;
547 
548 		if (mstream->server_mon == 0 && mstream->client_mon == 0) {
549 #ifdef NEWEV
550 			/*
551 			 * if stree_dontcare is NULL, then we know that all
552 			 * events have already been disabled
553 			 */
554 			if (mstream->stree_pre_rcv != NULL) {
555 				stree_dec_ref(mtcp->ev_store, mstream->stree_dontcare);
556 				stree_dec_ref(mtcp->ev_store, mstream->stree_pre_rcv);
557 				stree_dec_ref(mtcp->ev_store, mstream->stree_post_snd);
558 
559 				mstream->stree_dontcare = NULL;
560 				mstream->stree_pre_rcv = NULL;
561 				mstream->stree_post_snd = NULL;
562 			}
563 #else
564 			/* no error checking over here..
565 			 * but its okay.. this code is
566 			 * deprecated
567 			 */
568 			CleanupEvP(&mstream->dontcare_evp);
569 			CleanupEvP(&mstream->pre_tcp_evp);
570 			CleanupEvP(&mstream->post_tcp_evp);
571 #endif
572 		}
573 		break;
574 	case MOS_SOCK_MONITOR_STREAM:
575 		mlistener = socket->monitor_listener;
576 		if (mlistener == NULL) {
577 			TRACE_ERROR("Mon listener does not exist!\n");
578 			errno = ENODATA;
579 			return -1;
580 		}
581 
582 		if (side == MOS_SIDE_SVR) mlistener->server_mon = 0;
583 		else if (side == MOS_SIDE_CLI) mlistener->client_mon = 0;
584 
585 		if (mlistener->server_mon == 0 && mlistener->client_mon == 0) {
586 #ifdef NEWEV
587 			/*
588 			 * if stree_dontcare is NULL, then we know that all
589 			 * events have already been disabled
590 			 */
591 			if (mlistener->stree_pre_rcv != NULL) {
592 				stree_dec_ref(mtcp->ev_store, mlistener->stree_dontcare);
593 				stree_dec_ref(mtcp->ev_store, mlistener->stree_pre_rcv);
594 				stree_dec_ref(mtcp->ev_store, mlistener->stree_post_snd);
595 
596 				mlistener->stree_dontcare = NULL;
597 				mlistener->stree_pre_rcv = NULL;
598 				mlistener->stree_post_snd = NULL;
599 			}
600 #else
601 			/* no error checking over here..
602 			 * but its okay.. this code is
603 			 * deprecated
604 			 */
605 			CleanupEvB(mtcp, &mlistener->dontcare_evb);
606 			CleanupEvB(mtcp, &mlistener->pre_tcp_evb);
607 			CleanupEvB(mtcp, &mlistener->post_tcp_evb);
608 #endif
609 		}
610 		break;
611 	default:
612 		TRACE_ERROR("Invalid socket type!\n");
613 	}
614 
615 	return 0;
616 }
617 /*----------------------------------------------------------------------------*/
618 /**
619  * Disable monitoring based on side variable.
620  */
621 int
622 mtcp_cb_stop(mctx_t mctx, int sock, int side)
623 {
624 	mtcp_manager_t mtcp;
625 	socket_map_t socket;
626 	struct tcp_stream *stream;
627 	struct socket_map *walk;
628 	uint8_t mgmt;
629 
630 	mtcp = GetMTCPManager(mctx);
631 	if (!mtcp) {
632 		errno = EACCES;
633 		return -1;
634 	}
635 
636 	socket = &mtcp->msmap[sock];
637 
638 	/* works for both monitor listener and stream sockets */
639 	RemoveMonitorEvents(mtcp, socket, side);
640 
641 	/* passive monitoring socket is not connected to any stream */
642 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM)
643 		return 0;
644 
645 	if (side == MOS_SIDE_CLI) {
646 		/* see if the associated stream requires monitoring any more */
647 		stream = (socket->monitor_stream->stream->side == MOS_SIDE_CLI) ?
648 			socket->monitor_stream->stream :
649 			socket->monitor_stream->stream->pair_stream;
650 
651 		mgmt = 0;
652 		SOCKQ_FOREACH_START(walk, &stream->msocks) {
653 			if (walk->monitor_stream->client_mon == 1) {
654 				mgmt = 1;
655 				break;
656 			}
657 		} SOCKQ_FOREACH_END;
658 		/* if all streams have mgmt off, then tag the stream for destruction */
659 		if (mgmt == 0) {
660 			stream = (socket->monitor_stream->stream->side == MOS_SIDE_CLI) ?
661 				socket->monitor_stream->stream :
662 				socket->monitor_stream->stream->pair_stream;
663 			stream->status_mgmt = 0;
664 		}
665 	}
666 
667 	if (side == MOS_SIDE_SVR) {
668 		/* see if the associated stream requires monitoring any more */
669 		stream = (socket->monitor_stream->stream->side == MOS_SIDE_SVR) ?
670 			socket->monitor_stream->stream :
671 			socket->monitor_stream->stream->pair_stream;
672 		mgmt = 0;
673 		SOCKQ_FOREACH_START(walk, &stream->msocks) {
674 			if (walk->monitor_stream->server_mon == 1) {
675 				mgmt = 1;
676 				break;
677 			}
678 		} SOCKQ_FOREACH_END;
679 		/* if all streams have mgmt off, then tag the stream for destruction */
680 		if (mgmt == 0) {
681 			stream = (socket->monitor_stream->stream->side == MOS_SIDE_SVR) ?
682 				socket->monitor_stream->stream :
683 				socket->monitor_stream->stream->pair_stream;
684 			stream->status_mgmt = 0;
685 		}
686 	}
687 
688 	return 0;
689 }
690 /*----------------------------------------------------------------------------*/
691 /**
692  * send a RST packet to the TCP stream (uni-directional)
693  */
694 static inline void
695 SendRSTPacketStandalone(mtcp_manager_t mtcp, struct tcp_stream *stream) {
696 	SendTCPPacketStandalone(mtcp,
697 				stream->saddr, stream->sport, stream->daddr, stream->dport,
698 				stream->snd_nxt, stream->rcv_nxt, 0, TCP_FLAG_RST | TCP_FLAG_ACK,
699 				NULL, 0, mtcp->cur_ts, 0);
700 }
701 /*----------------------------------------------------------------------------*/
702 /**
703  * Reset the connection (send RST packets to both sides)
704  */
705 int
706 mtcp_reset_conn(mctx_t mctx, int sock)
707 {
708 	mtcp_manager_t mtcp;
709 	socket_map_t socket;
710 
711 	mtcp = GetMTCPManager(mctx);
712 	if (!mtcp) {
713 		errno = EACCES;
714 		return -1;
715 	}
716 
717 	socket = &mtcp->msmap[sock];
718 
719 	/* passive monitoring socket is not connected to any stream */
720 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
721 		errno = EINVAL;
722 		return -1;
723 	}
724 
725 	/* send RST packets to the both sides */
726 	SendRSTPacketStandalone(mtcp, socket->monitor_stream->stream);
727 	SendRSTPacketStandalone(mtcp, socket->monitor_stream->stream->pair_stream);
728 
729 	return 0;
730 }
731 /*----------------------------------------------------------------------------*/
732 uint32_t
733 mtcp_cb_get_ts(mctx_t mctx)
734 {
735 	mtcp_manager_t mtcp;
736 
737 	mtcp = GetMTCPManager(mctx);
738 	if (!mtcp) {
739 		TRACE_DBG("Can't access MTCP manager!\n");
740 		errno = EACCES;
741 		return 0;
742 	}
743 
744 	/* check if the calling thread is in MOS context */
745 	if (mtcp->ctx->thread != pthread_self()) {
746 		errno = EPERM;
747 		return 0;
748 	}
749 
750 	return TS_TO_USEC(mtcp->cur_ts);
751 }
752 /*----------------------------------------------------------------------------*/
753 /* Macros related to getpeername */
754 #define TILL_SVRADDR		offsetof(struct sockaddr_in, sin_zero)
755 #define TILL_SVRPORT		offsetof(struct sockaddr_in, sin_addr)
756 #define TILL_SVRFAMILY		offsetof(struct sockaddr_in, sin_port)
757 #define TILL_CLIADDR		sizeof(struct sockaddr) + TILL_SVRADDR
758 #define TILL_CLIPORT		sizeof(struct sockaddr) + TILL_SVRPORT
759 #define TILL_CLIFAMILY		sizeof(struct sockaddr) + TILL_SVRFAMILY
760 
761 int
762 mtcp_getpeername(mctx_t mctx, int sockfd, struct sockaddr *saddr,
763 				 socklen_t *addrlen, int side)
764 {
765 	mtcp_manager_t mtcp;
766 	socket_map_t socket;
767 	struct tcp_stream *stream;
768 	struct sockaddr_in *sin;
769 	int rc;
770 
771 	mtcp = GetMTCPManager(mctx);
772 	if (!mtcp) {
773 		TRACE_DBG("Can't access MTCP manager!\n");
774 		errno = EACCES;
775 		return -1;
776 	}
777 
778 	/* check if sockfd is within limits */
779 	if (sockfd < 0 || sockfd >= g_config.mos->max_concurrency) {
780 		TRACE_API("Socket id %d out of range.\n", sockfd);
781 		errno = EBADF;
782 		return -1;
783 	}
784 
785 	/* check if the calling thread is in MOS context */
786 	if (mtcp->ctx->thread != pthread_self()) {
787 		errno = EPERM;
788 		return -1;
789 	}
790 
791 	socket = &mtcp->msmap[sockfd];
792 	sin = (struct sockaddr_in *)saddr;
793 	rc = 0;
794 
795 	/* retrieve both streams */
796 	stream = socket->monitor_stream->stream;
797 
798 	if (side != stream->side)
799 		stream = stream->pair_stream;
800 
801 	if (stream == NULL) {
802 		errno = ENOTCONN;
803 		return -1;
804 	}
805 
806 	/* reset to 2 * sizeof(struct sockaddr) if addrlen is too big */
807 	if (*addrlen > 2 * sizeof(struct sockaddr))
808 		*addrlen = 2 * sizeof(struct sockaddr);
809 
810 	/* according per manpage, address can be truncated */
811 	switch (*addrlen) {
812 	case (2 * sizeof(struct sockaddr)):
813 	case TILL_CLIADDR:
814 		sin[1].sin_addr.s_addr = stream->side == MOS_SIDE_SVR ?
815 								 stream->daddr : stream->saddr;
816 	case TILL_CLIPORT:
817 		sin[1].sin_port = stream->side == MOS_SIDE_SVR ?
818 						  stream->dport : stream->sport;
819 	case TILL_CLIFAMILY:
820 		sin[1].sin_family = AF_INET;
821 	case (sizeof(struct sockaddr)):
822 	case TILL_SVRADDR:
823 		sin->sin_addr.s_addr = stream->side == MOS_SIDE_SVR ?
824 							   stream->saddr : stream->daddr;
825 	case TILL_SVRPORT:
826 		sin->sin_port = stream->side == MOS_SIDE_SVR ?
827 						stream->sport : stream->dport;
828 	case TILL_SVRFAMILY:
829 		sin->sin_family = AF_INET;
830 		break;
831 	default:
832 		rc = -1;
833 		*addrlen = 0xFFFF;
834 		errno = EINVAL;
835 	}
836 
837 	return rc;
838 }
839 /*----------------------------------------------------------------------------*/
840 int
841 mtcp_setlastpkt(mctx_t mctx, int sock, int side, off_t offset,
842 		byte *data, uint16_t datalen, int option)
843 {
844 	mtcp_manager_t mtcp;
845 	struct pkt_ctx *cur_pkt_ctx;
846 	struct ethhdr *ethh;
847 	struct iphdr *iph;
848 	struct tcphdr *tcph;
849 	unsigned char *payload;
850 
851 #if 0
852 	socket_map_t socket;
853 	struct tcp_stream *cur_stream;
854 #endif
855 
856 	/* checking if mtcp is valid */
857 	mtcp = GetMTCPManager(mctx);
858 	if (!mtcp) {
859 		errno = EACCES;
860 		TRACE_ERROR("Invalid mtcp!\n");
861 		return -1;
862 	}
863 
864 	/* check if the calling thread is in MOS context */
865 	if (mtcp->ctx->thread != pthread_self()) {
866 		errno = EPERM;
867 		TRACE_ERROR("Invalid thread id!\n");
868 		return -1;
869 	}
870 
871 #if 0
872 	/* check if the socket is monitor stream */
873 	socket = &mtcp->msmap[sock];
874 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
875 		if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
876 			TRACE_ERROR("Invalid side requested!\n");
877 			exit(EXIT_FAILURE);
878 			return -1;
879 		}
880 
881 		struct tcp_stream *mstrm = socket->monitor_stream->stream;
882 		cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
883 
884 		if (!cur_stream->allow_pkt_modification)
885 			return -1;
886 	} else if (socket->socktype != MOS_SOCK_MONITOR_RAW) {
887 		TRACE_ERROR("Invalid socket type!\n");
888 		exit(EXIT_FAILURE);
889 		return -1;
890 	}
891 #endif
892 
893 	/* see if cur_pkt_ctx is valid */
894 	cur_pkt_ctx = mtcp->pctx;
895 	if (cur_pkt_ctx == NULL) {
896 		TRACE_ERROR("pctx is NULL!\n");
897 		errno = ENODATA;
898 		return -1;
899 	}
900 
901 	/* check if offset is valid */
902 	if (offset < 0) {
903 		TRACE_ERROR("Invalid offset position!\n");
904 		errno = EINVAL;
905 		return -1;
906 	}
907 
908 	if (__builtin_popcount(option & (MOS_DROP | MOS_CHOMP |
909 					 MOS_INSERT | MOS_OVERWRITE)) != 1) {
910 		TRACE_ERROR("mtcp_setlastpkt() function only allows one of "
911 			    "(MOS_DROP | MOS_CHOMP | MOS_INSERT | MOS_OVERWRITE) "
912 			    "to be set at a time.\n");
913 		errno = EAGAIN;
914 		return -1;
915 	}
916 
917 	/* drop pkt has the highest priority */
918 	if (option & MOS_DROP) {
919 		mtcp->pctx->forward = 0;
920 		return 0;
921 	} else if (option & MOS_ETH_HDR) {
922 		/* validity test */
923 		if ((ethh=cur_pkt_ctx->p.ethh) == NULL ||
924 		    offset + datalen > sizeof(struct ethhdr)) {
925 			TRACE_ERROR("Ethernet setting has gone out of bounds "
926 				    "(offset: %ld, datalen: %d)\n",
927 				    offset, datalen);
928 			errno = EINVAL;
929 			return -1;
930 		}
931 		if (option & MOS_CHOMP) {
932 			TRACE_ERROR("Illegal call. "
933 				    "Ethernet header can't be chopped down!\n");
934 			errno = EACCES;
935 			return -1;
936 		} else if (option & MOS_INSERT) {
937 			TRACE_ERROR("Illegal call. "
938 				    "Ethernet header can't be extended!\n");
939 			errno = EACCES;
940 			return -1;
941 		} else /* if (option & MOS_OVERWRITE) */ {
942 			memcpy((uint8_t *)ethh + offset, data, datalen);
943 		}
944 		/* iph, tcph, and payload do not need to change */
945 	} else if (option & MOS_IP_HDR) {
946 		/* validity test */
947 		if (cur_pkt_ctx->p.ethh == NULL ||
948 		    cur_pkt_ctx->p.ethh->h_proto != ntohs(ETH_P_IP) ||
949 		    (iph=(struct iphdr *)(cur_pkt_ctx->p.ethh + 1)) == NULL) {
950 			TRACE_ERROR("ethh or iph are out of bounds\n");
951 			errno = EACCES;
952 			return -1;
953 		}
954 		if (option & MOS_OVERWRITE) {
955 			if (offset + datalen > (iph->ihl<<2)) {
956 				TRACE_ERROR("IP setting has gone out of bounds "
957 					    "(offset: %ld, datalen: %d)\n",
958 					    offset, datalen);
959 				errno = EINVAL;
960 				return -1;
961 			}
962 			memcpy((uint8_t *)iph + offset, data, datalen);
963 		}
964 		if (option & MOS_CHOMP) {
965 			memmove((uint8_t *)iph + offset,
966 				(uint8_t *)iph + offset + datalen,
967 				cur_pkt_ctx->p.ip_len - offset - datalen);
968 
969 			/* iph does not need to change */
970 			if (iph->protocol == IPPROTO_TCP) {
971 				cur_pkt_ctx->p.tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
972 				cur_pkt_ctx->p.payload = (uint8_t *)cur_pkt_ctx->p.tcph +
973 					(cur_pkt_ctx->p.tcph->doff<<2);
974 			} else {
975 				/* reset tcph if iph does not have tcp proto */
976 				cur_pkt_ctx->p.tcph = NULL;
977 			}
978 			/* update iph total length */
979 			cur_pkt_ctx->p.ip_len = ntohs(iph->tot_len);
980 			/* update eth frame length */
981 			cur_pkt_ctx->p.eth_len = cur_pkt_ctx->p.ip_len + sizeof(struct ethhdr);
982 		} else if (option & MOS_INSERT) {
983 			memmove((uint8_t *)iph + offset + datalen,
984 				(uint8_t *)iph + offset + 1,
985 				cur_pkt_ctx->p.ip_len - offset);
986 			memcpy((uint8_t *)iph + offset,
987 			       data, datalen);
988 
989 			/* iph does not need to change */
990 			if (iph->protocol == IPPROTO_TCP) {
991 				cur_pkt_ctx->p.tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
992 				cur_pkt_ctx->p.payload = (uint8_t *)cur_pkt_ctx->p.tcph +
993 					(cur_pkt_ctx->p.tcph->doff<<2);
994 			} else {
995 				/* reset tcph if iph does not have tcp proto */
996 				cur_pkt_ctx->p.tcph = NULL;
997 			}
998 			/* update iph total length */
999 			cur_pkt_ctx->p.ip_len = ntohs(iph->tot_len);
1000 			/* update eth frame length */
1001 			cur_pkt_ctx->p.eth_len = cur_pkt_ctx->p.ip_len + sizeof(struct ethhdr);
1002 		}
1003 		/* can't update payloadlen because we don't know tcph->doff */
1004 	} else if (option & MOS_TCP_HDR) {
1005 		/* validity test */
1006 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1007 		if (iph == NULL ||
1008 		    iph->protocol != IPPROTO_TCP ||
1009 		    (tcph=(struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2))) == NULL) {
1010 			TRACE_ERROR("TCP setting has gone out of bounds "
1011 				    "(offset: %ld, datalen: %d)\n",
1012 				    offset, datalen);
1013 			errno = EINVAL;
1014 			return -1;
1015 		}
1016 		if (option & MOS_OVERWRITE) {
1017 			if (offset + datalen > (tcph->doff<<2)) {
1018 				TRACE_ERROR("TCP setting has gone out of bounds "
1019 					    "(offset: %ld, datalen: %d)\n",
1020 					    offset, datalen);
1021 				errno = EINVAL;
1022 				return -1;
1023 			}
1024 			memcpy((uint8_t *)tcph + offset, data, datalen);
1025 			/* update tcp seq # */
1026 			cur_pkt_ctx->p.seq = ntohl(tcph->seq);
1027 			/* update tcp ack_seq # */
1028 			cur_pkt_ctx->p.ack_seq = ntohl(tcph->ack_seq);
1029 			/* update tcp window */
1030 			cur_pkt_ctx->p.window = ntohs(tcph->window);
1031 
1032 			/* 150422 dhkim TODO: seq and offset are two different form of same
1033 			 * variable. We also need to update the offset. */
1034 		}
1035 		if (option & MOS_CHOMP) {
1036 			memmove((uint8_t *)tcph + offset,
1037 				(uint8_t *)tcph + offset + datalen,
1038 				cur_pkt_ctx->p.payloadlen + (tcph->doff<<2)
1039 				- offset - datalen);
1040 			/* update payload ptr */
1041 			cur_pkt_ctx->p.payload = (uint8_t *)tcph + (tcph->doff<<2);
1042 		} else if (option & MOS_INSERT) {
1043 			memmove((uint8_t *)tcph + offset + datalen,
1044 				(uint8_t *)tcph + offset + 1,
1045 				cur_pkt_ctx->p.payloadlen + (tcph->doff<<2)
1046 				- offset);
1047 			memcpy((uint8_t *)tcph + offset, data, datalen);
1048 			/* update payload ptr */
1049 			cur_pkt_ctx->p.payload = (uint8_t *)tcph + (tcph->doff<<2);
1050 		}
1051 	} else if (option & MOS_TCP_PAYLOAD) {
1052 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1053 		tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
1054 		payload = (uint8_t *)tcph + (tcph->doff<<2);
1055 		if (option & MOS_OVERWRITE) {
1056 			if (offset + datalen > ntohs(iph->tot_len) -
1057 			    (iph->ihl<<2) - (tcph->doff<<2)) {
1058 				TRACE_ERROR("Payload setting has gone out of bounds "
1059 					    "(offset: %ld, datalen: %d)\n",
1060 					    offset, datalen);
1061 				errno = EINVAL;
1062 				return -1;
1063 			}
1064 			memcpy(payload + offset, data, datalen);
1065 		}
1066 		if (option & MOS_CHOMP) {
1067 			memmove(payload + offset,
1068 				payload + offset + datalen,
1069 				(cur_pkt_ctx->p.payloadlen -
1070 				 offset - datalen));
1071 			/* update payload length */
1072 			cur_pkt_ctx->p.payloadlen = cur_pkt_ctx->p.ip_len -
1073 				(tcph->doff<<2) - (iph->ihl<<2);
1074 		} else if (option & MOS_INSERT) {
1075 			memmove(payload + offset + datalen,
1076 				payload + offset + 1,
1077 				cur_pkt_ctx->p.payloadlen - offset);
1078 			memcpy(payload + offset, data, datalen);
1079 			cur_pkt_ctx->p.payloadlen = cur_pkt_ctx->p.ip_len -
1080 				(tcph->doff<<2) - (iph->ihl<<2);
1081 		}
1082 	} else {
1083 		TRACE_ERROR("Invalid option!\n");
1084 		errno = EINVAL;
1085 		return -1;
1086 	}
1087 
1088 	/* update ip checksum */
1089 	if (option & MOS_UPDATE_IP_CHKSUM) {
1090 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1091 		iph->check = 0;
1092 		iph->check = ip_fast_csum(iph, iph->ihl);
1093 	}
1094 
1095 	/* update tcp checksum */
1096 	if (option & MOS_UPDATE_TCP_CHKSUM) {
1097 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1098 		tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
1099 		tcph->check = 0;
1100 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
1101 					      ntohs(iph->tot_len) - (iph->ihl<<2),
1102 					      iph->saddr, iph->daddr);
1103 	}
1104 	return 0;
1105 }
1106 /*----------------------------------------------------------------------------*/
1107 #if 0
1108 inline int
1109 mtcp_cb_updatecurpkt(mctx_t mctx, off_t offset, unsigned char *data,
1110 		     uint16_t datalen, int option)
1111 {
1112 	return mtcp_setlastpkt(mctx, sock, side, offset, data, datalen, option);
1113 }
1114 #endif
1115 /*----------------------------------------------------------------------------*/
1116 /**
1117  * THIS IS A DEPRECETED FUNCTION...
1118  */
1119 int
1120 mtcp_cb_dropcurpkt(mctx_t mctx)
1121 {
1122 	mtcp_manager_t mtcp;
1123 
1124 	/* checking if mtcp is valid */
1125 	mtcp = GetMTCPManager(mctx);
1126 	if (!mtcp) {
1127 		TRACE_ERROR("Invalid mtcp!\n");
1128 		errno = EACCES;
1129 		return -1;
1130 	}
1131 
1132 	/* check if the calling thread is in MOS context */
1133 	if (mtcp->ctx->thread != pthread_self()) {
1134 		TRACE_ERROR("Invalid thread id!\n");
1135 		errno = EPERM;
1136 		return -1;
1137 	}
1138 
1139 	/* see if cur_pkt_ctx is valid */
1140 	if (mtcp->pctx == NULL) {
1141 		TRACE_ERROR("pctx is NULL!\n");
1142 		errno = ENODATA;
1143 		return -1;
1144 	}
1145 
1146 	mtcp->pctx->forward = 0;
1147 
1148 	return 0;
1149 }
1150 /*----------------------------------------------------------------------------*/
1151 int
1152 mtcp_set_debug_string(mtcp_manager_t mtcp, const char *fmt, ...)
1153 {
1154 #ifdef ENABLE_DEBUG_EVENT
1155 	va_list args;
1156 	int i;
1157 
1158 	assert(mtcp);
1159 
1160 	if (fmt == NULL) {
1161 		mtcp->dbg_buf[0] = '\0';
1162 		return 0;
1163 	}
1164 
1165 	va_start(args, fmt);
1166 	i = vsnprintf(mtcp->dbg_buf, DBG_BUF_LEN - 1, fmt, args);
1167 	va_end(args);
1168 
1169 	return i;
1170 #else
1171 	return -1;
1172 #endif /* ENABLE_DEBUG_EVENT */
1173 }
1174 /*----------------------------------------------------------------------------*/
1175 int
1176 mtcp_get_debug_string(mctx_t mctx, char *buf, int len)
1177 {
1178 #ifdef ENABLE_DEBUG_EVENT
1179 	mtcp_manager_t mtcp;
1180 	int copylen;
1181 
1182 	if (len < 0)
1183 		return -1;
1184 	else if (len == 0)
1185 		return 0;
1186 
1187 	if (!(mtcp = GetMTCPManager(mctx)))
1188 		return -1;
1189 
1190 	copylen = MIN(strlen(mtcp->dbg_buf), len);
1191 	strncpy(buf, mtcp->dbg_buf, copylen);
1192 
1193 	return copylen;
1194 #else
1195 	return -1;
1196 #endif /* ENABLE_DEBUG_EVENT */
1197 }
1198 /*----------------------------------------------------------------------------*/
1199