xref: /mOS-networking-stack/core/src/mos_api.c (revision 3b6b9ba6)
1 #include <assert.h>
2 #include <ctype.h>
3 #include <string.h>
4 #ifdef ENABLE_DEBUG_EVENT
5 #include <stdarg.h>
6 #endif
7 
8 #include "mtcp.h"
9 #include "mos_api.h"
10 #include "debug.h"
11 #include "config.h"
12 #include "ip_in.h"
13 #include "tcp_out.h"
14 /*----------------------------------------------------------------------------*/
15 #define MAX(x, y) (((x) > (y)) ? (x) : (y))
16 #define MIN(x, y) (((x) < (y)) ? (x) : (y))
17 #define SKIP_SPACES(x) while (*x && isspace((int)*x)) x++;
18 #define SKIP_CHAR(x) while((*x) && !isspace(*x)) x++;
19 
20 #define KW_AND       "and "
21 #define KW_OR        "or "
22 #define KW_NOT       "not "
23 #define KW_TCP       "tcp"
24 #define KW_NOT_TCP   "!tcp"
25 #define KW_NOT_TCP2  "not tcp"
26 #define KW_SRC       "src "
27 #define KW_DST       "dst "
28 #define KW_HOST      "host "
29 #define KW_NET       "net "
30 #define KW_MASK      "mask "
31 #define KW_PORT      "port "
32 #define KW_PORTRANGE "portrange "
33 /*----------------------------------------------------------------------------*/
34 int
35 IsValidFlowRule(char *cf)
36 {
37 	char *word;
38 	int skip_word = 0;
39 
40 	/* '!tcp' or 'not tcp' are also not supported in TCP flow filter */
41 	if (strstr(cf, KW_NOT_TCP) || strstr(cf, KW_NOT_TCP2)) {
42 		TRACE_ERROR("'!tcp' or 'not tcp' is not a valid rule for TCP flow monitor.\n");
43 		return FALSE;
44 	}
45 
46 	/* verify that the rule contains flow-related keywords only */
47 	word = cf;
48 	SKIP_SPACES(word);
49 
50 	/* while (browse the rule by words) */
51 	while (*word) {
52 		if (skip_word) {
53 			skip_word = 0;
54 			SKIP_CHAR(word);
55 			SKIP_SPACES(word);
56 			continue;
57 		}
58 		/* parse the keyword */
59 		/* case "tcp" "src" "dst" "not' "and" "or" -> move to the next word */
60 		if (!strncmp(word, KW_TCP, sizeof(KW_TCP) - 1) ||
61 			!strncmp(word, KW_SRC, sizeof(KW_SRC) - 1) ||
62 			!strncmp(word, KW_DST, sizeof(KW_DST) - 1) ||
63 			!strncmp(word, KW_NOT, sizeof(KW_NOT) - 1) ||
64 			!strncmp(word, KW_AND, sizeof(KW_AND) - 1) ||
65 			!strncmp(word, KW_OR, sizeof(KW_OR) - 1)) {
66 			skip_word = 0;
67 		}
68 		/* case "net" "mask" "port" "portrange" -> skip a word (= param) */
69 		else if (!strncmp(word, KW_HOST, sizeof(KW_HOST) - 1) ||
70 				 !strncmp(word, KW_NET, sizeof(KW_NET) - 1) ||
71 				 !strncmp(word, KW_MASK, sizeof(KW_MASK) - 1) ||
72 				 !strncmp(word, KW_PORT, sizeof(KW_PORT) - 1) ||
73 				 !strncmp(word, KW_PORTRANGE, sizeof(KW_PORTRANGE) - 1)) {
74 			skip_word = 1;
75 		}
76 		/* default (rule has any invalid keyword) -> return error */
77 		else {
78 			TRACE_ERROR("Invalid keyword in filter (%s)\n", word);
79 			return FALSE;
80 		}
81 
82 		SKIP_CHAR(word);
83 		SKIP_SPACES(word);
84 	}
85 
86 	return TRUE;
87 }
88 /*----------------------------------------------------------------------------*/
89 /* Assign an address range (specified by ft) to monitor via sock */
90 int
91 mtcp_bind_monitor_filter(mctx_t mctx, int sockid, monitor_filter_t ft)
92 {
93 	socket_map_t sock;
94 	mtcp_manager_t mtcp;
95 
96 	mtcp = GetMTCPManager(mctx);
97 	if (!mtcp) {
98 		errno = EACCES;
99 		return -1;
100 	}
101 
102 	/* if filter is not set, do nothing and return */
103 	if (ft == NULL) {
104 		TRACE_ERROR("filter not set!\n");
105 		return 0;
106 	}
107 
108 	/* retrieve the socket */
109 	if (sockid < 0 || sockid >= g_config.mos->max_concurrency) {
110 		errno = EBADF;
111 		TRACE_ERROR("sockid is invalid!\n");
112 		return -1;
113 	}
114 	sock = &mtcp->msmap[sockid];
115 
116 	/* check socket type */
117 	switch (sock->socktype) {
118 	case MOS_SOCK_MONITOR_RAW:
119 		/* For MONITOR_RAW type, allow any bpf rule */
120 		if (!ft->raw_pkt_filter) {
121 			TRACE_ERROR("raw pkt filter is null");
122 			return 0;
123 		}
124 		if (SET_BPFFILTER(&sock->monitor_listener->raw_pkt_fcode,
125 						  ft->raw_pkt_filter) < 0) {
126 			TRACE_ERROR("Invalid filter expression!\n");
127 			errno = EINVAL;
128 			return -1;
129 		}
130 		break;
131 	case MOS_SOCK_MONITOR_STREAM:
132 		/* For MONITOR_STREAM_PASSIVE type, restrict to flow-level keywords */
133 		if (ft->stream_syn_filter) {
134 			if (!IsValidFlowRule(ft->stream_syn_filter)) {
135 				errno = EINVAL;
136 				return -1;
137 			}
138 			if (SET_BPFFILTER(&sock->monitor_listener->stream_syn_fcode,
139 							  ft->stream_syn_filter) < 0) {
140 				TRACE_ERROR("Invalid filter expression!\n");
141 				errno = EINVAL;
142 				return -1;
143 			}
144 		}
145 		if (ft->stream_orphan_filter) {
146 			if (!IsValidFlowRule(ft->stream_orphan_filter)) {
147 				errno = EINVAL;
148 				return -1;
149 			}
150 			if (SET_BPFFILTER(&sock->monitor_listener->stream_orphan_fcode,
151 							  ft->stream_orphan_filter) < 0) {
152 				TRACE_ERROR("Invalid filter expression!\n");
153 				errno = EINVAL;
154 				return -1;
155 			}
156 		}
157 		break;
158 	default:
159 		/* return error for other socket types */
160 		errno = ENOPROTOOPT;
161 		TRACE_ERROR("Invalid sock type!\n");
162 		return -1;
163 	}
164 
165 	return 0;
166 }
167 /*----------------------------------------------------------------------------*/
168 void
169 mtcp_app_join(mctx_t mctx)
170 {
171 	mtcp_manager_t mtcp = GetMTCPManager(mctx);
172 	if (!mtcp) return;
173 
174 	RunPassiveLoop(mtcp);
175 	return;
176 }
177 /*----------------------------------------------------------------------------*/
178 /* Callback only functions */
179 /*----------------------------------------------------------------------------*/
180 void
181 mtcp_set_uctx(mctx_t mctx, int msock, void *uctx)
182 {
183 	mtcp_manager_t mtcp;
184 
185 	mtcp = GetMTCPManager(mctx);
186 	if (!mtcp) {
187 		return;
188 	}
189 
190 	/* check if the calling thread is in MOS context */
191 	if (mtcp->ctx->thread != pthread_self())
192 		return;
193 
194 	if (msock < 0 || msock >= g_config.mos->max_concurrency) {
195 		TRACE_API("Socket id %d out of range.\n", msock);
196 		errno = EBADF;
197 		return;
198 	}
199 
200 	socket_map_t socket = &mtcp->msmap[msock];
201 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE)
202 		socket->monitor_stream->uctx = uctx;
203 	else if (socket->socktype == MOS_SOCK_MONITOR_STREAM ||
204 			 socket->socktype == MOS_SOCK_MONITOR_RAW)
205 		socket->monitor_listener->uctx = uctx;
206 }
207 /*----------------------------------------------------------------------------*/
208 void *
209 mtcp_get_uctx(mctx_t mctx, int msock)
210 {
211 	mtcp_manager_t mtcp;
212 
213 	mtcp = GetMTCPManager(mctx);
214 	if (!mtcp) {
215 		errno = EACCES;
216 		return NULL;
217 	}
218 
219 	/* check if the calling thread is in MOS context */
220 	if (mtcp->ctx->thread != pthread_self()) {
221 		errno = EPERM;
222 		return NULL;
223 	}
224 
225 	if (msock < 0 || msock >= g_config.mos->max_concurrency) {
226 		TRACE_API("Socket id %d out of range.\n", msock);
227 		errno = EBADF;
228 		return NULL;
229 	}
230 
231 	socket_map_t socket = &mtcp->msmap[msock];
232 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE)
233 		return socket->monitor_stream->uctx;
234 	else if (socket->socktype == MOS_SOCK_MONITOR_STREAM ||
235 			 socket->socktype == MOS_SOCK_MONITOR_RAW)
236 		return socket->monitor_listener->uctx;
237 	else
238 		return NULL;
239 }
240 /*----------------------------------------------------------------------------*/
241 ssize_t
242 mtcp_peek(mctx_t mctx, int msock, int side, char *buf, size_t len)
243 {
244 	int copylen, rc;
245 	struct tcp_stream *cur_stream;
246 	mtcp_manager_t mtcp;
247 	socket_map_t sock;
248 
249 	copylen = rc = 0;
250 	mtcp = GetMTCPManager(mctx);
251 	if (!mtcp) {
252 		errno = EACCES;
253 		return -1;
254 	}
255 
256 	/* check if the calling thread is in MOS context */
257 	if (mtcp->ctx->thread != pthread_self()) {
258 		errno = EPERM;
259 		return -1;
260 	}
261 
262 	/* check if the socket is monitor stream */
263 	sock = &mtcp->msmap[msock];
264 	if (sock->socktype != MOS_SOCK_MONITOR_STREAM_ACTIVE) {
265 		TRACE_DBG("Invalid socket type!\n");
266 		errno = EBADF;
267 		return -1;
268 	}
269 
270 	if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
271 		TRACE_ERROR("Invalid side requested!\n");
272 		exit(EXIT_FAILURE);
273 		return -1;
274 	}
275 
276 	struct tcp_stream *mstrm = sock->monitor_stream->stream;
277 	cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
278 
279 	if (!cur_stream || !cur_stream->buffer_mgmt) {
280 		TRACE_DBG("Stream is NULL!! or buffer management is disabled\n");
281 		errno = EINVAL;
282 		return -1;
283 	}
284 
285 	/* Check if the read was not just due to syn-ack recv */
286 	if (cur_stream->rcvvar != NULL &&
287 	    cur_stream->rcvvar->rcvbuf != NULL) {
288 		tcprb_t *rcvbuf = cur_stream->rcvvar->rcvbuf;
289 		loff_t *poff = &sock->monitor_stream->peek_offset[cur_stream->side];
290 
291 		rc = tcprb_ppeek(rcvbuf, (uint8_t *)buf, len, *poff);
292 		if (rc < 0) {
293 			errno = ENODATA;
294 			return -1;
295 		}
296 
297 		*poff += rc;
298 		UNUSED(copylen);
299 
300 		return rc;
301 	} else {
302 		TRACE_DBG("Stream hasn't yet been initialized!\n");
303 		rc = 0;
304 	}
305 
306 	return rc;
307 }
308 /*----------------------------------------------------------------------------*/
309 /**
310  * Copies from the frags.. returns no. of bytes copied to buf
311  */
312 static inline int
313 ExtractPayloadFromFrags(struct tcp_ring_buffer *rcvbuf, char *buf,
314 						size_t count, off_t seq_num)
315 {
316 	int cpbytesleft;
317 	struct fragment_ctx *it;
318 
319 	it = rcvbuf->fctx;
320 	cpbytesleft = count;
321 	/* go through each frag */
322 	while (it) {
323 		/* first check whether sequent number matches */
324 		if (TCP_SEQ_BETWEEN(seq_num, it->seq, it->seq + it->len)) {
325 			/* copy buf starting from seq# seq_num */
326 			/* copy the MIN of seq-range and bytes to be copied */
327 			memcpy(buf + count - cpbytesleft,
328 			       rcvbuf->head + seq_num - rcvbuf->head_seq,
329 			       MIN(it->len - (seq_num - it->seq), cpbytesleft));
330 			/* update target seq num */
331 			seq_num += it->len - (seq_num - it->seq);
332 			/* update cpbytes left */
333 			cpbytesleft -= it->len - (seq_num - it->seq);
334 			if (cpbytesleft == 0)
335 				break;
336 		}
337 		it = it->next;
338 	}
339 
340 	count -= cpbytesleft;
341 
342 	/* return number of bytes copied */
343 	return count;
344 }
345 /*----------------------------------------------------------------------------*/
346 /* Please see in-code comments for description */
347 ssize_t
348 mtcp_ppeek(mctx_t mctx, int msock, int side,
349 			  char *buf, size_t count, uint64_t off)
350 {
351 	mtcp_manager_t mtcp;
352 	struct tcp_stream *cur_stream;
353 	int rc;
354 	socket_map_t sock;
355 
356 	mtcp = GetMTCPManager(mctx);
357 	if (!mtcp) {
358 		errno = EACCES;
359 		goto ppeek_error;
360 	}
361 
362 	/* check if the calling thread is in MOS context */
363 	if (mtcp->ctx->thread != pthread_self()) {
364 		errno = EPERM;
365 		goto ppeek_error;
366 	}
367 
368 	/* check if the socket is monitor stream */
369 	sock = &mtcp->msmap[msock];
370 	if (sock->socktype != MOS_SOCK_MONITOR_STREAM_ACTIVE) {
371 		TRACE_DBG("Invalid socket type!\n");
372 		errno = ESOCKTNOSUPPORT;
373 		goto ppeek_error;
374 	}
375 
376 	if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
377 		TRACE_ERROR("Invalid side requested!\n");
378 		exit(EXIT_FAILURE);
379 		return -1;
380 	}
381 
382 	struct tcp_stream *mstrm = sock->monitor_stream->stream;
383 	cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
384 
385 	if (!cur_stream || !cur_stream->buffer_mgmt) {
386 		TRACE_DBG("Stream is either NULL or ring buffer is not managed!!\n");
387 		errno = EACCES;
388 		goto ppeek_error;
389 	}
390 
391 	rc = 0;
392 	/* Check if the read was not just due to syn-ack recv */
393 	if (cur_stream->rcvvar != NULL &&
394 	    cur_stream->rcvvar->rcvbuf != NULL) {
395 		tcprb_t *rcvbuf = cur_stream->rcvvar->rcvbuf;
396 		return tcprb_ppeek(rcvbuf, (uint8_t *)buf, count, off);
397 	} else {
398 		errno = EPERM;
399 		goto ppeek_error;
400 	}
401 
402 	return rc;
403 
404  ppeek_error:
405 	return -1;
406 }
407 /*----------------------------------------------------------------------------*/
408 #ifdef MTCP_CB_GETCURPKT_CREATE_COPY
409 static __thread unsigned char local_frame[ETHERNET_FRAME_LEN];
410 inline struct pkt_info *
411 ClonePacketCtx(struct pkt_info *to, unsigned char *frame, struct pkt_ctx *from)
412 {
413 	/* only memcpy till the last field before ethh */
414 	/* memcpy(to, from, PCTX_COPY_LEN); */
415 	memcpy(to, &(from->p), PKT_INFO_LEN);
416 	/* memcpy the entire ethernet frame */
417 	assert(from);
418 	assert(from->p.eth_len > 0);
419 	assert(from->p.eth_len <= ETHERNET_FRAME_LEN);
420 	memcpy(frame, from->p.ethh, from->p.eth_len);
421 	/* set iph */
422 	to->ethh = (struct ethhdr *)frame;
423 	/* set iph */
424 	to->iph = from->p.iph ?
425 		(struct iphdr *)((uint8_t *)(frame + ETHERNET_HEADER_LEN)) : NULL;
426 	/* set tcph */
427 	to->tcph = from->p.tcph ?
428 		(struct tcphdr *)(((uint8_t *)(to->iph)) + (to->iph->ihl<<2)) : NULL;
429 	/* set payload */
430 	to->payload = from->p.tcph ?
431 		((uint8_t *)(to->tcph) + (to->tcph->doff<<2)) : NULL;
432 	return to;
433 }
434 /*----------------------------------------------------------------------------*/
435 int
436 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_info *pkt)
437 {
438 	mtcp_manager_t mtcp;
439 	socket_map_t socket;
440 	struct tcp_stream *cur_stream;
441 	struct pkt_ctx *cur_pkt_ctx;
442 
443 	mtcp = GetMTCPManager(mctx);
444 	if (!mtcp) {
445 		errno = EACCES;
446 		return -1;
447 	}
448 
449 	/* check if the calling thread is in MOS context */
450 	if (mtcp->ctx->thread != pthread_self()) {
451 		errno = EPERM;
452 		return -1;
453 	}
454 
455 	/* check if the socket is monitor stream */
456 	socket = &mtcp->msmap[sock];
457 
458 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
459 		if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
460 			TRACE_ERROR("Invalid side requested!\n");
461 			exit(EXIT_FAILURE);
462 			return -1;
463 		}
464 
465 		struct tcp_stream *mstrm = socket->monitor_stream->stream;
466 		cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
467 
468 		cur_pkt_ctx = &cur_stream->last_pctx;
469 		if (!cur_pkt_ctx->p.ethh) {
470 			errno = ENODATA;
471 			return -1;
472 		}
473 	} else if (socket->socktype == MOS_SOCK_MONITOR_RAW) {
474 		cur_pkt_ctx = mtcp->pctx;
475 	} else if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
476 		/*
477 		 * if it is a monitor socket, then this means that
478 		 * this is a request for an orphan tcp packet
479 		 */
480 		cur_pkt_ctx = mtcp->pctx;
481 	} else {
482 		TRACE_DBG("Invalid socket type!\n");
483 		errno = EBADF;
484 		return -1;
485 	}
486 
487 	ClonePacketCtx(pkt, local_frame, cur_pkt_ctx);
488 	return 0;
489 }
490 #else
491 /*----------------------------------------------------------------------------*/
492 int
493 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_ctx **pctx)
494 {
495 	mtcp_manager_t mtcp;
496 
497 	mtcp = GetMTCPManager(mctx);
498 	if (!mtcp) {
499 		errno = EACCES;
500 		return -1;
501 	}
502 
503 	/* check if the calling thread is in MOS context */
504 	if (mtcp->ctx->thread != pthread_self()) {
505 		errno = EPERM;
506 		return -1;
507 	}
508 	/* just pass direct pointer */
509 	*pctx = mtcp->pctx;
510 
511 	return 0;
512 }
513 #endif
514 /*----------------------------------------------------------------------------*/
515 /** Disable events from the monitor stream socket
516  * @param [in] mtcp: mtcp_manager
517  * @param [in] sock: socket
518  *
519  * returns 0 on success, -1 on failure
520  *
521  * This is used for flow management based monitoring sockets
522  */
523 int
524 RemoveMonitorEvents(mtcp_manager_t mtcp, socket_map_t socket, int side)
525 {
526 	struct mon_stream *mstream;
527 	struct mon_listener *mlistener;
528 
529 	if (mtcp == NULL) {
530 		TRACE_DBG("mtcp is not defined!!!\n");
531 		errno = EACCES;
532 		return -1;
533 	}
534 
535 	switch (socket->socktype) {
536 	case MOS_SOCK_MONITOR_STREAM_ACTIVE:
537 		mstream = socket->monitor_stream;
538 		if (mstream == NULL) {
539 			TRACE_ERROR("Mon Stream does not exist!\n");
540 			/* exit(-1); */
541 			errno = ENODATA;
542 			return -1;
543 		}
544 
545 		if (side == MOS_SIDE_SVR) mstream->server_mon = 0;
546 		else if (side == MOS_SIDE_CLI) mstream->client_mon = 0;
547 
548 		if (mstream->server_mon == 0 && mstream->client_mon == 0) {
549 #ifdef NEWEV
550 			/*
551 			 * if stree_dontcare is NULL, then we know that all
552 			 * events have already been disabled
553 			 */
554 			if (mstream->stree_pre_rcv != NULL) {
555 				stree_dec_ref(mtcp->ev_store, mstream->stree_dontcare);
556 				stree_dec_ref(mtcp->ev_store, mstream->stree_pre_rcv);
557 				stree_dec_ref(mtcp->ev_store, mstream->stree_post_snd);
558 
559 				mstream->stree_dontcare = NULL;
560 				mstream->stree_pre_rcv = NULL;
561 				mstream->stree_post_snd = NULL;
562 			}
563 #else
564 			/* no error checking over here..
565 			 * but its okay.. this code is
566 			 * deprecated
567 			 */
568 			CleanupEvP(&mstream->dontcare_evp);
569 			CleanupEvP(&mstream->pre_tcp_evp);
570 			CleanupEvP(&mstream->post_tcp_evp);
571 #endif
572 		}
573 		break;
574 	case MOS_SOCK_MONITOR_STREAM:
575 		mlistener = socket->monitor_listener;
576 		if (mlistener == NULL) {
577 			TRACE_ERROR("Mon listener does not exist!\n");
578 			errno = ENODATA;
579 			return -1;
580 		}
581 
582 		if (side == MOS_SIDE_SVR) mlistener->server_mon = 0;
583 		else if (side == MOS_SIDE_CLI) mlistener->client_mon = 0;
584 
585 		if (mlistener->server_mon == 0 && mlistener->client_mon == 0) {
586 #ifdef NEWEV
587 			/*
588 			 * if stree_dontcare is NULL, then we know that all
589 			 * events have already been disabled
590 			 */
591 			if (mlistener->stree_pre_rcv != NULL) {
592 				stree_dec_ref(mtcp->ev_store, mlistener->stree_dontcare);
593 				stree_dec_ref(mtcp->ev_store, mlistener->stree_pre_rcv);
594 				stree_dec_ref(mtcp->ev_store, mlistener->stree_post_snd);
595 
596 				mlistener->stree_dontcare = NULL;
597 				mlistener->stree_pre_rcv = NULL;
598 				mlistener->stree_post_snd = NULL;
599 			}
600 #else
601 			/* no error checking over here..
602 			 * but its okay.. this code is
603 			 * deprecated
604 			 */
605 			CleanupEvB(mtcp, &mlistener->dontcare_evb);
606 			CleanupEvB(mtcp, &mlistener->pre_tcp_evb);
607 			CleanupEvB(mtcp, &mlistener->post_tcp_evb);
608 #endif
609 		}
610 		break;
611 	default:
612 		TRACE_ERROR("Invalid socket type!\n");
613 	}
614 
615 	return 0;
616 }
617 /*----------------------------------------------------------------------------*/
618 /**
619  * Disable monitoring based on side variable.
620  */
621 int
622 mtcp_cb_stop(mctx_t mctx, int sock, int side)
623 {
624 	mtcp_manager_t mtcp;
625 	socket_map_t socket;
626 	struct tcp_stream *stream;
627 	struct socket_map *walk;
628 	uint8_t mgmt;
629 
630 	mtcp = GetMTCPManager(mctx);
631 	if (!mtcp) {
632 		errno = EACCES;
633 		return -1;
634 	}
635 
636 	socket = &mtcp->msmap[sock];
637 
638 	/* works for both monitor listener and stream sockets */
639 	RemoveMonitorEvents(mtcp, socket, side);
640 
641 	/* passive monitoring socket is not connected to any stream */
642 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM)
643 		return 0;
644 
645 	if (side == MOS_SIDE_CLI) {
646 		/* see if the associated stream requires monitoring any more */
647 		stream = (socket->monitor_stream->stream->side == MOS_SIDE_CLI) ?
648 			socket->monitor_stream->stream :
649 			socket->monitor_stream->stream->pair_stream;
650 
651 		mgmt = 0;
652 		SOCKQ_FOREACH_START(walk, &stream->msocks) {
653 			if (walk->monitor_stream->client_mon == 1) {
654 				mgmt = 1;
655 				break;
656 			}
657 		} SOCKQ_FOREACH_END;
658 		/* if all streams have mgmt off, then tag the stream for destruction */
659 		if (mgmt == 0) {
660 			stream = (socket->monitor_stream->stream->side == MOS_SIDE_CLI) ?
661 				socket->monitor_stream->stream :
662 				socket->monitor_stream->stream->pair_stream;
663 			stream->status_mgmt = 0;
664 		}
665 	}
666 
667 	if (side == MOS_SIDE_SVR) {
668 		/* see if the associated stream requires monitoring any more */
669 		stream = (socket->monitor_stream->stream->side == MOS_SIDE_SVR) ?
670 			socket->monitor_stream->stream :
671 			socket->monitor_stream->stream->pair_stream;
672 		mgmt = 0;
673 		SOCKQ_FOREACH_START(walk, &stream->msocks) {
674 			if (walk->monitor_stream->server_mon == 1) {
675 				mgmt = 1;
676 				break;
677 			}
678 		} SOCKQ_FOREACH_END;
679 		/* if all streams have mgmt off, then tag the stream for destruction */
680 		if (mgmt == 0) {
681 			stream = (socket->monitor_stream->stream->side == MOS_SIDE_SVR) ?
682 				socket->monitor_stream->stream :
683 				socket->monitor_stream->stream->pair_stream;
684 			stream->status_mgmt = 0;
685 		}
686 	}
687 
688 	return 0;
689 }
690 /*----------------------------------------------------------------------------*/
691 /**
692  * send a RST packet to the TCP stream (uni-directional)
693  */
694 static inline void
695 SendRSTPacketStandalone(mtcp_manager_t mtcp, struct tcp_stream *stream) {
696 	SendTCPPacketStandalone(mtcp,
697 				stream->saddr, stream->sport, stream->daddr, stream->dport,
698 				stream->snd_nxt, stream->rcv_nxt, 0, TCP_FLAG_RST | TCP_FLAG_ACK,
699 				NULL, 0, mtcp->cur_ts, 0);
700 }
701 /*----------------------------------------------------------------------------*/
702 /**
703  * Reset the connection (send RST packets to both sides)
704  */
705 int
706 mtcp_reset_conn(mctx_t mctx, int sock)
707 {
708 	mtcp_manager_t mtcp;
709 	socket_map_t socket;
710 
711 	mtcp = GetMTCPManager(mctx);
712 	if (!mtcp) {
713 		errno = EACCES;
714 		return -1;
715 	}
716 
717 	socket = &mtcp->msmap[sock];
718 
719 	/* passive monitoring socket is not connected to any stream */
720 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM) {
721 		errno = EINVAL;
722 		return -1;
723 	}
724 
725 	/* send RST packets to the both sides */
726 	SendRSTPacketStandalone(mtcp, socket->monitor_stream->stream);
727 	SendRSTPacketStandalone(mtcp, socket->monitor_stream->stream->pair_stream);
728 
729 	return 0;
730 }
731 /*----------------------------------------------------------------------------*/
732 uint32_t
733 mtcp_cb_get_ts(mctx_t mctx)
734 {
735 	mtcp_manager_t mtcp;
736 
737 	mtcp = GetMTCPManager(mctx);
738 	if (!mtcp) {
739 		TRACE_DBG("Can't access MTCP manager!\n");
740 		errno = EACCES;
741 		return 0;
742 	}
743 
744 	/* check if the calling thread is in MOS context */
745 	if (mtcp->ctx->thread != pthread_self()) {
746 		errno = EPERM;
747 		return 0;
748 	}
749 
750 	return TS_TO_USEC(mtcp->cur_ts);
751 }
752 /*----------------------------------------------------------------------------*/
753 /* Macros related to getpeername */
754 #define TILL_SVRADDR		offsetof(struct sockaddr_in, sin_zero)
755 #define TILL_SVRPORT		offsetof(struct sockaddr_in, sin_addr)
756 #define TILL_SVRFAMILY		offsetof(struct sockaddr_in, sin_port)
757 #define TILL_CLIADDR		sizeof(struct sockaddr) + TILL_SVRADDR
758 #define TILL_CLIPORT		sizeof(struct sockaddr) + TILL_SVRPORT
759 #define TILL_CLIFAMILY		sizeof(struct sockaddr) + TILL_SVRFAMILY
760 
761 int
762 mtcp_getpeername(mctx_t mctx, int sockfd, struct sockaddr *saddr,
763 				 socklen_t *addrlen, int side)
764 {
765 	mtcp_manager_t mtcp;
766 	socket_map_t socket;
767 	struct tcp_stream *stream;
768 	struct sockaddr_in *sin;
769 	int rc;
770 
771 	mtcp = GetMTCPManager(mctx);
772 	if (!mtcp) {
773 		TRACE_DBG("Can't access MTCP manager!\n");
774 		errno = EACCES;
775 		return -1;
776 	}
777 
778 	/* check if the calling thread is in MOS context */
779 	if (mtcp->ctx->thread != pthread_self()) {
780 		errno = EPERM;
781 		return -1;
782 	}
783 
784 	socket = &mtcp->msmap[sockfd];
785 	sin = (struct sockaddr_in *)saddr;
786 	rc = 0;
787 
788 	/* retrieve both streams */
789 	stream = socket->monitor_stream->stream;
790 
791 	if (side != stream->side)
792 		stream = stream->pair_stream;
793 
794 	if (stream == NULL)
795 		return -1;
796 
797 	/* reset to 2 * sizeof(struct sockaddr) if addrlen is too big */
798 	if (*addrlen > 2 * sizeof(struct sockaddr))
799 		*addrlen = 2 * sizeof(struct sockaddr);
800 
801 	/* according per manpage, address can be truncated */
802 	switch (*addrlen) {
803 	case (2 * sizeof(struct sockaddr)):
804 	case TILL_CLIADDR:
805 		sin[1].sin_addr.s_addr = stream->side == MOS_SIDE_SVR ?
806 								 stream->daddr : stream->saddr;
807 	case TILL_CLIPORT:
808 		sin[1].sin_port = stream->side == MOS_SIDE_SVR ?
809 						  stream->dport : stream->sport;
810 	case TILL_CLIFAMILY:
811 		sin[1].sin_family = AF_INET;
812 	case (sizeof(struct sockaddr)):
813 	case TILL_SVRADDR:
814 		sin->sin_addr.s_addr = stream->side == MOS_SIDE_SVR ?
815 							   stream->saddr : stream->daddr;
816 	case TILL_SVRPORT:
817 		sin->sin_port = stream->side == MOS_SIDE_SVR ?
818 						stream->sport : stream->dport;
819 	case TILL_SVRFAMILY:
820 		sin->sin_family = AF_INET;
821 		break;
822 	default:
823 		rc = -1;
824 		*addrlen = 0xFFFF;
825 	}
826 
827 	return rc;
828 }
829 /*----------------------------------------------------------------------------*/
830 int
831 mtcp_setlastpkt(mctx_t mctx, int sock, int side, off_t offset,
832 		byte *data, uint16_t datalen, int option)
833 {
834 	mtcp_manager_t mtcp;
835 	struct pkt_ctx *cur_pkt_ctx;
836 	struct ethhdr *ethh;
837 	struct iphdr *iph;
838 	struct tcphdr *tcph;
839 	unsigned char *payload;
840 
841 #if 0
842 	socket_map_t socket;
843 	struct tcp_stream *cur_stream;
844 #endif
845 
846 	/* checking if mtcp is valid */
847 	mtcp = GetMTCPManager(mctx);
848 	if (!mtcp) {
849 		errno = EACCES;
850 		TRACE_ERROR("Invalid mtcp!\n");
851 		return -1;
852 	}
853 
854 	/* check if the calling thread is in MOS context */
855 	if (mtcp->ctx->thread != pthread_self()) {
856 		errno = EPERM;
857 		TRACE_ERROR("Invalid thread id!\n");
858 		return -1;
859 	}
860 
861 #if 0
862 	/* check if the socket is monitor stream */
863 	socket = &mtcp->msmap[sock];
864 	if (socket->socktype == MOS_SOCK_MONITOR_STREAM_ACTIVE) {
865 		if (side != MOS_SIDE_CLI && side != MOS_SIDE_SVR) {
866 			TRACE_ERROR("Invalid side requested!\n");
867 			exit(EXIT_FAILURE);
868 			return -1;
869 		}
870 
871 		struct tcp_stream *mstrm = socket->monitor_stream->stream;
872 		cur_stream = (side == mstrm->side) ? mstrm : mstrm->pair_stream;
873 
874 		if (!cur_stream->allow_pkt_modification)
875 			return -1;
876 	} else if (socket->socktype != MOS_SOCK_MONITOR_RAW) {
877 		TRACE_ERROR("Invalid socket type!\n");
878 		exit(EXIT_FAILURE);
879 		return -1;
880 	}
881 #endif
882 
883 	/* see if cur_pkt_ctx is valid */
884 	cur_pkt_ctx = mtcp->pctx;
885 	if (cur_pkt_ctx == NULL) {
886 		TRACE_ERROR("pctx is NULL!\n");
887 		errno = ENODATA;
888 		return -1;
889 	}
890 
891 	/* check if offset is valid */
892 	if (offset < 0) {
893 		TRACE_ERROR("Invalid offset position!\n");
894 		errno = EINVAL;
895 		return -1;
896 	}
897 
898 	if (__builtin_popcount(option & (MOS_DROP | MOS_CHOMP |
899 					 MOS_INSERT | MOS_OVERWRITE)) != 1) {
900 		TRACE_ERROR("mtcp_setlastpkt() function only allows one of "
901 			    "(MOS_DROP | MOS_CHOMP | MOS_INSERT | MOS_OVERWRITE) "
902 			    "to be set at a time.\n");
903 		errno = EAGAIN;
904 		return -1;
905 	}
906 
907 	/* drop pkt has the highest priority */
908 	if (option & MOS_DROP) {
909 		mtcp->pctx->forward = 0;
910 		return 0;
911 	} else if (option & MOS_ETH_HDR) {
912 		/* validity test */
913 		if ((ethh=cur_pkt_ctx->p.ethh) == NULL ||
914 		    offset + datalen > sizeof(struct ethhdr)) {
915 			TRACE_ERROR("Ethernet setting has gone out of bounds "
916 				    "(offset: %ld, datalen: %d)\n",
917 				    offset, datalen);
918 			errno = EINVAL;
919 			return -1;
920 		}
921 		if (option & MOS_CHOMP) {
922 			TRACE_ERROR("Illegal call. "
923 				    "Ethernet header can't be chopped down!\n");
924 			errno = EACCES;
925 			return -1;
926 		} else if (option & MOS_INSERT) {
927 			TRACE_ERROR("Illegal call. "
928 				    "Ethernet header can't be extended!\n");
929 			errno = EACCES;
930 			return -1;
931 		} else /* if (option & MOS_OVERWRITE) */ {
932 			memcpy((uint8_t *)ethh + offset, data, datalen);
933 		}
934 		/* iph, tcph, and payload do not need to change */
935 	} else if (option & MOS_IP_HDR) {
936 		/* validity test */
937 		if (cur_pkt_ctx->p.ethh == NULL ||
938 		    cur_pkt_ctx->p.ethh->h_proto != ntohs(ETH_P_IP) ||
939 		    (iph=(struct iphdr *)(cur_pkt_ctx->p.ethh + 1)) == NULL) {
940 			TRACE_ERROR("ethh or iph are out of bounds\n");
941 			errno = EACCES;
942 			return -1;
943 		}
944 		if (option & MOS_OVERWRITE) {
945 			if (offset + datalen > (iph->ihl<<2)) {
946 				TRACE_ERROR("IP setting has gone out of bounds "
947 					    "(offset: %ld, datalen: %d)\n",
948 					    offset, datalen);
949 				errno = EINVAL;
950 				return -1;
951 			}
952 			memcpy((uint8_t *)iph + offset, data, datalen);
953 		}
954 		if (option & MOS_CHOMP) {
955 			memmove((uint8_t *)iph + offset,
956 				(uint8_t *)iph + offset + datalen,
957 				cur_pkt_ctx->p.ip_len - offset - datalen);
958 
959 			/* iph does not need to change */
960 			if (iph->protocol == IPPROTO_TCP) {
961 				cur_pkt_ctx->p.tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
962 				cur_pkt_ctx->p.payload = (uint8_t *)cur_pkt_ctx->p.tcph +
963 					(cur_pkt_ctx->p.tcph->doff<<2);
964 			} else {
965 				/* reset tcph if iph does not have tcp proto */
966 				cur_pkt_ctx->p.tcph = NULL;
967 			}
968 			/* update iph total length */
969 			cur_pkt_ctx->p.ip_len = ntohs(iph->tot_len);
970 			/* update eth frame length */
971 			cur_pkt_ctx->p.eth_len = cur_pkt_ctx->p.ip_len + sizeof(struct ethhdr);
972 		} else if (option & MOS_INSERT) {
973 			memmove((uint8_t *)iph + offset + datalen,
974 				(uint8_t *)iph + offset + 1,
975 				cur_pkt_ctx->p.ip_len - offset);
976 			memcpy((uint8_t *)iph + offset,
977 			       data, datalen);
978 
979 			/* iph does not need to change */
980 			if (iph->protocol == IPPROTO_TCP) {
981 				cur_pkt_ctx->p.tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
982 				cur_pkt_ctx->p.payload = (uint8_t *)cur_pkt_ctx->p.tcph +
983 					(cur_pkt_ctx->p.tcph->doff<<2);
984 			} else {
985 				/* reset tcph if iph does not have tcp proto */
986 				cur_pkt_ctx->p.tcph = NULL;
987 			}
988 			/* update iph total length */
989 			cur_pkt_ctx->p.ip_len = ntohs(iph->tot_len);
990 			/* update eth frame length */
991 			cur_pkt_ctx->p.eth_len = cur_pkt_ctx->p.ip_len + sizeof(struct ethhdr);
992 		}
993 		/* can't update payloadlen because we don't know tcph->doff */
994 	} else if (option & MOS_TCP_HDR) {
995 		/* validity test */
996 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
997 		if (iph == NULL ||
998 		    iph->protocol != IPPROTO_TCP ||
999 		    (tcph=(struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2))) == NULL) {
1000 			TRACE_ERROR("TCP setting has gone out of bounds "
1001 				    "(offset: %ld, datalen: %d)\n",
1002 				    offset, datalen);
1003 			errno = EINVAL;
1004 			return -1;
1005 		}
1006 		if (option & MOS_OVERWRITE) {
1007 			if (offset + datalen > (tcph->doff<<2)) {
1008 				TRACE_ERROR("TCP setting has gone out of bounds "
1009 					    "(offset: %ld, datalen: %d)\n",
1010 					    offset, datalen);
1011 				errno = EINVAL;
1012 				return -1;
1013 			}
1014 			memcpy((uint8_t *)tcph + offset, data, datalen);
1015 			/* update tcp seq # */
1016 			cur_pkt_ctx->p.seq = ntohl(tcph->seq);
1017 			/* update tcp ack_seq # */
1018 			cur_pkt_ctx->p.ack_seq = ntohl(tcph->ack_seq);
1019 			/* update tcp window */
1020 			cur_pkt_ctx->p.window = ntohs(tcph->window);
1021 
1022 			/* 150422 dhkim TODO: seq and offset are two different form of same
1023 			 * variable. We also need to update the offset. */
1024 		}
1025 		if (option & MOS_CHOMP) {
1026 			memmove((uint8_t *)tcph + offset,
1027 				(uint8_t *)tcph + offset + datalen,
1028 				cur_pkt_ctx->p.payloadlen + (tcph->doff<<2)
1029 				- offset - datalen);
1030 			/* update payload ptr */
1031 			cur_pkt_ctx->p.payload = (uint8_t *)tcph + (tcph->doff<<2);
1032 		} else if (option & MOS_INSERT) {
1033 			memmove((uint8_t *)tcph + offset + datalen,
1034 				(uint8_t *)tcph + offset + 1,
1035 				cur_pkt_ctx->p.payloadlen + (tcph->doff<<2)
1036 				- offset);
1037 			memcpy((uint8_t *)tcph + offset, data, datalen);
1038 			/* update payload ptr */
1039 			cur_pkt_ctx->p.payload = (uint8_t *)tcph + (tcph->doff<<2);
1040 		}
1041 	} else if (option & MOS_TCP_PAYLOAD) {
1042 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1043 		tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
1044 		payload = (uint8_t *)tcph + (tcph->doff<<2);
1045 		if (option & MOS_OVERWRITE) {
1046 			if (offset + datalen > ntohs(iph->tot_len) -
1047 			    (iph->ihl<<2) - (tcph->doff<<2)) {
1048 				TRACE_ERROR("Payload setting has gone out of bounds "
1049 					    "(offset: %ld, datalen: %d)\n",
1050 					    offset, datalen);
1051 				errno = EINVAL;
1052 				return -1;
1053 			}
1054 			memcpy(payload + offset, data, datalen);
1055 		}
1056 		if (option & MOS_CHOMP) {
1057 			memmove(payload + offset,
1058 				payload + offset + datalen,
1059 				(cur_pkt_ctx->p.payloadlen -
1060 				 offset - datalen));
1061 			/* update payload length */
1062 			cur_pkt_ctx->p.payloadlen = cur_pkt_ctx->p.ip_len -
1063 				(tcph->doff<<2) - (iph->ihl<<2);
1064 		} else if (option & MOS_INSERT) {
1065 			memmove(payload + offset + datalen,
1066 				payload + offset + 1,
1067 				cur_pkt_ctx->p.payloadlen - offset);
1068 			memcpy(payload + offset, data, datalen);
1069 			cur_pkt_ctx->p.payloadlen = cur_pkt_ctx->p.ip_len -
1070 				(tcph->doff<<2) - (iph->ihl<<2);
1071 		}
1072 	} else {
1073 		TRACE_ERROR("Invalid option!\n");
1074 		errno = EINVAL;
1075 		return -1;
1076 	}
1077 
1078 	/* update ip checksum */
1079 	if (option & MOS_UPDATE_IP_CHKSUM) {
1080 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1081 		iph->check = 0;
1082 		iph->check = ip_fast_csum(iph, iph->ihl);
1083 	}
1084 
1085 	/* update tcp checksum */
1086 	if (option & MOS_UPDATE_TCP_CHKSUM) {
1087 		iph = (struct iphdr *)(cur_pkt_ctx->p.ethh + 1);
1088 		tcph = (struct tcphdr *)((uint8_t *)iph + (iph->ihl<<2));
1089 		tcph->check = 0;
1090 		tcph->check = TCPCalcChecksum((uint16_t *)tcph,
1091 					      ntohs(iph->tot_len) - (iph->ihl<<2),
1092 					      iph->saddr, iph->daddr);
1093 	}
1094 	return 0;
1095 }
1096 /*----------------------------------------------------------------------------*/
1097 #if 0
1098 inline int
1099 mtcp_cb_updatecurpkt(mctx_t mctx, off_t offset, unsigned char *data,
1100 		     uint16_t datalen, int option)
1101 {
1102 	return mtcp_setlastpkt(mctx, sock, side, offset, data, datalen, option);
1103 }
1104 #endif
1105 /*----------------------------------------------------------------------------*/
1106 /**
1107  * THIS IS A DEPRECETED FUNCTION...
1108  */
1109 int
1110 mtcp_cb_dropcurpkt(mctx_t mctx)
1111 {
1112 	mtcp_manager_t mtcp;
1113 
1114 	/* checking if mtcp is valid */
1115 	mtcp = GetMTCPManager(mctx);
1116 	if (!mtcp) {
1117 		TRACE_ERROR("Invalid mtcp!\n");
1118 		errno = EACCES;
1119 		return -1;
1120 	}
1121 
1122 	/* check if the calling thread is in MOS context */
1123 	if (mtcp->ctx->thread != pthread_self()) {
1124 		TRACE_ERROR("Invalid thread id!\n");
1125 		errno = EPERM;
1126 		return -1;
1127 	}
1128 
1129 	/* see if cur_pkt_ctx is valid */
1130 	if (mtcp->pctx == NULL) {
1131 		TRACE_ERROR("pctx is NULL!\n");
1132 		errno = ENODATA;
1133 		return -1;
1134 	}
1135 
1136 	mtcp->pctx->forward = 0;
1137 
1138 	return 0;
1139 }
1140 /*----------------------------------------------------------------------------*/
1141 int
1142 mtcp_set_debug_string(mtcp_manager_t mtcp, const char *fmt, ...)
1143 {
1144 #ifdef ENABLE_DEBUG_EVENT
1145 	va_list args;
1146 	int i;
1147 
1148 	assert(mtcp);
1149 
1150 	if (fmt == NULL) {
1151 		mtcp->dbg_buf[0] = '\0';
1152 		return 0;
1153 	}
1154 
1155 	va_start(args, fmt);
1156 	i = vsnprintf(mtcp->dbg_buf, DBG_BUF_LEN - 1, fmt, args);
1157 	va_end(args);
1158 
1159 	return i;
1160 #else
1161 	return -1;
1162 #endif /* ENABLE_DEBUG_EVENT */
1163 }
1164 /*----------------------------------------------------------------------------*/
1165 int
1166 mtcp_get_debug_string(mctx_t mctx, char *buf, int len)
1167 {
1168 #ifdef ENABLE_DEBUG_EVENT
1169 	mtcp_manager_t mtcp;
1170 	int copylen;
1171 
1172 	if (len < 0)
1173 		return -1;
1174 	else if (len == 0)
1175 		return 0;
1176 
1177 	if (!(mtcp = GetMTCPManager(mctx)))
1178 		return -1;
1179 
1180 	copylen = MIN(strlen(mtcp->dbg_buf), len);
1181 	strncpy(buf, mtcp->dbg_buf, copylen);
1182 
1183 	return copylen;
1184 #else
1185 	return -1;
1186 #endif /* ENABLE_DEBUG_EVENT */
1187 }
1188 /*----------------------------------------------------------------------------*/
1189