1 #ifndef __MOS_API_H_
2 #define __MOS_API_H_
3 
4 #ifdef DARWIN
5 #include <netinet/tcp.h>
6 #include <netinet/if_ether.h>
7 #else
8 #include <linux/tcp.h>
9 #include <linux/if_ether.h>
10 #endif
11 #include <netinet/in.h>
12 #include <arpa/inet.h>
13 #include <netinet/ip.h>
14 #include <stddef.h> /* for offsetof */
15 #include "mtcp_epoll.h"
16 #include <stdbool.h>
17 
18 #ifndef __MTCP_MANAGER
19 #define __MTCP_MANAGER
20 typedef struct mtcp_manager * mtcp_manager_t;
21 #endif
22 #ifndef __SOCKET_MAP
23 #define __SOCKET_MAP
24 typedef struct socket_map * socket_map_t;
25 #endif
26 
27 /** Available hooking points */
28 enum mtcp_hook_point
29 {
30 	/* NOTE: The value of hooking points should not overlap with any of
31 	 * mos_event_types */
32 
33 	/** Very first hooking point of incoming packet even before flow
34 	 * identification*/
35 	MOS_NULL	= (1 << 29),
36 	/** Hooking point before TCP receiver */
37 	MOS_HK_RCV     = (1 << 30),
38 	/** Hooking point after TCP sender */
39 	MOS_HK_SND	= (1 << 31),
40 };
41 
42 /** Built-in events provided by mOS */
43 enum mos_event_type
44 {
45 	/** invalid event */
46 	MOS_NULL_EVENT          = (0),
47 	/* mos-defined tcp build-in events */
48 	/** A packet is coming in. */
49 	MOS_ON_PKT_IN 		= (0x1<<0),
50 	/** A packet is going out. */
51 	/* THIS EVENT IS NOW DEPRECATED (USED ONLY FOR DEBUGGING) */
52 	MOS_ON_PKT_OUT 		= (0x1<<1),
53 	/** SYN packet as seen by the monitor
54 	 *  client side: activated when the client state is set to SYN_SENT
55 	 *  server side: activated when the server state is set to SYN_RCVD
56 	 *
57 	 *  Retransmitted SYN packets don't activate this event.
58 	 */
59 	MOS_ON_CONN_START	= (0x1<<2),
60 	/** 3-way handshake is finished.
61 	 * server side: ACK is coming in as a response of SYNACK.
62 	 * client side: SYNACK is coming in as a response of SYN. */
63 	/* THIS EVENT IS NOW DEPRECATED */
64 	MOS_ON_CONN_SETUP	= (0x1<<3),
65 	/** New data is now readable.
66 	 * This event is available in only MOS_NULL hook point.
67 	 * mOS raises this event only once while batched packet processing. */
68 	MOS_ON_CONN_NEW_DATA	= (0x1<<4),
69 	/** Abnormal behavior is detected.
70 	 * NOTE: This is not fully implemented yet. */
71 	MOS_ON_ERROR 		= (0x1<<5),
72 	/** No packet is seen for a long time.
73 	 * This is implemented as mtcp_cb_settimer()
74 	 */
75 	MOS_ON_TIMEOUT 		= (0x1<<6),
76 	/** TCP state is being changed. */
77 	MOS_ON_TCP_STATE_CHANGE	= (0x1<<7),
78 	/** A packet is not SYN and has no identified flow. */
79 	MOS_ON_ORPHAN		= (0x1<<8),
80 	/** Retransmission is detected */
81 	MOS_ON_REXMIT           = (0x1<<9),
82 	/** A flow is about to be destroyed.
83 	 * 4-way handshake, RST packet or timeout could be the reason.
84 	 * NOTE: In current implementation, mOS raises this event while destroying
85 	 * `struct tcp_stream`. There is possibility of false-positive especially
86 	 * when mOS is running out of memory. */
87 	MOS_ON_CONN_END		= (0x1<<10),
88 
89 	/** This event is for debugging. We can easily mute this later. */
90 	MOS_ON_DEBUG_MESSAGE  = (0x1<<11),
91 };
92 
93 #if 0
94 /* This may go away in future revisions */
95 typedef union event_data {
96 	uint32_t u32;
97 	uint64_t u64;
98 	void *ptr;
99 } event_data_t;
100 #endif
101 
102 /* Macros for updating packet context */
103 #define MOS_ETH_HDR		(1 << 0)
104 #define MOS_IP_HDR		(1 << 1)
105 #define MOS_TCP_HDR		(1 << 2)
106 #define MOS_TCP_PAYLOAD		(1 << 3)
107 #define MOS_UPDATE_IP_CHKSUM	(1 << 4)
108 #define MOS_UPDATE_TCP_CHKSUM	(1 << 5)
109 #define MOS_DROP		(1 << 6)
110 #define MOS_OVERWRITE		(1 << 7)
111 #define MOS_CHOMP		(1 << 8)
112 #define MOS_INSERT		(1 << 9)
113 
114 /**
115  * struct pkt_info is the struct that is actually
116  * exposed to the monitor application.
117  *
118  * NOTE: When you retrieve the packet information using mtcp_getlastpkt()
119  * via MOS_SOCK_MONITOR_RAW socket, you can only use up to L3 information.
120  * (cur_ts, eth_len, ip_len, ethh, iph)
121  */
122 struct pkt_info {
123 	uint32_t      cur_ts;    /**< packet receiving time (read-only:ro) */
124 	int8_t        in_ifidx;  /**< input interface (ro) */
125 
126 	/* ETH */
127 	uint16_t      eth_len;
128 
129 	/* IP */
130 	uint16_t      ip_len;
131 
132 	/* TCP */
133 	uint64_t      offset;    /**< TCP ring buffer offset */
134 	uint16_t      payloadlen;
135 	uint32_t      seq;
136 	uint32_t      ack_seq;
137 	uint16_t      window;
138 
139 	/* ~~ 28 byte boundary ~~ */
140 
141 	/*
142 	 * CAUTION!!!
143 	 * It is extremely critical that the last 5 fields (ethh .. frame)
144 	 * are always placed at the end of the definition. MOS relies on
145 	 * this specific arrangement when it is creating a new instantiation
146 	 * of pctx during mtcp_getlastpkt() invocation.
147 	 */
148 	struct ethhdr *ethh;
149 	struct iphdr  *iph;
150 	struct tcphdr *tcph;
151 	uint8_t       *payload;
152 };
153 
154 /**
155  * PACKET CONTEXT is the packet structure that goes through
156  * the mOS core...
157  */
158 struct pkt_ctx {
159 	struct pkt_info  p;
160 
161 	int8_t        direction; /**< where does this packet originate from? (ro)*/
162 	uint8_t       forward;   /**< 0: drop, 1: forward to out_ifidx (rw) */
163 	int8_t        out_ifidx; /**< output interface (rw) */
164 	int8_t        batch_index; /**< index of packet in the rx batch */
165 	/* ~~ 64 byte boundary ~~ */
166 };
167 #define PKT_INFO_LEN		offsetof(struct pkt_info, ethh)
168 
169 /*
170  * Sequence number change structure.
171  * Used for MOS_SEQ_REMAP.
172  */
173 typedef struct {
174 	int64_t seq_off;	/* the amount of sequence number drift */
175 	int side;		/* which side does this sequence number change apply to? */
176 	uint32_t base_seq;	/* seq # of the flow where the actual sequence # translation starts */
177 } seq_remap_info;
178 
179 typedef struct filter_arg {
180 	void *arg;
181 	size_t len;
182 } filter_arg_t;
183 
184 /**
185  * The available level number in the POSIX library for sockets is
186  * on SOL_SOCKET
187  */
188 #ifndef SOL_SOCKET
189 /* Level number for (get/set)sockopt() to apply to socket itself. */
190 #define SOL_SOCKET 		0xffff	/* options for socket level */
191 #endif
192 #define SOL_MONSOCKET		0xfffe	/* MOS monitor socket level */
193 
194 /**
195  * MOS monitor socket option names (and values)
196  * This will contain options pertaining to monitor stream sockets
197  * See mtcp_getsockopt() and mtcp_setsockopt() the mtcp_api.h file.
198  */
199 enum mos_socket_opts {
200 	MOS_FRAGINFO_CLIBUF	= 0x01,
201 	MOS_FRAGINFO_SVRBUF	= 0x02,
202 	MOS_INFO_CLIBUF		= 0x03,
203 	MOS_INFO_SVRBUF		= 0x04,
204 	MOS_TCP_STATE_CLI	= 0x05,
205 	MOS_TCP_STATE_SVR	= 0x06,
206 	MOS_CLIBUF  		= 0x09,
207 	MOS_SVRBUF  		= 0x0a,
208 	MOS_STOP_MON		= 0x0c,
209 	MOS_CLIOVERLAP		= 0x0f,
210 	MOS_SVROVERLAP		= 0x10,
211 
212 	MOS_TIMESTAMP		= 0x07, /* supressed (not used) */
213 	MOS_SEQ_REMAP		= 0x0b, /* supressed (not used) */
214 	MOS_FRAG_CLIBUF   	= 0x0d, /* supressed (not used) */
215 	MOS_FRAG_SVRBUF   	= 0x0e, /* supressed (not used) */
216 
217 };
218 
219 /**
220  * MOS tcp buf info structure.
221  * Used by the monitor application to retreive
222  * tcp_stream-related info. Usually called via
223  * mtcp_getsockopt() function
224  */
225 struct tcp_buf_info {
226 	/** The initial TCP sequence number of TCP ring buffer. */
227 	uint32_t tcpbi_init_seq;
228 	/** TCP sequence number of the 'last byte of payload that has
229 	 * already been read by the end application' (applies in the case
230 	 * of embedded monitor setup)
231 	 */
232 	uint32_t tcpbi_last_byte_read;
233 	/** TCP sequence number of the 'last byte of the payload that
234 	 * is currently buffered and needs to be read by the end
235 	 * application' (applies in the case of embedded monitor setup).
236 	 *
237 	 * In case of standalone monitors, tcpbi_last_byte_read =
238 	 * tcpbi_next_byte_expected
239 	 */
240 	uint32_t tcpbi_next_byte_expected;
241 	/** TCP sequence number of the 'last byte of the payload that
242 	 * is currently stored' in the TCP ring buffer. This value
243 	 * may be greater than tcpbi_next_byte_expected if packets
244 	 * arrive out of order.
245 	 */
246 	uint32_t tcpbi_last_byte_received;
247 };
248 
249 /** Structure to expose TCP ring buffer's fragment information. */
250 struct tcp_ring_fragment {
251 	uint64_t offset;
252 	uint32_t len;
253 };
254 
255 /**
256  * mOS tcp stream states.
257  * used by the monitor application to retreive
258  * tcp_stream-state info. Usually called via
259  * getsockopt() function
260  */
261 enum tcpstate
262 {
263 	TCP_CLOSED		= 0,
264 	TCP_LISTEN		= 1,
265 	TCP_SYN_SENT		= 2,
266 	TCP_SYN_RCVD		= 3,
267 	TCP_ESTABLISHED		= 4,
268 	TCP_FIN_WAIT_1		= 5,
269 	TCP_FIN_WAIT_2		= 6,
270 	TCP_CLOSE_WAIT		= 7,
271 	TCP_CLOSING		= 8,
272 	TCP_LAST_ACK		= 9,
273 	TCP_TIME_WAIT		= 10
274 };
275 
276 /** mOS segment overlapping policies */
277 enum {
278 	MOS_OVERLAP_POLICY_FIRST=0,
279 	MOS_OVERLAP_POLICY_LAST,
280 	MOS_OVERLAP_CNT
281 };
282 
283 /** Definition of event type */
284 typedef uint64_t event_t;
285 
286 /** Definition of monitor side */
287 enum {MOS_SIDE_CLI=0, MOS_SIDE_SVR, MOS_SIDE_BOTH};
288 
289 /* mos callback/filter function type definition */
290 /** Prototype of callback function */
291 typedef void (*callback_t)(mctx_t mctx, int sock, int side,
292 			 event_t event, filter_arg_t *arg);
293 /** Prototype of UDE's filter function */
294 typedef bool (*filter_t)(mctx_t mctx, int sock, int side,
295 		       event_t event, filter_arg_t *arg);
296 
297 /*----------------------------------------------------------------------------*/
298 /* Definition of monitor_filter type */
299 union monitor_filter {
300 	/** For MOS_SOCK_MONITOR_RAW type socket **/
301 	char *raw_pkt_filter;
302 	/** For MOS_SOCK_MONITOR_STREAM type socket **/
303 	struct {
304 		char *stream_syn_filter;
305 		char *stream_orphan_filter;
306 	};
307 };
308 typedef union monitor_filter *monitor_filter_t;
309 
310 /* Assign an address range (specified by ft) to monitor via sock
311  *
312  * (1) If sock is MOS_SOCK_MONITOR_RAW type, ft.raw_pkt_filter is applied to
313  *     every packet coming in.
314  * (2) If sock is MOS_SOCK_MONITOR_STREAM type,
315  *     ft.stream_syn_filter is applied to the first SYN pkt of the flow.
316  *     (The succeeding packets of that flow will bypass the filter operation.)
317  *     ft.stream_orphan_filter is applied to the pkts that don't belong to any
318  *     of the existing TCP streams which are being monitored.
319  *     (e.g., non-SYN pkt with no identified flow)
320  * [*] ft.stream_syn_filter and ft.stream_orphan_filter should be consisted
321  *     only of the following keywords:
322  *     - 'tcp, 'host', 'src', 'dst', 'net', 'mask', 'port', 'portrange'
323  *     - 'and', 'or', '&', '|'
324  *
325  * @param [in] mctx: mtcp context
326  * @param [in] sock: socket id (should be MOS_SOCK_MONITOR_RAW
327  *                   or MOS_SOCK_MONITOR_STREAM type)
328  * @param [in] cf: Describe a set of connections to accept
329  *                 in a BPF (Berkerley Packet Filter) format
330  *                 NULL if you want to monitor any packet
331  * @return zero on success, -1 on error
332  */
333 int
334 mtcp_bind_monitor_filter(mctx_t mctx, int sock, monitor_filter_t ft);
335 /*----------------------------------------------------------------------------*/
336 
337 /** Register a callback function in hook_point
338  * @param [in] mctx: mtcp context
339  * @param [in] sock: socket id
340  * @param [in] event: event id
341  * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_DONTCARE
342  * @param [in] cb: callback fucntion
343  * @return zero on success, -1 on error
344  *
345  * (both for packet-level and flow-level) for events in hook_point
346  */
347 int
348 mtcp_register_callback(mctx_t mctx, int sock, event_t event,
349 		                       int hook_point, callback_t cb);
350 
351 /** Remove registered callback functions
352  * @param [in] mctx: mtcp context
353  * @param [in] sock: socket id
354  * @param [in] event: event id
355  * @param [in] hook_point: MOS_HK_RCV, MOS_HK_SND, MOS_NULL
356  * @return zero on success, -1 on error
357  *
358  * (both for packet-level and flow-level) for events in hook_point
359  */
360 //int
361 //mtcp_unregister_callback(mctx_t mctx, int sock, event_t event,
362 //		                       int hook_point);
363 
364 /** Allocate a child event
365  * @param [in] event: event id
366  * @return new event id on success, 0 on error
367  */
368 event_t
369 mtcp_alloc_event(event_t event);
370 
371 /** Define a user-defined event function
372  * @param [in] event: event id
373  * @param [in] filter: filter fucntion for new event
374  * @param [in] arg: a filter argument to be delivered to the filter
375  * @return new event id on success, 0 on error
376  *
377  * (both for packet-level and flow-level)
378  */
379 event_t
380 mtcp_define_event(event_t event, filter_t filter, struct filter_arg *arg);
381 
382 /** Raise a event
383  * @param [in] mctx: mtcp context
384  * @param [in] event: event id
385  * @return 0 on success, -1 on error
386  */
387 int
388 mtcp_raise_event(mctx_t mctx, event_t event);
389 
390 /*
391  * Callback only functions
392  */
393 
394 /** Set user-level context
395  * (e.g., to store any per-flow user-defined meatadata)
396  * @param [in] mctx: mtcp context
397  * @param [in] sock: the monitor socket id
398  * @param [in] uctx: user-level context
399  */
400 void
401 mtcp_set_uctx(mctx_t mctx, int sock, void *uctx);
402 
403 /** Get user-level context
404  * (e.g., to retrieve user-defined metadata stored in mtcp_set_uctx())
405  * @param [in] mctx: mtcp context
406  * @param [in] sock: the monitor socket id
407  * @return user-level context for input flow_ocntext
408  */
409 void *
410 mtcp_get_uctx(mctx_t mctx, int sock);
411 
412 /** Peeking bytestream from flow_context
413  * @param [in] mctx: mtcp context
414  * @param [in] sock: monitoring stream socket id
415  * @param [in] side: side of monitoring (client side, server side or both)
416  * @param [in] buf: buffer for read byte stream
417  * @param [in] len: requested length
418  *
419  * It will return the number of bytes actually read.
420  * It will return -1 if there is an error
421 */
422 ssize_t
423 mtcp_peek(mctx_t mctx, int sock, int side,
424 	     char *buf, size_t len);
425 
426 /**
427  * The mtcp_ppeek() function reads up to count bytes from the TCP ring
428  * buffer of the monitor socket sock in mctx into buf, starting from
429  * the TCP sequence number seq_num.
430  * Note that seq_num can point the data in the fragmented buffer list
431  * of the TCP ring buffer. If there is no received byte with TCP sequence
432  * number seq_num in the TCP ring buffer, it returns error. If there are
433  * received bytes starting from seq_num, count is set to be the number
434  * of bytes read from the buffer. After mtcp_ppeek(), the data in the
435  * TCP ring buffer will not be flushed, and the monitor offset used by
436  * mtcp_peek() is not changed.
437  *
438  * @param [in] mctx: mtcp context
439  * @param [in] sock: monitoring stream socket id
440  * @param [in] side: side of monitoring (client side, server side or both)
441  * @param [in] buf: buffer for read byte stream
442  * @param [in] count: No. of bytes to be read
443  * @param [in] seq_num: byte offset of the TCP bytestream (absolute offset: offset 0 = init_seq_num)
444  * @return # of bytes actually read on success, -1 for error
445  */
446 ssize_t mtcp_ppeek(mctx_t mctx, int sock, int side,
447 			  char *buf, size_t count, uint64_t off);
448 
449 /* Use this macro to copy packets when mtcp_getlastpkt is called */
450 #define MTCP_CB_GETCURPKT_CREATE_COPY
451 
452 /** Get current packet of mtcp context
453  * @param [in] mctx: mTCP/mOS context
454  * @param [in] sock: monitoring stream socket id
455  * @param [in] side: side of monitoring
456  *                   (MOS_NULL for MOS_SOCK_MONITOR_RAW socket)
457  * @param [in] p: ptr to packet info ptr
458  * (only L2-L3 information is available for MOS_SOCK_MONITOR_RAW socket)
459  * @return 0 on success, -1 on failure
460  * This is useful for running callback-only applications
461  */
462 int
463 mtcp_getlastpkt(mctx_t mctx, int sock, int side, struct pkt_info *p);
464 
465 /** Register user's custom timer
466  * @param [in] mctx: mtcp context
467  * @param [in] id: timer id
468  * @param [in] timeout: timeout length
469  * @param [in] cb: callback function
470  */
471 int
472 mtcp_settimer(mctx_t mctx, int id, struct timeval *timeout, callback_t cb);
473 
474 /** A sibling function to mtcp_settimer that returns
475  * the current timestamp of the machine in microseconds.
476  * This avoids the monitor application to call current
477  * time getter functions (e.g. gettimeofday) that may
478  * incur overhead.
479  *
480  * @param [in] mctx: mtcp context
481  * Returns timestamp on success, 0 on failure.
482  */
483 uint32_t
484 mtcp_cb_get_ts(mctx_t mctx);
485 
486 /** Pause mtcp application context since it is not running anything
487  * @param [in] mctx: mtcp context
488  *
489  * This is useful for running callback-only applications
490  */
491 void
492 mtcp_app_join(mctx_t mctx);
493 
494 /** Get IP addrs/ports for both sides.
495  * (Server IP/port in 0th element) (Client IP/port in 1st element)
496  * Should only be called with MOS_SOCK_MONITOR_STREAM_ACTIVE socket
497  * _NOTE_: Code is currently not set for MOS_SOCK_STREAM!!!
498  * Returns 0 on success, -1 on failure
499  */
500 int
501 mtcp_getpeername(mctx_t mctx, int sock, struct sockaddr *saddr, socklen_t *addrlen, int side);
502 
503 /**
504  * Updates the Ethernet frame at a given offset across
505  * datalen bytes.
506  *
507  * @param [in] mctx: mtcp context
508  * @param [in] sock: monitoring socket
509  * @param [in] side: monitoring side
510  *                   (MOS_NULL for MOS_SOCK_MONITOR_RAW socket)
511  * @param [in] offset: the offset from where the data needs to be written
512  * @param [in] data: the data buffer that needs to be written
513  * @param [in] datalen: the length of data that needs to be written
514  * @param [in] option: disjunction of MOS_ETH_HDR, MOS_IP_HDR, MOS_TCP_HDR,
515  *			MOS_TCP_PAYLOAD, MOS_DROP_PKT, MOS_UPDATE_TCP_CHKSUM,
516  *			MOS_UPDATE_IP_CHKSUM
517  * @return Returns 0 on success, -1 on failure
518  *
519  * If you want to chomp/insert something in the payload:
520  * (i) first update the ip header to adjust iph->tot_len field; (MOS_OVERWRITE)
521  * (ii) then update the tcp payload accordingly (MOS_CHOMP or MOS_INSERT)
522  *
523  * MOS_DROP, MOS_OVERWRITE, MOS_CHOMP and MOS_INSERT are mutually
524  * exclusive operations
525  */
526 int
527 mtcp_setlastpkt(mctx_t mctx, int sock, int side, off_t offset,
528 		byte *data, uint16_t datalen, int option);
529 
530 /** Send a TCP packet of struct pkt_info
531  * @param [in] mctx: mTCP/mOS context
532  * @param [in] sock: monitoring stream socket id
533  * @param [in] pkt: ptr to packet info (e.g., captured by mtcp_getlastpkt)
534  * @return 0 on success, -1 on failure
535  * (NOTE: this function supports only TCP packet for now.
536  *  we will add the support for any ethernet packets when required)
537  */
538 int
539 mtcp_sendpkt(mctx_t mctx, int sock, const struct pkt_info *pkt);
540 
541 #endif /* __MOS_API_H_ */
542