1 #ifndef __TCP_STREAM_H_ 2 #define __TCP_STREAM_H_ 3 4 #ifdef DARWIN 5 #include <netinet/tcp.h> 6 #else 7 #include <linux/tcp.h> 8 #endif 9 #include <netinet/ip.h> 10 #include <sys/queue.h> 11 12 #include "mtcp.h" 13 #include "socket.h" 14 #include "memory_mgt.h" 15 #include "tcp_rb.h" 16 17 #define TCP_MAX_SEQ 4294967295 18 19 #define HAS_STREAM_TYPE(str, type) (str->stream_type & (1 << type)) 20 #define IS_STREAM_TYPE(str, type) (str->stream_type == (1 << type)) 21 #define STREAM_TYPE(type) (1 << type) 22 23 /*----------------------------------------------------------------------------*/ 24 /** 25 * routines for traversing stream + raw sockets 26 */ 27 struct sockent { 28 struct socket_map *sock; 29 TAILQ_ENTRY(sockent) link; 30 }; 31 #define SOCKQ_HEAD(name) TAILQ_HEAD(name, sockent) 32 #define SOCKQ_INIT(head) TAILQ_INIT(head) 33 #define SOCKQ_INSERT_TAIL(head, socket) \ 34 do { \ 35 struct sockent *__s = \ 36 (struct sockent *)MPAllocateChunk(mtcp->sockent_pool); \ 37 if (__s) { \ 38 __s->sock = (socket); \ 39 TAILQ_INSERT_TAIL(head, __s, link); \ 40 } \ 41 } while (0) 42 #define SOCKQ_REMOVE(head, socket) \ 43 do { \ 44 struct sockent *__walk, *__temp; \ 45 for (__walk = TAILQ_FIRST(head); __walk != NULL; __walk = __temp) { \ 46 __temp = TAILQ_NEXT(__walk, link); \ 47 if ((socket) == __walk->sock) break; \ 48 } \ 49 if (__walk) { \ 50 TAILQ_REMOVE(head, __walk, link); \ 51 MPFreeChunk(mtcp->sockent_pool, __walk); \ 52 } \ 53 } while (0) 54 #define SOCKQ_FOREACH_START(var, head) \ 55 do { \ 56 struct sockent *__walk, *__temp; \ 57 for (__walk = ((head)->tqh_first); \ 58 ((var) = __walk ? __walk->sock : NULL, __walk); \ 59 __walk = __temp) { \ 60 __temp = ((__walk)->link.tqe_next); 61 #define SOCKQ_FOREACH_REVERSE(var, head) \ 62 do { \ 63 struct sockent *__walk, *__temp; \ 64 for (__walk = (*(((struct mlist *)((head)->tqh_last))->tqh_last)); \ 65 ((var) = __walk ? __walk->sock : NULL, __walk); \ 66 __walk = __temp) { \ 67 __temp = (*(((struct mlist *)((__walk)->link.tqe_prev))->tqh_last)); 68 #define SOCKQ_FOREACH_END }} while (0) 69 /*----------------------------------------------------------------------------*/ 70 71 struct rtm_stat 72 { 73 uint32_t tdp_ack_cnt; 74 uint32_t tdp_ack_bytes; 75 uint32_t ack_upd_cnt; 76 uint32_t ack_upd_bytes; 77 #if TCP_OPT_SACK_ENABLED 78 uint32_t sack_cnt; 79 uint32_t sack_bytes; 80 uint32_t tdp_sack_cnt; 81 uint32_t tdp_sack_bytes; 82 #endif /* TCP_OPT_SACK_ENABLED */ 83 uint32_t rto_cnt; 84 uint32_t rto_bytes; 85 }; 86 87 typedef struct seq_remap_entry { 88 uint32_t seq_base; 89 uint32_t seq_off; 90 } seq_remap_entry; 91 92 #define SRE_MAX 8 93 94 #if TCP_OPT_SACK_ENABLED 95 struct sack_entry 96 { 97 uint32_t left_edge; 98 uint32_t right_edge; 99 uint32_t expire; 100 }; 101 #endif /* TCP_OPT_SACK_ENABLED */ 102 103 struct tcp_recv_vars 104 { 105 /* receiver variables */ 106 uint32_t rcv_wnd; /* receive window (unscaled) */ 107 //uint32_t rcv_up; /* receive urgent pointer */ 108 uint32_t irs; /* initial receiving sequence */ 109 uint32_t snd_wl1; /* segment seq number for last window update */ 110 uint32_t snd_wl2; /* segment ack number for last window update */ 111 112 /* variables for fast retransmission */ 113 uint32_t last_ack_seq; /* highest ackd seq */ 114 uint8_t dup_acks; /* number of duplicated acks */ 115 116 /* timestamps */ 117 uint32_t ts_recent; /* recent peer timestamp */ 118 uint32_t ts_lastack_rcvd; /* last ack rcvd time */ 119 uint32_t ts_last_ts_upd; /* last peer ts update time */ 120 uint32_t ts_tw_expire; // timestamp for timewait expire 121 122 /* RTT estimation variables */ 123 uint32_t srtt; /* smoothed round trip time << 3 (scaled) */ 124 uint32_t mdev; /* medium deviation */ 125 uint32_t mdev_max; /* maximal mdev ffor the last rtt period */ 126 uint32_t rttvar; /* smoothed mdev_max */ 127 uint32_t rtt_seq; /* sequence number to update rttvar */ 128 129 #if TCP_OPT_SACK_ENABLED /* currently not used */ 130 #define MAX_SACK_ENTRY 8 131 struct sack_entry sack_table[MAX_SACK_ENTRY]; 132 uint8_t sacks:3; 133 #endif /* TCP_OPT_SACK_ENABLED */ 134 135 tcprb_t *rcvbuf; 136 137 #if USE_SPIN_LOCK 138 pthread_spinlock_t read_lock; 139 #else 140 pthread_mutex_t read_lock; 141 #endif 142 struct hash_bucket_head *he_mybucket; 143 TAILQ_ENTRY(tcp_stream) he_link; /* hash table entry link */ 144 }; 145 146 struct tcp_send_vars 147 { 148 /* IP-level information */ 149 uint16_t ip_id; 150 151 uint16_t mss; /* maximum segment size */ 152 uint16_t eff_mss; /* effective segment size (excluding tcp option) */ 153 154 uint8_t wscale_mine; /* my window scale (advertising window) */ 155 uint8_t wscale_peer; /* peer's window scale (advertised window) */ 156 int8_t nif_out; /* cached output network interface */ 157 unsigned char *d_haddr; /* cached destination MAC address */ 158 159 /* send sequence variables */ 160 uint32_t snd_una; /* send unacknoledged */ 161 uint32_t snd_wnd; /* send window (unscaled) */ 162 uint32_t peer_wnd; /* client window size */ 163 //uint32_t snd_up; /* send urgent pointer (not used) */ 164 uint32_t iss; /* initial sending sequence */ 165 uint32_t fss; /* final sending sequence */ 166 167 /* retransmission timeout variables */ 168 uint8_t nrtx; /* number of retransmission */ 169 uint8_t max_nrtx; /* max number of retransmission */ 170 uint32_t rto; /* retransmission timeout */ 171 uint32_t ts_rto; /* timestamp for retransmission timeout */ 172 173 /* congestion control variables */ 174 uint32_t cwnd; /* congestion window */ 175 uint32_t ssthresh; /* slow start threshold */ 176 177 /* timestamp */ 178 uint32_t ts_lastack_sent; /* last ack sent time */ 179 180 uint8_t is_wack:1, /* is ack for window adertisement? */ 181 ack_cnt:6; /* number of acks to send. max 64 */ 182 183 uint8_t on_control_list; 184 uint8_t on_send_list; 185 uint8_t on_ack_list; 186 uint8_t on_sendq; 187 uint8_t on_ackq; 188 uint8_t on_closeq; 189 uint8_t on_resetq; 190 191 uint8_t on_closeq_int:1, 192 on_resetq_int:1, 193 is_fin_sent:1, 194 is_fin_ackd:1; 195 196 TAILQ_ENTRY(tcp_stream) control_link; 197 TAILQ_ENTRY(tcp_stream) send_link; 198 TAILQ_ENTRY(tcp_stream) ack_link; 199 200 TAILQ_ENTRY(tcp_stream) timer_link; /* timer link (rto list, tw list) */ 201 TAILQ_ENTRY(tcp_stream) timeout_link; /* connection timeout link */ 202 203 struct tcp_send_buffer *sndbuf; 204 struct seq_remap_entry sre[SRE_MAX]; /* seq # translation table */ 205 uint8_t sre_index; /* seq # translation index */ 206 207 #if USE_SPIN_LOCK 208 pthread_spinlock_t write_lock; 209 #else 210 pthread_mutex_t write_lock; 211 #endif 212 213 #if RTM_STAT 214 struct rtm_stat rstat; /* retransmission statistics */ 215 #endif 216 }; 217 218 typedef struct tcp_stream 219 { 220 /* 221 * This is a direct replacement for fctx... 222 * However this could be replaced by some 223 * more elaborate data structure that supports 224 * multiple monitors in the future... 225 * 226 * In case no monitor is attached, msock will be 227 * NULL. 228 * 229 * Support for standalone monitors will be patched 230 * in future revisions... 231 */ 232 233 SOCKQ_HEAD(mlist) msocks; /* in case monitoring is enabled */ 234 socket_map_t socket; /* relating to MOS_SOCK_STREAM */ 235 236 uint32_t id; 237 uint32_t stream_type; /* to identify sock_stream/mon_stream */ 238 239 uint32_t saddr; /* in network order */ 240 uint32_t daddr; /* in network order */ 241 uint16_t sport; /* in network order */ 242 uint16_t dport; /* in network order */ 243 244 uint32_t actions; 245 uint64_t cb_events; 246 247 uint8_t state; /* tcp state */ 248 uint8_t close_reason; /* close reason */ 249 uint8_t on_hash_table; 250 uint8_t on_timewait_list; 251 uint8_t ht_idx; 252 uint8_t closed; 253 uint8_t is_bound_addr; 254 uint8_t need_wnd_adv; 255 int16_t on_rto_idx; 256 257 uint16_t on_timeout_list:1, 258 on_rcv_br_list:1, 259 on_snd_br_list:1, 260 saw_timestamp:1, /* whether peer sends timestamp */ 261 sack_permit:1, /* whether peer permits SACK */ 262 control_list_waiting:1, 263 have_reset:1, 264 side:2, 265 buffer_mgmt:2, 266 status_mgmt:1, 267 allow_pkt_modification:1; 268 269 uint32_t snd_nxt; /* send next */ 270 uint32_t rcv_nxt; /* receive next */ 271 272 struct tcp_recv_vars *rcvvar; 273 struct tcp_send_vars *sndvar; 274 275 uint32_t last_active_ts; /* ts_last_ack_sent or ts_last_ts_upd */ 276 277 struct tcp_stream *pair_stream; /* pair stream in case of monitor / proxy socket */ 278 #ifdef RECORDPKT_PER_STREAM 279 struct pkt_ctx last_pctx; 280 unsigned char last_pkt_data[ETHERNET_FRAME_LEN]; 281 #endif 282 283 } tcp_stream; 284 285 extern inline char * 286 TCPStateToString(const tcp_stream *cur_stream); 287 288 extern inline int 289 AddEpollEvent(struct mtcp_epoll *ep, 290 int queue_type, socket_map_t socket, uint32_t event); 291 292 extern inline void 293 RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream); 294 295 extern inline void 296 RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream); 297 298 extern inline void 299 RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream); 300 301 extern inline int 302 RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream); 303 304 tcp_stream * 305 CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 306 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, 307 unsigned int *hash); 308 309 extern inline tcp_stream * 310 CreateDualTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, uint32_t saddr, 311 uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 312 313 extern inline tcp_stream * 314 CreateClientTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 315 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 316 317 extern inline tcp_stream * 318 AttachServerTCPStream(mtcp_manager_t mtcp, tcp_stream *cs, int type, 319 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport); 320 321 void 322 DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream); 323 324 void 325 DumpStream(mtcp_manager_t mtcp, tcp_stream *stream); 326 327 int 328 GetFragInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 329 330 int 331 GetBufInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 332 333 int 334 GetTCPState(struct tcp_stream *stream, int side, 335 void *optval, socklen_t *optlen); 336 337 int 338 DisableBuf(socket_map_t sock, int side); 339 340 int 341 GetLastTimestamp(struct tcp_stream *stream, uint32_t *usecs, socklen_t *sz); 342 343 void 344 posix_seq_srand(unsigned seed); 345 346 #endif /* __TCP_STREAM_H_ */ 347