1 #ifndef __TCP_STREAM_H_ 2 #define __TCP_STREAM_H_ 3 4 #ifdef DARWIN 5 #include <netinet/tcp.h> 6 #else 7 #include <linux/tcp.h> 8 #endif 9 #include <netinet/ip.h> 10 #include <sys/queue.h> 11 12 #include "mtcp.h" 13 #include "socket.h" 14 #include "memory_mgt.h" 15 #include "tcp_rb.h" 16 17 #define TCP_MAX_SEQ 4294967295 18 19 #define HAS_STREAM_TYPE(str, type) (str->stream_type & (1 << type)) 20 #define IS_STREAM_TYPE(str, type) (str->stream_type == (1 << type)) 21 #define STREAM_TYPE(type) (1 << type) 22 23 /*----------------------------------------------------------------------------*/ 24 /** 25 * routines for traversing stream + raw sockets 26 */ 27 struct sockent { 28 struct socket_map *sock; 29 TAILQ_ENTRY(sockent) link; 30 }; 31 #define SOCKQ_HEAD(name) TAILQ_HEAD(name, sockent) 32 #define SOCKQ_INIT(head) TAILQ_INIT(head) 33 #define SOCKQ_INSERT_TAIL(head, socket) \ 34 do { \ 35 struct sockent *__s = \ 36 (struct sockent *)MPAllocateChunk(mtcp->sockent_pool); \ 37 if (__s) { \ 38 __s->sock = (socket); \ 39 TAILQ_INSERT_TAIL(head, __s, link); \ 40 } \ 41 } while (0) 42 #define SOCKQ_REMOVE(head, socket) \ 43 do { \ 44 struct sockent *__walk, *__temp; \ 45 for (__walk = TAILQ_FIRST(head); __walk != NULL; __walk = __temp) { \ 46 __temp = TAILQ_NEXT(__walk, link); \ 47 if ((socket) == __walk->sock) break; \ 48 } \ 49 if (__walk) { \ 50 TAILQ_REMOVE(head, __walk, link); \ 51 MPFreeChunk(mtcp->sockent_pool, __walk); \ 52 } \ 53 } while (0) 54 #define SOCKQ_FOREACH_START(var, head) \ 55 do { \ 56 struct sockent *__walk, *__temp; \ 57 for (__walk = ((head)->tqh_first); \ 58 ((var) = __walk ? __walk->sock : NULL, __walk); \ 59 __walk = __temp) { \ 60 __temp = ((__walk)->link.tqe_next); 61 #define SOCKQ_FOREACH_END }} while (0) 62 /*----------------------------------------------------------------------------*/ 63 64 struct rtm_stat 65 { 66 uint32_t tdp_ack_cnt; 67 uint32_t tdp_ack_bytes; 68 uint32_t ack_upd_cnt; 69 uint32_t ack_upd_bytes; 70 #if TCP_OPT_SACK_ENABLED 71 uint32_t sack_cnt; 72 uint32_t sack_bytes; 73 uint32_t tdp_sack_cnt; 74 uint32_t tdp_sack_bytes; 75 #endif /* TCP_OPT_SACK_ENABLED */ 76 uint32_t rto_cnt; 77 uint32_t rto_bytes; 78 }; 79 80 typedef struct seq_remap_entry { 81 uint32_t seq_base; 82 uint32_t seq_off; 83 } seq_remap_entry; 84 85 #define SRE_MAX 8 86 87 #if TCP_OPT_SACK_ENABLED 88 struct sack_entry 89 { 90 uint32_t left_edge; 91 uint32_t right_edge; 92 uint32_t expire; 93 }; 94 #endif /* TCP_OPT_SACK_ENABLED */ 95 96 struct tcp_recv_vars 97 { 98 /* receiver variables */ 99 uint32_t rcv_wnd; /* receive window (unscaled) */ 100 //uint32_t rcv_up; /* receive urgent pointer */ 101 uint32_t irs; /* initial receiving sequence */ 102 uint32_t snd_wl1; /* segment seq number for last window update */ 103 uint32_t snd_wl2; /* segment ack number for last window update */ 104 105 /* variables for fast retransmission */ 106 uint32_t last_ack_seq; /* highest ackd seq */ 107 uint8_t dup_acks; /* number of duplicated acks */ 108 109 /* timestamps */ 110 uint32_t ts_recent; /* recent peer timestamp */ 111 uint32_t ts_lastack_rcvd; /* last ack rcvd time */ 112 uint32_t ts_last_ts_upd; /* last peer ts update time */ 113 uint32_t ts_tw_expire; // timestamp for timewait expire 114 115 /* RTT estimation variables */ 116 uint32_t srtt; /* smoothed round trip time << 3 (scaled) */ 117 uint32_t mdev; /* medium deviation */ 118 uint32_t mdev_max; /* maximal mdev ffor the last rtt period */ 119 uint32_t rttvar; /* smoothed mdev_max */ 120 uint32_t rtt_seq; /* sequence number to update rttvar */ 121 122 #if TCP_OPT_SACK_ENABLED /* currently not used */ 123 #define MAX_SACK_ENTRY 8 124 struct sack_entry sack_table[MAX_SACK_ENTRY]; 125 uint8_t sacks:3; 126 #endif /* TCP_OPT_SACK_ENABLED */ 127 128 #ifdef NEWRB 129 tcprb_t *rcvbuf; 130 #else 131 struct tcp_ring_buffer *rcvbuf; 132 #endif 133 134 #if USE_SPIN_LOCK 135 pthread_spinlock_t read_lock; 136 #else 137 pthread_mutex_t read_lock; 138 #endif 139 struct hash_bucket_head *he_mybucket; 140 TAILQ_ENTRY(tcp_stream) he_link; /* hash table entry link */ 141 }; 142 143 struct tcp_send_vars 144 { 145 /* IP-level information */ 146 uint16_t ip_id; 147 148 uint16_t mss; /* maximum segment size */ 149 uint16_t eff_mss; /* effective segment size (excluding tcp option) */ 150 151 uint8_t wscale_mine; /* my window scale (advertising window) */ 152 uint8_t wscale_peer; /* peer's window scale (advertised window) */ 153 int8_t nif_out; /* cached output network interface */ 154 unsigned char *d_haddr; /* cached destination MAC address */ 155 156 /* send sequence variables */ 157 uint32_t snd_una; /* send unacknoledged */ 158 uint32_t snd_wnd; /* send window (unscaled) */ 159 uint32_t peer_wnd; /* client window size */ 160 //uint32_t snd_up; /* send urgent pointer (not used) */ 161 uint32_t iss; /* initial sending sequence */ 162 uint32_t fss; /* final sending sequence */ 163 164 /* retransmission timeout variables */ 165 uint8_t nrtx; /* number of retransmission */ 166 uint8_t max_nrtx; /* max number of retransmission */ 167 uint32_t rto; /* retransmission timeout */ 168 uint32_t ts_rto; /* timestamp for retransmission timeout */ 169 170 /* congestion control variables */ 171 uint32_t cwnd; /* congestion window */ 172 uint32_t ssthresh; /* slow start threshold */ 173 174 /* timestamp */ 175 uint32_t ts_lastack_sent; /* last ack sent time */ 176 177 uint8_t is_wack:1, /* is ack for window adertisement? */ 178 ack_cnt:6; /* number of acks to send. max 64 */ 179 180 uint8_t on_control_list; 181 uint8_t on_send_list; 182 uint8_t on_ack_list; 183 uint8_t on_sendq; 184 uint8_t on_ackq; 185 uint8_t on_closeq; 186 uint8_t on_resetq; 187 188 uint8_t on_closeq_int:1, 189 on_resetq_int:1, 190 is_fin_sent:1, 191 is_fin_ackd:1; 192 193 TAILQ_ENTRY(tcp_stream) control_link; 194 TAILQ_ENTRY(tcp_stream) send_link; 195 TAILQ_ENTRY(tcp_stream) ack_link; 196 197 TAILQ_ENTRY(tcp_stream) timer_link; /* timer link (rto list, tw list) */ 198 TAILQ_ENTRY(tcp_stream) timeout_link; /* connection timeout link */ 199 200 struct tcp_send_buffer *sndbuf; 201 struct seq_remap_entry sre[SRE_MAX]; /* seq # translation table */ 202 uint8_t sre_index; /* seq # translation index */ 203 204 #if USE_SPIN_LOCK 205 pthread_spinlock_t write_lock; 206 #else 207 pthread_mutex_t write_lock; 208 #endif 209 210 #if RTM_STAT 211 struct rtm_stat rstat; /* retransmission statistics */ 212 #endif 213 }; 214 215 typedef struct tcp_stream 216 { 217 /* 218 * This is a direct replacement for fctx... 219 * However this could be replaced by some 220 * more elaborate data structure that supports 221 * multiple monitors in the future... 222 * 223 * In case no monitor is attached, msock will be 224 * NULL. 225 * 226 * Support for standalone monitors will be patched 227 * in future revisions... 228 */ 229 230 SOCKQ_HEAD() msocks; /* in case monitoring is enabled */ 231 socket_map_t socket; /* relating to MOS_SOCK_STREAM */ 232 233 uint32_t id; 234 uint32_t stream_type; /* to identify sock_stream/mon_stream */ 235 236 uint32_t saddr; /* in network order */ 237 uint32_t daddr; /* in network order */ 238 uint16_t sport; /* in network order */ 239 uint16_t dport; /* in network order */ 240 241 uint32_t actions; 242 uint64_t cb_events; 243 244 uint8_t state; /* tcp state */ 245 uint8_t close_reason; /* close reason */ 246 uint8_t on_hash_table; 247 uint8_t on_timewait_list; 248 uint8_t ht_idx; 249 uint8_t closed; 250 uint8_t is_bound_addr; 251 uint8_t need_wnd_adv; 252 int16_t on_rto_idx; 253 254 uint16_t on_timeout_list:1, 255 on_rcv_br_list:1, 256 on_snd_br_list:1, 257 saw_timestamp:1, /* whether peer sends timestamp */ 258 sack_permit:1, /* whether peer permits SACK */ 259 control_list_waiting:1, 260 have_reset:1, 261 side:2, 262 buffer_mgmt:2, 263 status_mgmt:1, 264 allow_pkt_modification:1; 265 266 uint32_t snd_nxt; /* send next */ 267 uint32_t rcv_nxt; /* receive next */ 268 269 struct tcp_recv_vars *rcvvar; 270 struct tcp_send_vars *sndvar; 271 272 uint32_t last_active_ts; /* ts_last_ack_sent or ts_last_ts_upd */ 273 274 struct tcp_stream *pair_stream; /* pair stream in case of monitor / proxy socket */ 275 276 struct pkt_ctx last_pctx; 277 unsigned char last_pkt_data[ETHERNET_FRAME_LEN]; 278 279 } tcp_stream; 280 281 extern inline char * 282 TCPStateToString(const tcp_stream *cur_stream); 283 284 extern inline int 285 AddEpollEvent(struct mtcp_epoll *ep, 286 int queue_type, socket_map_t socket, uint32_t event); 287 288 extern inline void 289 RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream); 290 291 extern inline void 292 RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream); 293 294 extern inline void 295 RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream); 296 297 extern inline int 298 RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream); 299 300 tcp_stream * 301 CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 302 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, 303 unsigned int *hash); 304 305 extern inline tcp_stream * 306 CreateDualTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, uint32_t saddr, 307 uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 308 309 extern inline tcp_stream * 310 CreateClientTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 311 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 312 313 extern inline tcp_stream * 314 AttachServerTCPStream(mtcp_manager_t mtcp, tcp_stream *cs, int type, 315 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport); 316 317 void 318 DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream); 319 320 void 321 DumpStream(mtcp_manager_t mtcp, tcp_stream *stream); 322 323 int 324 GetFragInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 325 326 int 327 GetBufInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 328 329 int 330 GetTCPState(struct tcp_stream *stream, int side, 331 void *optval, socklen_t *optlen); 332 333 int 334 DisableBuf(socket_map_t sock, int side); 335 336 int 337 GetLastTimestamp(struct tcp_stream *stream, uint32_t *usecs, socklen_t *sz); 338 339 int 340 TcpSeqChange(socket_map_t socket, uint32_t seq_drift, int side, uint32_t seqno); 341 342 uint32_t 343 FetchSeqDrift(struct tcp_stream *stream, uint32_t seqno); 344 345 void 346 posix_seq_srand(unsigned seed); 347 348 #endif /* __TCP_STREAM_H_ */ 349