1 #ifndef __TCP_STREAM_H_ 2 #define __TCP_STREAM_H_ 3 4 #ifdef DARWIN 5 #include <netinet/tcp.h> 6 #else 7 #include <linux/tcp.h> 8 #endif 9 #include <netinet/ip.h> 10 #include <sys/queue.h> 11 12 #include "mtcp.h" 13 #include "socket.h" 14 #include "memory_mgt.h" 15 #include "tcp_rb.h" 16 17 #define TCP_MAX_SEQ 4294967295 18 19 #define HAS_STREAM_TYPE(str, type) (str->stream_type & (1 << type)) 20 #define IS_STREAM_TYPE(str, type) (str->stream_type == (1 << type)) 21 #define STREAM_TYPE(type) (1 << type) 22 23 /*----------------------------------------------------------------------------*/ 24 /** 25 * routines for traversing stream + raw sockets 26 */ 27 struct sockent { 28 struct socket_map *sock; 29 TAILQ_ENTRY(sockent) link; 30 }; 31 #define SOCKQ_HEAD(name) TAILQ_HEAD(name, sockent) 32 #define SOCKQ_INIT(head) TAILQ_INIT(head) 33 #define SOCKQ_INSERT_TAIL(head, socket) \ 34 do { \ 35 struct sockent *__s = \ 36 (struct sockent *)MPAllocateChunk(mtcp->sockent_pool); \ 37 if (__s) { \ 38 __s->sock = (socket); \ 39 TAILQ_INSERT_TAIL(head, __s, link); \ 40 } \ 41 } while (0) 42 #define SOCKQ_REMOVE(head, socket) \ 43 do { \ 44 struct sockent *__walk, *__temp; \ 45 for (__walk = TAILQ_FIRST(head); __walk != NULL; __walk = __temp) { \ 46 __temp = TAILQ_NEXT(__walk, link); \ 47 if ((socket) == __walk->sock) break; \ 48 } \ 49 if (__walk) { \ 50 TAILQ_REMOVE(head, __walk, link); \ 51 MPFreeChunk(mtcp->sockent_pool, __walk); \ 52 } \ 53 } while (0) 54 #define SOCKQ_FOREACH_START(var, head) \ 55 do { \ 56 struct sockent *__walk, *__temp; \ 57 for (__walk = ((head)->tqh_first); \ 58 ((var) = __walk ? __walk->sock : NULL, __walk); \ 59 __walk = __temp) { \ 60 __temp = ((__walk)->link.tqe_next); 61 #define SOCKQ_FOREACH_END }} while (0) 62 /*----------------------------------------------------------------------------*/ 63 64 struct rtm_stat 65 { 66 uint32_t tdp_ack_cnt; 67 uint32_t tdp_ack_bytes; 68 uint32_t ack_upd_cnt; 69 uint32_t ack_upd_bytes; 70 #if TCP_OPT_SACK_ENABLED 71 uint32_t sack_cnt; 72 uint32_t sack_bytes; 73 uint32_t tdp_sack_cnt; 74 uint32_t tdp_sack_bytes; 75 #endif /* TCP_OPT_SACK_ENABLED */ 76 uint32_t rto_cnt; 77 uint32_t rto_bytes; 78 }; 79 80 typedef struct seq_remap_entry { 81 uint32_t seq_base; 82 uint32_t seq_off; 83 } seq_remap_entry; 84 85 #define SRE_MAX 8 86 87 #if TCP_OPT_SACK_ENABLED 88 struct sack_entry 89 { 90 uint32_t left_edge; 91 uint32_t right_edge; 92 uint32_t expire; 93 }; 94 #endif /* TCP_OPT_SACK_ENABLED */ 95 96 struct tcp_recv_vars 97 { 98 /* receiver variables */ 99 uint32_t rcv_wnd; /* receive window (unscaled) */ 100 //uint32_t rcv_up; /* receive urgent pointer */ 101 uint32_t irs; /* initial receiving sequence */ 102 uint32_t snd_wl1; /* segment seq number for last window update */ 103 uint32_t snd_wl2; /* segment ack number for last window update */ 104 105 /* variables for fast retransmission */ 106 uint32_t last_ack_seq; /* highest ackd seq */ 107 uint8_t dup_acks; /* number of duplicated acks */ 108 109 /* timestamps */ 110 uint32_t ts_recent; /* recent peer timestamp */ 111 uint32_t ts_lastack_rcvd; /* last ack rcvd time */ 112 uint32_t ts_last_ts_upd; /* last peer ts update time */ 113 uint32_t ts_tw_expire; // timestamp for timewait expire 114 115 /* RTT estimation variables */ 116 uint32_t srtt; /* smoothed round trip time << 3 (scaled) */ 117 uint32_t mdev; /* medium deviation */ 118 uint32_t mdev_max; /* maximal mdev ffor the last rtt period */ 119 uint32_t rttvar; /* smoothed mdev_max */ 120 uint32_t rtt_seq; /* sequence number to update rttvar */ 121 122 #if TCP_OPT_SACK_ENABLED /* currently not used */ 123 #define MAX_SACK_ENTRY 8 124 struct sack_entry sack_table[MAX_SACK_ENTRY]; 125 uint8_t sacks:3; 126 #endif /* TCP_OPT_SACK_ENABLED */ 127 128 tcprb_t *rcvbuf; 129 130 #if USE_SPIN_LOCK 131 pthread_spinlock_t read_lock; 132 #else 133 pthread_mutex_t read_lock; 134 #endif 135 struct hash_bucket_head *he_mybucket; 136 TAILQ_ENTRY(tcp_stream) he_link; /* hash table entry link */ 137 }; 138 139 struct tcp_send_vars 140 { 141 /* IP-level information */ 142 uint16_t ip_id; 143 144 uint16_t mss; /* maximum segment size */ 145 uint16_t eff_mss; /* effective segment size (excluding tcp option) */ 146 147 uint8_t wscale_mine; /* my window scale (advertising window) */ 148 uint8_t wscale_peer; /* peer's window scale (advertised window) */ 149 int8_t nif_out; /* cached output network interface */ 150 unsigned char *d_haddr; /* cached destination MAC address */ 151 152 /* send sequence variables */ 153 uint32_t snd_una; /* send unacknoledged */ 154 uint32_t snd_wnd; /* send window (unscaled) */ 155 uint32_t peer_wnd; /* client window size */ 156 //uint32_t snd_up; /* send urgent pointer (not used) */ 157 uint32_t iss; /* initial sending sequence */ 158 uint32_t fss; /* final sending sequence */ 159 160 /* retransmission timeout variables */ 161 uint8_t nrtx; /* number of retransmission */ 162 uint8_t max_nrtx; /* max number of retransmission */ 163 uint32_t rto; /* retransmission timeout */ 164 uint32_t ts_rto; /* timestamp for retransmission timeout */ 165 166 /* congestion control variables */ 167 uint32_t cwnd; /* congestion window */ 168 uint32_t ssthresh; /* slow start threshold */ 169 170 /* timestamp */ 171 uint32_t ts_lastack_sent; /* last ack sent time */ 172 173 uint8_t is_wack:1, /* is ack for window adertisement? */ 174 ack_cnt:6; /* number of acks to send. max 64 */ 175 176 uint8_t on_control_list; 177 uint8_t on_send_list; 178 uint8_t on_ack_list; 179 uint8_t on_sendq; 180 uint8_t on_ackq; 181 uint8_t on_closeq; 182 uint8_t on_resetq; 183 184 uint8_t on_closeq_int:1, 185 on_resetq_int:1, 186 is_fin_sent:1, 187 is_fin_ackd:1; 188 189 TAILQ_ENTRY(tcp_stream) control_link; 190 TAILQ_ENTRY(tcp_stream) send_link; 191 TAILQ_ENTRY(tcp_stream) ack_link; 192 193 TAILQ_ENTRY(tcp_stream) timer_link; /* timer link (rto list, tw list) */ 194 TAILQ_ENTRY(tcp_stream) timeout_link; /* connection timeout link */ 195 196 struct tcp_send_buffer *sndbuf; 197 struct seq_remap_entry sre[SRE_MAX]; /* seq # translation table */ 198 uint8_t sre_index; /* seq # translation index */ 199 200 #if USE_SPIN_LOCK 201 pthread_spinlock_t write_lock; 202 #else 203 pthread_mutex_t write_lock; 204 #endif 205 206 #if RTM_STAT 207 struct rtm_stat rstat; /* retransmission statistics */ 208 #endif 209 }; 210 211 typedef struct tcp_stream 212 { 213 /* 214 * This is a direct replacement for fctx... 215 * However this could be replaced by some 216 * more elaborate data structure that supports 217 * multiple monitors in the future... 218 * 219 * In case no monitor is attached, msock will be 220 * NULL. 221 * 222 * Support for standalone monitors will be patched 223 * in future revisions... 224 */ 225 226 SOCKQ_HEAD() msocks; /* in case monitoring is enabled */ 227 socket_map_t socket; /* relating to MOS_SOCK_STREAM */ 228 229 uint32_t id; 230 uint32_t stream_type; /* to identify sock_stream/mon_stream */ 231 232 uint32_t saddr; /* in network order */ 233 uint32_t daddr; /* in network order */ 234 uint16_t sport; /* in network order */ 235 uint16_t dport; /* in network order */ 236 237 uint32_t actions; 238 uint64_t cb_events; 239 240 uint8_t state; /* tcp state */ 241 uint8_t close_reason; /* close reason */ 242 uint8_t on_hash_table; 243 uint8_t on_timewait_list; 244 uint8_t ht_idx; 245 uint8_t closed; 246 uint8_t is_bound_addr; 247 uint8_t need_wnd_adv; 248 int16_t on_rto_idx; 249 250 uint16_t on_timeout_list:1, 251 on_rcv_br_list:1, 252 on_snd_br_list:1, 253 saw_timestamp:1, /* whether peer sends timestamp */ 254 sack_permit:1, /* whether peer permits SACK */ 255 control_list_waiting:1, 256 have_reset:1, 257 side:2, 258 buffer_mgmt:2, 259 status_mgmt:1, 260 allow_pkt_modification:1; 261 262 uint32_t snd_nxt; /* send next */ 263 uint32_t rcv_nxt; /* receive next */ 264 265 struct tcp_recv_vars *rcvvar; 266 struct tcp_send_vars *sndvar; 267 268 uint32_t last_active_ts; /* ts_last_ack_sent or ts_last_ts_upd */ 269 270 struct tcp_stream *pair_stream; /* pair stream in case of monitor / proxy socket */ 271 272 struct pkt_ctx last_pctx; 273 unsigned char last_pkt_data[ETHERNET_FRAME_LEN]; 274 275 } tcp_stream; 276 277 extern inline char * 278 TCPStateToString(const tcp_stream *cur_stream); 279 280 extern inline int 281 AddEpollEvent(struct mtcp_epoll *ep, 282 int queue_type, socket_map_t socket, uint32_t event); 283 284 extern inline void 285 RaiseReadEvent(mtcp_manager_t mtcp, tcp_stream *stream); 286 287 extern inline void 288 RaiseWriteEvent(mtcp_manager_t mtcp, tcp_stream *stream); 289 290 extern inline void 291 RaiseCloseEvent(mtcp_manager_t mtcp, tcp_stream *stream); 292 293 extern inline int 294 RaiseErrorEvent(mtcp_manager_t mtcp, tcp_stream *stream); 295 296 tcp_stream * 297 CreateTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 298 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, 299 unsigned int *hash); 300 301 extern inline tcp_stream * 302 CreateDualTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, uint32_t saddr, 303 uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 304 305 extern inline tcp_stream * 306 CreateClientTCPStream(mtcp_manager_t mtcp, socket_map_t socket, int type, 307 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport, unsigned int *hash); 308 309 extern inline tcp_stream * 310 AttachServerTCPStream(mtcp_manager_t mtcp, tcp_stream *cs, int type, 311 uint32_t saddr, uint16_t sport, uint32_t daddr, uint16_t dport); 312 313 void 314 DestroyTCPStream(mtcp_manager_t mtcp, tcp_stream *stream); 315 316 void 317 DumpStream(mtcp_manager_t mtcp, tcp_stream *stream); 318 319 int 320 GetFragInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 321 322 int 323 GetBufInfo(socket_map_t sock, int side, void *optval, socklen_t *optlen); 324 325 int 326 GetTCPState(struct tcp_stream *stream, int side, 327 void *optval, socklen_t *optlen); 328 329 int 330 DisableBuf(socket_map_t sock, int side); 331 332 int 333 GetLastTimestamp(struct tcp_stream *stream, uint32_t *usecs, socklen_t *sz); 334 335 int 336 TcpSeqChange(socket_map_t socket, uint32_t seq_drift, int side, uint32_t seqno); 337 338 uint32_t 339 FetchSeqDrift(struct tcp_stream *stream, uint32_t seqno); 340 341 void 342 posix_seq_srand(unsigned seed); 343 344 #endif /* __TCP_STREAM_H_ */ 345