1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2017 Intel Corporation 3 */ 4 5 #ifndef _GRO_TCP4_H_ 6 #define _GRO_TCP4_H_ 7 8 #include <rte_tcp.h> 9 10 #define INVALID_ARRAY_INDEX 0xffffffffUL 11 #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) 12 13 /* 14 * The max length of a IPv4 packet, which includes the length of the L3 15 * header, the L4 header and the data payload. 16 */ 17 #define MAX_IPV4_PKT_LENGTH UINT16_MAX 18 19 /* The maximum TCP header length */ 20 #define MAX_TCP_HLEN 60 21 #define INVALID_TCP_HDRLEN(len) \ 22 (((len) < sizeof(struct rte_tcp_hdr)) || ((len) > MAX_TCP_HLEN)) 23 24 /* Header fields representing a TCP/IPv4 flow */ 25 struct tcp4_flow_key { 26 struct rte_ether_addr eth_saddr; 27 struct rte_ether_addr eth_daddr; 28 uint32_t ip_src_addr; 29 uint32_t ip_dst_addr; 30 31 uint32_t recv_ack; 32 uint16_t src_port; 33 uint16_t dst_port; 34 }; 35 36 struct gro_tcp4_flow { 37 struct tcp4_flow_key key; 38 /* 39 * The index of the first packet in the flow. 40 * INVALID_ARRAY_INDEX indicates an empty flow. 41 */ 42 uint32_t start_index; 43 }; 44 45 struct gro_tcp4_item { 46 /* 47 * The first MBUF segment of the packet. If the value 48 * is NULL, it means the item is empty. 49 */ 50 struct rte_mbuf *firstseg; 51 /* The last MBUF segment of the packet */ 52 struct rte_mbuf *lastseg; 53 /* 54 * The time when the first packet is inserted into the table. 55 * This value won't be updated, even if the packet is merged 56 * with other packets. 57 */ 58 uint64_t start_time; 59 /* 60 * next_pkt_idx is used to chain the packets that 61 * are in the same flow but can't be merged together 62 * (e.g. caused by packet reordering). 63 */ 64 uint32_t next_pkt_idx; 65 /* TCP sequence number of the packet */ 66 uint32_t sent_seq; 67 /* IPv4 ID of the packet */ 68 uint16_t ip_id; 69 /* the number of merged packets */ 70 uint16_t nb_merged; 71 /* Indicate if IPv4 ID can be ignored */ 72 uint8_t is_atomic; 73 }; 74 75 /* 76 * TCP/IPv4 reassembly table structure. 77 */ 78 struct gro_tcp4_tbl { 79 /* item array */ 80 struct gro_tcp4_item *items; 81 /* flow array */ 82 struct gro_tcp4_flow *flows; 83 /* current item number */ 84 uint32_t item_num; 85 /* current flow num */ 86 uint32_t flow_num; 87 /* item array size */ 88 uint32_t max_item_num; 89 /* flow array size */ 90 uint32_t max_flow_num; 91 }; 92 93 /** 94 * This function creates a TCP/IPv4 reassembly table. 95 * 96 * @param socket_id 97 * Socket index for allocating the TCP/IPv4 reassemble table 98 * @param max_flow_num 99 * The maximum number of flows in the TCP/IPv4 GRO table 100 * @param max_item_per_flow 101 * The maximum number of packets per flow 102 * 103 * @return 104 * - Return the table pointer on success. 105 * - Return NULL on failure. 106 */ 107 void *gro_tcp4_tbl_create(uint16_t socket_id, 108 uint16_t max_flow_num, 109 uint16_t max_item_per_flow); 110 111 /** 112 * This function destroys a TCP/IPv4 reassembly table. 113 * 114 * @param tbl 115 * Pointer pointing to the TCP/IPv4 reassembly table. 116 */ 117 void gro_tcp4_tbl_destroy(void *tbl); 118 119 /** 120 * This function merges a TCP/IPv4 packet. It doesn't process the packet, 121 * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have 122 * payload. 123 * 124 * This function doesn't check if the packet has correct checksums and 125 * doesn't re-calculate checksums for the merged packet. Additionally, 126 * it assumes the packets are complete (i.e., MF==0 && frag_off==0), 127 * when IP fragmentation is possible (i.e., DF==0). It returns the 128 * packet, if the packet has invalid parameters (e.g. SYN bit is set) 129 * or there is no available space in the table. 130 * 131 * @param pkt 132 * Packet to reassemble 133 * @param tbl 134 * Pointer pointing to the TCP/IPv4 reassembly table 135 * @start_time 136 * The time when the packet is inserted into the table 137 * 138 * @return 139 * - Return a positive value if the packet is merged. 140 * - Return zero if the packet isn't merged but stored in the table. 141 * - Return a negative value for invalid parameters or no available 142 * space in the table. 143 */ 144 int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, 145 struct gro_tcp4_tbl *tbl, 146 uint64_t start_time); 147 148 /** 149 * This function flushes timeout packets in a TCP/IPv4 reassembly table, 150 * and without updating checksums. 151 * 152 * @param tbl 153 * TCP/IPv4 reassembly table pointer 154 * @param flush_timestamp 155 * Flush packets which are inserted into the table before or at the 156 * flush_timestamp. 157 * @param out 158 * Pointer array used to keep flushed packets 159 * @param nb_out 160 * The element number in 'out'. It also determines the maximum number of 161 * packets that can be flushed finally. 162 * 163 * @return 164 * The number of flushed packets 165 */ 166 uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, 167 uint64_t flush_timestamp, 168 struct rte_mbuf **out, 169 uint16_t nb_out); 170 171 /** 172 * This function returns the number of the packets in a TCP/IPv4 173 * reassembly table. 174 * 175 * @param tbl 176 * TCP/IPv4 reassembly table pointer 177 * 178 * @return 179 * The number of packets in the table 180 */ 181 uint32_t gro_tcp4_tbl_pkt_count(void *tbl); 182 183 /* 184 * Check if two TCP/IPv4 packets belong to the same flow. 185 */ 186 static inline int 187 is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2) 188 { 189 return (rte_is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) && 190 rte_is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) && 191 (k1.ip_src_addr == k2.ip_src_addr) && 192 (k1.ip_dst_addr == k2.ip_dst_addr) && 193 (k1.recv_ack == k2.recv_ack) && 194 (k1.src_port == k2.src_port) && 195 (k1.dst_port == k2.dst_port)); 196 } 197 198 /* 199 * Merge two TCP/IPv4 packets without updating checksums. 200 * If cmp is larger than 0, append the new packet to the 201 * original packet. Otherwise, pre-pend the new packet to 202 * the original packet. 203 */ 204 static inline int 205 merge_two_tcp4_packets(struct gro_tcp4_item *item, 206 struct rte_mbuf *pkt, 207 int cmp, 208 uint32_t sent_seq, 209 uint16_t ip_id, 210 uint16_t l2_offset) 211 { 212 struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; 213 uint16_t hdr_len, l2_len; 214 215 if (cmp > 0) { 216 pkt_head = item->firstseg; 217 pkt_tail = pkt; 218 } else { 219 pkt_head = pkt; 220 pkt_tail = item->firstseg; 221 } 222 223 /* check if the IPv4 packet length is greater than the max value */ 224 hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + 225 pkt_head->l4_len; 226 l2_len = l2_offset > 0 ? pkt_head->outer_l2_len : pkt_head->l2_len; 227 if (unlikely(pkt_head->pkt_len - l2_len + pkt_tail->pkt_len - 228 hdr_len > MAX_IPV4_PKT_LENGTH)) 229 return 0; 230 231 /* remove the packet header for the tail packet */ 232 rte_pktmbuf_adj(pkt_tail, hdr_len); 233 234 /* chain two packets together */ 235 if (cmp > 0) { 236 item->lastseg->next = pkt; 237 item->lastseg = rte_pktmbuf_lastseg(pkt); 238 /* update IP ID to the larger value */ 239 item->ip_id = ip_id; 240 } else { 241 lastseg = rte_pktmbuf_lastseg(pkt); 242 lastseg->next = item->firstseg; 243 item->firstseg = pkt; 244 /* update sent_seq to the smaller value */ 245 item->sent_seq = sent_seq; 246 item->ip_id = ip_id; 247 } 248 item->nb_merged++; 249 250 /* update MBUF metadata for the merged packet */ 251 pkt_head->nb_segs += pkt_tail->nb_segs; 252 pkt_head->pkt_len += pkt_tail->pkt_len; 253 254 return 1; 255 } 256 257 /* 258 * Check if two TCP/IPv4 packets are neighbors. 259 */ 260 static inline int 261 check_seq_option(struct gro_tcp4_item *item, 262 struct rte_tcp_hdr *tcph, 263 uint32_t sent_seq, 264 uint16_t ip_id, 265 uint16_t tcp_hl, 266 uint16_t tcp_dl, 267 uint16_t l2_offset, 268 uint8_t is_atomic) 269 { 270 struct rte_mbuf *pkt_orig = item->firstseg; 271 struct rte_ipv4_hdr *iph_orig; 272 struct rte_tcp_hdr *tcph_orig; 273 uint16_t len, tcp_hl_orig; 274 275 iph_orig = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) + 276 l2_offset + pkt_orig->l2_len); 277 tcph_orig = (struct rte_tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len); 278 tcp_hl_orig = pkt_orig->l4_len; 279 280 /* Check if TCP option fields equal */ 281 len = RTE_MAX(tcp_hl, tcp_hl_orig) - sizeof(struct rte_tcp_hdr); 282 if ((tcp_hl != tcp_hl_orig) || ((len > 0) && 283 (memcmp(tcph + 1, tcph_orig + 1, 284 len) != 0))) 285 return 0; 286 287 /* Don't merge packets whose DF bits are different */ 288 if (unlikely(item->is_atomic ^ is_atomic)) 289 return 0; 290 291 /* check if the two packets are neighbors */ 292 len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - 293 pkt_orig->l3_len - tcp_hl_orig; 294 if ((sent_seq == item->sent_seq + len) && (is_atomic || 295 (ip_id == item->ip_id + 1))) 296 /* append the new packet */ 297 return 1; 298 else if ((sent_seq + tcp_dl == item->sent_seq) && (is_atomic || 299 (ip_id + item->nb_merged == item->ip_id))) 300 /* pre-pend the new packet */ 301 return -1; 302 303 return 0; 304 } 305 #endif 306