1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef __INCLUDE_IP4_LOOKUP_SSE_H__ 6 #define __INCLUDE_IP4_LOOKUP_SSE_H__ 7 8 /* X86 SSE */ 9 static uint16_t 10 ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node, 11 void **objs, uint16_t nb_objs) 12 { 13 struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; 14 struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx); 15 const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx); 16 rte_edge_t next0, next1, next2, next3, next_index; 17 struct rte_ipv4_hdr *ipv4_hdr; 18 uint32_t ip0, ip1, ip2, ip3; 19 void **to_next, **from; 20 uint16_t last_spec = 0; 21 uint16_t n_left_from; 22 uint16_t held = 0; 23 uint32_t drop_nh; 24 rte_xmm_t dst; 25 __m128i dip; /* SSE register */ 26 int rc, i; 27 28 /* Speculative next */ 29 next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; 30 /* Drop node */ 31 drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; 32 33 pkts = (struct rte_mbuf **)objs; 34 from = objs; 35 n_left_from = nb_objs; 36 37 if (n_left_from >= 4) { 38 for (i = 0; i < 4; i++) 39 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, 40 sizeof(struct rte_ether_hdr))); 41 } 42 43 /* Get stream for the speculated next node */ 44 to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); 45 while (n_left_from >= 4) { 46 /* Prefetch next-next mbufs */ 47 if (likely(n_left_from > 11)) { 48 rte_prefetch0(pkts[8]); 49 rte_prefetch0(pkts[9]); 50 rte_prefetch0(pkts[10]); 51 rte_prefetch0(pkts[11]); 52 } 53 54 /* Prefetch next mbuf data */ 55 if (likely(n_left_from > 7)) { 56 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 57 sizeof(struct rte_ether_hdr))); 58 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 59 sizeof(struct rte_ether_hdr))); 60 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 61 sizeof(struct rte_ether_hdr))); 62 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 63 sizeof(struct rte_ether_hdr))); 64 } 65 66 mbuf0 = pkts[0]; 67 mbuf1 = pkts[1]; 68 mbuf2 = pkts[2]; 69 mbuf3 = pkts[3]; 70 71 pkts += 4; 72 n_left_from -= 4; 73 74 /* Extract DIP of mbuf0 */ 75 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, 76 sizeof(struct rte_ether_hdr)); 77 ip0 = ipv4_hdr->dst_addr; 78 /* Extract cksum, ttl as ipv4 hdr is in cache */ 79 node_mbuf_priv1(mbuf0, dyn)->cksum = ipv4_hdr->hdr_checksum; 80 node_mbuf_priv1(mbuf0, dyn)->ttl = ipv4_hdr->time_to_live; 81 82 /* Extract DIP of mbuf1 */ 83 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *, 84 sizeof(struct rte_ether_hdr)); 85 ip1 = ipv4_hdr->dst_addr; 86 /* Extract cksum, ttl as ipv4 hdr is in cache */ 87 node_mbuf_priv1(mbuf1, dyn)->cksum = ipv4_hdr->hdr_checksum; 88 node_mbuf_priv1(mbuf1, dyn)->ttl = ipv4_hdr->time_to_live; 89 90 /* Extract DIP of mbuf2 */ 91 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *, 92 sizeof(struct rte_ether_hdr)); 93 ip2 = ipv4_hdr->dst_addr; 94 /* Extract cksum, ttl as ipv4 hdr is in cache */ 95 node_mbuf_priv1(mbuf2, dyn)->cksum = ipv4_hdr->hdr_checksum; 96 node_mbuf_priv1(mbuf2, dyn)->ttl = ipv4_hdr->time_to_live; 97 98 /* Extract DIP of mbuf3 */ 99 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *, 100 sizeof(struct rte_ether_hdr)); 101 ip3 = ipv4_hdr->dst_addr; 102 103 /* Prepare for lookup x4 */ 104 dip = _mm_set_epi32(ip3, ip2, ip1, ip0); 105 106 /* Byte swap 4 IPV4 addresses. */ 107 const __m128i bswap_mask = _mm_set_epi8( 108 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3); 109 dip = _mm_shuffle_epi8(dip, bswap_mask); 110 111 /* Extract cksum, ttl as ipv4 hdr is in cache */ 112 node_mbuf_priv1(mbuf3, dyn)->cksum = ipv4_hdr->hdr_checksum; 113 node_mbuf_priv1(mbuf3, dyn)->ttl = ipv4_hdr->time_to_live; 114 115 /* Perform LPM lookup to get NH and next node */ 116 rte_lpm_lookupx4(lpm, dip, dst.u32, drop_nh); 117 118 /* Extract next node id and NH */ 119 node_mbuf_priv1(mbuf0, dyn)->nh = dst.u32[0] & 0xFFFF; 120 next0 = (dst.u32[0] >> 16); 121 122 node_mbuf_priv1(mbuf1, dyn)->nh = dst.u32[1] & 0xFFFF; 123 next1 = (dst.u32[1] >> 16); 124 125 node_mbuf_priv1(mbuf2, dyn)->nh = dst.u32[2] & 0xFFFF; 126 next2 = (dst.u32[2] >> 16); 127 128 node_mbuf_priv1(mbuf3, dyn)->nh = dst.u32[3] & 0xFFFF; 129 next3 = (dst.u32[3] >> 16); 130 131 /* Enqueue four to next node */ 132 rte_edge_t fix_spec = 133 (next_index ^ next0) | (next_index ^ next1) | 134 (next_index ^ next2) | (next_index ^ next3); 135 136 if (unlikely(fix_spec)) { 137 /* Copy things successfully speculated till now */ 138 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 139 from += last_spec; 140 to_next += last_spec; 141 held += last_spec; 142 last_spec = 0; 143 144 /* Next0 */ 145 if (next_index == next0) { 146 to_next[0] = from[0]; 147 to_next++; 148 held++; 149 } else { 150 rte_node_enqueue_x1(graph, node, next0, 151 from[0]); 152 } 153 154 /* Next1 */ 155 if (next_index == next1) { 156 to_next[0] = from[1]; 157 to_next++; 158 held++; 159 } else { 160 rte_node_enqueue_x1(graph, node, next1, 161 from[1]); 162 } 163 164 /* Next2 */ 165 if (next_index == next2) { 166 to_next[0] = from[2]; 167 to_next++; 168 held++; 169 } else { 170 rte_node_enqueue_x1(graph, node, next2, 171 from[2]); 172 } 173 174 /* Next3 */ 175 if (next_index == next3) { 176 to_next[0] = from[3]; 177 to_next++; 178 held++; 179 } else { 180 rte_node_enqueue_x1(graph, node, next3, 181 from[3]); 182 } 183 184 from += 4; 185 186 } else { 187 last_spec += 4; 188 } 189 } 190 191 while (n_left_from > 0) { 192 uint32_t next_hop; 193 194 mbuf0 = pkts[0]; 195 196 pkts += 1; 197 n_left_from -= 1; 198 199 /* Extract DIP of mbuf0 */ 200 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, 201 sizeof(struct rte_ether_hdr)); 202 /* Extract cksum, ttl as ipv4 hdr is in cache */ 203 node_mbuf_priv1(mbuf0, dyn)->cksum = ipv4_hdr->hdr_checksum; 204 node_mbuf_priv1(mbuf0, dyn)->ttl = ipv4_hdr->time_to_live; 205 206 rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), 207 &next_hop); 208 next_hop = (rc == 0) ? next_hop : drop_nh; 209 210 node_mbuf_priv1(mbuf0, dyn)->nh = next_hop & 0xFFFF; 211 next0 = (next_hop >> 16); 212 213 if (unlikely(next_index ^ next0)) { 214 /* Copy things successfully speculated till now */ 215 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 216 from += last_spec; 217 to_next += last_spec; 218 held += last_spec; 219 last_spec = 0; 220 221 rte_node_enqueue_x1(graph, node, next0, from[0]); 222 from += 1; 223 } else { 224 last_spec += 1; 225 } 226 } 227 228 /* !!! Home run !!! */ 229 if (likely(last_spec == nb_objs)) { 230 rte_node_next_stream_move(graph, node, next_index); 231 return nb_objs; 232 } 233 234 held += last_spec; 235 /* Copy things successfully speculated till now */ 236 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 237 rte_node_next_stream_put(graph, node, next_index, held); 238 239 return nb_objs; 240 } 241 242 #endif /* __INCLUDE_IP4_LOOKUP_SSE_H__ */ 243