1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(C) 2020 Marvell International Ltd. 3 */ 4 5 #ifndef __INCLUDE_IP4_LOOKUP_NEON_H__ 6 #define __INCLUDE_IP4_LOOKUP_NEON_H__ 7 8 /* ARM64 NEON */ 9 static uint16_t 10 ip4_lookup_node_process_vec(struct rte_graph *graph, struct rte_node *node, 11 void **objs, uint16_t nb_objs) 12 { 13 struct rte_mbuf *mbuf0, *mbuf1, *mbuf2, *mbuf3, **pkts; 14 struct rte_lpm *lpm = IP4_LOOKUP_NODE_LPM(node->ctx); 15 const int dyn = IP4_LOOKUP_NODE_PRIV1_OFF(node->ctx); 16 struct rte_ipv4_hdr *ipv4_hdr; 17 void **to_next, **from; 18 uint16_t last_spec = 0; 19 rte_edge_t next_index; 20 uint16_t n_left_from; 21 uint16_t held = 0; 22 uint32_t drop_nh; 23 rte_xmm_t result; 24 rte_xmm_t priv01; 25 rte_xmm_t priv23; 26 int32x4_t dip; 27 int rc, i; 28 29 /* Speculative next */ 30 next_index = RTE_NODE_IP4_LOOKUP_NEXT_REWRITE; 31 /* Drop node */ 32 drop_nh = ((uint32_t)RTE_NODE_IP4_LOOKUP_NEXT_PKT_DROP) << 16; 33 34 pkts = (struct rte_mbuf **)objs; 35 from = objs; 36 n_left_from = nb_objs; 37 38 for (i = OBJS_PER_CLINE; i < RTE_GRAPH_BURST_SIZE; i += OBJS_PER_CLINE) 39 rte_prefetch0(&objs[i]); 40 41 for (i = 0; i < 4 && i < n_left_from; i++) 42 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[i], void *, 43 sizeof(struct rte_ether_hdr))); 44 45 dip = vdupq_n_s32(0); 46 /* Get stream for the speculated next node */ 47 to_next = rte_node_next_stream_get(graph, node, next_index, nb_objs); 48 while (n_left_from >= 4) { 49 #if RTE_GRAPH_BURST_SIZE > 64 50 /* Prefetch next-next mbufs */ 51 if (likely(n_left_from > 11)) { 52 rte_prefetch0(pkts[8]); 53 rte_prefetch0(pkts[9]); 54 rte_prefetch0(pkts[10]); 55 rte_prefetch0(pkts[11]); 56 } 57 #endif 58 /* Prefetch next mbuf data */ 59 if (likely(n_left_from > 7)) { 60 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[4], void *, 61 sizeof(struct rte_ether_hdr))); 62 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[5], void *, 63 sizeof(struct rte_ether_hdr))); 64 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[6], void *, 65 sizeof(struct rte_ether_hdr))); 66 rte_prefetch0(rte_pktmbuf_mtod_offset(pkts[7], void *, 67 sizeof(struct rte_ether_hdr))); 68 } 69 70 mbuf0 = pkts[0]; 71 mbuf1 = pkts[1]; 72 mbuf2 = pkts[2]; 73 mbuf3 = pkts[3]; 74 75 pkts += 4; 76 n_left_from -= 4; 77 78 /* Extract DIP of mbuf0 */ 79 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, 80 sizeof(struct rte_ether_hdr)); 81 dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 0); 82 /* Extract cksum, ttl as ipv4 hdr is in cache */ 83 priv01.u16[1] = ipv4_hdr->time_to_live; 84 priv01.u32[1] = ipv4_hdr->hdr_checksum; 85 86 /* Extract DIP of mbuf1 */ 87 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf1, struct rte_ipv4_hdr *, 88 sizeof(struct rte_ether_hdr)); 89 dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 1); 90 /* Extract cksum, ttl as ipv4 hdr is in cache */ 91 priv01.u16[5] = ipv4_hdr->time_to_live; 92 priv01.u32[3] = ipv4_hdr->hdr_checksum; 93 94 /* Extract DIP of mbuf2 */ 95 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf2, struct rte_ipv4_hdr *, 96 sizeof(struct rte_ether_hdr)); 97 dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 2); 98 /* Extract cksum, ttl as ipv4 hdr is in cache */ 99 priv23.u16[1] = ipv4_hdr->time_to_live; 100 priv23.u32[1] = ipv4_hdr->hdr_checksum; 101 102 /* Extract DIP of mbuf3 */ 103 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf3, struct rte_ipv4_hdr *, 104 sizeof(struct rte_ether_hdr)); 105 dip = vsetq_lane_s32(ipv4_hdr->dst_addr, dip, 3); 106 107 dip = vreinterpretq_s32_u8( 108 vrev32q_u8(vreinterpretq_u8_s32(dip))); 109 /* Extract cksum, ttl as ipv4 hdr is in cache */ 110 priv23.u16[5] = ipv4_hdr->time_to_live; 111 priv23.u32[3] = ipv4_hdr->hdr_checksum; 112 113 /* Perform LPM lookup to get NH and next node */ 114 rte_lpm_lookupx4(lpm, dip, result.u32, drop_nh); 115 priv01.u16[0] = result.u16[0]; 116 priv01.u16[4] = result.u16[2]; 117 priv23.u16[0] = result.u16[4]; 118 priv23.u16[4] = result.u16[6]; 119 120 node_mbuf_priv1(mbuf0, dyn)->u = priv01.u64[0]; 121 node_mbuf_priv1(mbuf1, dyn)->u = priv01.u64[1]; 122 node_mbuf_priv1(mbuf2, dyn)->u = priv23.u64[0]; 123 node_mbuf_priv1(mbuf3, dyn)->u = priv23.u64[1]; 124 125 /* Enqueue four to next node */ 126 rte_edge_t fix_spec = ((next_index == result.u16[1]) && 127 (result.u16[1] == result.u16[3]) && 128 (result.u16[3] == result.u16[5]) && 129 (result.u16[5] == result.u16[7])); 130 131 if (unlikely(fix_spec == 0)) { 132 /* Copy things successfully speculated till now */ 133 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 134 from += last_spec; 135 to_next += last_spec; 136 held += last_spec; 137 last_spec = 0; 138 139 /* Next0 */ 140 if (next_index == result.u16[1]) { 141 to_next[0] = from[0]; 142 to_next++; 143 held++; 144 } else { 145 rte_node_enqueue_x1(graph, node, result.u16[1], 146 from[0]); 147 } 148 149 /* Next1 */ 150 if (next_index == result.u16[3]) { 151 to_next[0] = from[1]; 152 to_next++; 153 held++; 154 } else { 155 rte_node_enqueue_x1(graph, node, result.u16[3], 156 from[1]); 157 } 158 159 /* Next2 */ 160 if (next_index == result.u16[5]) { 161 to_next[0] = from[2]; 162 to_next++; 163 held++; 164 } else { 165 rte_node_enqueue_x1(graph, node, result.u16[5], 166 from[2]); 167 } 168 169 /* Next3 */ 170 if (next_index == result.u16[7]) { 171 to_next[0] = from[3]; 172 to_next++; 173 held++; 174 } else { 175 rte_node_enqueue_x1(graph, node, result.u16[7], 176 from[3]); 177 } 178 179 from += 4; 180 } else { 181 last_spec += 4; 182 } 183 } 184 185 while (n_left_from > 0) { 186 uint32_t next_hop; 187 uint16_t next0; 188 189 mbuf0 = pkts[0]; 190 191 pkts += 1; 192 n_left_from -= 1; 193 194 /* Extract DIP of mbuf0 */ 195 ipv4_hdr = rte_pktmbuf_mtod_offset(mbuf0, struct rte_ipv4_hdr *, 196 sizeof(struct rte_ether_hdr)); 197 /* Extract cksum, ttl as ipv4 hdr is in cache */ 198 node_mbuf_priv1(mbuf0, dyn)->cksum = ipv4_hdr->hdr_checksum; 199 node_mbuf_priv1(mbuf0, dyn)->ttl = ipv4_hdr->time_to_live; 200 201 rc = rte_lpm_lookup(lpm, rte_be_to_cpu_32(ipv4_hdr->dst_addr), 202 &next_hop); 203 next_hop = (rc == 0) ? next_hop : drop_nh; 204 205 node_mbuf_priv1(mbuf0, dyn)->nh = (uint16_t)next_hop; 206 next_hop = next_hop >> 16; 207 next0 = (uint16_t)next_hop; 208 209 if (unlikely(next_index ^ next0)) { 210 /* Copy things successfully speculated till now */ 211 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 212 from += last_spec; 213 to_next += last_spec; 214 held += last_spec; 215 last_spec = 0; 216 217 rte_node_enqueue_x1(graph, node, next0, from[0]); 218 from += 1; 219 } else { 220 last_spec += 1; 221 } 222 } 223 224 /* !!! Home run !!! */ 225 if (likely(last_spec == nb_objs)) { 226 rte_node_next_stream_move(graph, node, next_index); 227 return nb_objs; 228 } 229 held += last_spec; 230 rte_memcpy(to_next, from, last_spec * sizeof(from[0])); 231 rte_node_next_stream_put(graph, node, next_index, held); 232 233 return nb_objs; 234 } 235 236 #endif /* __INCLUDE_IP4_LOOKUP_NEON_H__ */ 237