xref: /f-stack/freebsd/mips/nlm/hal/fmn.c (revision 22ce4aff)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
5  * reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are
9  * met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGE.
29  *
30  * NETLOGIC_BSD */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 #include <sys/types.h>
35 #include <sys/systm.h>
36 
37 #include <machine/cpufunc.h>
38 #include <mips/nlm/hal/mips-extns.h>
39 #include <mips/nlm/hal/haldefs.h>
40 #include <mips/nlm/hal/iomap.h>
41 #include <mips/nlm/hal/fmn.h>
42 
43 /* XLP can take upto 16K of FMN messages per hardware queue, as spill.
44 * But, configuring all 16K causes the total spill memory to required
45 * to blow upto 192MB for single chip configuration, and 768MB in four
46 * chip configuration. Hence for now, we will setup the per queue spill
47 * as 1K FMN messages. With this, the total spill memory needed for 1024
48 * hardware queues (with 12bytes per single entry FMN message) becomes
49 * (1*1024)*12*1024queues = 12MB. For the four chip config, the memory
50 * needed = 12 * 4 = 48MB.
51 */
52 uint64_t nlm_cms_spill_total_messages = 1 * 1024;
53 
54 /* On a XLP832, we have the following FMN stations:
55 * CPU    stations: 8
56 * PCIE0  stations: 1
57 * PCIE1  stations: 1
58 * PCIE2  stations: 1
59 * PCIE3  stations: 1
60 * GDX    stations: 1
61 * CRYPTO stations: 1
62 * RSA    stations: 1
63 * CMP    stations: 1
64 * POE    stations: 1
65 * NAE    stations: 1
66 * ==================
67 * Total          : 18 stations per chip
68 *
69 * For all 4 nodes, there are 18*4 = 72 FMN stations
70 */
71 uint32_t nlm_cms_total_stations = 18 * 4 /*xlp_num_nodes*/;
72 
73 /**
74  * Takes inputs as node, queue_size and maximum number of queues.
75  * Calculates the base, start & end and returns the same for a
76  * defined qid.
77  *
78  * The output queues are maintained in the internal output buffer
79  * which is a on-chip SRAM structure. For the actial hardware
80  * internal implementation, It is a structure which consists
81  * of eight banks of 4096-entry x message-width SRAMs. The SRAM
82  * implementation is designed to run at 1GHz with a 1-cycle read/write
83  * access. A read/write transaction can be initiated for each bank
84  * every cycle for a total of eight accesses per cycle. Successive
85  * entries of the same output queue are placed in successive banks.
86  * This is done to spread different read & write accesses to same/different
87  * output queue over as many different banks as possible so that they
88  * can be scheduled concurrently. Spreading the accesses to as many banks
89  * as possible to maximize the concurrency internally is important for
90  * achieving the desired peak throughput. This is done by h/w implementation
91  * itself.
92  *
93  * Output queues are allocated from this internal output buffer by
94  * software. The total capacity of the output buffer is 32K-entry.
95  * Each output queue can be sized from 32-entry to 1024-entry in
96  * increments of 32-entry. This is done by specifying a Start & a
97  * End pointer: pointers to the first & last 32-entry chunks allocated
98  * to the output queue.
99  *
100  * To optimize the storage required for 1024 OQ pointers, the upper 5-bits
101  * are shared by the Start & the End pointer. The side-effect of this
102  * optimization is that an OQ can't cross a 1024-entry boundary. Also, the
103  * lower 5-bits don't need to be specified in the Start & the End pointer
104  * as the allocation is in increments of 32-entries.
105  *
106  * Queue occupancy is tracked by a Head & a Tail pointer. Tail pointer
107  * indicates the location to which next entry will be written & Head
108  * pointer indicates the location from which next entry will be read. When
109  * these pointers reach the top of the allocated space (indicated by the
110  * End pointer), they are reset to the bottom of the allocated space
111  * (indicated by the Start pointer).
112  *
113  * Output queue pointer information:
114  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
115  *
116  *   14               10 9              5 4                 0
117  *   ------------------
118  *   | base ptr       |
119  *   ------------------
120  *                       ----------------
121  *                       | start ptr    |
122  *                       ----------------
123  *                       ----------------
124  *                       | end   ptr    |
125  *                       ----------------
126  *                       ------------------------------------
127  *                       |           head ptr               |
128  *                       ------------------------------------
129  *                       ------------------------------------
130  *                       |           tail ptr               |
131  *                       ------------------------------------
132  * Note:
133  * A total of 1024 segments can sit on one software-visible "bank"
134  * of internal SRAM. Each segment contains 32 entries. Also note
135  * that sw-visible "banks" are not the same as the actual internal
136  * 8-bank implementation of hardware. It is an optimization of
137  * internal access.
138  *
139  */
140 
nlm_cms_setup_credits(uint64_t base,int destid,int srcid,int credit)141 void nlm_cms_setup_credits(uint64_t base, int destid, int srcid, int credit)
142 {
143 	uint64_t val;
144 
145 	val = (((uint64_t)credit << 24) | (destid << 12) | (srcid << 0));
146 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CREDIT_CFG, val);
147 
148 }
149 
150 /*
151  * base		- CMS module base address for this node.
152  * qid		- is the output queue id otherwise called as vc id
153  * spill_base   - is the 40-bit physical address of spill memory. Must be
154 		  4KB aligned.
155  * nsegs	- No of segments where a "1" indicates 4KB. Spill size must be
156  *                a multiple of 4KB.
157  */
nlm_cms_alloc_spill_q(uint64_t base,int qid,uint64_t spill_base,int nsegs)158 int nlm_cms_alloc_spill_q(uint64_t base, int qid, uint64_t spill_base,
159 				int nsegs)
160 {
161 	uint64_t queue_config;
162 	uint32_t spill_start;
163 
164 	if (nsegs > CMS_MAX_SPILL_SEGMENTS_PER_QUEUE) {
165 		return 1;
166 	}
167 
168 	queue_config = nlm_read_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)));
169 
170 	spill_start = ((spill_base >> 12) & 0x3F);
171 	/* Spill configuration */
172 	queue_config = (((uint64_t)CMS_SPILL_ENA << 62) |
173 				(((spill_base >> 18) & 0x3FFFFF) << 27) |
174 				(spill_start + nsegs - 1) << 21 |
175 				(spill_start << 15));
176 
177 	nlm_write_cms_reg(base,(CMS_OUTPUTQ_CONFIG(qid)),queue_config);
178 
179 	return 0;
180 }
181 
nlm_cms_get_onchip_queue(uint64_t base,int qid)182 uint64_t nlm_cms_get_onchip_queue (uint64_t base, int qid)
183 {
184 	return nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
185 }
186 
nlm_cms_set_onchip_queue(uint64_t base,int qid,uint64_t val)187 void nlm_cms_set_onchip_queue (uint64_t base, int qid, uint64_t val)
188 {
189 	uint64_t rdval;
190 
191 	rdval = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
192 	rdval |= val;
193 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), rdval);
194 }
195 
nlm_cms_per_queue_level_intr(uint64_t base,int qid,int sub_type,int intr_val)196 void nlm_cms_per_queue_level_intr(uint64_t base, int qid, int sub_type,
197 					int intr_val)
198 {
199 	uint64_t val;
200 
201 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
202 
203 	val &= ~((0x7ULL << 56) | (0x3ULL << 54));
204 
205 	val |= (((uint64_t)sub_type<<54) |
206 		((uint64_t)intr_val<<56));
207 
208 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
209 }
210 
nlm_cms_per_queue_timer_intr(uint64_t base,int qid,int sub_type,int intr_val)211 void nlm_cms_per_queue_timer_intr(uint64_t base, int qid, int sub_type,
212 					int intr_val)
213 {
214 	uint64_t val;
215 
216 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
217 
218 	val &= ~((0x7ULL << 51) | (0x3ULL << 49));
219 
220 	val |= (((uint64_t)sub_type<<49) |
221 		((uint64_t)intr_val<<51));
222 
223 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
224 }
225 
226 /* returns 1 if interrupt has been generated for this output queue */
nlm_cms_outputq_intr_check(uint64_t base,int qid)227 int nlm_cms_outputq_intr_check(uint64_t base, int qid)
228 {
229 	uint64_t val;
230 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
231 
232 	return ((val >> 59) & 0x1);
233 }
234 
nlm_cms_outputq_clr_intr(uint64_t base,int qid)235 void nlm_cms_outputq_clr_intr(uint64_t base, int qid)
236 {
237 	uint64_t val;
238 	val = nlm_read_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid));
239 	val |= (1ULL<<59);
240 	nlm_write_cms_reg(base, CMS_OUTPUTQ_CONFIG(qid), val);
241 }
242 
nlm_cms_illegal_dst_error_intr(uint64_t base,int en)243 void nlm_cms_illegal_dst_error_intr(uint64_t base, int en)
244 {
245 	uint64_t val;
246 
247 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
248 	val |= (en<<8);
249 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
250 }
251 
nlm_cms_timeout_error_intr(uint64_t base,int en)252 void nlm_cms_timeout_error_intr(uint64_t base, int en)
253 {
254 	uint64_t val;
255 
256 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
257 	val |= (en<<7);
258 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
259 }
260 
nlm_cms_biu_error_resp_intr(uint64_t base,int en)261 void nlm_cms_biu_error_resp_intr(uint64_t base, int en)
262 {
263 	uint64_t val;
264 
265 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
266 	val |= (en<<6);
267 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
268 }
269 
nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base,int en)270 void nlm_cms_spill_uncorrectable_ecc_error_intr(uint64_t base, int en)
271 {
272 	uint64_t val;
273 
274 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
275 	val |= (en<<5) | (en<<3);
276 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
277 }
278 
nlm_cms_spill_correctable_ecc_error_intr(uint64_t base,int en)279 void nlm_cms_spill_correctable_ecc_error_intr(uint64_t base, int en)
280 {
281 	uint64_t val;
282 
283 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
284 	val |= (en<<4) | (en<<2);
285 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
286 }
287 
nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base,int en)288 void nlm_cms_outputq_uncorrectable_ecc_error_intr(uint64_t base, int en)
289 {
290 	uint64_t val;
291 
292 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
293 	val |= (en<<1);
294 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
295 }
296 
nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base,int en)297 void nlm_cms_outputq_correctable_ecc_error_intr(uint64_t base, int en)
298 {
299 	uint64_t val;
300 
301 	val = nlm_read_cms_reg(base, CMS_MSG_CONFIG);
302 	val |= (en<<0);
303 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
304 }
305 
nlm_cms_network_error_status(uint64_t base)306 uint64_t nlm_cms_network_error_status(uint64_t base)
307 {
308 	return nlm_read_cms_reg(base, CMS_MSG_ERR);
309 }
310 
nlm_cms_get_net_error_code(uint64_t err)311 int nlm_cms_get_net_error_code(uint64_t err)
312 {
313 	return ((err >> 12) & 0xf);
314 }
315 
nlm_cms_get_net_error_syndrome(uint64_t err)316 int nlm_cms_get_net_error_syndrome(uint64_t err)
317 {
318 	return ((err >> 32) & 0x1ff);
319 }
320 
nlm_cms_get_net_error_ramindex(uint64_t err)321 int nlm_cms_get_net_error_ramindex(uint64_t err)
322 {
323 	return ((err >> 44) & 0x7fff);
324 }
325 
nlm_cms_get_net_error_outputq(uint64_t err)326 int nlm_cms_get_net_error_outputq(uint64_t err)
327 {
328 	return ((err >> 16) & 0xfff);
329 }
330 
331 /*========================= FMN Tracing related APIs ================*/
332 
nlm_cms_trace_setup(uint64_t base,int en,uint64_t trace_base,uint64_t trace_limit,int match_dstid_en,int dst_id,int match_srcid_en,int src_id,int wrap)333 void nlm_cms_trace_setup(uint64_t base, int en, uint64_t trace_base,
334 				uint64_t trace_limit, int match_dstid_en,
335 				int dst_id, int match_srcid_en, int src_id,
336 				int wrap)
337 {
338 	uint64_t val;
339 
340 	nlm_write_cms_reg(base, CMS_TRACE_BASE_ADDR, trace_base);
341 	nlm_write_cms_reg(base, CMS_TRACE_LIMIT_ADDR, trace_limit);
342 
343 	val = nlm_read_cms_reg(base, CMS_TRACE_CONFIG);
344 	val |= (((uint64_t)match_dstid_en << 39) |
345 		((dst_id & 0xfff) << 24) |
346 		(match_srcid_en << 23) |
347 		((src_id & 0xfff) << 8) |
348 		(wrap << 1) |
349 		(en << 0));
350 	nlm_write_cms_reg(base, CMS_MSG_CONFIG, val);
351 }
352 
nlm_cms_endian_byte_swap(uint64_t base,int en)353 void nlm_cms_endian_byte_swap (uint64_t base, int en)
354 {
355 	nlm_write_cms_reg(base, CMS_MSG_ENDIAN_SWAP, en);
356 }
357