Commit | Line | Data |
---|---|---|
77241056 MM |
1 | #ifndef _HFI1_KERNEL_H |
2 | #define _HFI1_KERNEL_H | |
3 | /* | |
4 | * | |
5 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
6 | * redistributing this file, you may do so under either license. | |
7 | * | |
8 | * GPL LICENSE SUMMARY | |
9 | * | |
10 | * Copyright(c) 2015 Intel Corporation. | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of version 2 of the GNU General Public License as | |
14 | * published by the Free Software Foundation. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, but | |
17 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * BSD LICENSE | |
22 | * | |
23 | * Copyright(c) 2015 Intel Corporation. | |
24 | * | |
25 | * Redistribution and use in source and binary forms, with or without | |
26 | * modification, are permitted provided that the following conditions | |
27 | * are met: | |
28 | * | |
29 | * - Redistributions of source code must retain the above copyright | |
30 | * notice, this list of conditions and the following disclaimer. | |
31 | * - Redistributions in binary form must reproduce the above copyright | |
32 | * notice, this list of conditions and the following disclaimer in | |
33 | * the documentation and/or other materials provided with the | |
34 | * distribution. | |
35 | * - Neither the name of Intel Corporation nor the names of its | |
36 | * contributors may be used to endorse or promote products derived | |
37 | * from this software without specific prior written permission. | |
38 | * | |
39 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
40 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
41 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
42 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
43 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
44 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
45 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
46 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
47 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
48 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
49 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
50 | * | |
51 | */ | |
52 | ||
53 | #include <linux/interrupt.h> | |
54 | #include <linux/pci.h> | |
55 | #include <linux/dma-mapping.h> | |
56 | #include <linux/mutex.h> | |
57 | #include <linux/list.h> | |
58 | #include <linux/scatterlist.h> | |
59 | #include <linux/slab.h> | |
60 | #include <linux/io.h> | |
61 | #include <linux/fs.h> | |
62 | #include <linux/completion.h> | |
63 | #include <linux/kref.h> | |
64 | #include <linux/sched.h> | |
65 | #include <linux/cdev.h> | |
66 | #include <linux/delay.h> | |
67 | #include <linux/kthread.h> | |
ec3f2c12 | 68 | #include <rdma/rdma_vt.h> |
77241056 MM |
69 | |
70 | #include "chip_registers.h" | |
71 | #include "common.h" | |
72 | #include "verbs.h" | |
73 | #include "pio.h" | |
74 | #include "chip.h" | |
75 | #include "mad.h" | |
76 | #include "qsfp.h" | |
77 | #include "platform_config.h" | |
78 | ||
79 | /* bumped 1 from s/w major version of TrueScale */ | |
80 | #define HFI1_CHIP_VERS_MAJ 3U | |
81 | ||
82 | /* don't care about this except printing */ | |
83 | #define HFI1_CHIP_VERS_MIN 0U | |
84 | ||
85 | /* The Organization Unique Identifier (Mfg code), and its position in GUID */ | |
86 | #define HFI1_OUI 0x001175 | |
87 | #define HFI1_OUI_LSB 40 | |
88 | ||
89 | #define DROP_PACKET_OFF 0 | |
90 | #define DROP_PACKET_ON 1 | |
91 | ||
92 | extern unsigned long hfi1_cap_mask; | |
93 | #define HFI1_CAP_KGET_MASK(mask, cap) ((mask) & HFI1_CAP_##cap) | |
94 | #define HFI1_CAP_UGET_MASK(mask, cap) \ | |
95 | (((mask) >> HFI1_CAP_USER_SHIFT) & HFI1_CAP_##cap) | |
96 | #define HFI1_CAP_KGET(cap) (HFI1_CAP_KGET_MASK(hfi1_cap_mask, cap)) | |
97 | #define HFI1_CAP_UGET(cap) (HFI1_CAP_UGET_MASK(hfi1_cap_mask, cap)) | |
98 | #define HFI1_CAP_IS_KSET(cap) (!!HFI1_CAP_KGET(cap)) | |
99 | #define HFI1_CAP_IS_USET(cap) (!!HFI1_CAP_UGET(cap)) | |
100 | #define HFI1_MISC_GET() ((hfi1_cap_mask >> HFI1_CAP_MISC_SHIFT) & \ | |
101 | HFI1_CAP_MISC_MASK) | |
a9c05e35 BM |
102 | /* Offline Disabled Reason is 4-bits */ |
103 | #define HFI1_ODR_MASK(rsn) ((rsn) & OPA_PI_MASK_OFFLINE_REASON) | |
77241056 | 104 | |
82c2611d NV |
105 | /* |
106 | * Control context is always 0 and handles the error packets. | |
107 | * It also handles the VL15 and multicast packets. | |
108 | */ | |
109 | #define HFI1_CTRL_CTXT 0 | |
110 | ||
2c5b521a JR |
111 | /* |
112 | * Driver context will store software counters for each of the events | |
113 | * associated with these status registers | |
114 | */ | |
115 | #define NUM_CCE_ERR_STATUS_COUNTERS 41 | |
116 | #define NUM_RCV_ERR_STATUS_COUNTERS 64 | |
117 | #define NUM_MISC_ERR_STATUS_COUNTERS 13 | |
118 | #define NUM_SEND_PIO_ERR_STATUS_COUNTERS 36 | |
119 | #define NUM_SEND_DMA_ERR_STATUS_COUNTERS 4 | |
120 | #define NUM_SEND_EGRESS_ERR_STATUS_COUNTERS 64 | |
121 | #define NUM_SEND_ERR_STATUS_COUNTERS 3 | |
122 | #define NUM_SEND_CTXT_ERR_STATUS_COUNTERS 5 | |
123 | #define NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS 24 | |
124 | ||
77241056 MM |
125 | /* |
126 | * per driver stats, either not device nor port-specific, or | |
127 | * summed over all of the devices and ports. | |
128 | * They are described by name via ipathfs filesystem, so layout | |
129 | * and number of elements can change without breaking compatibility. | |
130 | * If members are added or deleted hfi1_statnames[] in debugfs.c must | |
131 | * change to match. | |
132 | */ | |
133 | struct hfi1_ib_stats { | |
134 | __u64 sps_ints; /* number of interrupts handled */ | |
135 | __u64 sps_errints; /* number of error interrupts */ | |
136 | __u64 sps_txerrs; /* tx-related packet errors */ | |
137 | __u64 sps_rcverrs; /* non-crc rcv packet errors */ | |
138 | __u64 sps_hwerrs; /* hardware errors reported (parity, etc.) */ | |
139 | __u64 sps_nopiobufs; /* no pio bufs avail from kernel */ | |
140 | __u64 sps_ctxts; /* number of contexts currently open */ | |
141 | __u64 sps_lenerrs; /* number of kernel packets where RHF != LRH len */ | |
142 | __u64 sps_buffull; | |
143 | __u64 sps_hdrfull; | |
144 | }; | |
145 | ||
146 | extern struct hfi1_ib_stats hfi1_stats; | |
147 | extern const struct pci_error_handlers hfi1_pci_err_handler; | |
148 | ||
149 | /* | |
150 | * First-cut criterion for "device is active" is | |
151 | * two thousand dwords combined Tx, Rx traffic per | |
152 | * 5-second interval. SMA packets are 64 dwords, | |
153 | * and occur "a few per second", presumably each way. | |
154 | */ | |
155 | #define HFI1_TRAFFIC_ACTIVE_THRESHOLD (2000) | |
156 | ||
157 | /* | |
158 | * Below contains all data related to a single context (formerly called port). | |
159 | */ | |
160 | ||
161 | #ifdef CONFIG_DEBUG_FS | |
162 | struct hfi1_opcode_stats_perctx; | |
163 | #endif | |
164 | ||
77241056 MM |
165 | struct ctxt_eager_bufs { |
166 | ssize_t size; /* total size of eager buffers */ | |
167 | u32 count; /* size of buffers array */ | |
168 | u32 numbufs; /* number of buffers allocated */ | |
169 | u32 alloced; /* number of rcvarray entries used */ | |
170 | u32 rcvtid_size; /* size of each eager rcv tid */ | |
171 | u32 threshold; /* head update threshold */ | |
172 | struct eager_buffer { | |
173 | void *addr; | |
174 | dma_addr_t phys; | |
175 | ssize_t len; | |
176 | } *buffers; | |
177 | struct { | |
178 | void *addr; | |
179 | dma_addr_t phys; | |
180 | } *rcvtids; | |
181 | }; | |
182 | ||
a86cd357 MH |
183 | struct exp_tid_set { |
184 | struct list_head list; | |
185 | u32 count; | |
186 | }; | |
187 | ||
77241056 MM |
188 | struct hfi1_ctxtdata { |
189 | /* shadow the ctxt's RcvCtrl register */ | |
190 | u64 rcvctrl; | |
191 | /* rcvhdrq base, needs mmap before useful */ | |
192 | void *rcvhdrq; | |
193 | /* kernel virtual address where hdrqtail is updated */ | |
194 | volatile __le64 *rcvhdrtail_kvaddr; | |
195 | /* | |
196 | * Shared page for kernel to signal user processes that send buffers | |
197 | * need disarming. The process should call HFI1_CMD_DISARM_BUFS | |
198 | * or HFI1_CMD_ACK_EVENT with IPATH_EVENT_DISARM_BUFS set. | |
199 | */ | |
200 | unsigned long *user_event_mask; | |
201 | /* when waiting for rcv or pioavail */ | |
202 | wait_queue_head_t wait; | |
203 | /* rcvhdrq size (for freeing) */ | |
204 | size_t rcvhdrq_size; | |
205 | /* number of rcvhdrq entries */ | |
206 | u16 rcvhdrq_cnt; | |
207 | /* size of each of the rcvhdrq entries */ | |
208 | u16 rcvhdrqentsize; | |
209 | /* mmap of hdrq, must fit in 44 bits */ | |
210 | dma_addr_t rcvhdrq_phys; | |
211 | dma_addr_t rcvhdrqtailaddr_phys; | |
212 | struct ctxt_eager_bufs egrbufs; | |
213 | /* this receive context's assigned PIO ACK send context */ | |
214 | struct send_context *sc; | |
215 | ||
216 | /* dynamic receive available interrupt timeout */ | |
217 | u32 rcvavail_timeout; | |
218 | /* | |
219 | * number of opens (including slave sub-contexts) on this instance | |
220 | * (ignoring forks, dup, etc. for now) | |
221 | */ | |
222 | int cnt; | |
223 | /* | |
224 | * how much space to leave at start of eager TID entries for | |
225 | * protocol use, on each TID | |
226 | */ | |
227 | /* instead of calculating it */ | |
228 | unsigned ctxt; | |
229 | /* non-zero if ctxt is being shared. */ | |
230 | u16 subctxt_cnt; | |
231 | /* non-zero if ctxt is being shared. */ | |
232 | u16 subctxt_id; | |
233 | u8 uuid[16]; | |
234 | /* job key */ | |
235 | u16 jkey; | |
236 | /* number of RcvArray groups for this context. */ | |
237 | u32 rcv_array_groups; | |
238 | /* index of first eager TID entry. */ | |
239 | u32 eager_base; | |
240 | /* number of expected TID entries */ | |
241 | u32 expected_count; | |
242 | /* index of first expected TID entry. */ | |
243 | u32 expected_base; | |
a86cd357 MH |
244 | |
245 | struct exp_tid_set tid_group_list; | |
246 | struct exp_tid_set tid_used_list; | |
247 | struct exp_tid_set tid_full_list; | |
248 | ||
77241056 | 249 | /* lock protecting all Expected TID data */ |
463e6ebc | 250 | struct mutex exp_lock; |
77241056 MM |
251 | /* number of pio bufs for this ctxt (all procs, if shared) */ |
252 | u32 piocnt; | |
253 | /* first pio buffer for this ctxt */ | |
254 | u32 pio_base; | |
255 | /* chip offset of PIO buffers for this ctxt */ | |
256 | u32 piobufs; | |
257 | /* per-context configuration flags */ | |
82c2611d | 258 | u32 flags; |
77241056 MM |
259 | /* per-context event flags for fileops/intr communication */ |
260 | unsigned long event_flags; | |
261 | /* WAIT_RCV that timed out, no interrupt */ | |
262 | u32 rcvwait_to; | |
263 | /* WAIT_PIO that timed out, no interrupt */ | |
264 | u32 piowait_to; | |
265 | /* WAIT_RCV already happened, no wait */ | |
266 | u32 rcvnowait; | |
267 | /* WAIT_PIO already happened, no wait */ | |
268 | u32 pionowait; | |
269 | /* total number of polled urgent packets */ | |
270 | u32 urgent; | |
271 | /* saved total number of polled urgent packets for poll edge trigger */ | |
272 | u32 urgent_poll; | |
273 | /* pid of process using this ctxt */ | |
274 | pid_t pid; | |
275 | pid_t subpid[HFI1_MAX_SHARED_CTXTS]; | |
276 | /* same size as task_struct .comm[], command that opened context */ | |
c3af8a28 | 277 | char comm[TASK_COMM_LEN]; |
77241056 MM |
278 | /* so file ops can get at unit */ |
279 | struct hfi1_devdata *dd; | |
280 | /* so functions that need physical port can get it easily */ | |
281 | struct hfi1_pportdata *ppd; | |
282 | /* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */ | |
283 | void *subctxt_uregbase; | |
284 | /* An array of pages for the eager receive buffers * N */ | |
285 | void *subctxt_rcvegrbuf; | |
286 | /* An array of pages for the eager header queue entries * N */ | |
287 | void *subctxt_rcvhdr_base; | |
288 | /* The version of the library which opened this ctxt */ | |
289 | u32 userversion; | |
290 | /* Bitmask of active slaves */ | |
291 | u32 active_slaves; | |
292 | /* Type of packets or conditions we want to poll for */ | |
293 | u16 poll_type; | |
294 | /* receive packet sequence counter */ | |
295 | u8 seq_cnt; | |
296 | u8 redirect_seq_cnt; | |
297 | /* ctxt rcvhdrq head offset */ | |
298 | u32 head; | |
299 | u32 pkt_count; | |
300 | /* QPs waiting for context processing */ | |
301 | struct list_head qp_wait_list; | |
302 | /* interrupt handling */ | |
303 | u64 imask; /* clear interrupt mask */ | |
304 | int ireg; /* clear interrupt register */ | |
305 | unsigned numa_id; /* numa node of this context */ | |
306 | /* verbs stats per CTX */ | |
307 | struct hfi1_opcode_stats_perctx *opstats; | |
308 | /* | |
309 | * This is the kernel thread that will keep making | |
310 | * progress on the user sdma requests behind the scenes. | |
311 | * There is one per context (shared contexts use the master's). | |
312 | */ | |
313 | struct task_struct *progress; | |
314 | struct list_head sdma_queues; | |
315 | spinlock_t sdma_qlock; | |
316 | ||
77241056 MM |
317 | /* |
318 | * The interrupt handler for a particular receive context can vary | |
319 | * throughout it's lifetime. This is not a lock protected data member so | |
320 | * it must be updated atomically and the prev and new value must always | |
321 | * be valid. Worst case is we process an extra interrupt and up to 64 | |
322 | * packets with the wrong interrupt handler. | |
323 | */ | |
f4f30031 | 324 | int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded); |
77241056 MM |
325 | }; |
326 | ||
327 | /* | |
328 | * Represents a single packet at a high level. Put commonly computed things in | |
329 | * here so we do not have to keep doing them over and over. The rule of thumb is | |
330 | * if something is used one time to derive some value, store that something in | |
331 | * here. If it is used multiple times, then store the result of that derivation | |
332 | * in here. | |
333 | */ | |
334 | struct hfi1_packet { | |
335 | void *ebuf; | |
336 | void *hdr; | |
337 | struct hfi1_ctxtdata *rcd; | |
338 | __le32 *rhf_addr; | |
895420dd | 339 | struct rvt_qp *qp; |
77241056 MM |
340 | struct hfi1_other_headers *ohdr; |
341 | u64 rhf; | |
342 | u32 maxcnt; | |
343 | u32 rhqoff; | |
344 | u32 hdrqtail; | |
345 | int numpkt; | |
346 | u16 tlen; | |
347 | u16 hlen; | |
348 | s16 etail; | |
349 | u16 rsize; | |
350 | u8 updegr; | |
351 | u8 rcv_flags; | |
352 | u8 etype; | |
353 | }; | |
354 | ||
355 | static inline bool has_sc4_bit(struct hfi1_packet *p) | |
356 | { | |
357 | return !!rhf_dc_info(p->rhf); | |
358 | } | |
359 | ||
360 | /* | |
361 | * Private data for snoop/capture support. | |
362 | */ | |
363 | struct hfi1_snoop_data { | |
364 | int mode_flag; | |
365 | struct cdev cdev; | |
366 | struct device *class_dev; | |
367 | spinlock_t snoop_lock; | |
368 | struct list_head queue; | |
369 | wait_queue_head_t waitq; | |
370 | void *filter_value; | |
371 | int (*filter_callback)(void *hdr, void *data, void *value); | |
372 | u64 dcc_cfg; /* saved value of DCC Cfg register */ | |
373 | }; | |
374 | ||
375 | /* snoop mode_flag values */ | |
376 | #define HFI1_PORT_SNOOP_MODE 1U | |
377 | #define HFI1_PORT_CAPTURE_MODE 2U | |
378 | ||
895420dd | 379 | struct rvt_sge_state; |
77241056 MM |
380 | |
381 | /* | |
382 | * Get/Set IB link-level config parameters for f_get/set_ib_cfg() | |
383 | * Mostly for MADs that set or query link parameters, also ipath | |
384 | * config interfaces | |
385 | */ | |
386 | #define HFI1_IB_CFG_LIDLMC 0 /* LID (LS16b) and Mask (MS16b) */ | |
387 | #define HFI1_IB_CFG_LWID_DG_ENB 1 /* allowed Link-width downgrade */ | |
388 | #define HFI1_IB_CFG_LWID_ENB 2 /* allowed Link-width */ | |
389 | #define HFI1_IB_CFG_LWID 3 /* currently active Link-width */ | |
390 | #define HFI1_IB_CFG_SPD_ENB 4 /* allowed Link speeds */ | |
391 | #define HFI1_IB_CFG_SPD 5 /* current Link spd */ | |
392 | #define HFI1_IB_CFG_RXPOL_ENB 6 /* Auto-RX-polarity enable */ | |
393 | #define HFI1_IB_CFG_LREV_ENB 7 /* Auto-Lane-reversal enable */ | |
394 | #define HFI1_IB_CFG_LINKLATENCY 8 /* Link Latency (IB1.2 only) */ | |
395 | #define HFI1_IB_CFG_HRTBT 9 /* IB heartbeat off/enable/auto; DDR/QDR only */ | |
396 | #define HFI1_IB_CFG_OP_VLS 10 /* operational VLs */ | |
397 | #define HFI1_IB_CFG_VL_HIGH_CAP 11 /* num of VL high priority weights */ | |
398 | #define HFI1_IB_CFG_VL_LOW_CAP 12 /* num of VL low priority weights */ | |
399 | #define HFI1_IB_CFG_OVERRUN_THRESH 13 /* IB overrun threshold */ | |
400 | #define HFI1_IB_CFG_PHYERR_THRESH 14 /* IB PHY error threshold */ | |
401 | #define HFI1_IB_CFG_LINKDEFAULT 15 /* IB link default (sleep/poll) */ | |
402 | #define HFI1_IB_CFG_PKEYS 16 /* update partition keys */ | |
403 | #define HFI1_IB_CFG_MTU 17 /* update MTU in IBC */ | |
404 | #define HFI1_IB_CFG_VL_HIGH_LIMIT 19 | |
405 | #define HFI1_IB_CFG_PMA_TICKS 20 /* PMA sample tick resolution */ | |
406 | #define HFI1_IB_CFG_PORT 21 /* switch port we are connected to */ | |
407 | ||
408 | /* | |
409 | * HFI or Host Link States | |
410 | * | |
411 | * These describe the states the driver thinks the logical and physical | |
412 | * states are in. Used as an argument to set_link_state(). Implemented | |
413 | * as bits for easy multi-state checking. The actual state can only be | |
414 | * one. | |
415 | */ | |
416 | #define __HLS_UP_INIT_BP 0 | |
417 | #define __HLS_UP_ARMED_BP 1 | |
418 | #define __HLS_UP_ACTIVE_BP 2 | |
419 | #define __HLS_DN_DOWNDEF_BP 3 /* link down default */ | |
420 | #define __HLS_DN_POLL_BP 4 | |
421 | #define __HLS_DN_DISABLE_BP 5 | |
422 | #define __HLS_DN_OFFLINE_BP 6 | |
423 | #define __HLS_VERIFY_CAP_BP 7 | |
424 | #define __HLS_GOING_UP_BP 8 | |
425 | #define __HLS_GOING_OFFLINE_BP 9 | |
426 | #define __HLS_LINK_COOLDOWN_BP 10 | |
427 | ||
349ac71f | 428 | #define HLS_UP_INIT BIT(__HLS_UP_INIT_BP) |
429 | #define HLS_UP_ARMED BIT(__HLS_UP_ARMED_BP) | |
430 | #define HLS_UP_ACTIVE BIT(__HLS_UP_ACTIVE_BP) | |
431 | #define HLS_DN_DOWNDEF BIT(__HLS_DN_DOWNDEF_BP) /* link down default */ | |
432 | #define HLS_DN_POLL BIT(__HLS_DN_POLL_BP) | |
433 | #define HLS_DN_DISABLE BIT(__HLS_DN_DISABLE_BP) | |
434 | #define HLS_DN_OFFLINE BIT(__HLS_DN_OFFLINE_BP) | |
435 | #define HLS_VERIFY_CAP BIT(__HLS_VERIFY_CAP_BP) | |
436 | #define HLS_GOING_UP BIT(__HLS_GOING_UP_BP) | |
437 | #define HLS_GOING_OFFLINE BIT(__HLS_GOING_OFFLINE_BP) | |
438 | #define HLS_LINK_COOLDOWN BIT(__HLS_LINK_COOLDOWN_BP) | |
77241056 MM |
439 | |
440 | #define HLS_UP (HLS_UP_INIT | HLS_UP_ARMED | HLS_UP_ACTIVE) | |
441 | ||
442 | /* use this MTU size if none other is given */ | |
443 | #define HFI1_DEFAULT_ACTIVE_MTU 8192 | |
444 | /* use this MTU size as the default maximum */ | |
445 | #define HFI1_DEFAULT_MAX_MTU 8192 | |
446 | /* default partition key */ | |
447 | #define DEFAULT_PKEY 0xffff | |
448 | ||
449 | /* | |
450 | * Possible fabric manager config parameters for fm_{get,set}_table() | |
451 | */ | |
452 | #define FM_TBL_VL_HIGH_ARB 1 /* Get/set VL high prio weights */ | |
453 | #define FM_TBL_VL_LOW_ARB 2 /* Get/set VL low prio weights */ | |
454 | #define FM_TBL_BUFFER_CONTROL 3 /* Get/set Buffer Control */ | |
455 | #define FM_TBL_SC2VLNT 4 /* Get/set SC->VLnt */ | |
456 | #define FM_TBL_VL_PREEMPT_ELEMS 5 /* Get (no set) VL preempt elems */ | |
457 | #define FM_TBL_VL_PREEMPT_MATRIX 6 /* Get (no set) VL preempt matrix */ | |
458 | ||
459 | /* | |
460 | * Possible "operations" for f_rcvctrl(ppd, op, ctxt) | |
461 | * these are bits so they can be combined, e.g. | |
462 | * HFI1_RCVCTRL_INTRAVAIL_ENB | HFI1_RCVCTRL_CTXT_ENB | |
463 | */ | |
464 | #define HFI1_RCVCTRL_TAILUPD_ENB 0x01 | |
465 | #define HFI1_RCVCTRL_TAILUPD_DIS 0x02 | |
466 | #define HFI1_RCVCTRL_CTXT_ENB 0x04 | |
467 | #define HFI1_RCVCTRL_CTXT_DIS 0x08 | |
468 | #define HFI1_RCVCTRL_INTRAVAIL_ENB 0x10 | |
469 | #define HFI1_RCVCTRL_INTRAVAIL_DIS 0x20 | |
470 | #define HFI1_RCVCTRL_PKEY_ENB 0x40 /* Note, default is enabled */ | |
471 | #define HFI1_RCVCTRL_PKEY_DIS 0x80 | |
472 | #define HFI1_RCVCTRL_TIDFLOW_ENB 0x0400 | |
473 | #define HFI1_RCVCTRL_TIDFLOW_DIS 0x0800 | |
474 | #define HFI1_RCVCTRL_ONE_PKT_EGR_ENB 0x1000 | |
475 | #define HFI1_RCVCTRL_ONE_PKT_EGR_DIS 0x2000 | |
476 | #define HFI1_RCVCTRL_NO_RHQ_DROP_ENB 0x4000 | |
477 | #define HFI1_RCVCTRL_NO_RHQ_DROP_DIS 0x8000 | |
478 | #define HFI1_RCVCTRL_NO_EGR_DROP_ENB 0x10000 | |
479 | #define HFI1_RCVCTRL_NO_EGR_DROP_DIS 0x20000 | |
480 | ||
481 | /* partition enforcement flags */ | |
482 | #define HFI1_PART_ENFORCE_IN 0x1 | |
483 | #define HFI1_PART_ENFORCE_OUT 0x2 | |
484 | ||
485 | /* how often we check for synthetic counter wrap around */ | |
486 | #define SYNTH_CNT_TIME 2 | |
487 | ||
488 | /* Counter flags */ | |
489 | #define CNTR_NORMAL 0x0 /* Normal counters, just read register */ | |
490 | #define CNTR_SYNTH 0x1 /* Synthetic counters, saturate at all 1s */ | |
491 | #define CNTR_DISABLED 0x2 /* Disable this counter */ | |
492 | #define CNTR_32BIT 0x4 /* Simulate 64 bits for this counter */ | |
493 | #define CNTR_VL 0x8 /* Per VL counter */ | |
a699c6c2 | 494 | #define CNTR_SDMA 0x10 |
77241056 MM |
495 | #define CNTR_INVALID_VL -1 /* Specifies invalid VL */ |
496 | #define CNTR_MODE_W 0x0 | |
497 | #define CNTR_MODE_R 0x1 | |
498 | ||
499 | /* VLs Supported/Operational */ | |
500 | #define HFI1_MIN_VLS_SUPPORTED 1 | |
501 | #define HFI1_MAX_VLS_SUPPORTED 8 | |
502 | ||
503 | static inline void incr_cntr64(u64 *cntr) | |
504 | { | |
505 | if (*cntr < (u64)-1LL) | |
506 | (*cntr)++; | |
507 | } | |
508 | ||
509 | static inline void incr_cntr32(u32 *cntr) | |
510 | { | |
511 | if (*cntr < (u32)-1LL) | |
512 | (*cntr)++; | |
513 | } | |
514 | ||
515 | #define MAX_NAME_SIZE 64 | |
516 | struct hfi1_msix_entry { | |
517 | struct msix_entry msix; | |
518 | void *arg; | |
519 | char name[MAX_NAME_SIZE]; | |
520 | cpumask_var_t mask; | |
521 | }; | |
522 | ||
523 | /* per-SL CCA information */ | |
524 | struct cca_timer { | |
525 | struct hrtimer hrtimer; | |
526 | struct hfi1_pportdata *ppd; /* read-only */ | |
527 | int sl; /* read-only */ | |
528 | u16 ccti; /* read/write - current value of CCTI */ | |
529 | }; | |
530 | ||
531 | struct link_down_reason { | |
532 | /* | |
533 | * SMA-facing value. Should be set from .latest when | |
534 | * HLS_UP_* -> HLS_DN_* transition actually occurs. | |
535 | */ | |
536 | u8 sma; | |
537 | u8 latest; | |
538 | }; | |
539 | ||
540 | enum { | |
541 | LO_PRIO_TABLE, | |
542 | HI_PRIO_TABLE, | |
543 | MAX_PRIO_TABLE | |
544 | }; | |
545 | ||
546 | struct vl_arb_cache { | |
547 | spinlock_t lock; | |
548 | struct ib_vl_weight_elem table[VL_ARB_TABLE_SIZE]; | |
549 | }; | |
550 | ||
551 | /* | |
552 | * The structure below encapsulates data relevant to a physical IB Port. | |
553 | * Current chips support only one such port, but the separation | |
554 | * clarifies things a bit. Note that to conform to IB conventions, | |
555 | * port-numbers are one-based. The first or only port is port1. | |
556 | */ | |
557 | struct hfi1_pportdata { | |
558 | struct hfi1_ibport ibport_data; | |
559 | ||
560 | struct hfi1_devdata *dd; | |
561 | struct kobject pport_cc_kobj; | |
562 | struct kobject sc2vl_kobj; | |
563 | struct kobject sl2sc_kobj; | |
564 | struct kobject vl2mtu_kobj; | |
565 | ||
566 | /* QSFP support */ | |
567 | struct qsfp_data qsfp_info; | |
568 | ||
569 | /* GUID for this interface, in host order */ | |
570 | u64 guid; | |
571 | /* GUID for peer interface, in host order */ | |
572 | u64 neighbor_guid; | |
573 | ||
574 | /* up or down physical link state */ | |
575 | u32 linkup; | |
576 | ||
577 | /* | |
578 | * this address is mapped read-only into user processes so they can | |
579 | * get status cheaply, whenever they want. One qword of status per port | |
580 | */ | |
581 | u64 *statusp; | |
582 | ||
583 | /* SendDMA related entries */ | |
584 | ||
585 | struct workqueue_struct *hfi1_wq; | |
586 | ||
587 | /* move out of interrupt context */ | |
588 | struct work_struct link_vc_work; | |
589 | struct work_struct link_up_work; | |
590 | struct work_struct link_down_work; | |
591 | struct work_struct sma_message_work; | |
592 | struct work_struct freeze_work; | |
593 | struct work_struct link_downgrade_work; | |
594 | struct work_struct link_bounce_work; | |
595 | /* host link state variables */ | |
596 | struct mutex hls_lock; | |
597 | u32 host_link_state; | |
598 | ||
599 | spinlock_t sdma_alllock ____cacheline_aligned_in_smp; | |
600 | ||
601 | u32 lstate; /* logical link state */ | |
602 | ||
603 | /* these are the "32 bit" regs */ | |
604 | ||
605 | u32 ibmtu; /* The MTU programmed for this unit */ | |
606 | /* | |
607 | * Current max size IB packet (in bytes) including IB headers, that | |
608 | * we can send. Changes when ibmtu changes. | |
609 | */ | |
610 | u32 ibmaxlen; | |
611 | u32 current_egress_rate; /* units [10^6 bits/sec] */ | |
612 | /* LID programmed for this instance */ | |
613 | u16 lid; | |
614 | /* list of pkeys programmed; 0 if not set */ | |
615 | u16 pkeys[MAX_PKEY_VALUES]; | |
616 | u16 link_width_supported; | |
617 | u16 link_width_downgrade_supported; | |
618 | u16 link_speed_supported; | |
619 | u16 link_width_enabled; | |
620 | u16 link_width_downgrade_enabled; | |
621 | u16 link_speed_enabled; | |
622 | u16 link_width_active; | |
623 | u16 link_width_downgrade_tx_active; | |
624 | u16 link_width_downgrade_rx_active; | |
625 | u16 link_speed_active; | |
626 | u8 vls_supported; | |
627 | u8 vls_operational; | |
628 | /* LID mask control */ | |
629 | u8 lmc; | |
630 | /* Rx Polarity inversion (compensate for ~tx on partner) */ | |
631 | u8 rx_pol_inv; | |
632 | ||
633 | u8 hw_pidx; /* physical port index */ | |
634 | u8 port; /* IB port number and index into dd->pports - 1 */ | |
635 | /* type of neighbor node */ | |
636 | u8 neighbor_type; | |
637 | u8 neighbor_normal; | |
638 | u8 neighbor_fm_security; /* 1 if firmware checking is disabled */ | |
639 | u8 neighbor_port_number; | |
640 | u8 is_sm_config_started; | |
641 | u8 offline_disabled_reason; | |
642 | u8 is_active_optimize_enabled; | |
643 | u8 driver_link_ready; /* driver ready for active link */ | |
644 | u8 link_enabled; /* link enabled? */ | |
645 | u8 linkinit_reason; | |
646 | u8 local_tx_rate; /* rate given to 8051 firmware */ | |
647 | ||
648 | /* placeholders for IB MAD packet settings */ | |
649 | u8 overrun_threshold; | |
650 | u8 phy_error_threshold; | |
651 | ||
652 | /* used to override LED behavior */ | |
653 | u8 led_override; /* Substituted for normal value, if non-zero */ | |
654 | u16 led_override_timeoff; /* delta to next timer event */ | |
655 | u8 led_override_vals[2]; /* Alternates per blink-frame */ | |
656 | u8 led_override_phase; /* Just counts, LSB picks from vals[] */ | |
657 | atomic_t led_override_timer_active; | |
658 | /* Used to flash LEDs in override mode */ | |
659 | struct timer_list led_override_timer; | |
660 | u32 sm_trap_qp; | |
661 | u32 sa_qp; | |
662 | ||
663 | /* | |
664 | * cca_timer_lock protects access to the per-SL cca_timer | |
665 | * structures (specifically the ccti member). | |
666 | */ | |
667 | spinlock_t cca_timer_lock ____cacheline_aligned_in_smp; | |
668 | struct cca_timer cca_timer[OPA_MAX_SLS]; | |
669 | ||
670 | /* List of congestion control table entries */ | |
671 | struct ib_cc_table_entry_shadow ccti_entries[CC_TABLE_SHADOW_MAX]; | |
672 | ||
673 | /* congestion entries, each entry corresponding to a SL */ | |
674 | struct opa_congestion_setting_entry_shadow | |
675 | congestion_entries[OPA_MAX_SLS]; | |
676 | ||
677 | /* | |
678 | * cc_state_lock protects (write) access to the per-port | |
679 | * struct cc_state. | |
680 | */ | |
681 | spinlock_t cc_state_lock ____cacheline_aligned_in_smp; | |
682 | ||
683 | struct cc_state __rcu *cc_state; | |
684 | ||
685 | /* Total number of congestion control table entries */ | |
686 | u16 total_cct_entry; | |
687 | ||
688 | /* Bit map identifying service level */ | |
689 | u32 cc_sl_control_map; | |
690 | ||
691 | /* CA's max number of 64 entry units in the congestion control table */ | |
692 | u8 cc_max_table_entries; | |
693 | ||
694 | /* begin congestion log related entries | |
695 | * cc_log_lock protects all congestion log related data */ | |
696 | spinlock_t cc_log_lock ____cacheline_aligned_in_smp; | |
697 | u8 threshold_cong_event_map[OPA_MAX_SLS/8]; | |
698 | u16 threshold_event_counter; | |
699 | struct opa_hfi1_cong_log_event_internal cc_events[OPA_CONG_LOG_ELEMS]; | |
700 | int cc_log_idx; /* index for logging events */ | |
701 | int cc_mad_idx; /* index for reporting events */ | |
702 | /* end congestion log related entries */ | |
703 | ||
704 | struct vl_arb_cache vl_arb_cache[MAX_PRIO_TABLE]; | |
705 | ||
706 | /* port relative counter buffer */ | |
707 | u64 *cntrs; | |
708 | /* port relative synthetic counter buffer */ | |
709 | u64 *scntrs; | |
710 | /* we synthesize port_xmit_discards from several egress errors */ | |
711 | u64 port_xmit_discards; | |
712 | u64 port_xmit_constraint_errors; | |
713 | u64 port_rcv_constraint_errors; | |
714 | /* count of 'link_err' interrupts from DC */ | |
715 | u64 link_downed; | |
716 | /* number of times link retrained successfully */ | |
717 | u64 link_up; | |
6d014530 DL |
718 | /* number of times a link unknown frame was reported */ |
719 | u64 unknown_frame_count; | |
77241056 MM |
720 | /* port_ltp_crc_mode is returned in 'portinfo' MADs */ |
721 | u16 port_ltp_crc_mode; | |
722 | /* port_crc_mode_enabled is the crc we support */ | |
723 | u8 port_crc_mode_enabled; | |
724 | /* mgmt_allowed is also returned in 'portinfo' MADs */ | |
725 | u8 mgmt_allowed; | |
726 | u8 part_enforce; /* partition enforcement flags */ | |
727 | struct link_down_reason local_link_down_reason; | |
728 | struct link_down_reason neigh_link_down_reason; | |
729 | /* Value to be sent to link peer on LinkDown .*/ | |
730 | u8 remote_link_down_reason; | |
731 | /* Error events that will cause a port bounce. */ | |
732 | u32 port_error_action; | |
fb9036dd | 733 | struct work_struct linkstate_active_work; |
77241056 MM |
734 | }; |
735 | ||
736 | typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet); | |
737 | ||
738 | typedef void (*opcode_handler)(struct hfi1_packet *packet); | |
739 | ||
740 | /* return values for the RHF receive functions */ | |
741 | #define RHF_RCV_CONTINUE 0 /* keep going */ | |
742 | #define RHF_RCV_DONE 1 /* stop, this packet processed */ | |
743 | #define RHF_RCV_REPROCESS 2 /* stop. retain this packet */ | |
744 | ||
745 | struct rcv_array_data { | |
746 | u8 group_size; | |
747 | u16 ngroups; | |
748 | u16 nctxt_extra; | |
749 | }; | |
750 | ||
751 | struct per_vl_data { | |
752 | u16 mtu; | |
753 | struct send_context *sc; | |
754 | }; | |
755 | ||
756 | /* 16 to directly index */ | |
757 | #define PER_VL_SEND_CONTEXTS 16 | |
758 | ||
759 | struct err_info_rcvport { | |
760 | u8 status_and_code; | |
761 | u64 packet_flit1; | |
762 | u64 packet_flit2; | |
763 | }; | |
764 | ||
765 | struct err_info_constraint { | |
766 | u8 status; | |
767 | u16 pkey; | |
768 | u32 slid; | |
769 | }; | |
770 | ||
771 | struct hfi1_temp { | |
772 | unsigned int curr; /* current temperature */ | |
773 | unsigned int lo_lim; /* low temperature limit */ | |
774 | unsigned int hi_lim; /* high temperature limit */ | |
775 | unsigned int crit_lim; /* critical temperature limit */ | |
776 | u8 triggers; /* temperature triggers */ | |
777 | }; | |
778 | ||
779 | /* device data struct now contains only "general per-device" info. | |
780 | * fields related to a physical IB port are in a hfi1_pportdata struct. | |
781 | */ | |
782 | struct sdma_engine; | |
783 | struct sdma_vl_map; | |
784 | ||
785 | #define BOARD_VERS_MAX 96 /* how long the version string can be */ | |
786 | #define SERIAL_MAX 16 /* length of the serial number */ | |
787 | ||
788 | struct hfi1_devdata { | |
789 | struct hfi1_ibdev verbs_dev; /* must be first */ | |
790 | struct list_head list; | |
791 | /* pointers to related structs for this device */ | |
792 | /* pci access data structure */ | |
793 | struct pci_dev *pcidev; | |
794 | struct cdev user_cdev; | |
795 | struct cdev diag_cdev; | |
796 | struct cdev ui_cdev; | |
797 | struct device *user_device; | |
798 | struct device *diag_device; | |
799 | struct device *ui_device; | |
800 | ||
801 | /* mem-mapped pointer to base of chip regs */ | |
802 | u8 __iomem *kregbase; | |
803 | /* end of mem-mapped chip space excluding sendbuf and user regs */ | |
804 | u8 __iomem *kregend; | |
805 | /* physical address of chip for io_remap, etc. */ | |
806 | resource_size_t physaddr; | |
807 | /* receive context data */ | |
808 | struct hfi1_ctxtdata **rcd; | |
809 | /* send context data */ | |
810 | struct send_context_info *send_contexts; | |
811 | /* map hardware send contexts to software index */ | |
812 | u8 *hw_to_sw; | |
813 | /* spinlock for allocating and releasing send context resources */ | |
814 | spinlock_t sc_lock; | |
815 | /* Per VL data. Enough for all VLs but not all elements are set/used. */ | |
816 | struct per_vl_data vld[PER_VL_SEND_CONTEXTS]; | |
817 | /* seqlock for sc2vl */ | |
818 | seqlock_t sc2vl_lock; | |
819 | u64 sc2vl[4]; | |
820 | /* Send Context initialization lock. */ | |
821 | spinlock_t sc_init_lock; | |
822 | ||
823 | /* fields common to all SDMA engines */ | |
824 | ||
825 | /* default flags to last descriptor */ | |
826 | u64 default_desc1; | |
827 | volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */ | |
828 | dma_addr_t sdma_heads_phys; | |
829 | void *sdma_pad_dma; /* DMA'ed by chip */ | |
830 | dma_addr_t sdma_pad_phys; | |
831 | /* for deallocation */ | |
832 | size_t sdma_heads_size; | |
833 | /* number from the chip */ | |
834 | u32 chip_sdma_engines; | |
835 | /* num used */ | |
836 | u32 num_sdma; | |
837 | /* lock for sdma_map */ | |
838 | spinlock_t sde_map_lock; | |
839 | /* array of engines sized by num_sdma */ | |
840 | struct sdma_engine *per_sdma; | |
841 | /* array of vl maps */ | |
842 | struct sdma_vl_map __rcu *sdma_map; | |
843 | /* SPC freeze waitqueue and variable */ | |
844 | wait_queue_head_t sdma_unfreeze_wq; | |
845 | atomic_t sdma_unfreeze_count; | |
846 | ||
847 | ||
848 | /* hfi1_pportdata, points to array of (physical) port-specific | |
849 | * data structs, indexed by pidx (0..n-1) | |
850 | */ | |
851 | struct hfi1_pportdata *pport; | |
852 | ||
853 | /* mem-mapped pointer to base of PIO buffers */ | |
854 | void __iomem *piobase; | |
855 | /* | |
856 | * write-combining mem-mapped pointer to base of RcvArray | |
857 | * memory. | |
858 | */ | |
859 | void __iomem *rcvarray_wc; | |
860 | /* | |
861 | * credit return base - a per-NUMA range of DMA address that | |
862 | * the chip will use to update the per-context free counter | |
863 | */ | |
864 | struct credit_return_base *cr_base; | |
865 | ||
866 | /* send context numbers and sizes for each type */ | |
867 | struct sc_config_sizes sc_sizes[SC_MAX]; | |
868 | ||
869 | u32 lcb_access_count; /* count of LCB users */ | |
870 | ||
871 | char *boardname; /* human readable board info */ | |
872 | ||
873 | /* device (not port) flags, basically device capabilities */ | |
874 | u32 flags; | |
875 | ||
876 | /* reset value */ | |
877 | u64 z_int_counter; | |
878 | u64 z_rcv_limit; | |
879 | /* percpu int_counter */ | |
880 | u64 __percpu *int_counter; | |
881 | u64 __percpu *rcv_limit; | |
882 | ||
883 | /* number of receive contexts in use by the driver */ | |
884 | u32 num_rcv_contexts; | |
885 | /* number of pio send contexts in use by the driver */ | |
886 | u32 num_send_contexts; | |
887 | /* | |
888 | * number of ctxts available for PSM open | |
889 | */ | |
890 | u32 freectxts; | |
891 | /* base receive interrupt timeout, in CSR units */ | |
892 | u32 rcv_intr_timeout_csr; | |
893 | ||
894 | u64 __iomem *egrtidbase; | |
895 | spinlock_t sendctrl_lock; /* protect changes to SendCtrl */ | |
896 | spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */ | |
897 | /* around rcd and (user ctxts) ctxt_cnt use (intr vs free) */ | |
898 | spinlock_t uctxt_lock; /* rcd and user context changes */ | |
899 | /* exclusive access to 8051 */ | |
900 | spinlock_t dc8051_lock; | |
901 | /* exclusive access to 8051 memory */ | |
902 | spinlock_t dc8051_memlock; | |
903 | int dc8051_timed_out; /* remember if the 8051 timed out */ | |
904 | /* | |
905 | * A page that will hold event notification bitmaps for all | |
906 | * contexts. This page will be mapped into all processes. | |
907 | */ | |
908 | unsigned long *events; | |
909 | /* | |
910 | * per unit status, see also portdata statusp | |
911 | * mapped read-only into user processes so they can get unit and | |
912 | * IB link status cheaply | |
913 | */ | |
914 | struct hfi1_status *status; | |
915 | u32 freezelen; /* max length of freezemsg */ | |
916 | ||
917 | /* revision register shadow */ | |
918 | u64 revision; | |
919 | /* Base GUID for device (network order) */ | |
920 | u64 base_guid; | |
921 | ||
922 | /* these are the "32 bit" regs */ | |
923 | ||
924 | /* value we put in kr_rcvhdrsize */ | |
925 | u32 rcvhdrsize; | |
926 | /* number of receive contexts the chip supports */ | |
927 | u32 chip_rcv_contexts; | |
928 | /* number of receive array entries */ | |
929 | u32 chip_rcv_array_count; | |
930 | /* number of PIO send contexts the chip supports */ | |
931 | u32 chip_send_contexts; | |
932 | /* number of bytes in the PIO memory buffer */ | |
933 | u32 chip_pio_mem_size; | |
934 | /* number of bytes in the SDMA memory buffer */ | |
935 | u32 chip_sdma_mem_size; | |
936 | ||
937 | /* size of each rcvegrbuffer */ | |
938 | u32 rcvegrbufsize; | |
939 | /* log2 of above */ | |
940 | u16 rcvegrbufsize_shift; | |
941 | /* both sides of the PCIe link are gen3 capable */ | |
942 | u8 link_gen3_capable; | |
943 | /* localbus width (1, 2,4,8,16,32) from config space */ | |
944 | u32 lbus_width; | |
945 | /* localbus speed in MHz */ | |
946 | u32 lbus_speed; | |
947 | int unit; /* unit # of this chip */ | |
948 | int node; /* home node of this chip */ | |
949 | ||
950 | /* save these PCI fields to restore after a reset */ | |
951 | u32 pcibar0; | |
952 | u32 pcibar1; | |
953 | u32 pci_rom; | |
954 | u16 pci_command; | |
955 | u16 pcie_devctl; | |
956 | u16 pcie_lnkctl; | |
957 | u16 pcie_devctl2; | |
958 | u32 pci_msix0; | |
959 | u32 pci_lnkctl3; | |
960 | u32 pci_tph2; | |
961 | ||
962 | /* | |
963 | * ASCII serial number, from flash, large enough for original | |
964 | * all digit strings, and longer serial number format | |
965 | */ | |
966 | u8 serial[SERIAL_MAX]; | |
967 | /* human readable board version */ | |
968 | u8 boardversion[BOARD_VERS_MAX]; | |
969 | u8 lbus_info[32]; /* human readable localbus info */ | |
970 | /* chip major rev, from CceRevision */ | |
971 | u8 majrev; | |
972 | /* chip minor rev, from CceRevision */ | |
973 | u8 minrev; | |
974 | /* hardware ID */ | |
975 | u8 hfi1_id; | |
976 | /* implementation code */ | |
977 | u8 icode; | |
978 | /* default link down value (poll/sleep) */ | |
979 | u8 link_default; | |
980 | /* vAU of this device */ | |
981 | u8 vau; | |
982 | /* vCU of this device */ | |
983 | u8 vcu; | |
984 | /* link credits of this device */ | |
985 | u16 link_credits; | |
986 | /* initial vl15 credits to use */ | |
987 | u16 vl15_init; | |
988 | ||
989 | /* Misc small ints */ | |
990 | /* Number of physical ports available */ | |
991 | u8 num_pports; | |
992 | /* Lowest context number which can be used by user processes */ | |
993 | u8 first_user_ctxt; | |
994 | u8 n_krcv_queues; | |
995 | u8 qos_shift; | |
996 | u8 qpn_mask; | |
997 | ||
998 | u16 rhf_offset; /* offset of RHF within receive header entry */ | |
999 | u16 irev; /* implementation revision */ | |
1000 | u16 dc8051_ver; /* 8051 firmware version */ | |
1001 | ||
1002 | struct platform_config_cache pcfg_cache; | |
1003 | /* control high-level access to qsfp */ | |
1004 | struct mutex qsfp_i2c_mutex; | |
1005 | ||
1006 | struct diag_client *diag_client; | |
1007 | spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */ | |
1008 | ||
1009 | u8 psxmitwait_supported; | |
1010 | /* cycle length of PS* counters in HW (in picoseconds) */ | |
1011 | u16 psxmitwait_check_rate; | |
1012 | /* high volume overflow errors deferred to tasklet */ | |
1013 | struct tasklet_struct error_tasklet; | |
77241056 MM |
1014 | |
1015 | /* MSI-X information */ | |
1016 | struct hfi1_msix_entry *msix_entries; | |
1017 | u32 num_msix_entries; | |
1018 | ||
1019 | /* INTx information */ | |
1020 | u32 requested_intx_irq; /* did we request one? */ | |
1021 | char intx_name[MAX_NAME_SIZE]; /* INTx name */ | |
1022 | ||
1023 | /* general interrupt: mask of handled interrupts */ | |
1024 | u64 gi_mask[CCE_NUM_INT_CSRS]; | |
1025 | ||
1026 | struct rcv_array_data rcv_entries; | |
1027 | ||
1028 | /* | |
1029 | * 64 bit synthetic counters | |
1030 | */ | |
1031 | struct timer_list synth_stats_timer; | |
1032 | ||
1033 | /* | |
1034 | * device counters | |
1035 | */ | |
1036 | char *cntrnames; | |
1037 | size_t cntrnameslen; | |
1038 | size_t ndevcntrs; | |
1039 | u64 *cntrs; | |
1040 | u64 *scntrs; | |
1041 | ||
1042 | /* | |
1043 | * remembered values for synthetic counters | |
1044 | */ | |
1045 | u64 last_tx; | |
1046 | u64 last_rx; | |
1047 | ||
1048 | /* | |
1049 | * per-port counters | |
1050 | */ | |
1051 | size_t nportcntrs; | |
1052 | char *portcntrnames; | |
1053 | size_t portcntrnameslen; | |
1054 | ||
1055 | struct hfi1_snoop_data hfi1_snoop; | |
1056 | ||
1057 | struct err_info_rcvport err_info_rcvport; | |
1058 | struct err_info_constraint err_info_rcv_constraint; | |
1059 | struct err_info_constraint err_info_xmit_constraint; | |
1060 | u8 err_info_uncorrectable; | |
1061 | u8 err_info_fmconfig; | |
1062 | ||
1063 | atomic_t drop_packet; | |
1064 | u8 do_drop; | |
1065 | ||
2c5b521a JR |
1066 | /* |
1067 | * Software counters for the status bits defined by the | |
1068 | * associated error status registers | |
1069 | */ | |
1070 | u64 cce_err_status_cnt[NUM_CCE_ERR_STATUS_COUNTERS]; | |
1071 | u64 rcv_err_status_cnt[NUM_RCV_ERR_STATUS_COUNTERS]; | |
1072 | u64 misc_err_status_cnt[NUM_MISC_ERR_STATUS_COUNTERS]; | |
1073 | u64 send_pio_err_status_cnt[NUM_SEND_PIO_ERR_STATUS_COUNTERS]; | |
1074 | u64 send_dma_err_status_cnt[NUM_SEND_DMA_ERR_STATUS_COUNTERS]; | |
1075 | u64 send_egress_err_status_cnt[NUM_SEND_EGRESS_ERR_STATUS_COUNTERS]; | |
1076 | u64 send_err_status_cnt[NUM_SEND_ERR_STATUS_COUNTERS]; | |
1077 | ||
1078 | /* Software counter that spans all contexts */ | |
1079 | u64 sw_ctxt_err_status_cnt[NUM_SEND_CTXT_ERR_STATUS_COUNTERS]; | |
1080 | /* Software counter that spans all DMA engines */ | |
1081 | u64 sw_send_dma_eng_err_status_cnt[ | |
1082 | NUM_SEND_DMA_ENG_ERR_STATUS_COUNTERS]; | |
1083 | /* Software counter that aggregates all cce_err_status errors */ | |
1084 | u64 sw_cce_err_status_aggregate; | |
1085 | ||
77241056 MM |
1086 | /* receive interrupt functions */ |
1087 | rhf_rcv_function_ptr *rhf_rcv_function_map; | |
1088 | rhf_rcv_function_ptr normal_rhf_rcv_functions[8]; | |
1089 | ||
1090 | /* | |
1091 | * Handlers for outgoing data so that snoop/capture does not | |
1092 | * have to have its hooks in the send path | |
1093 | */ | |
895420dd | 1094 | int (*process_pio_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 1095 | u64 pbc); |
895420dd | 1096 | int (*process_dma_send)(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 1097 | u64 pbc); |
77241056 MM |
1098 | void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf, |
1099 | u64 pbc, const void *from, size_t count); | |
1100 | ||
1101 | /* OUI comes from the HW. Used everywhere as 3 separate bytes. */ | |
1102 | u8 oui1; | |
1103 | u8 oui2; | |
1104 | u8 oui3; | |
1105 | /* Timer and counter used to detect RcvBufOvflCnt changes */ | |
1106 | struct timer_list rcverr_timer; | |
1107 | u32 rcv_ovfl_cnt; | |
1108 | ||
1109 | int assigned_node_id; | |
1110 | wait_queue_head_t event_queue; | |
1111 | ||
1112 | /* Save the enabled LCB error bits */ | |
1113 | u64 lcb_err_en; | |
1114 | u8 dc_shutdown; | |
46b010d3 MB |
1115 | |
1116 | /* receive context tail dummy address */ | |
1117 | __le64 *rcvhdrtail_dummy_kvaddr; | |
1118 | dma_addr_t rcvhdrtail_dummy_physaddr; | |
77241056 MM |
1119 | }; |
1120 | ||
1121 | /* 8051 firmware version helper */ | |
1122 | #define dc8051_ver(a, b) ((a) << 8 | (b)) | |
1123 | ||
1124 | /* f_put_tid types */ | |
1125 | #define PT_EXPECTED 0 | |
1126 | #define PT_EAGER 1 | |
1127 | #define PT_INVALID 2 | |
1128 | ||
f727a0c3 MH |
1129 | struct mmu_rb_node; |
1130 | ||
77241056 MM |
1131 | /* Private data for file operations */ |
1132 | struct hfi1_filedata { | |
1133 | struct hfi1_ctxtdata *uctxt; | |
1134 | unsigned subctxt; | |
1135 | struct hfi1_user_sdma_comp_q *cq; | |
1136 | struct hfi1_user_sdma_pkt_q *pq; | |
1137 | /* for cpu affinity; -1 if none */ | |
1138 | int rec_cpu_num; | |
a86cd357 MH |
1139 | struct mmu_notifier mn; |
1140 | struct rb_root tid_rb_root; | |
1141 | spinlock_t tid_lock; /* protect tid_[limit,used] counters */ | |
1142 | u32 tid_limit; | |
1143 | u32 tid_used; | |
1144 | spinlock_t rb_lock; /* protect tid_rb_root RB tree */ | |
1145 | u32 *invalid_tids; | |
1146 | u32 invalid_tid_idx; | |
1147 | spinlock_t invalid_lock; /* protect the invalid_tids array */ | |
1148 | int (*mmu_rb_insert)(struct rb_root *, struct mmu_rb_node *); | |
77241056 MM |
1149 | }; |
1150 | ||
1151 | extern struct list_head hfi1_dev_list; | |
1152 | extern spinlock_t hfi1_devs_lock; | |
1153 | struct hfi1_devdata *hfi1_lookup(int unit); | |
1154 | extern u32 hfi1_cpulist_count; | |
1155 | extern unsigned long *hfi1_cpulist; | |
1156 | ||
1157 | extern unsigned int snoop_drop_send; | |
1158 | extern unsigned int snoop_force_capture; | |
1159 | int hfi1_init(struct hfi1_devdata *, int); | |
1160 | int hfi1_count_units(int *npresentp, int *nupp); | |
1161 | int hfi1_count_active_units(void); | |
1162 | ||
1163 | int hfi1_diag_add(struct hfi1_devdata *); | |
1164 | void hfi1_diag_remove(struct hfi1_devdata *); | |
1165 | void handle_linkup_change(struct hfi1_devdata *dd, u32 linkup); | |
1166 | ||
1167 | void handle_user_interrupt(struct hfi1_ctxtdata *rcd); | |
1168 | ||
1169 | int hfi1_create_rcvhdrq(struct hfi1_devdata *, struct hfi1_ctxtdata *); | |
1170 | int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *); | |
1171 | int hfi1_create_ctxts(struct hfi1_devdata *dd); | |
1172 | struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *, u32); | |
1173 | void hfi1_init_pportdata(struct pci_dev *, struct hfi1_pportdata *, | |
1174 | struct hfi1_devdata *, u8, u8); | |
1175 | void hfi1_free_ctxtdata(struct hfi1_devdata *, struct hfi1_ctxtdata *); | |
1176 | ||
f4f30031 DL |
1177 | int handle_receive_interrupt(struct hfi1_ctxtdata *, int); |
1178 | int handle_receive_interrupt_nodma_rtail(struct hfi1_ctxtdata *, int); | |
1179 | int handle_receive_interrupt_dma_rtail(struct hfi1_ctxtdata *, int); | |
fb9036dd | 1180 | void set_all_slowpath(struct hfi1_devdata *dd); |
f4f30031 DL |
1181 | |
1182 | /* receive packet handler dispositions */ | |
1183 | #define RCV_PKT_OK 0x0 /* keep going */ | |
1184 | #define RCV_PKT_LIMIT 0x1 /* stop, hit limit, start thread */ | |
1185 | #define RCV_PKT_DONE 0x2 /* stop, no more packets detected */ | |
1186 | ||
1187 | /* calculate the current RHF address */ | |
1188 | static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd) | |
1189 | { | |
1190 | return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset; | |
1191 | } | |
1192 | ||
77241056 MM |
1193 | int hfi1_reset_device(int); |
1194 | ||
1195 | /* return the driver's idea of the logical OPA port state */ | |
1196 | static inline u32 driver_lstate(struct hfi1_pportdata *ppd) | |
1197 | { | |
1198 | return ppd->lstate; /* use the cached value */ | |
1199 | } | |
1200 | ||
fb9036dd JS |
1201 | void receive_interrupt_work(struct work_struct *work); |
1202 | ||
1203 | /* extract service channel from header and rhf */ | |
1204 | static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) | |
1205 | { | |
1206 | return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | | |
1207 | ((!!(rhf & RHF_DC_INFO_MASK)) << 4); | |
1208 | } | |
1209 | ||
77241056 MM |
1210 | static inline u16 generate_jkey(kuid_t uid) |
1211 | { | |
1212 | return from_kuid(current_user_ns(), uid) & 0xffff; | |
1213 | } | |
1214 | ||
1215 | /* | |
1216 | * active_egress_rate | |
1217 | * | |
1218 | * returns the active egress rate in units of [10^6 bits/sec] | |
1219 | */ | |
1220 | static inline u32 active_egress_rate(struct hfi1_pportdata *ppd) | |
1221 | { | |
1222 | u16 link_speed = ppd->link_speed_active; | |
1223 | u16 link_width = ppd->link_width_active; | |
1224 | u32 egress_rate; | |
1225 | ||
1226 | if (link_speed == OPA_LINK_SPEED_25G) | |
1227 | egress_rate = 25000; | |
1228 | else /* assume OPA_LINK_SPEED_12_5G */ | |
1229 | egress_rate = 12500; | |
1230 | ||
1231 | switch (link_width) { | |
1232 | case OPA_LINK_WIDTH_4X: | |
1233 | egress_rate *= 4; | |
1234 | break; | |
1235 | case OPA_LINK_WIDTH_3X: | |
1236 | egress_rate *= 3; | |
1237 | break; | |
1238 | case OPA_LINK_WIDTH_2X: | |
1239 | egress_rate *= 2; | |
1240 | break; | |
1241 | default: | |
1242 | /* assume IB_WIDTH_1X */ | |
1243 | break; | |
1244 | } | |
1245 | ||
1246 | return egress_rate; | |
1247 | } | |
1248 | ||
1249 | /* | |
1250 | * egress_cycles | |
1251 | * | |
1252 | * Returns the number of 'fabric clock cycles' to egress a packet | |
1253 | * of length 'len' bytes, at 'rate' Mbit/s. Since the fabric clock | |
1254 | * rate is (approximately) 805 MHz, the units of the returned value | |
1255 | * are (1/805 MHz). | |
1256 | */ | |
1257 | static inline u32 egress_cycles(u32 len, u32 rate) | |
1258 | { | |
1259 | u32 cycles; | |
1260 | ||
1261 | /* | |
1262 | * cycles is: | |
1263 | * | |
1264 | * (length) [bits] / (rate) [bits/sec] | |
1265 | * --------------------------------------------------- | |
1266 | * fabric_clock_period == 1 /(805 * 10^6) [cycles/sec] | |
1267 | */ | |
1268 | ||
1269 | cycles = len * 8; /* bits */ | |
1270 | cycles *= 805; | |
1271 | cycles /= rate; | |
1272 | ||
1273 | return cycles; | |
1274 | } | |
1275 | ||
1276 | void set_link_ipg(struct hfi1_pportdata *ppd); | |
1277 | void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, | |
1278 | u32 rqpn, u8 svc_type); | |
895420dd | 1279 | void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, |
77241056 MM |
1280 | u32 pkey, u32 slid, u32 dlid, u8 sc5, |
1281 | const struct ib_grh *old_grh); | |
1282 | ||
1283 | #define PACKET_EGRESS_TIMEOUT 350 | |
1284 | static inline void pause_for_credit_return(struct hfi1_devdata *dd) | |
1285 | { | |
1286 | /* Pause at least 1us, to ensure chip returns all credits */ | |
1287 | u32 usec = cclock_to_ns(dd, PACKET_EGRESS_TIMEOUT) / 1000; | |
1288 | ||
1289 | udelay(usec ? usec : 1); | |
1290 | } | |
1291 | ||
1292 | /** | |
1293 | * sc_to_vlt() reverse lookup sc to vl | |
1294 | * @dd - devdata | |
1295 | * @sc5 - 5 bit sc | |
1296 | */ | |
1297 | static inline u8 sc_to_vlt(struct hfi1_devdata *dd, u8 sc5) | |
1298 | { | |
1299 | unsigned seq; | |
1300 | u8 rval; | |
1301 | ||
1302 | if (sc5 >= OPA_MAX_SCS) | |
1303 | return (u8)(0xff); | |
1304 | ||
1305 | do { | |
1306 | seq = read_seqbegin(&dd->sc2vl_lock); | |
1307 | rval = *(((u8 *)dd->sc2vl) + sc5); | |
1308 | } while (read_seqretry(&dd->sc2vl_lock, seq)); | |
1309 | ||
1310 | return rval; | |
1311 | } | |
1312 | ||
1313 | #define PKEY_MEMBER_MASK 0x8000 | |
1314 | #define PKEY_LOW_15_MASK 0x7fff | |
1315 | ||
1316 | /* | |
1317 | * ingress_pkey_matches_entry - return 1 if the pkey matches ent (ent | |
1318 | * being an entry from the ingress partition key table), return 0 | |
1319 | * otherwise. Use the matching criteria for ingress partition keys | |
1320 | * specified in the OPAv1 spec., section 9.10.14. | |
1321 | */ | |
1322 | static inline int ingress_pkey_matches_entry(u16 pkey, u16 ent) | |
1323 | { | |
1324 | u16 mkey = pkey & PKEY_LOW_15_MASK; | |
1325 | u16 ment = ent & PKEY_LOW_15_MASK; | |
1326 | ||
1327 | if (mkey == ment) { | |
1328 | /* | |
1329 | * If pkey[15] is clear (limited partition member), | |
1330 | * is bit 15 in the corresponding table element | |
1331 | * clear (limited member)? | |
1332 | */ | |
1333 | if (!(pkey & PKEY_MEMBER_MASK)) | |
1334 | return !!(ent & PKEY_MEMBER_MASK); | |
1335 | return 1; | |
1336 | } | |
1337 | return 0; | |
1338 | } | |
1339 | ||
1340 | /* | |
1341 | * ingress_pkey_table_search - search the entire pkey table for | |
1342 | * an entry which matches 'pkey'. return 0 if a match is found, | |
1343 | * and 1 otherwise. | |
1344 | */ | |
1345 | static int ingress_pkey_table_search(struct hfi1_pportdata *ppd, u16 pkey) | |
1346 | { | |
1347 | int i; | |
1348 | ||
1349 | for (i = 0; i < MAX_PKEY_VALUES; i++) { | |
1350 | if (ingress_pkey_matches_entry(pkey, ppd->pkeys[i])) | |
1351 | return 0; | |
1352 | } | |
1353 | return 1; | |
1354 | } | |
1355 | ||
1356 | /* | |
1357 | * ingress_pkey_table_fail - record a failure of ingress pkey validation, | |
1358 | * i.e., increment port_rcv_constraint_errors for the port, and record | |
1359 | * the 'error info' for this failure. | |
1360 | */ | |
1361 | static void ingress_pkey_table_fail(struct hfi1_pportdata *ppd, u16 pkey, | |
1362 | u16 slid) | |
1363 | { | |
1364 | struct hfi1_devdata *dd = ppd->dd; | |
1365 | ||
1366 | incr_cntr64(&ppd->port_rcv_constraint_errors); | |
1367 | if (!(dd->err_info_rcv_constraint.status & OPA_EI_STATUS_SMASK)) { | |
1368 | dd->err_info_rcv_constraint.status |= OPA_EI_STATUS_SMASK; | |
1369 | dd->err_info_rcv_constraint.slid = slid; | |
1370 | dd->err_info_rcv_constraint.pkey = pkey; | |
1371 | } | |
1372 | } | |
1373 | ||
1374 | /* | |
1375 | * ingress_pkey_check - Return 0 if the ingress pkey is valid, return 1 | |
1376 | * otherwise. Use the criteria in the OPAv1 spec, section 9.10.14. idx | |
1377 | * is a hint as to the best place in the partition key table to begin | |
1378 | * searching. This function should not be called on the data path because | |
1379 | * of performance reasons. On datapath pkey check is expected to be done | |
1380 | * by HW and rcv_pkey_check function should be called instead. | |
1381 | */ | |
1382 | static inline int ingress_pkey_check(struct hfi1_pportdata *ppd, u16 pkey, | |
1383 | u8 sc5, u8 idx, u16 slid) | |
1384 | { | |
1385 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN)) | |
1386 | return 0; | |
1387 | ||
1388 | /* If SC15, pkey[0:14] must be 0x7fff */ | |
1389 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) | |
1390 | goto bad; | |
1391 | ||
1392 | /* Is the pkey = 0x0, or 0x8000? */ | |
1393 | if ((pkey & PKEY_LOW_15_MASK) == 0) | |
1394 | goto bad; | |
1395 | ||
1396 | /* The most likely matching pkey has index 'idx' */ | |
1397 | if (ingress_pkey_matches_entry(pkey, ppd->pkeys[idx])) | |
1398 | return 0; | |
1399 | ||
1400 | /* no match - try the whole table */ | |
1401 | if (!ingress_pkey_table_search(ppd, pkey)) | |
1402 | return 0; | |
1403 | ||
1404 | bad: | |
1405 | ingress_pkey_table_fail(ppd, pkey, slid); | |
1406 | return 1; | |
1407 | } | |
1408 | ||
1409 | /* | |
1410 | * rcv_pkey_check - Return 0 if the ingress pkey is valid, return 1 | |
1411 | * otherwise. It only ensures pkey is vlid for QP0. This function | |
1412 | * should be called on the data path instead of ingress_pkey_check | |
1413 | * as on data path, pkey check is done by HW (except for QP0). | |
1414 | */ | |
1415 | static inline int rcv_pkey_check(struct hfi1_pportdata *ppd, u16 pkey, | |
1416 | u8 sc5, u16 slid) | |
1417 | { | |
1418 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_IN)) | |
1419 | return 0; | |
1420 | ||
1421 | /* If SC15, pkey[0:14] must be 0x7fff */ | |
1422 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) | |
1423 | goto bad; | |
1424 | ||
1425 | return 0; | |
1426 | bad: | |
1427 | ingress_pkey_table_fail(ppd, pkey, slid); | |
1428 | return 1; | |
1429 | } | |
1430 | ||
1431 | /* MTU handling */ | |
1432 | ||
1433 | /* MTU enumeration, 256-4k match IB */ | |
1434 | #define OPA_MTU_0 0 | |
1435 | #define OPA_MTU_256 1 | |
1436 | #define OPA_MTU_512 2 | |
1437 | #define OPA_MTU_1024 3 | |
1438 | #define OPA_MTU_2048 4 | |
1439 | #define OPA_MTU_4096 5 | |
1440 | ||
1441 | u32 lrh_max_header_bytes(struct hfi1_devdata *dd); | |
1442 | int mtu_to_enum(u32 mtu, int default_if_bad); | |
1443 | u16 enum_to_mtu(int); | |
1444 | static inline int valid_ib_mtu(unsigned int mtu) | |
1445 | { | |
1446 | return mtu == 256 || mtu == 512 || | |
1447 | mtu == 1024 || mtu == 2048 || | |
1448 | mtu == 4096; | |
1449 | } | |
1450 | static inline int valid_opa_max_mtu(unsigned int mtu) | |
1451 | { | |
1452 | return mtu >= 2048 && | |
1453 | (valid_ib_mtu(mtu) || mtu == 8192 || mtu == 10240); | |
1454 | } | |
1455 | ||
1456 | int set_mtu(struct hfi1_pportdata *); | |
1457 | ||
1458 | int hfi1_set_lid(struct hfi1_pportdata *, u32, u8); | |
1459 | void hfi1_disable_after_error(struct hfi1_devdata *); | |
1460 | int hfi1_set_uevent_bits(struct hfi1_pportdata *, const int); | |
1461 | int hfi1_rcvbuf_validate(u32, u8, u16 *); | |
1462 | ||
1463 | int fm_get_table(struct hfi1_pportdata *, int, void *); | |
1464 | int fm_set_table(struct hfi1_pportdata *, int, void *); | |
1465 | ||
1466 | void set_up_vl15(struct hfi1_devdata *dd, u8 vau, u16 vl15buf); | |
1467 | void reset_link_credits(struct hfi1_devdata *dd); | |
1468 | void assign_remote_cm_au_table(struct hfi1_devdata *dd, u8 vcu); | |
1469 | ||
1470 | int snoop_recv_handler(struct hfi1_packet *packet); | |
895420dd | 1471 | int snoop_send_dma_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 1472 | u64 pbc); |
895420dd | 1473 | int snoop_send_pio_handler(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 1474 | u64 pbc); |
77241056 MM |
1475 | void snoop_inline_pio_send(struct hfi1_devdata *dd, struct pio_buf *pbuf, |
1476 | u64 pbc, const void *from, size_t count); | |
1477 | ||
77241056 MM |
1478 | static inline struct hfi1_devdata *dd_from_ppd(struct hfi1_pportdata *ppd) |
1479 | { | |
1480 | return ppd->dd; | |
1481 | } | |
1482 | ||
1483 | static inline struct hfi1_devdata *dd_from_dev(struct hfi1_ibdev *dev) | |
1484 | { | |
1485 | return container_of(dev, struct hfi1_devdata, verbs_dev); | |
1486 | } | |
1487 | ||
1488 | static inline struct hfi1_devdata *dd_from_ibdev(struct ib_device *ibdev) | |
1489 | { | |
1490 | return dd_from_dev(to_idev(ibdev)); | |
1491 | } | |
1492 | ||
1493 | static inline struct hfi1_pportdata *ppd_from_ibp(struct hfi1_ibport *ibp) | |
1494 | { | |
1495 | return container_of(ibp, struct hfi1_pportdata, ibport_data); | |
1496 | } | |
1497 | ||
1498 | static inline struct hfi1_ibport *to_iport(struct ib_device *ibdev, u8 port) | |
1499 | { | |
1500 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1501 | unsigned pidx = port - 1; /* IB number port from 1, hdw from 0 */ | |
1502 | ||
1503 | WARN_ON(pidx >= dd->num_pports); | |
1504 | return &dd->pport[pidx].ibport_data; | |
1505 | } | |
1506 | ||
1507 | /* | |
1508 | * Return the indexed PKEY from the port PKEY table. | |
1509 | */ | |
1510 | static inline u16 hfi1_get_pkey(struct hfi1_ibport *ibp, unsigned index) | |
1511 | { | |
1512 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); | |
1513 | u16 ret; | |
1514 | ||
1515 | if (index >= ARRAY_SIZE(ppd->pkeys)) | |
1516 | ret = 0; | |
1517 | else | |
1518 | ret = ppd->pkeys[index]; | |
1519 | ||
1520 | return ret; | |
1521 | } | |
1522 | ||
1523 | /* | |
1524 | * Readers of cc_state must call get_cc_state() under rcu_read_lock(). | |
1525 | * Writers of cc_state must call get_cc_state() under cc_state_lock. | |
1526 | */ | |
1527 | static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd) | |
1528 | { | |
1529 | return rcu_dereference(ppd->cc_state); | |
1530 | } | |
1531 | ||
1532 | /* | |
1533 | * values for dd->flags (_device_ related flags) | |
1534 | */ | |
1535 | #define HFI1_INITTED 0x1 /* chip and driver up and initted */ | |
1536 | #define HFI1_PRESENT 0x2 /* chip accesses can be done */ | |
1537 | #define HFI1_FROZEN 0x4 /* chip in SPC freeze */ | |
1538 | #define HFI1_HAS_SDMA_TIMEOUT 0x8 | |
1539 | #define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */ | |
1540 | #define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */ | |
1541 | #define HFI1_DO_INIT_ASIC 0x100 /* This device will init the ASIC */ | |
1542 | ||
1543 | /* IB dword length mask in PBC (lower 11 bits); same for all chips */ | |
1544 | #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) | |
1545 | ||
1546 | ||
1547 | /* ctxt_flag bit offsets */ | |
1548 | /* context has been setup */ | |
1549 | #define HFI1_CTXT_SETUP_DONE 1 | |
1550 | /* waiting for a packet to arrive */ | |
1551 | #define HFI1_CTXT_WAITING_RCV 2 | |
1552 | /* master has not finished initializing */ | |
1553 | #define HFI1_CTXT_MASTER_UNINIT 4 | |
1554 | /* waiting for an urgent packet to arrive */ | |
1555 | #define HFI1_CTXT_WAITING_URG 5 | |
1556 | ||
1557 | /* free up any allocated data at closes */ | |
1558 | struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, | |
1559 | const struct pci_device_id *); | |
1560 | void hfi1_free_devdata(struct hfi1_devdata *); | |
1561 | void cc_state_reclaim(struct rcu_head *rcu); | |
1562 | struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); | |
1563 | ||
1564 | /* | |
1565 | * Set LED override, only the two LSBs have "public" meaning, but | |
1566 | * any non-zero value substitutes them for the Link and LinkTrain | |
1567 | * LED states. | |
1568 | */ | |
1569 | #define HFI1_LED_PHYS 1 /* Physical (linktraining) GREEN LED */ | |
1570 | #define HFI1_LED_LOG 2 /* Logical (link) YELLOW LED */ | |
1571 | void hfi1_set_led_override(struct hfi1_pportdata *ppd, unsigned int val); | |
1572 | ||
1573 | #define HFI1_CREDIT_RETURN_RATE (100) | |
1574 | ||
1575 | /* | |
1576 | * The number of words for the KDETH protocol field. If this is | |
1577 | * larger then the actual field used, then part of the payload | |
1578 | * will be in the header. | |
1579 | * | |
1580 | * Optimally, we want this sized so that a typical case will | |
1581 | * use full cache lines. The typical local KDETH header would | |
1582 | * be: | |
1583 | * | |
1584 | * Bytes Field | |
1585 | * 8 LRH | |
1586 | * 12 BHT | |
1587 | * ?? KDETH | |
1588 | * 8 RHF | |
1589 | * --- | |
1590 | * 28 + KDETH | |
1591 | * | |
1592 | * For a 64-byte cache line, KDETH would need to be 36 bytes or 9 DWORDS | |
1593 | */ | |
1594 | #define DEFAULT_RCVHDRSIZE 9 | |
1595 | ||
1596 | /* | |
1597 | * Maximal header byte count: | |
1598 | * | |
1599 | * Bytes Field | |
1600 | * 8 LRH | |
1601 | * 40 GRH (optional) | |
1602 | * 12 BTH | |
1603 | * ?? KDETH | |
1604 | * 8 RHF | |
1605 | * --- | |
1606 | * 68 + KDETH | |
1607 | * | |
1608 | * We also want to maintain a cache line alignment to assist DMA'ing | |
1609 | * of the header bytes. Round up to a good size. | |
1610 | */ | |
1611 | #define DEFAULT_RCVHDR_ENTSIZE 32 | |
1612 | ||
def82284 MH |
1613 | int hfi1_acquire_user_pages(unsigned long, size_t, bool, struct page **); |
1614 | void hfi1_release_user_pages(struct page **, size_t, bool); | |
77241056 MM |
1615 | |
1616 | static inline void clear_rcvhdrtail(const struct hfi1_ctxtdata *rcd) | |
1617 | { | |
1618 | *((u64 *) rcd->rcvhdrtail_kvaddr) = 0ULL; | |
1619 | } | |
1620 | ||
1621 | static inline u32 get_rcvhdrtail(const struct hfi1_ctxtdata *rcd) | |
1622 | { | |
1623 | /* | |
1624 | * volatile because it's a DMA target from the chip, routine is | |
1625 | * inlined, and don't want register caching or reordering. | |
1626 | */ | |
1627 | return (u32) le64_to_cpu(*rcd->rcvhdrtail_kvaddr); | |
1628 | } | |
1629 | ||
1630 | /* | |
1631 | * sysfs interface. | |
1632 | */ | |
1633 | ||
1634 | extern const char ib_hfi1_version[]; | |
1635 | ||
1636 | int hfi1_device_create(struct hfi1_devdata *); | |
1637 | void hfi1_device_remove(struct hfi1_devdata *); | |
1638 | ||
1639 | int hfi1_create_port_files(struct ib_device *ibdev, u8 port_num, | |
1640 | struct kobject *kobj); | |
1641 | int hfi1_verbs_register_sysfs(struct hfi1_devdata *); | |
1642 | void hfi1_verbs_unregister_sysfs(struct hfi1_devdata *); | |
1643 | /* Hook for sysfs read of QSFP */ | |
1644 | int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len); | |
1645 | ||
1646 | int hfi1_pcie_init(struct pci_dev *, const struct pci_device_id *); | |
1647 | void hfi1_pcie_cleanup(struct pci_dev *); | |
1648 | int hfi1_pcie_ddinit(struct hfi1_devdata *, struct pci_dev *, | |
1649 | const struct pci_device_id *); | |
1650 | void hfi1_pcie_ddcleanup(struct hfi1_devdata *); | |
1651 | void hfi1_pcie_flr(struct hfi1_devdata *); | |
1652 | int pcie_speeds(struct hfi1_devdata *); | |
1653 | void request_msix(struct hfi1_devdata *, u32 *, struct hfi1_msix_entry *); | |
1654 | void hfi1_enable_intx(struct pci_dev *); | |
77241056 MM |
1655 | void restore_pci_variables(struct hfi1_devdata *dd); |
1656 | int do_pcie_gen3_transition(struct hfi1_devdata *dd); | |
1657 | int parse_platform_config(struct hfi1_devdata *dd); | |
1658 | int get_platform_config_field(struct hfi1_devdata *dd, | |
1659 | enum platform_config_table_type_encoding table_type, | |
1660 | int table_index, int field_index, u32 *data, u32 len); | |
1661 | ||
77241056 | 1662 | const char *get_unit_name(int unit); |
49dbb6cf DD |
1663 | const char *get_card_name(struct rvt_dev_info *rdi); |
1664 | struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi); | |
77241056 MM |
1665 | |
1666 | /* | |
1667 | * Flush write combining store buffers (if present) and perform a write | |
1668 | * barrier. | |
1669 | */ | |
1670 | static inline void flush_wc(void) | |
1671 | { | |
1672 | asm volatile("sfence" : : : "memory"); | |
1673 | } | |
1674 | ||
1675 | void handle_eflags(struct hfi1_packet *packet); | |
1676 | int process_receive_ib(struct hfi1_packet *packet); | |
1677 | int process_receive_bypass(struct hfi1_packet *packet); | |
1678 | int process_receive_error(struct hfi1_packet *packet); | |
1679 | int kdeth_process_expected(struct hfi1_packet *packet); | |
1680 | int kdeth_process_eager(struct hfi1_packet *packet); | |
1681 | int process_receive_invalid(struct hfi1_packet *packet); | |
1682 | ||
1683 | extern rhf_rcv_function_ptr snoop_rhf_rcv_functions[8]; | |
1684 | ||
895420dd | 1685 | void update_sge(struct rvt_sge_state *ss, u32 length); |
77241056 MM |
1686 | |
1687 | /* global module parameter variables */ | |
1688 | extern unsigned int hfi1_max_mtu; | |
1689 | extern unsigned int hfi1_cu; | |
1690 | extern unsigned int user_credit_return_threshold; | |
2ce6bf22 | 1691 | extern int num_user_contexts; |
77241056 | 1692 | extern unsigned n_krcvqs; |
5b55ea3b | 1693 | extern uint krcvqs[]; |
77241056 MM |
1694 | extern int krcvqsset; |
1695 | extern uint kdeth_qp; | |
1696 | extern uint loopback; | |
1697 | extern uint quick_linkup; | |
1698 | extern uint rcv_intr_timeout; | |
1699 | extern uint rcv_intr_count; | |
1700 | extern uint rcv_intr_dynamic; | |
1701 | extern ushort link_crc_mask; | |
1702 | ||
1703 | extern struct mutex hfi1_mutex; | |
1704 | ||
1705 | /* Number of seconds before our card status check... */ | |
1706 | #define STATUS_TIMEOUT 60 | |
1707 | ||
1708 | #define DRIVER_NAME "hfi1" | |
1709 | #define HFI1_USER_MINOR_BASE 0 | |
1710 | #define HFI1_TRACE_MINOR 127 | |
1711 | #define HFI1_DIAGPKT_MINOR 128 | |
1712 | #define HFI1_DIAG_MINOR_BASE 129 | |
1713 | #define HFI1_SNOOP_CAPTURE_BASE 200 | |
1714 | #define HFI1_NMINORS 255 | |
1715 | ||
1716 | #define PCI_VENDOR_ID_INTEL 0x8086 | |
1717 | #define PCI_DEVICE_ID_INTEL0 0x24f0 | |
1718 | #define PCI_DEVICE_ID_INTEL1 0x24f1 | |
1719 | ||
1720 | #define HFI1_PKT_USER_SC_INTEGRITY \ | |
1721 | (SEND_CTXT_CHECK_ENABLE_DISALLOW_NON_KDETH_PACKETS_SMASK \ | |
1722 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_SMASK \ | |
1723 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_GRH_SMASK) | |
1724 | ||
1725 | #define HFI1_PKT_KERNEL_SC_INTEGRITY \ | |
1726 | (SEND_CTXT_CHECK_ENABLE_DISALLOW_KDETH_PACKETS_SMASK) | |
1727 | ||
1728 | static inline u64 hfi1_pkt_default_send_ctxt_mask(struct hfi1_devdata *dd, | |
1729 | u16 ctxt_type) | |
1730 | { | |
1731 | u64 base_sc_integrity = | |
1732 | SEND_CTXT_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK | |
1733 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | |
1734 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | |
1735 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | |
1736 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK | |
1737 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_TEST_SMASK | |
1738 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK | |
1739 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK | |
1740 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK | |
1741 | | SEND_CTXT_CHECK_ENABLE_DISALLOW_RAW_SMASK | |
1742 | | SEND_CTXT_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK | |
1743 | | SEND_CTXT_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | |
1744 | | SEND_CTXT_CHECK_ENABLE_CHECK_OPCODE_SMASK | |
1745 | | SEND_CTXT_CHECK_ENABLE_CHECK_SLID_SMASK | |
1746 | | SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | |
1747 | | SEND_CTXT_CHECK_ENABLE_CHECK_VL_SMASK | |
1748 | | SEND_CTXT_CHECK_ENABLE_CHECK_ENABLE_SMASK; | |
1749 | ||
1750 | if (ctxt_type == SC_USER) | |
1751 | base_sc_integrity |= HFI1_PKT_USER_SC_INTEGRITY; | |
1752 | else | |
1753 | base_sc_integrity |= HFI1_PKT_KERNEL_SC_INTEGRITY; | |
1754 | ||
995deafa | 1755 | if (is_ax(dd)) |
624be1db | 1756 | /* turn off send-side job key checks - A0 */ |
77241056 MM |
1757 | return base_sc_integrity & |
1758 | ~SEND_CTXT_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; | |
1759 | return base_sc_integrity; | |
1760 | } | |
1761 | ||
1762 | static inline u64 hfi1_pkt_base_sdma_integrity(struct hfi1_devdata *dd) | |
1763 | { | |
1764 | u64 base_sdma_integrity = | |
1765 | SEND_DMA_CHECK_ENABLE_DISALLOW_BYPASS_BAD_PKT_LEN_SMASK | |
1766 | | SEND_DMA_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK | |
1767 | | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_BYPASS_PACKETS_SMASK | |
1768 | | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_LONG_IB_PACKETS_SMASK | |
1769 | | SEND_DMA_CHECK_ENABLE_DISALLOW_BAD_PKT_LEN_SMASK | |
1770 | | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_SMALL_BYPASS_PACKETS_SMASK | |
1771 | | SEND_DMA_CHECK_ENABLE_DISALLOW_TOO_SMALL_IB_PACKETS_SMASK | |
1772 | | SEND_DMA_CHECK_ENABLE_DISALLOW_RAW_IPV6_SMASK | |
1773 | | SEND_DMA_CHECK_ENABLE_DISALLOW_RAW_SMASK | |
1774 | | SEND_DMA_CHECK_ENABLE_CHECK_BYPASS_VL_MAPPING_SMASK | |
1775 | | SEND_DMA_CHECK_ENABLE_CHECK_VL_MAPPING_SMASK | |
1776 | | SEND_DMA_CHECK_ENABLE_CHECK_OPCODE_SMASK | |
1777 | | SEND_DMA_CHECK_ENABLE_CHECK_SLID_SMASK | |
1778 | | SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK | |
1779 | | SEND_DMA_CHECK_ENABLE_CHECK_VL_SMASK | |
1780 | | SEND_DMA_CHECK_ENABLE_CHECK_ENABLE_SMASK; | |
1781 | ||
995deafa | 1782 | if (is_ax(dd)) |
624be1db | 1783 | /* turn off send-side job key checks - A0 */ |
77241056 MM |
1784 | return base_sdma_integrity & |
1785 | ~SEND_DMA_CHECK_ENABLE_CHECK_JOB_KEY_SMASK; | |
1786 | return base_sdma_integrity; | |
1787 | } | |
1788 | ||
1789 | /* | |
1790 | * hfi1_early_err is used (only!) to print early errors before devdata is | |
1791 | * allocated, or when dd->pcidev may not be valid, and at the tail end of | |
1792 | * cleanup when devdata may have been freed, etc. hfi1_dev_porterr is | |
1793 | * the same as dd_dev_err, but is used when the message really needs | |
1794 | * the IB port# to be definitive as to what's happening.. | |
1795 | */ | |
1796 | #define hfi1_early_err(dev, fmt, ...) \ | |
1797 | dev_err(dev, fmt, ##__VA_ARGS__) | |
1798 | ||
1799 | #define hfi1_early_info(dev, fmt, ...) \ | |
1800 | dev_info(dev, fmt, ##__VA_ARGS__) | |
1801 | ||
1802 | #define dd_dev_emerg(dd, fmt, ...) \ | |
1803 | dev_emerg(&(dd)->pcidev->dev, "%s: " fmt, \ | |
1804 | get_unit_name((dd)->unit), ##__VA_ARGS__) | |
1805 | #define dd_dev_err(dd, fmt, ...) \ | |
1806 | dev_err(&(dd)->pcidev->dev, "%s: " fmt, \ | |
1807 | get_unit_name((dd)->unit), ##__VA_ARGS__) | |
1808 | #define dd_dev_warn(dd, fmt, ...) \ | |
1809 | dev_warn(&(dd)->pcidev->dev, "%s: " fmt, \ | |
1810 | get_unit_name((dd)->unit), ##__VA_ARGS__) | |
1811 | ||
1812 | #define dd_dev_warn_ratelimited(dd, fmt, ...) \ | |
1813 | dev_warn_ratelimited(&(dd)->pcidev->dev, "%s: " fmt, \ | |
1814 | get_unit_name((dd)->unit), ##__VA_ARGS__) | |
1815 | ||
1816 | #define dd_dev_info(dd, fmt, ...) \ | |
1817 | dev_info(&(dd)->pcidev->dev, "%s: " fmt, \ | |
1818 | get_unit_name((dd)->unit), ##__VA_ARGS__) | |
1819 | ||
a1edc18a IW |
1820 | #define dd_dev_dbg(dd, fmt, ...) \ |
1821 | dev_dbg(&(dd)->pcidev->dev, "%s: " fmt, \ | |
1822 | get_unit_name((dd)->unit), ##__VA_ARGS__) | |
1823 | ||
77241056 MM |
1824 | #define hfi1_dev_porterr(dd, port, fmt, ...) \ |
1825 | dev_err(&(dd)->pcidev->dev, "%s: IB%u:%u " fmt, \ | |
1826 | get_unit_name((dd)->unit), (dd)->unit, (port), \ | |
1827 | ##__VA_ARGS__) | |
1828 | ||
1829 | /* | |
1830 | * this is used for formatting hw error messages... | |
1831 | */ | |
1832 | struct hfi1_hwerror_msgs { | |
1833 | u64 mask; | |
1834 | const char *msg; | |
1835 | size_t sz; | |
1836 | }; | |
1837 | ||
1838 | /* in intr.c... */ | |
1839 | void hfi1_format_hwerrors(u64 hwerrs, | |
1840 | const struct hfi1_hwerror_msgs *hwerrmsgs, | |
1841 | size_t nhwerrmsgs, char *msg, size_t lmsg); | |
1842 | ||
1843 | #define USER_OPCODE_CHECK_VAL 0xC0 | |
1844 | #define USER_OPCODE_CHECK_MASK 0xC0 | |
1845 | #define OPCODE_CHECK_VAL_DISABLED 0x0 | |
1846 | #define OPCODE_CHECK_MASK_DISABLED 0x0 | |
1847 | ||
1848 | static inline void hfi1_reset_cpu_counters(struct hfi1_devdata *dd) | |
1849 | { | |
1850 | struct hfi1_pportdata *ppd; | |
1851 | int i; | |
1852 | ||
1853 | dd->z_int_counter = get_all_cpu_total(dd->int_counter); | |
1854 | dd->z_rcv_limit = get_all_cpu_total(dd->rcv_limit); | |
1855 | ||
1856 | ppd = (struct hfi1_pportdata *)(dd + 1); | |
1857 | for (i = 0; i < dd->num_pports; i++, ppd++) { | |
4eb06882 DD |
1858 | ppd->ibport_data.rvp.z_rc_acks = |
1859 | get_all_cpu_total(ppd->ibport_data.rvp.rc_acks); | |
1860 | ppd->ibport_data.rvp.z_rc_qacks = | |
1861 | get_all_cpu_total(ppd->ibport_data.rvp.rc_qacks); | |
77241056 MM |
1862 | } |
1863 | } | |
1864 | ||
1865 | /* Control LED state */ | |
1866 | static inline void setextled(struct hfi1_devdata *dd, u32 on) | |
1867 | { | |
1868 | if (on) | |
1869 | write_csr(dd, DCC_CFG_LED_CNTRL, 0x1F); | |
1870 | else | |
1871 | write_csr(dd, DCC_CFG_LED_CNTRL, 0x10); | |
1872 | } | |
1873 | ||
1874 | int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp); | |
1875 | ||
1876 | #endif /* _HFI1_KERNEL_H */ |