2 * IBM eServer eHCA Infiniband device driver for Linux on POWER
4 * Functions for EQs, NEQs and interrupts
6 * Authors: Heiko J Schick <schickhj@de.ibm.com>
7 * Khadija Souissi <souissi@de.ibm.com>
8 * Hoang-Nam Nguyen <hnguyen@de.ibm.com>
9 * Joachim Fenkes <fenkes@de.ibm.com>
11 * Copyright (c) 2005 IBM Corporation
13 * All rights reserved.
15 * This source code is distributed under a dual license of GPL v2.0 and OpenIB
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions are met:
23 * Redistributions of source code must retain the above copyright notice, this
24 * list of conditions and the following disclaimer.
26 * Redistributions in binary form must reproduce the above copyright notice,
27 * this list of conditions and the following disclaimer in the documentation
28 * and/or other materials
29 * provided with the distribution.
31 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
38 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
39 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 * POSSIBILITY OF SUCH DAMAGE.
44 #include "ehca_classes.h"
46 #include "ehca_iverbs.h"
47 #include "ehca_tools.h"
50 #include "ipz_pt_fn.h"
52 #define EQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
53 #define EQE_CQ_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
54 #define EQE_EE_IDENTIFIER EHCA_BMASK_IBM( 2, 7)
55 #define EQE_CQ_NUMBER EHCA_BMASK_IBM( 8, 31)
56 #define EQE_QP_NUMBER EHCA_BMASK_IBM( 8, 31)
57 #define EQE_QP_TOKEN EHCA_BMASK_IBM(32, 63)
58 #define EQE_CQ_TOKEN EHCA_BMASK_IBM(32, 63)
60 #define NEQE_COMPLETION_EVENT EHCA_BMASK_IBM( 1, 1)
61 #define NEQE_EVENT_CODE EHCA_BMASK_IBM( 2, 7)
62 #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
63 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
64 #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
65 #define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)
67 #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
68 #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
70 static void queue_comp_task(struct ehca_cq
*__cq
);
72 static struct ehca_comp_pool
*pool
;
74 static inline void comp_event_callback(struct ehca_cq
*cq
)
76 if (!cq
->ib_cq
.comp_handler
)
79 spin_lock(&cq
->cb_lock
);
80 cq
->ib_cq
.comp_handler(&cq
->ib_cq
, cq
->ib_cq
.cq_context
);
81 spin_unlock(&cq
->cb_lock
);
86 static void print_error_data(struct ehca_shca
*shca
, void *data
,
87 u64
*rblock
, int length
)
89 u64 type
= EHCA_BMASK_GET(ERROR_DATA_TYPE
, rblock
[2]);
90 u64 resource
= rblock
[1];
93 case 0x1: /* Queue Pair */
95 struct ehca_qp
*qp
= (struct ehca_qp
*)data
;
97 /* only print error data if AER is set */
101 ehca_err(&shca
->ib_device
,
102 "QP 0x%x (resource=%lx) has errors.",
103 qp
->ib_qp
.qp_num
, resource
);
106 case 0x4: /* Completion Queue */
108 struct ehca_cq
*cq
= (struct ehca_cq
*)data
;
110 ehca_err(&shca
->ib_device
,
111 "CQ 0x%x (resource=%lx) has errors.",
112 cq
->cq_number
, resource
);
116 ehca_err(&shca
->ib_device
,
117 "Unknown error type: %lx on %s.",
118 type
, shca
->ib_device
.name
);
122 ehca_err(&shca
->ib_device
, "Error data is available: %lx.", resource
);
123 ehca_err(&shca
->ib_device
, "EHCA ----- error data begin "
124 "---------------------------------------------------");
125 ehca_dmp(rblock
, length
, "resource=%lx", resource
);
126 ehca_err(&shca
->ib_device
, "EHCA ----- error data end "
127 "----------------------------------------------------");
132 int ehca_error_data(struct ehca_shca
*shca
, void *data
,
138 unsigned long block_count
;
140 rblock
= ehca_alloc_fw_ctrlblock(GFP_ATOMIC
);
142 ehca_err(&shca
->ib_device
, "Cannot allocate rblock memory.");
147 /* rblock must be 4K aligned and should be 4K large */
148 ret
= hipz_h_error_data(shca
->ipz_hca_handle
,
153 if (ret
== H_R_STATE
)
154 ehca_err(&shca
->ib_device
,
155 "No error data is available: %lx.", resource
);
156 else if (ret
== H_SUCCESS
) {
159 length
= EHCA_BMASK_GET(ERROR_DATA_LENGTH
, rblock
[0]);
161 if (length
> EHCA_PAGESIZE
)
162 length
= EHCA_PAGESIZE
;
164 print_error_data(shca
, data
, rblock
, length
);
166 ehca_err(&shca
->ib_device
,
167 "Error data could not be fetched: %lx", resource
);
169 ehca_free_fw_ctrlblock(rblock
);
176 static void dispatch_qp_event(struct ehca_shca
*shca
, struct ehca_qp
*qp
,
177 enum ib_event_type event_type
)
179 struct ib_event event
;
181 /* PATH_MIG without the QP ever having been armed is false alarm */
182 if (event_type
== IB_EVENT_PATH_MIG
&& !qp
->mig_armed
)
185 event
.device
= &shca
->ib_device
;
186 event
.event
= event_type
;
188 if (qp
->ext_type
== EQPT_SRQ
) {
189 if (!qp
->ib_srq
.event_handler
)
192 event
.element
.srq
= &qp
->ib_srq
;
193 qp
->ib_srq
.event_handler(&event
, qp
->ib_srq
.srq_context
);
195 if (!qp
->ib_qp
.event_handler
)
198 event
.element
.qp
= &qp
->ib_qp
;
199 qp
->ib_qp
.event_handler(&event
, qp
->ib_qp
.qp_context
);
203 static void qp_event_callback(struct ehca_shca
*shca
, u64 eqe
,
204 enum ib_event_type event_type
, int fatal
)
207 u32 token
= EHCA_BMASK_GET(EQE_QP_TOKEN
, eqe
);
209 read_lock(&ehca_qp_idr_lock
);
210 qp
= idr_find(&ehca_qp_idr
, token
);
212 atomic_inc(&qp
->nr_events
);
213 read_unlock(&ehca_qp_idr_lock
);
219 ehca_error_data(shca
, qp
, qp
->ipz_qp_handle
.handle
);
221 dispatch_qp_event(shca
, qp
, fatal
&& qp
->ext_type
== EQPT_SRQ
?
222 IB_EVENT_SRQ_ERR
: event_type
);
225 * eHCA only processes one WQE at a time for SRQ base QPs,
226 * so the last WQE has been processed as soon as the QP enters
229 if (fatal
&& qp
->ext_type
== EQPT_SRQBASE
)
230 dispatch_qp_event(shca
, qp
, IB_EVENT_QP_LAST_WQE_REACHED
);
232 if (atomic_dec_and_test(&qp
->nr_events
))
233 wake_up(&qp
->wait_completion
);
237 static void cq_event_callback(struct ehca_shca
*shca
,
241 u32 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe
);
243 read_lock(&ehca_cq_idr_lock
);
244 cq
= idr_find(&ehca_cq_idr
, token
);
246 atomic_inc(&cq
->nr_events
);
247 read_unlock(&ehca_cq_idr_lock
);
252 ehca_error_data(shca
, cq
, cq
->ipz_cq_handle
.handle
);
254 if (atomic_dec_and_test(&cq
->nr_events
))
255 wake_up(&cq
->wait_completion
);
260 static void parse_identifier(struct ehca_shca
*shca
, u64 eqe
)
262 u8 identifier
= EHCA_BMASK_GET(EQE_EE_IDENTIFIER
, eqe
);
264 switch (identifier
) {
265 case 0x02: /* path migrated */
266 qp_event_callback(shca
, eqe
, IB_EVENT_PATH_MIG
, 0);
268 case 0x03: /* communication established */
269 qp_event_callback(shca
, eqe
, IB_EVENT_COMM_EST
, 0);
271 case 0x04: /* send queue drained */
272 qp_event_callback(shca
, eqe
, IB_EVENT_SQ_DRAINED
, 0);
274 case 0x05: /* QP error */
275 case 0x06: /* QP error */
276 qp_event_callback(shca
, eqe
, IB_EVENT_QP_FATAL
, 1);
278 case 0x07: /* CQ error */
279 case 0x08: /* CQ error */
280 cq_event_callback(shca
, eqe
);
282 case 0x09: /* MRMWPTE error */
283 ehca_err(&shca
->ib_device
, "MRMWPTE error.");
285 case 0x0A: /* port event */
286 ehca_err(&shca
->ib_device
, "Port event.");
288 case 0x0B: /* MR access error */
289 ehca_err(&shca
->ib_device
, "MR access error.");
291 case 0x0C: /* EQ error */
292 ehca_err(&shca
->ib_device
, "EQ error.");
294 case 0x0D: /* P/Q_Key mismatch */
295 ehca_err(&shca
->ib_device
, "P/Q_Key mismatch.");
297 case 0x10: /* sampling complete */
298 ehca_err(&shca
->ib_device
, "Sampling complete.");
300 case 0x11: /* unaffiliated access error */
301 ehca_err(&shca
->ib_device
, "Unaffiliated access error.");
303 case 0x12: /* path migrating */
304 ehca_err(&shca
->ib_device
, "Path migrating.");
306 case 0x13: /* interface trace stopped */
307 ehca_err(&shca
->ib_device
, "Interface trace stopped.");
309 case 0x14: /* first error capture info available */
310 ehca_info(&shca
->ib_device
, "First error capture available");
312 case 0x15: /* SRQ limit reached */
313 qp_event_callback(shca
, eqe
, IB_EVENT_SRQ_LIMIT_REACHED
, 0);
316 ehca_err(&shca
->ib_device
, "Unknown identifier: %x on %s.",
317 identifier
, shca
->ib_device
.name
);
324 static void dispatch_port_event(struct ehca_shca
*shca
, int port_num
,
325 enum ib_event_type type
, const char *msg
)
327 struct ib_event event
;
329 ehca_info(&shca
->ib_device
, "port %d %s.", port_num
, msg
);
330 event
.device
= &shca
->ib_device
;
332 event
.element
.port_num
= port_num
;
333 ib_dispatch_event(&event
);
336 static void notify_port_conf_change(struct ehca_shca
*shca
, int port_num
)
338 struct ehca_sma_attr new_attr
;
339 struct ehca_sma_attr
*old_attr
= &shca
->sport
[port_num
- 1].saved_attr
;
341 ehca_query_sma_attr(shca
, port_num
, &new_attr
);
343 if (new_attr
.sm_sl
!= old_attr
->sm_sl
||
344 new_attr
.sm_lid
!= old_attr
->sm_lid
)
345 dispatch_port_event(shca
, port_num
, IB_EVENT_SM_CHANGE
,
348 if (new_attr
.lid
!= old_attr
->lid
||
349 new_attr
.lmc
!= old_attr
->lmc
)
350 dispatch_port_event(shca
, port_num
, IB_EVENT_LID_CHANGE
,
353 if (new_attr
.pkey_tbl_len
!= old_attr
->pkey_tbl_len
||
354 memcmp(new_attr
.pkeys
, old_attr
->pkeys
,
355 sizeof(u16
) * new_attr
.pkey_tbl_len
))
356 dispatch_port_event(shca
, port_num
, IB_EVENT_PKEY_CHANGE
,
359 *old_attr
= new_attr
;
362 static void parse_ec(struct ehca_shca
*shca
, u64 eqe
)
364 u8 ec
= EHCA_BMASK_GET(NEQE_EVENT_CODE
, eqe
);
365 u8 port
= EHCA_BMASK_GET(NEQE_PORT_NUMBER
, eqe
);
367 struct ehca_sport
*sport
= &shca
->sport
[port
- 1];
371 case 0x30: /* port availability change */
372 if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY
, eqe
)) {
374 /* replay modify_qp for sqps */
375 spin_lock_irqsave(&sport
->mod_sqp_lock
, flags
);
376 suppress_event
= !sport
->ibqp_sqp
[IB_QPT_GSI
];
377 if (sport
->ibqp_sqp
[IB_QPT_SMI
])
378 ehca_recover_sqp(sport
->ibqp_sqp
[IB_QPT_SMI
]);
380 ehca_recover_sqp(sport
->ibqp_sqp
[IB_QPT_GSI
]);
381 spin_unlock_irqrestore(&sport
->mod_sqp_lock
, flags
);
383 /* AQP1 was destroyed, ignore this event */
387 sport
->port_state
= IB_PORT_ACTIVE
;
388 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ACTIVE
,
390 ehca_query_sma_attr(shca
, port
,
393 sport
->port_state
= IB_PORT_DOWN
;
394 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ERR
,
399 /* port configuration change
400 * disruptive change is caused by
401 * LID, PKEY or SM change
403 if (EHCA_BMASK_GET(NEQE_DISRUPTIVE
, eqe
)) {
404 ehca_warn(&shca
->ib_device
, "disruptive port "
405 "%d configuration change", port
);
407 sport
->port_state
= IB_PORT_DOWN
;
408 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ERR
,
411 sport
->port_state
= IB_PORT_ACTIVE
;
412 dispatch_port_event(shca
, port
, IB_EVENT_PORT_ACTIVE
,
414 ehca_query_sma_attr(shca
, port
,
417 notify_port_conf_change(shca
, port
);
419 case 0x32: /* adapter malfunction */
420 ehca_err(&shca
->ib_device
, "Adapter malfunction.");
422 case 0x33: /* trace stopped */
423 ehca_err(&shca
->ib_device
, "Traced stopped.");
425 case 0x34: /* util async event */
426 spec_event
= EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT
, eqe
);
427 if (spec_event
== 0x80) /* client reregister required */
428 dispatch_port_event(shca
, port
,
429 IB_EVENT_CLIENT_REREGISTER
,
430 "client reregister req.");
432 ehca_warn(&shca
->ib_device
, "Unknown util async "
433 "event %x on port %x", spec_event
, port
);
436 ehca_err(&shca
->ib_device
, "Unknown event code: %x on %s.",
437 ec
, shca
->ib_device
.name
);
444 static inline void reset_eq_pending(struct ehca_cq
*cq
)
447 struct h_galpa gal
= cq
->galpas
.kernel
;
449 hipz_galpa_store_cq(gal
, cqx_ep
, 0x0);
450 CQx_EP
= hipz_galpa_load(gal
, CQTEMM_OFFSET(cqx_ep
));
455 irqreturn_t
ehca_interrupt_neq(int irq
, void *dev_id
)
457 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
459 tasklet_hi_schedule(&shca
->neq
.interrupt_task
);
464 void ehca_tasklet_neq(unsigned long data
)
466 struct ehca_shca
*shca
= (struct ehca_shca
*)data
;
467 struct ehca_eqe
*eqe
;
470 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->neq
);
473 if (!EHCA_BMASK_GET(NEQE_COMPLETION_EVENT
, eqe
->entry
))
474 parse_ec(shca
, eqe
->entry
);
476 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->neq
);
479 ret
= hipz_h_reset_event(shca
->ipz_hca_handle
,
480 shca
->neq
.ipz_eq_handle
, 0xFFFFFFFFFFFFFFFFL
);
482 if (ret
!= H_SUCCESS
)
483 ehca_err(&shca
->ib_device
, "Can't clear notification events.");
488 irqreturn_t
ehca_interrupt_eq(int irq
, void *dev_id
)
490 struct ehca_shca
*shca
= (struct ehca_shca
*)dev_id
;
492 tasklet_hi_schedule(&shca
->eq
.interrupt_task
);
498 static inline void process_eqe(struct ehca_shca
*shca
, struct ehca_eqe
*eqe
)
504 eqe_value
= eqe
->entry
;
505 ehca_dbg(&shca
->ib_device
, "eqe_value=%lx", eqe_value
);
506 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT
, eqe_value
)) {
507 ehca_dbg(&shca
->ib_device
, "Got completion event");
508 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe_value
);
509 read_lock(&ehca_cq_idr_lock
);
510 cq
= idr_find(&ehca_cq_idr
, token
);
512 atomic_inc(&cq
->nr_events
);
513 read_unlock(&ehca_cq_idr_lock
);
515 ehca_err(&shca
->ib_device
,
516 "Invalid eqe for non-existing cq token=%x",
520 reset_eq_pending(cq
);
521 if (ehca_scaling_code
)
524 comp_event_callback(cq
);
525 if (atomic_dec_and_test(&cq
->nr_events
))
526 wake_up(&cq
->wait_completion
);
529 ehca_dbg(&shca
->ib_device
, "Got non completion event");
530 parse_identifier(shca
, eqe_value
);
534 void ehca_process_eq(struct ehca_shca
*shca
, int is_irq
)
536 struct ehca_eq
*eq
= &shca
->eq
;
537 struct ehca_eqe_cache_entry
*eqe_cache
= eq
->eqe_cache
;
543 spin_lock_irqsave(&eq
->irq_spinlock
, flags
);
545 const int max_query_cnt
= 100;
549 int_state
= hipz_h_query_int_state(
550 shca
->ipz_hca_handle
, eq
->ist
);
553 } while (int_state
&& query_cnt
< max_query_cnt
);
554 if (unlikely((query_cnt
== max_query_cnt
)))
555 ehca_dbg(&shca
->ib_device
, "int_state=%x query_cnt=%x",
556 int_state
, query_cnt
);
559 /* read out all eqes */
563 eqe_cache
[eqe_cnt
].eqe
=
564 (struct ehca_eqe
*)ehca_poll_eq(shca
, eq
);
565 if (!eqe_cache
[eqe_cnt
].eqe
)
567 eqe_value
= eqe_cache
[eqe_cnt
].eqe
->entry
;
568 if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT
, eqe_value
)) {
569 token
= EHCA_BMASK_GET(EQE_CQ_TOKEN
, eqe_value
);
570 read_lock(&ehca_cq_idr_lock
);
571 eqe_cache
[eqe_cnt
].cq
= idr_find(&ehca_cq_idr
, token
);
572 if (eqe_cache
[eqe_cnt
].cq
)
573 atomic_inc(&eqe_cache
[eqe_cnt
].cq
->nr_events
);
574 read_unlock(&ehca_cq_idr_lock
);
575 if (!eqe_cache
[eqe_cnt
].cq
) {
576 ehca_err(&shca
->ib_device
,
577 "Invalid eqe for non-existing cq "
582 eqe_cache
[eqe_cnt
].cq
= NULL
;
584 } while (eqe_cnt
< EHCA_EQE_CACHE_SIZE
);
587 ehca_dbg(&shca
->ib_device
,
588 "No eqe found for irq event");
589 goto unlock_irq_spinlock
;
590 } else if (!is_irq
) {
591 ret
= hipz_h_eoi(eq
->ist
);
592 if (ret
!= H_SUCCESS
)
593 ehca_err(&shca
->ib_device
,
594 "bad return code EOI -rc = %ld\n", ret
);
595 ehca_dbg(&shca
->ib_device
, "deadman found %x eqe", eqe_cnt
);
597 if (unlikely(eqe_cnt
== EHCA_EQE_CACHE_SIZE
))
598 ehca_dbg(&shca
->ib_device
, "too many eqes for one irq event");
599 /* enable irq for new packets */
600 for (i
= 0; i
< eqe_cnt
; i
++) {
601 if (eq
->eqe_cache
[i
].cq
)
602 reset_eq_pending(eq
->eqe_cache
[i
].cq
);
605 spin_lock(&eq
->spinlock
);
606 eq_empty
= (!ipz_eqit_eq_peek_valid(&shca
->eq
.ipz_queue
));
607 spin_unlock(&eq
->spinlock
);
608 /* call completion handler for cached eqes */
609 for (i
= 0; i
< eqe_cnt
; i
++)
610 if (eq
->eqe_cache
[i
].cq
) {
611 if (ehca_scaling_code
)
612 queue_comp_task(eq
->eqe_cache
[i
].cq
);
614 struct ehca_cq
*cq
= eq
->eqe_cache
[i
].cq
;
615 comp_event_callback(cq
);
616 if (atomic_dec_and_test(&cq
->nr_events
))
617 wake_up(&cq
->wait_completion
);
620 ehca_dbg(&shca
->ib_device
, "Got non completion event");
621 parse_identifier(shca
, eq
->eqe_cache
[i
].eqe
->entry
);
623 /* poll eq if not empty */
625 goto unlock_irq_spinlock
;
627 struct ehca_eqe
*eqe
;
628 eqe
= (struct ehca_eqe
*)ehca_poll_eq(shca
, &shca
->eq
);
631 process_eqe(shca
, eqe
);
635 spin_unlock_irqrestore(&eq
->irq_spinlock
, flags
);
638 void ehca_tasklet_eq(unsigned long data
)
640 ehca_process_eq((struct ehca_shca
*)data
, 1);
643 static inline int find_next_online_cpu(struct ehca_comp_pool
*pool
)
648 WARN_ON_ONCE(!in_interrupt());
649 if (ehca_debug_level
>= 3)
650 ehca_dmp(&cpu_online_map
, sizeof(cpumask_t
), "");
652 spin_lock_irqsave(&pool
->last_cpu_lock
, flags
);
653 cpu
= next_cpu_nr(pool
->last_cpu
, cpu_online_map
);
654 if (cpu
>= nr_cpu_ids
)
655 cpu
= first_cpu(cpu_online_map
);
656 pool
->last_cpu
= cpu
;
657 spin_unlock_irqrestore(&pool
->last_cpu_lock
, flags
);
662 static void __queue_comp_task(struct ehca_cq
*__cq
,
663 struct ehca_cpu_comp_task
*cct
)
667 spin_lock_irqsave(&cct
->task_lock
, flags
);
668 spin_lock(&__cq
->task_lock
);
670 if (__cq
->nr_callbacks
== 0) {
671 __cq
->nr_callbacks
++;
672 list_add_tail(&__cq
->entry
, &cct
->cq_list
);
674 wake_up(&cct
->wait_queue
);
676 __cq
->nr_callbacks
++;
678 spin_unlock(&__cq
->task_lock
);
679 spin_unlock_irqrestore(&cct
->task_lock
, flags
);
682 static void queue_comp_task(struct ehca_cq
*__cq
)
685 struct ehca_cpu_comp_task
*cct
;
689 cpu_id
= find_next_online_cpu(pool
);
690 BUG_ON(!cpu_online(cpu_id
));
692 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
695 spin_lock_irqsave(&cct
->task_lock
, flags
);
696 cq_jobs
= cct
->cq_jobs
;
697 spin_unlock_irqrestore(&cct
->task_lock
, flags
);
699 cpu_id
= find_next_online_cpu(pool
);
700 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu_id
);
704 __queue_comp_task(__cq
, cct
);
707 static void run_comp_task(struct ehca_cpu_comp_task
*cct
)
712 spin_lock_irqsave(&cct
->task_lock
, flags
);
714 while (!list_empty(&cct
->cq_list
)) {
715 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
716 spin_unlock_irqrestore(&cct
->task_lock
, flags
);
718 comp_event_callback(cq
);
719 if (atomic_dec_and_test(&cq
->nr_events
))
720 wake_up(&cq
->wait_completion
);
722 spin_lock_irqsave(&cct
->task_lock
, flags
);
723 spin_lock(&cq
->task_lock
);
725 if (!cq
->nr_callbacks
) {
726 list_del_init(cct
->cq_list
.next
);
729 spin_unlock(&cq
->task_lock
);
732 spin_unlock_irqrestore(&cct
->task_lock
, flags
);
735 static int comp_task(void *__cct
)
737 struct ehca_cpu_comp_task
*cct
= __cct
;
739 DECLARE_WAITQUEUE(wait
, current
);
741 set_current_state(TASK_INTERRUPTIBLE
);
742 while (!kthread_should_stop()) {
743 add_wait_queue(&cct
->wait_queue
, &wait
);
745 spin_lock_irq(&cct
->task_lock
);
746 cql_empty
= list_empty(&cct
->cq_list
);
747 spin_unlock_irq(&cct
->task_lock
);
751 __set_current_state(TASK_RUNNING
);
753 remove_wait_queue(&cct
->wait_queue
, &wait
);
755 spin_lock_irq(&cct
->task_lock
);
756 cql_empty
= list_empty(&cct
->cq_list
);
757 spin_unlock_irq(&cct
->task_lock
);
759 run_comp_task(__cct
);
761 set_current_state(TASK_INTERRUPTIBLE
);
763 __set_current_state(TASK_RUNNING
);
768 static struct task_struct
*create_comp_task(struct ehca_comp_pool
*pool
,
771 struct ehca_cpu_comp_task
*cct
;
773 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
774 spin_lock_init(&cct
->task_lock
);
775 INIT_LIST_HEAD(&cct
->cq_list
);
776 init_waitqueue_head(&cct
->wait_queue
);
777 cct
->task
= kthread_create(comp_task
, cct
, "ehca_comp/%d", cpu
);
782 static void destroy_comp_task(struct ehca_comp_pool
*pool
,
785 struct ehca_cpu_comp_task
*cct
;
786 struct task_struct
*task
;
787 unsigned long flags_cct
;
789 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
791 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
797 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
803 static void __cpuinit
take_over_work(struct ehca_comp_pool
*pool
, int cpu
)
805 struct ehca_cpu_comp_task
*cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
808 unsigned long flags_cct
;
810 spin_lock_irqsave(&cct
->task_lock
, flags_cct
);
812 list_splice_init(&cct
->cq_list
, &list
);
814 while (!list_empty(&list
)) {
815 cq
= list_entry(cct
->cq_list
.next
, struct ehca_cq
, entry
);
817 list_del(&cq
->entry
);
818 __queue_comp_task(cq
, per_cpu_ptr(pool
->cpu_comp_tasks
,
819 smp_processor_id()));
822 spin_unlock_irqrestore(&cct
->task_lock
, flags_cct
);
826 static int __cpuinit
comp_pool_callback(struct notifier_block
*nfb
,
827 unsigned long action
,
830 unsigned int cpu
= (unsigned long)hcpu
;
831 struct ehca_cpu_comp_task
*cct
;
835 case CPU_UP_PREPARE_FROZEN
:
836 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu
);
837 if (!create_comp_task(pool
, cpu
)) {
838 ehca_gen_err("Can't create comp_task for cpu: %x", cpu
);
842 case CPU_UP_CANCELED
:
843 case CPU_UP_CANCELED_FROZEN
:
844 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu
);
845 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
846 kthread_bind(cct
->task
, any_online_cpu(cpu_online_map
));
847 destroy_comp_task(pool
, cpu
);
850 case CPU_ONLINE_FROZEN
:
851 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu
);
852 cct
= per_cpu_ptr(pool
->cpu_comp_tasks
, cpu
);
853 kthread_bind(cct
->task
, cpu
);
854 wake_up_process(cct
->task
);
856 case CPU_DOWN_PREPARE
:
857 case CPU_DOWN_PREPARE_FROZEN
:
858 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu
);
860 case CPU_DOWN_FAILED
:
861 case CPU_DOWN_FAILED_FROZEN
:
862 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu
);
865 case CPU_DEAD_FROZEN
:
866 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu
);
867 destroy_comp_task(pool
, cpu
);
868 take_over_work(pool
, cpu
);
875 static struct notifier_block comp_pool_callback_nb __cpuinitdata
= {
876 .notifier_call
= comp_pool_callback
,
880 int ehca_create_comp_pool(void)
883 struct task_struct
*task
;
885 if (!ehca_scaling_code
)
888 pool
= kzalloc(sizeof(struct ehca_comp_pool
), GFP_KERNEL
);
892 spin_lock_init(&pool
->last_cpu_lock
);
893 pool
->last_cpu
= any_online_cpu(cpu_online_map
);
895 pool
->cpu_comp_tasks
= alloc_percpu(struct ehca_cpu_comp_task
);
896 if (pool
->cpu_comp_tasks
== NULL
) {
901 for_each_online_cpu(cpu
) {
902 task
= create_comp_task(pool
, cpu
);
904 kthread_bind(task
, cpu
);
905 wake_up_process(task
);
909 register_hotcpu_notifier(&comp_pool_callback_nb
);
911 printk(KERN_INFO
"eHCA scaling code enabled\n");
916 void ehca_destroy_comp_pool(void)
920 if (!ehca_scaling_code
)
923 unregister_hotcpu_notifier(&comp_pool_callback_nb
);
925 for (i
= 0; i
< NR_CPUS
; i
++) {
927 destroy_comp_task(pool
, i
);
929 free_percpu(pool
->cpu_comp_tasks
);