IB/ipath: Fix races with ib_resize_cq()
[deliverable/linux.git] / drivers / infiniband / hw / ipath / ipath_cq.c
CommitLineData
cef1cce5 1/*
759d5768 2 * Copyright (c) 2006 QLogic, Inc. All rights reserved.
cef1cce5
BS
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/err.h>
35#include <linux/vmalloc.h>
36
37#include "ipath_verbs.h"
38
39/**
40 * ipath_cq_enter - add a new entry to the completion queue
41 * @cq: completion queue
42 * @entry: work completion entry to add
43 * @sig: true if @entry is a solicitated entry
44 *
373d9915 45 * This may be called with qp->s_lock held.
cef1cce5
BS
46 */
47void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
48{
7a26c474 49 struct ipath_cq_wc *wc;
cef1cce5 50 unsigned long flags;
373d9915 51 u32 head;
cef1cce5
BS
52 u32 next;
53
54 spin_lock_irqsave(&cq->lock, flags);
55
373d9915
RC
56 /*
57 * Note that the head pointer might be writable by user processes.
58 * Take care to verify it is a sane value.
59 */
7a26c474 60 wc = cq->queue;
373d9915
RC
61 head = wc->head;
62 if (head >= (unsigned) cq->ibcq.cqe) {
63 head = cq->ibcq.cqe;
cef1cce5 64 next = 0;
373d9915
RC
65 } else
66 next = head + 1;
67 if (unlikely(next == wc->tail)) {
cef1cce5
BS
68 spin_unlock_irqrestore(&cq->lock, flags);
69 if (cq->ibcq.event_handler) {
70 struct ib_event ev;
71
72 ev.device = cq->ibcq.device;
73 ev.element.cq = &cq->ibcq;
74 ev.event = IB_EVENT_CQ_ERR;
75 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
76 }
77 return;
78 }
373d9915
RC
79 wc->queue[head] = *entry;
80 wc->head = next;
cef1cce5
BS
81
82 if (cq->notify == IB_CQ_NEXT_COMP ||
83 (cq->notify == IB_CQ_SOLICITED && solicited)) {
84 cq->notify = IB_CQ_NONE;
85 cq->triggered++;
86 /*
87 * This will cause send_complete() to be called in
88 * another thread.
89 */
90 tasklet_hi_schedule(&cq->comptask);
91 }
92
93 spin_unlock_irqrestore(&cq->lock, flags);
94
95 if (entry->status != IB_WC_SUCCESS)
96 to_idev(cq->ibcq.device)->n_wqe_errs++;
97}
98
99/**
100 * ipath_poll_cq - poll for work completion entries
101 * @ibcq: the completion queue to poll
102 * @num_entries: the maximum number of entries to return
103 * @entry: pointer to array where work completions are placed
104 *
105 * Returns the number of completion entries polled.
106 *
107 * This may be called from interrupt context. Also called by ib_poll_cq()
108 * in the generic verbs code.
109 */
110int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
111{
112 struct ipath_cq *cq = to_icq(ibcq);
7a26c474 113 struct ipath_cq_wc *wc;
cef1cce5
BS
114 unsigned long flags;
115 int npolled;
7a26c474 116 u32 tail;
cef1cce5
BS
117
118 spin_lock_irqsave(&cq->lock, flags);
119
7a26c474
BS
120 wc = cq->queue;
121 tail = wc->tail;
122 if (tail > (u32) cq->ibcq.cqe)
123 tail = (u32) cq->ibcq.cqe;
cef1cce5 124 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
7a26c474 125 if (tail == wc->head)
cef1cce5 126 break;
7a26c474
BS
127 *entry = wc->queue[tail];
128 if (tail >= cq->ibcq.cqe)
129 tail = 0;
cef1cce5 130 else
7a26c474 131 tail++;
cef1cce5 132 }
7a26c474 133 wc->tail = tail;
cef1cce5
BS
134
135 spin_unlock_irqrestore(&cq->lock, flags);
136
137 return npolled;
138}
139
140static void send_complete(unsigned long data)
141{
142 struct ipath_cq *cq = (struct ipath_cq *)data;
143
144 /*
145 * The completion handler will most likely rearm the notification
146 * and poll for all pending entries. If a new completion entry
147 * is added while we are in this routine, tasklet_hi_schedule()
148 * won't call us again until we return so we check triggered to
149 * see if we need to call the handler again.
150 */
151 for (;;) {
152 u8 triggered = cq->triggered;
153
154 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
155
156 if (cq->triggered == triggered)
157 return;
158 }
159}
160
161/**
162 * ipath_create_cq - create a completion queue
163 * @ibdev: the device this completion queue is attached to
164 * @entries: the minimum size of the completion queue
165 * @context: unused by the InfiniPath driver
166 * @udata: unused by the InfiniPath driver
167 *
168 * Returns a pointer to the completion queue or negative errno values
169 * for failure.
170 *
171 * Called by ib_create_cq() in the generic verbs code.
172 */
173struct ib_cq *ipath_create_cq(struct ib_device *ibdev, int entries,
174 struct ib_ucontext *context,
175 struct ib_udata *udata)
176{
fe62546a 177 struct ipath_ibdev *dev = to_idev(ibdev);
cef1cce5 178 struct ipath_cq *cq;
373d9915 179 struct ipath_cq_wc *wc;
cef1cce5
BS
180 struct ib_cq *ret;
181
eae33d47 182 if (entries < 1 || entries > ib_ipath_max_cqes) {
fe62546a 183 ret = ERR_PTR(-EINVAL);
373d9915 184 goto done;
fe62546a
BS
185 }
186
373d9915 187 /* Allocate the completion queue structure. */
cef1cce5
BS
188 cq = kmalloc(sizeof(*cq), GFP_KERNEL);
189 if (!cq) {
190 ret = ERR_PTR(-ENOMEM);
373d9915 191 goto done;
cef1cce5
BS
192 }
193
194 /*
373d9915
RC
195 * Allocate the completion queue entries and head/tail pointers.
196 * This is allocated separately so that it can be resized and
197 * also mapped into user space.
198 * We need to use vmalloc() in order to support mmap and large
199 * numbers of entries.
cef1cce5 200 */
373d9915 201 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * entries);
cef1cce5 202 if (!wc) {
cef1cce5 203 ret = ERR_PTR(-ENOMEM);
373d9915 204 goto bail_cq;
cef1cce5 205 }
373d9915
RC
206
207 /*
208 * Return the address of the WC as the offset to mmap.
209 * See ipath_mmap() for details.
210 */
211 if (udata && udata->outlen >= sizeof(__u64)) {
212 struct ipath_mmap_info *ip;
213 __u64 offset = (__u64) wc;
214 int err;
215
216 err = ib_copy_to_udata(udata, &offset, sizeof(offset));
217 if (err) {
218 ret = ERR_PTR(err);
219 goto bail_wc;
220 }
221
222 /* Allocate info for ipath_mmap(). */
223 ip = kmalloc(sizeof(*ip), GFP_KERNEL);
224 if (!ip) {
225 ret = ERR_PTR(-ENOMEM);
226 goto bail_wc;
227 }
228 cq->ip = ip;
229 ip->context = context;
230 ip->obj = wc;
231 kref_init(&ip->ref);
232 ip->mmap_cnt = 0;
233 ip->size = PAGE_ALIGN(sizeof(*wc) +
234 sizeof(struct ib_wc) * entries);
235 spin_lock_irq(&dev->pending_lock);
236 ip->next = dev->pending_mmaps;
237 dev->pending_mmaps = ip;
238 spin_unlock_irq(&dev->pending_lock);
239 } else
240 cq->ip = NULL;
241
aa4eaed7
BS
242 spin_lock(&dev->n_cqs_lock);
243 if (dev->n_cqs_allocated == ib_ipath_max_cqs) {
244 spin_unlock(&dev->n_cqs_lock);
245 ret = ERR_PTR(-ENOMEM);
246 goto bail_wc;
247 }
248
249 dev->n_cqs_allocated++;
250 spin_unlock(&dev->n_cqs_lock);
251
cef1cce5
BS
252 /*
253 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
254 * The number of entries should be >= the number requested or return
255 * an error.
256 */
257 cq->ibcq.cqe = entries;
258 cq->notify = IB_CQ_NONE;
259 cq->triggered = 0;
260 spin_lock_init(&cq->lock);
261 tasklet_init(&cq->comptask, send_complete, (unsigned long)cq);
373d9915
RC
262 wc->head = 0;
263 wc->tail = 0;
cef1cce5
BS
264 cq->queue = wc;
265
266 ret = &cq->ibcq;
267
373d9915 268 goto done;
fe62546a 269
373d9915
RC
270bail_wc:
271 vfree(wc);
272
273bail_cq:
274 kfree(cq);
275
276done:
cef1cce5
BS
277 return ret;
278}
279
280/**
281 * ipath_destroy_cq - destroy a completion queue
282 * @ibcq: the completion queue to destroy.
283 *
284 * Returns 0 for success.
285 *
286 * Called by ib_destroy_cq() in the generic verbs code.
287 */
288int ipath_destroy_cq(struct ib_cq *ibcq)
289{
fe62546a 290 struct ipath_ibdev *dev = to_idev(ibcq->device);
cef1cce5
BS
291 struct ipath_cq *cq = to_icq(ibcq);
292
293 tasklet_kill(&cq->comptask);
aa4eaed7 294 spin_lock(&dev->n_cqs_lock);
fe62546a 295 dev->n_cqs_allocated--;
aa4eaed7 296 spin_unlock(&dev->n_cqs_lock);
373d9915
RC
297 if (cq->ip)
298 kref_put(&cq->ip->ref, ipath_release_mmap_info);
299 else
300 vfree(cq->queue);
cef1cce5
BS
301 kfree(cq);
302
303 return 0;
304}
305
306/**
307 * ipath_req_notify_cq - change the notification type for a completion queue
308 * @ibcq: the completion queue
309 * @notify: the type of notification to request
310 *
311 * Returns 0 for success.
312 *
313 * This may be called from interrupt context. Also called by
314 * ib_req_notify_cq() in the generic verbs code.
315 */
316int ipath_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify notify)
317{
318 struct ipath_cq *cq = to_icq(ibcq);
319 unsigned long flags;
320
321 spin_lock_irqsave(&cq->lock, flags);
322 /*
323 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
373d9915 324 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
cef1cce5
BS
325 */
326 if (cq->notify != IB_CQ_NEXT_COMP)
327 cq->notify = notify;
328 spin_unlock_irqrestore(&cq->lock, flags);
329 return 0;
330}
331
7a26c474
BS
332/**
333 * ipath_resize_cq - change the size of the CQ
334 * @ibcq: the completion queue
335 *
336 * Returns 0 for success.
337 */
cef1cce5
BS
338int ipath_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
339{
340 struct ipath_cq *cq = to_icq(ibcq);
7a26c474 341 struct ipath_cq_wc *old_wc;
373d9915
RC
342 struct ipath_cq_wc *wc;
343 u32 head, tail, n;
cef1cce5
BS
344 int ret;
345
eae33d47
BS
346 if (cqe < 1 || cqe > ib_ipath_max_cqes) {
347 ret = -EINVAL;
348 goto bail;
349 }
350
cef1cce5
BS
351 /*
352 * Need to use vmalloc() if we want to support large #s of entries.
353 */
373d9915 354 wc = vmalloc_user(sizeof(*wc) + sizeof(struct ib_wc) * cqe);
cef1cce5
BS
355 if (!wc) {
356 ret = -ENOMEM;
357 goto bail;
358 }
359
373d9915
RC
360 /*
361 * Return the address of the WC as the offset to mmap.
362 * See ipath_mmap() for details.
363 */
364 if (udata && udata->outlen >= sizeof(__u64)) {
365 __u64 offset = (__u64) wc;
366
367 ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
368 if (ret)
369 goto bail;
370 }
371
cef1cce5 372 spin_lock_irq(&cq->lock);
373d9915
RC
373 /*
374 * Make sure head and tail are sane since they
375 * might be user writable.
376 */
7a26c474 377 old_wc = cq->queue;
373d9915
RC
378 head = old_wc->head;
379 if (head > (u32) cq->ibcq.cqe)
380 head = (u32) cq->ibcq.cqe;
381 tail = old_wc->tail;
382 if (tail > (u32) cq->ibcq.cqe)
383 tail = (u32) cq->ibcq.cqe;
384 if (head < tail)
385 n = cq->ibcq.cqe + 1 + head - tail;
cef1cce5 386 else
373d9915 387 n = head - tail;
cef1cce5
BS
388 if (unlikely((u32)cqe < n)) {
389 spin_unlock_irq(&cq->lock);
390 vfree(wc);
391 ret = -EOVERFLOW;
392 goto bail;
393 }
373d9915
RC
394 for (n = 0; tail != head; n++) {
395 wc->queue[n] = old_wc->queue[tail];
396 if (tail == (u32) cq->ibcq.cqe)
397 tail = 0;
cef1cce5 398 else
373d9915 399 tail++;
cef1cce5
BS
400 }
401 cq->ibcq.cqe = cqe;
373d9915
RC
402 wc->head = n;
403 wc->tail = 0;
cef1cce5
BS
404 cq->queue = wc;
405 spin_unlock_irq(&cq->lock);
406
407 vfree(old_wc);
408
373d9915
RC
409 if (cq->ip) {
410 struct ipath_ibdev *dev = to_idev(ibcq->device);
411 struct ipath_mmap_info *ip = cq->ip;
412
413 ip->obj = wc;
414 ip->size = PAGE_ALIGN(sizeof(*wc) +
415 sizeof(struct ib_wc) * cqe);
416 spin_lock_irq(&dev->pending_lock);
417 ip->next = dev->pending_mmaps;
418 dev->pending_mmaps = ip;
419 spin_unlock_irq(&dev->pending_lock);
420 }
421
cef1cce5
BS
422 ret = 0;
423
424bail:
425 return ret;
426}
This page took 0.101962 seconds and 5 git commands to generate.