Commit | Line | Data |
---|---|---|
a4d4bbf1 | 1 | /* |
c4d0f8f6 | 2 | * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved. |
a4d4bbf1 AC |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
19 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
20 | * DEALINGS IN THE SOFTWARE. | |
21 | */ | |
c4d0f8f6 | 22 | #include "gk20a.h" |
e3c71eb2 | 23 | #include "ctxgf100.h" |
a4d4bbf1 | 24 | |
e3c71eb2 | 25 | #include <nvif/class.h> |
c4d0f8f6 | 26 | #include <subdev/timer.h> |
a4d4bbf1 | 27 | |
e3c71eb2 | 28 | static struct nvkm_oclass |
b8bf04e1 | 29 | gk20a_gr_sclass[] = { |
3740c825 BS |
30 | { FERMI_TWOD_A, &nvkm_object_ofuncs }, |
31 | { KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs }, | |
e3c71eb2 BS |
32 | { KEPLER_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds }, |
33 | { KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds }, | |
a4d4bbf1 AC |
34 | {} |
35 | }; | |
36 | ||
c4d0f8f6 AC |
37 | static void |
38 | gk20a_gr_init_dtor(struct gf100_gr_pack *pack) | |
39 | { | |
40 | vfree(pack); | |
41 | } | |
42 | ||
43 | struct gk20a_fw_av | |
44 | { | |
45 | u32 addr; | |
46 | u32 data; | |
47 | }; | |
48 | ||
49 | static struct gf100_gr_pack * | |
50 | gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc) | |
51 | { | |
52 | struct gf100_gr_init *init; | |
53 | struct gf100_gr_pack *pack; | |
54 | const int nent = (fuc->size / sizeof(struct gk20a_fw_av)); | |
55 | int i; | |
56 | ||
57 | pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); | |
58 | if (!pack) | |
59 | return ERR_PTR(-ENOMEM); | |
60 | ||
61 | init = (void *)(pack + 2); | |
62 | ||
63 | pack[0].init = init; | |
64 | ||
65 | for (i = 0; i < nent; i++) { | |
66 | struct gf100_gr_init *ent = &init[i]; | |
67 | struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i]; | |
68 | ||
69 | ent->addr = av->addr; | |
70 | ent->data = av->data; | |
71 | ent->count = 1; | |
72 | ent->pitch = 1; | |
73 | } | |
74 | ||
75 | return pack; | |
76 | } | |
77 | ||
78 | struct gk20a_fw_aiv | |
79 | { | |
80 | u32 addr; | |
81 | u32 index; | |
82 | u32 data; | |
83 | }; | |
84 | ||
85 | static struct gf100_gr_pack * | |
86 | gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc) | |
87 | { | |
88 | struct gf100_gr_init *init; | |
89 | struct gf100_gr_pack *pack; | |
90 | const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv)); | |
91 | int i; | |
92 | ||
93 | pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1))); | |
94 | if (!pack) | |
95 | return ERR_PTR(-ENOMEM); | |
96 | ||
97 | init = (void *)(pack + 2); | |
98 | ||
99 | pack[0].init = init; | |
100 | ||
101 | for (i = 0; i < nent; i++) { | |
102 | struct gf100_gr_init *ent = &init[i]; | |
103 | struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i]; | |
104 | ||
105 | ent->addr = av->addr; | |
106 | ent->data = av->data; | |
107 | ent->count = 1; | |
108 | ent->pitch = 1; | |
109 | } | |
110 | ||
111 | return pack; | |
112 | } | |
113 | ||
114 | static struct gf100_gr_pack * | |
115 | gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc) | |
116 | { | |
117 | struct gf100_gr_init *init; | |
118 | struct gf100_gr_pack *pack; | |
119 | /* We don't suppose we will initialize more than 16 classes here... */ | |
120 | static const unsigned int max_classes = 16; | |
121 | const int nent = (fuc->size / sizeof(struct gk20a_fw_av)); | |
122 | int i, classidx = 0; | |
123 | u32 prevclass = 0; | |
124 | ||
125 | pack = vzalloc((sizeof(*pack) * max_classes) + | |
126 | (sizeof(*init) * (nent + 1))); | |
127 | if (!pack) | |
128 | return ERR_PTR(-ENOMEM); | |
129 | ||
130 | init = (void *)(pack + max_classes); | |
131 | ||
132 | for (i = 0; i < nent; i++) { | |
133 | struct gf100_gr_init *ent = &init[i]; | |
134 | struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i]; | |
135 | u32 class = av->addr & 0xffff; | |
136 | u32 addr = (av->addr & 0xffff0000) >> 14; | |
137 | ||
138 | if (prevclass != class) { | |
139 | pack[classidx].init = ent; | |
140 | pack[classidx].type = class; | |
141 | prevclass = class; | |
142 | if (++classidx >= max_classes) { | |
143 | vfree(pack); | |
144 | return ERR_PTR(-ENOSPC); | |
145 | } | |
146 | } | |
147 | ||
148 | ent->addr = addr; | |
149 | ent->data = av->data; | |
150 | ent->count = 1; | |
151 | ent->pitch = 1; | |
152 | } | |
153 | ||
154 | return pack; | |
155 | } | |
156 | ||
a032fb9d | 157 | int |
c4d0f8f6 AC |
158 | gk20a_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine, |
159 | struct nvkm_oclass *oclass, void *data, u32 size, | |
160 | struct nvkm_object **pobject) | |
161 | { | |
162 | int err; | |
163 | struct gf100_gr_priv *priv; | |
164 | struct gf100_gr_fuc fuc; | |
165 | ||
166 | err = gf100_gr_ctor(parent, engine, oclass, data, size, pobject); | |
167 | if (err) | |
168 | return err; | |
169 | ||
170 | priv = (void *)*pobject; | |
171 | ||
172 | err = gf100_gr_ctor_fw(priv, "sw_nonctx", &fuc); | |
173 | if (err) | |
174 | return err; | |
175 | priv->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc); | |
176 | gf100_gr_dtor_fw(&fuc); | |
177 | if (IS_ERR(priv->fuc_sw_nonctx)) | |
178 | return PTR_ERR(priv->fuc_sw_nonctx); | |
179 | ||
180 | err = gf100_gr_ctor_fw(priv, "sw_ctx", &fuc); | |
181 | if (err) | |
182 | return err; | |
183 | priv->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc); | |
184 | gf100_gr_dtor_fw(&fuc); | |
185 | if (IS_ERR(priv->fuc_sw_ctx)) | |
186 | return PTR_ERR(priv->fuc_sw_ctx); | |
187 | ||
188 | err = gf100_gr_ctor_fw(priv, "sw_bundle_init", &fuc); | |
189 | if (err) | |
190 | return err; | |
191 | priv->fuc_bundle = gk20a_gr_av_to_init(&fuc); | |
192 | gf100_gr_dtor_fw(&fuc); | |
193 | if (IS_ERR(priv->fuc_bundle)) | |
194 | return PTR_ERR(priv->fuc_bundle); | |
195 | ||
196 | err = gf100_gr_ctor_fw(priv, "sw_method_init", &fuc); | |
197 | if (err) | |
198 | return err; | |
199 | priv->fuc_method = gk20a_gr_av_to_method(&fuc); | |
200 | gf100_gr_dtor_fw(&fuc); | |
201 | if (IS_ERR(priv->fuc_method)) | |
202 | return PTR_ERR(priv->fuc_method); | |
203 | ||
204 | return 0; | |
205 | } | |
206 | ||
a032fb9d | 207 | void |
c4d0f8f6 AC |
208 | gk20a_gr_dtor(struct nvkm_object *object) |
209 | { | |
210 | struct gf100_gr_priv *priv = (void *)object; | |
211 | ||
212 | gk20a_gr_init_dtor(priv->fuc_method); | |
213 | gk20a_gr_init_dtor(priv->fuc_bundle); | |
214 | gk20a_gr_init_dtor(priv->fuc_sw_ctx); | |
215 | gk20a_gr_init_dtor(priv->fuc_sw_nonctx); | |
216 | ||
217 | gf100_gr_dtor(object); | |
218 | } | |
219 | ||
220 | static int | |
221 | gk20a_gr_wait_mem_scrubbing(struct gf100_gr_priv *priv) | |
222 | { | |
223 | if (!nv_wait(priv, 0x40910c, 0x6, 0x0)) { | |
224 | nv_error(priv, "FECS mem scrubbing timeout\n"); | |
225 | return -ETIMEDOUT; | |
226 | } | |
227 | ||
228 | if (!nv_wait(priv, 0x41a10c, 0x6, 0x0)) { | |
229 | nv_error(priv, "GPCCS mem scrubbing timeout\n"); | |
230 | return -ETIMEDOUT; | |
231 | } | |
232 | ||
233 | return 0; | |
234 | } | |
235 | ||
236 | static void | |
237 | gk20a_gr_set_hww_esr_report_mask(struct gf100_gr_priv *priv) | |
238 | { | |
239 | nv_wr32(priv, 0x419e44, 0x1ffffe); | |
240 | nv_wr32(priv, 0x419e4c, 0x7f); | |
241 | } | |
242 | ||
a032fb9d | 243 | int |
c4d0f8f6 AC |
244 | gk20a_gr_init(struct nvkm_object *object) |
245 | { | |
246 | struct gk20a_gr_oclass *oclass = (void *)object->oclass; | |
247 | struct gf100_gr_priv *priv = (void *)object; | |
248 | const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total); | |
249 | u32 data[TPC_MAX / 8] = {}; | |
250 | u8 tpcnr[GPC_MAX]; | |
251 | int gpc, tpc; | |
252 | int ret, i; | |
253 | ||
254 | ret = nvkm_gr_init(&priv->base); | |
255 | if (ret) | |
256 | return ret; | |
257 | ||
258 | /* Clear SCC RAM */ | |
259 | nv_wr32(priv, 0x40802c, 0x1); | |
260 | ||
261 | gf100_gr_mmio(priv, priv->fuc_sw_nonctx); | |
262 | ||
263 | ret = gk20a_gr_wait_mem_scrubbing(priv); | |
264 | if (ret) | |
265 | return ret; | |
266 | ||
267 | ret = gf100_gr_wait_idle(priv); | |
268 | if (ret) | |
269 | return ret; | |
270 | ||
271 | /* MMU debug buffer */ | |
272 | nv_wr32(priv, 0x100cc8, priv->unk4188b4->addr >> 8); | |
273 | nv_wr32(priv, 0x100ccc, priv->unk4188b8->addr >> 8); | |
274 | ||
275 | if (oclass->init_gpc_mmu) | |
276 | oclass->init_gpc_mmu(priv); | |
277 | ||
278 | /* Set the PE as stream master */ | |
279 | nv_mask(priv, 0x503018, 0x1, 0x1); | |
280 | ||
281 | /* Zcull init */ | |
282 | memset(data, 0x00, sizeof(data)); | |
283 | memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr)); | |
284 | for (i = 0, gpc = -1; i < priv->tpc_total; i++) { | |
285 | do { | |
286 | gpc = (gpc + 1) % priv->gpc_nr; | |
287 | } while (!tpcnr[gpc]); | |
288 | tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--; | |
289 | ||
290 | data[i / 8] |= tpc << ((i % 8) * 4); | |
291 | } | |
292 | ||
293 | nv_wr32(priv, GPC_BCAST(0x0980), data[0]); | |
294 | nv_wr32(priv, GPC_BCAST(0x0984), data[1]); | |
295 | nv_wr32(priv, GPC_BCAST(0x0988), data[2]); | |
296 | nv_wr32(priv, GPC_BCAST(0x098c), data[3]); | |
297 | ||
298 | for (gpc = 0; gpc < priv->gpc_nr; gpc++) { | |
299 | nv_wr32(priv, GPC_UNIT(gpc, 0x0914), | |
300 | priv->magic_not_rop_nr << 8 | priv->tpc_nr[gpc]); | |
301 | nv_wr32(priv, GPC_UNIT(gpc, 0x0910), 0x00040000 | | |
302 | priv->tpc_total); | |
303 | nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918); | |
304 | } | |
305 | ||
306 | nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918); | |
307 | ||
308 | /* Enable FIFO access */ | |
309 | nv_wr32(priv, 0x400500, 0x00010001); | |
310 | ||
311 | /* Enable interrupts */ | |
312 | nv_wr32(priv, 0x400100, 0xffffffff); | |
313 | nv_wr32(priv, 0x40013c, 0xffffffff); | |
314 | ||
315 | /* Enable FECS error interrupts */ | |
316 | nv_wr32(priv, 0x409c24, 0x000f0000); | |
317 | ||
318 | /* Enable hardware warning exceptions */ | |
319 | nv_wr32(priv, 0x404000, 0xc0000000); | |
320 | nv_wr32(priv, 0x404600, 0xc0000000); | |
321 | ||
322 | if (oclass->set_hww_esr_report_mask) | |
323 | oclass->set_hww_esr_report_mask(priv); | |
324 | ||
325 | /* Enable TPC exceptions per GPC */ | |
326 | nv_wr32(priv, 0x419d0c, 0x2); | |
327 | nv_wr32(priv, 0x41ac94, (((1 << priv->tpc_total) - 1) & 0xff) << 16); | |
328 | ||
329 | /* Reset and enable all exceptions */ | |
330 | nv_wr32(priv, 0x400108, 0xffffffff); | |
331 | nv_wr32(priv, 0x400138, 0xffffffff); | |
332 | nv_wr32(priv, 0x400118, 0xffffffff); | |
333 | nv_wr32(priv, 0x400130, 0xffffffff); | |
334 | nv_wr32(priv, 0x40011c, 0xffffffff); | |
335 | nv_wr32(priv, 0x400134, 0xffffffff); | |
336 | ||
337 | gf100_gr_zbc_init(priv); | |
338 | ||
339 | return gf100_gr_init_ctxctl(priv); | |
340 | } | |
341 | ||
e3c71eb2 | 342 | struct nvkm_oclass * |
c4d0f8f6 AC |
343 | gk20a_gr_oclass = &(struct gk20a_gr_oclass) { |
344 | .gf100 = { | |
345 | .base.handle = NV_ENGINE(GR, 0xea), | |
346 | .base.ofuncs = &(struct nvkm_ofuncs) { | |
347 | .ctor = gk20a_gr_ctor, | |
348 | .dtor = gk20a_gr_dtor, | |
349 | .init = gk20a_gr_init, | |
350 | .fini = _nvkm_gr_fini, | |
351 | }, | |
352 | .cclass = &gk20a_grctx_oclass, | |
353 | .sclass = gk20a_gr_sclass, | |
354 | .ppc_nr = 1, | |
a4d4bbf1 | 355 | }, |
c4d0f8f6 AC |
356 | .set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask, |
357 | }.gf100.base; |