Commit | Line | Data |
---|---|---|
d38ceaf9 AD |
1 | /* |
2 | * Copyright 2013 Advanced Micro Devices, Inc. | |
3 | * All Rights Reserved. | |
4 | * | |
5 | * Permission is hereby granted, free of charge, to any person obtaining a | |
6 | * copy of this software and associated documentation files (the | |
7 | * "Software"), to deal in the Software without restriction, including | |
8 | * without limitation the rights to use, copy, modify, merge, publish, | |
9 | * distribute, sub license, and/or sell copies of the Software, and to | |
10 | * permit persons to whom the Software is furnished to do so, subject to | |
11 | * the following conditions: | |
12 | * | |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL | |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, | |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR | |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE | |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. | |
20 | * | |
21 | * The above copyright notice and this permission notice (including the | |
22 | * next paragraph) shall be included in all copies or substantial portions | |
23 | * of the Software. | |
24 | * | |
25 | * Authors: Christian König <christian.koenig@amd.com> | |
26 | */ | |
27 | ||
28 | #include <linux/firmware.h> | |
29 | #include <linux/module.h> | |
30 | #include <drm/drmP.h> | |
31 | #include <drm/drm.h> | |
32 | ||
33 | #include "amdgpu.h" | |
34 | #include "amdgpu_pm.h" | |
35 | #include "amdgpu_vce.h" | |
36 | #include "cikd.h" | |
37 | ||
38 | /* 1 second timeout */ | |
182830a1 | 39 | #define VCE_IDLE_TIMEOUT msecs_to_jiffies(1000) |
d38ceaf9 AD |
40 | |
41 | /* Firmware Names */ | |
42 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
43 | #define FIRMWARE_BONAIRE "radeon/bonaire_vce.bin" | |
edf600da CK |
44 | #define FIRMWARE_KABINI "radeon/kabini_vce.bin" |
45 | #define FIRMWARE_KAVERI "radeon/kaveri_vce.bin" | |
46 | #define FIRMWARE_HAWAII "radeon/hawaii_vce.bin" | |
d38ceaf9 AD |
47 | #define FIRMWARE_MULLINS "radeon/mullins_vce.bin" |
48 | #endif | |
c65444fe JZ |
49 | #define FIRMWARE_TONGA "amdgpu/tonga_vce.bin" |
50 | #define FIRMWARE_CARRIZO "amdgpu/carrizo_vce.bin" | |
188a9bcd | 51 | #define FIRMWARE_FIJI "amdgpu/fiji_vce.bin" |
cfaba566 | 52 | #define FIRMWARE_STONEY "amdgpu/stoney_vce.bin" |
2cc0c0b5 FC |
53 | #define FIRMWARE_POLARIS10 "amdgpu/polaris10_vce.bin" |
54 | #define FIRMWARE_POLARIS11 "amdgpu/polaris11_vce.bin" | |
d38ceaf9 AD |
55 | |
56 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
57 | MODULE_FIRMWARE(FIRMWARE_BONAIRE); | |
58 | MODULE_FIRMWARE(FIRMWARE_KABINI); | |
59 | MODULE_FIRMWARE(FIRMWARE_KAVERI); | |
60 | MODULE_FIRMWARE(FIRMWARE_HAWAII); | |
61 | MODULE_FIRMWARE(FIRMWARE_MULLINS); | |
62 | #endif | |
63 | MODULE_FIRMWARE(FIRMWARE_TONGA); | |
64 | MODULE_FIRMWARE(FIRMWARE_CARRIZO); | |
188a9bcd | 65 | MODULE_FIRMWARE(FIRMWARE_FIJI); |
cfaba566 | 66 | MODULE_FIRMWARE(FIRMWARE_STONEY); |
2cc0c0b5 FC |
67 | MODULE_FIRMWARE(FIRMWARE_POLARIS10); |
68 | MODULE_FIRMWARE(FIRMWARE_POLARIS11); | |
d38ceaf9 AD |
69 | |
70 | static void amdgpu_vce_idle_work_handler(struct work_struct *work); | |
71 | ||
72 | /** | |
73 | * amdgpu_vce_init - allocate memory, load vce firmware | |
74 | * | |
75 | * @adev: amdgpu_device pointer | |
76 | * | |
77 | * First step to get VCE online, allocate memory and load the firmware | |
78 | */ | |
e9822622 | 79 | int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) |
d38ceaf9 | 80 | { |
c594989c CK |
81 | struct amdgpu_ring *ring; |
82 | struct amd_sched_rq *rq; | |
d38ceaf9 AD |
83 | const char *fw_name; |
84 | const struct common_firmware_header *hdr; | |
85 | unsigned ucode_version, version_major, version_minor, binary_id; | |
86 | int i, r; | |
87 | ||
d38ceaf9 AD |
88 | switch (adev->asic_type) { |
89 | #ifdef CONFIG_DRM_AMDGPU_CIK | |
90 | case CHIP_BONAIRE: | |
91 | fw_name = FIRMWARE_BONAIRE; | |
92 | break; | |
93 | case CHIP_KAVERI: | |
94 | fw_name = FIRMWARE_KAVERI; | |
95 | break; | |
96 | case CHIP_KABINI: | |
97 | fw_name = FIRMWARE_KABINI; | |
98 | break; | |
99 | case CHIP_HAWAII: | |
100 | fw_name = FIRMWARE_HAWAII; | |
101 | break; | |
102 | case CHIP_MULLINS: | |
103 | fw_name = FIRMWARE_MULLINS; | |
104 | break; | |
105 | #endif | |
106 | case CHIP_TONGA: | |
107 | fw_name = FIRMWARE_TONGA; | |
108 | break; | |
109 | case CHIP_CARRIZO: | |
110 | fw_name = FIRMWARE_CARRIZO; | |
111 | break; | |
188a9bcd AD |
112 | case CHIP_FIJI: |
113 | fw_name = FIRMWARE_FIJI; | |
114 | break; | |
cfaba566 SL |
115 | case CHIP_STONEY: |
116 | fw_name = FIRMWARE_STONEY; | |
117 | break; | |
2cc0c0b5 FC |
118 | case CHIP_POLARIS10: |
119 | fw_name = FIRMWARE_POLARIS10; | |
1b4eeea5 | 120 | break; |
2cc0c0b5 FC |
121 | case CHIP_POLARIS11: |
122 | fw_name = FIRMWARE_POLARIS11; | |
1b4eeea5 | 123 | break; |
d38ceaf9 AD |
124 | |
125 | default: | |
126 | return -EINVAL; | |
127 | } | |
128 | ||
129 | r = request_firmware(&adev->vce.fw, fw_name, adev->dev); | |
130 | if (r) { | |
131 | dev_err(adev->dev, "amdgpu_vce: Can't load firmware \"%s\"\n", | |
132 | fw_name); | |
133 | return r; | |
134 | } | |
135 | ||
136 | r = amdgpu_ucode_validate(adev->vce.fw); | |
137 | if (r) { | |
138 | dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", | |
139 | fw_name); | |
140 | release_firmware(adev->vce.fw); | |
141 | adev->vce.fw = NULL; | |
142 | return r; | |
143 | } | |
144 | ||
145 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | |
146 | ||
147 | ucode_version = le32_to_cpu(hdr->ucode_version); | |
148 | version_major = (ucode_version >> 20) & 0xfff; | |
149 | version_minor = (ucode_version >> 8) & 0xfff; | |
150 | binary_id = ucode_version & 0xff; | |
151 | DRM_INFO("Found VCE firmware Version: %hhd.%hhd Binary ID: %hhd\n", | |
152 | version_major, version_minor, binary_id); | |
153 | adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | | |
154 | (binary_id << 8)); | |
155 | ||
156 | /* allocate firmware, stack and heap BO */ | |
157 | ||
d38ceaf9 | 158 | r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, |
857d913d AD |
159 | AMDGPU_GEM_DOMAIN_VRAM, |
160 | AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, | |
72d7668b | 161 | NULL, NULL, &adev->vce.vcpu_bo); |
d38ceaf9 AD |
162 | if (r) { |
163 | dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); | |
164 | return r; | |
165 | } | |
166 | ||
167 | r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); | |
168 | if (r) { | |
169 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | |
170 | dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); | |
171 | return r; | |
172 | } | |
173 | ||
174 | r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, | |
175 | &adev->vce.gpu_addr); | |
176 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
177 | if (r) { | |
178 | amdgpu_bo_unref(&adev->vce.vcpu_bo); | |
179 | dev_err(adev->dev, "(%d) VCE bo pin failed\n", r); | |
180 | return r; | |
181 | } | |
182 | ||
c594989c CK |
183 | |
184 | ring = &adev->vce.ring[0]; | |
185 | rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; | |
186 | r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, | |
187 | rq, amdgpu_sched_jobs); | |
188 | if (r != 0) { | |
189 | DRM_ERROR("Failed setting up VCE run queue.\n"); | |
190 | return r; | |
191 | } | |
192 | ||
d38ceaf9 AD |
193 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { |
194 | atomic_set(&adev->vce.handles[i], 0); | |
195 | adev->vce.filp[i] = NULL; | |
196 | } | |
197 | ||
ebff485e CK |
198 | INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler); |
199 | mutex_init(&adev->vce.idle_mutex); | |
200 | ||
d38ceaf9 AD |
201 | return 0; |
202 | } | |
203 | ||
204 | /** | |
205 | * amdgpu_vce_fini - free memory | |
206 | * | |
207 | * @adev: amdgpu_device pointer | |
208 | * | |
209 | * Last step on VCE teardown, free firmware memory | |
210 | */ | |
211 | int amdgpu_vce_sw_fini(struct amdgpu_device *adev) | |
212 | { | |
213 | if (adev->vce.vcpu_bo == NULL) | |
214 | return 0; | |
215 | ||
c594989c CK |
216 | amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); |
217 | ||
d38ceaf9 AD |
218 | amdgpu_bo_unref(&adev->vce.vcpu_bo); |
219 | ||
220 | amdgpu_ring_fini(&adev->vce.ring[0]); | |
221 | amdgpu_ring_fini(&adev->vce.ring[1]); | |
222 | ||
223 | release_firmware(adev->vce.fw); | |
ebff485e | 224 | mutex_destroy(&adev->vce.idle_mutex); |
d38ceaf9 AD |
225 | |
226 | return 0; | |
227 | } | |
228 | ||
229 | /** | |
230 | * amdgpu_vce_suspend - unpin VCE fw memory | |
231 | * | |
232 | * @adev: amdgpu_device pointer | |
233 | * | |
234 | */ | |
235 | int amdgpu_vce_suspend(struct amdgpu_device *adev) | |
236 | { | |
237 | int i; | |
238 | ||
239 | if (adev->vce.vcpu_bo == NULL) | |
240 | return 0; | |
241 | ||
242 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) | |
243 | if (atomic_read(&adev->vce.handles[i])) | |
244 | break; | |
245 | ||
246 | if (i == AMDGPU_MAX_VCE_HANDLES) | |
247 | return 0; | |
248 | ||
85cc88f0 | 249 | cancel_delayed_work_sync(&adev->vce.idle_work); |
d38ceaf9 AD |
250 | /* TODO: suspending running encoding sessions isn't supported */ |
251 | return -EINVAL; | |
252 | } | |
253 | ||
254 | /** | |
255 | * amdgpu_vce_resume - pin VCE fw memory | |
256 | * | |
257 | * @adev: amdgpu_device pointer | |
258 | * | |
259 | */ | |
260 | int amdgpu_vce_resume(struct amdgpu_device *adev) | |
261 | { | |
262 | void *cpu_addr; | |
263 | const struct common_firmware_header *hdr; | |
264 | unsigned offset; | |
265 | int r; | |
266 | ||
267 | if (adev->vce.vcpu_bo == NULL) | |
268 | return -EINVAL; | |
269 | ||
270 | r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); | |
271 | if (r) { | |
272 | dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); | |
273 | return r; | |
274 | } | |
275 | ||
276 | r = amdgpu_bo_kmap(adev->vce.vcpu_bo, &cpu_addr); | |
277 | if (r) { | |
278 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
279 | dev_err(adev->dev, "(%d) VCE map failed\n", r); | |
280 | return r; | |
281 | } | |
282 | ||
283 | hdr = (const struct common_firmware_header *)adev->vce.fw->data; | |
284 | offset = le32_to_cpu(hdr->ucode_array_offset_bytes); | |
7b4d3e29 CK |
285 | memcpy_toio(cpu_addr, adev->vce.fw->data + offset, |
286 | adev->vce.fw->size - offset); | |
d38ceaf9 AD |
287 | |
288 | amdgpu_bo_kunmap(adev->vce.vcpu_bo); | |
289 | ||
290 | amdgpu_bo_unreserve(adev->vce.vcpu_bo); | |
291 | ||
292 | return 0; | |
293 | } | |
294 | ||
295 | /** | |
296 | * amdgpu_vce_idle_work_handler - power off VCE | |
297 | * | |
298 | * @work: pointer to work structure | |
299 | * | |
300 | * power of VCE when it's not used any more | |
301 | */ | |
302 | static void amdgpu_vce_idle_work_handler(struct work_struct *work) | |
303 | { | |
304 | struct amdgpu_device *adev = | |
305 | container_of(work, struct amdgpu_device, vce.idle_work.work); | |
306 | ||
307 | if ((amdgpu_fence_count_emitted(&adev->vce.ring[0]) == 0) && | |
308 | (amdgpu_fence_count_emitted(&adev->vce.ring[1]) == 0)) { | |
309 | if (adev->pm.dpm_enabled) { | |
310 | amdgpu_dpm_enable_vce(adev, false); | |
311 | } else { | |
312 | amdgpu_asic_set_vce_clocks(adev, 0, 0); | |
313 | } | |
314 | } else { | |
182830a1 | 315 | schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT); |
d38ceaf9 AD |
316 | } |
317 | } | |
318 | ||
319 | /** | |
ebff485e | 320 | * amdgpu_vce_ring_begin_use - power up VCE |
d38ceaf9 | 321 | * |
ebff485e | 322 | * @ring: amdgpu ring |
d38ceaf9 AD |
323 | * |
324 | * Make sure VCE is powerd up when we want to use it | |
325 | */ | |
ebff485e | 326 | void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring) |
d38ceaf9 | 327 | { |
ebff485e CK |
328 | struct amdgpu_device *adev = ring->adev; |
329 | bool set_clocks; | |
d38ceaf9 | 330 | |
ebff485e CK |
331 | mutex_lock(&adev->vce.idle_mutex); |
332 | set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work); | |
182830a1 | 333 | if (set_clocks) { |
d38ceaf9 AD |
334 | if (adev->pm.dpm_enabled) { |
335 | amdgpu_dpm_enable_vce(adev, true); | |
336 | } else { | |
337 | amdgpu_asic_set_vce_clocks(adev, 53300, 40000); | |
338 | } | |
339 | } | |
ebff485e CK |
340 | mutex_unlock(&adev->vce.idle_mutex); |
341 | } | |
342 | ||
343 | /** | |
344 | * amdgpu_vce_ring_end_use - power VCE down | |
345 | * | |
346 | * @ring: amdgpu ring | |
347 | * | |
348 | * Schedule work to power VCE down again | |
349 | */ | |
350 | void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring) | |
351 | { | |
352 | schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT); | |
d38ceaf9 AD |
353 | } |
354 | ||
355 | /** | |
356 | * amdgpu_vce_free_handles - free still open VCE handles | |
357 | * | |
358 | * @adev: amdgpu_device pointer | |
359 | * @filp: drm file pointer | |
360 | * | |
361 | * Close all VCE handles still open by this file pointer | |
362 | */ | |
363 | void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) | |
364 | { | |
365 | struct amdgpu_ring *ring = &adev->vce.ring[0]; | |
366 | int i, r; | |
367 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
368 | uint32_t handle = atomic_read(&adev->vce.handles[i]); | |
182830a1 | 369 | |
d38ceaf9 AD |
370 | if (!handle || adev->vce.filp[i] != filp) |
371 | continue; | |
372 | ||
9f2ade33 | 373 | r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL); |
d38ceaf9 AD |
374 | if (r) |
375 | DRM_ERROR("Error destroying VCE handle (%d)!\n", r); | |
376 | ||
377 | adev->vce.filp[i] = NULL; | |
378 | atomic_set(&adev->vce.handles[i], 0); | |
379 | } | |
380 | } | |
381 | ||
382 | /** | |
383 | * amdgpu_vce_get_create_msg - generate a VCE create msg | |
384 | * | |
385 | * @adev: amdgpu_device pointer | |
386 | * @ring: ring we should submit the msg to | |
387 | * @handle: VCE session handle to use | |
388 | * @fence: optional fence to return | |
389 | * | |
390 | * Open up a stream for HW test | |
391 | */ | |
392 | int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, | |
ed40bfb8 | 393 | struct fence **fence) |
d38ceaf9 AD |
394 | { |
395 | const unsigned ib_size_dw = 1024; | |
d71518b5 CK |
396 | struct amdgpu_job *job; |
397 | struct amdgpu_ib *ib; | |
1763552e | 398 | struct fence *f = NULL; |
d38ceaf9 AD |
399 | uint64_t dummy; |
400 | int i, r; | |
401 | ||
d71518b5 CK |
402 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
403 | if (r) | |
d38ceaf9 | 404 | return r; |
d71518b5 CK |
405 | |
406 | ib = &job->ibs[0]; | |
d38ceaf9 | 407 | |
8128765c | 408 | dummy = ib->gpu_addr + 1024; |
d38ceaf9 AD |
409 | |
410 | /* stitch together an VCE create msg */ | |
8128765c CZ |
411 | ib->length_dw = 0; |
412 | ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ | |
413 | ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ | |
414 | ib->ptr[ib->length_dw++] = handle; | |
415 | ||
d66f8e48 LL |
416 | if ((ring->adev->vce.fw_version >> 24) >= 52) |
417 | ib->ptr[ib->length_dw++] = 0x00000040; /* len */ | |
418 | else | |
419 | ib->ptr[ib->length_dw++] = 0x00000030; /* len */ | |
8128765c CZ |
420 | ib->ptr[ib->length_dw++] = 0x01000001; /* create cmd */ |
421 | ib->ptr[ib->length_dw++] = 0x00000000; | |
422 | ib->ptr[ib->length_dw++] = 0x00000042; | |
423 | ib->ptr[ib->length_dw++] = 0x0000000a; | |
424 | ib->ptr[ib->length_dw++] = 0x00000001; | |
425 | ib->ptr[ib->length_dw++] = 0x00000080; | |
426 | ib->ptr[ib->length_dw++] = 0x00000060; | |
427 | ib->ptr[ib->length_dw++] = 0x00000100; | |
428 | ib->ptr[ib->length_dw++] = 0x00000100; | |
429 | ib->ptr[ib->length_dw++] = 0x0000000c; | |
430 | ib->ptr[ib->length_dw++] = 0x00000000; | |
d66f8e48 LL |
431 | if ((ring->adev->vce.fw_version >> 24) >= 52) { |
432 | ib->ptr[ib->length_dw++] = 0x00000000; | |
433 | ib->ptr[ib->length_dw++] = 0x00000000; | |
434 | ib->ptr[ib->length_dw++] = 0x00000000; | |
435 | ib->ptr[ib->length_dw++] = 0x00000000; | |
436 | } | |
8128765c CZ |
437 | |
438 | ib->ptr[ib->length_dw++] = 0x00000014; /* len */ | |
439 | ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */ | |
440 | ib->ptr[ib->length_dw++] = upper_32_bits(dummy); | |
441 | ib->ptr[ib->length_dw++] = dummy; | |
442 | ib->ptr[ib->length_dw++] = 0x00000001; | |
443 | ||
444 | for (i = ib->length_dw; i < ib_size_dw; ++i) | |
445 | ib->ptr[i] = 0x0; | |
446 | ||
c5637837 | 447 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); |
22a77cf6 | 448 | job->fence = fence_get(f); |
8128765c CZ |
449 | if (r) |
450 | goto err; | |
9f2ade33 CK |
451 | |
452 | amdgpu_job_free(job); | |
d38ceaf9 | 453 | if (fence) |
1763552e | 454 | *fence = fence_get(f); |
281b4223 | 455 | fence_put(f); |
cadf97b1 | 456 | return 0; |
d71518b5 | 457 | |
8128765c | 458 | err: |
d71518b5 | 459 | amdgpu_job_free(job); |
d38ceaf9 AD |
460 | return r; |
461 | } | |
462 | ||
463 | /** | |
464 | * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg | |
465 | * | |
466 | * @adev: amdgpu_device pointer | |
467 | * @ring: ring we should submit the msg to | |
468 | * @handle: VCE session handle to use | |
469 | * @fence: optional fence to return | |
470 | * | |
471 | * Close up a stream for HW test or if userspace failed to do so | |
472 | */ | |
473 | int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, | |
9f2ade33 | 474 | bool direct, struct fence **fence) |
d38ceaf9 AD |
475 | { |
476 | const unsigned ib_size_dw = 1024; | |
d71518b5 CK |
477 | struct amdgpu_job *job; |
478 | struct amdgpu_ib *ib; | |
1763552e | 479 | struct fence *f = NULL; |
d38ceaf9 AD |
480 | int i, r; |
481 | ||
d71518b5 CK |
482 | r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); |
483 | if (r) | |
d38ceaf9 | 484 | return r; |
d38ceaf9 | 485 | |
d71518b5 | 486 | ib = &job->ibs[0]; |
d38ceaf9 AD |
487 | |
488 | /* stitch together an VCE destroy msg */ | |
8128765c CZ |
489 | ib->length_dw = 0; |
490 | ib->ptr[ib->length_dw++] = 0x0000000c; /* len */ | |
491 | ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */ | |
492 | ib->ptr[ib->length_dw++] = handle; | |
493 | ||
99453a9e RZ |
494 | ib->ptr[ib->length_dw++] = 0x00000020; /* len */ |
495 | ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ | |
496 | ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */ | |
497 | ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */ | |
498 | ib->ptr[ib->length_dw++] = 0x00000000; | |
499 | ib->ptr[ib->length_dw++] = 0x00000000; | |
500 | ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */ | |
501 | ib->ptr[ib->length_dw++] = 0x00000000; | |
8128765c CZ |
502 | |
503 | ib->ptr[ib->length_dw++] = 0x00000008; /* len */ | |
504 | ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */ | |
505 | ||
506 | for (i = ib->length_dw; i < ib_size_dw; ++i) | |
507 | ib->ptr[i] = 0x0; | |
9f2ade33 CK |
508 | |
509 | if (direct) { | |
c5637837 | 510 | r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f); |
22a77cf6 | 511 | job->fence = fence_get(f); |
9f2ade33 CK |
512 | if (r) |
513 | goto err; | |
514 | ||
515 | amdgpu_job_free(job); | |
516 | } else { | |
c594989c | 517 | r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, |
9f2ade33 CK |
518 | AMDGPU_FENCE_OWNER_UNDEFINED, &f); |
519 | if (r) | |
520 | goto err; | |
521 | } | |
522 | ||
d38ceaf9 | 523 | if (fence) |
1763552e | 524 | *fence = fence_get(f); |
281b4223 | 525 | fence_put(f); |
cadf97b1 | 526 | return 0; |
d71518b5 | 527 | |
8128765c | 528 | err: |
d71518b5 | 529 | amdgpu_job_free(job); |
d38ceaf9 AD |
530 | return r; |
531 | } | |
532 | ||
533 | /** | |
534 | * amdgpu_vce_cs_reloc - command submission relocation | |
535 | * | |
536 | * @p: parser context | |
537 | * @lo: address of lower dword | |
538 | * @hi: address of higher dword | |
f1689ec1 | 539 | * @size: minimum size |
d38ceaf9 AD |
540 | * |
541 | * Patch relocation inside command stream with real buffer address | |
542 | */ | |
f1689ec1 | 543 | static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, |
dc78330a | 544 | int lo, int hi, unsigned size, uint32_t index) |
d38ceaf9 AD |
545 | { |
546 | struct amdgpu_bo_va_mapping *mapping; | |
d38ceaf9 AD |
547 | struct amdgpu_bo *bo; |
548 | uint64_t addr; | |
549 | ||
dc78330a CK |
550 | if (index == 0xffffffff) |
551 | index = 0; | |
552 | ||
d38ceaf9 AD |
553 | addr = ((uint64_t)amdgpu_get_ib_value(p, ib_idx, lo)) | |
554 | ((uint64_t)amdgpu_get_ib_value(p, ib_idx, hi)) << 32; | |
dc78330a | 555 | addr += ((uint64_t)size) * ((uint64_t)index); |
d38ceaf9 AD |
556 | |
557 | mapping = amdgpu_cs_find_mapping(p, addr, &bo); | |
558 | if (mapping == NULL) { | |
dc78330a CK |
559 | DRM_ERROR("Can't find BO for addr 0x%010Lx %d %d %d %d\n", |
560 | addr, lo, hi, size, index); | |
d38ceaf9 AD |
561 | return -EINVAL; |
562 | } | |
563 | ||
f1689ec1 CK |
564 | if ((addr + (uint64_t)size) > |
565 | ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) { | |
566 | DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n", | |
567 | addr, lo, hi); | |
568 | return -EINVAL; | |
569 | } | |
570 | ||
d38ceaf9 AD |
571 | addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; |
572 | addr += amdgpu_bo_gpu_offset(bo); | |
dc78330a | 573 | addr -= ((uint64_t)size) * ((uint64_t)index); |
d38ceaf9 | 574 | |
7270f839 CK |
575 | amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); |
576 | amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); | |
d38ceaf9 AD |
577 | |
578 | return 0; | |
579 | } | |
580 | ||
f1689ec1 CK |
581 | /** |
582 | * amdgpu_vce_validate_handle - validate stream handle | |
583 | * | |
584 | * @p: parser context | |
585 | * @handle: handle to validate | |
2f4b9368 | 586 | * @allocated: allocated a new handle? |
f1689ec1 CK |
587 | * |
588 | * Validates the handle and return the found session index or -EINVAL | |
589 | * we we don't have another free session index. | |
590 | */ | |
591 | static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, | |
e5223214 | 592 | uint32_t handle, uint32_t *allocated) |
f1689ec1 CK |
593 | { |
594 | unsigned i; | |
595 | ||
596 | /* validate the handle */ | |
597 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
2f4b9368 CK |
598 | if (atomic_read(&p->adev->vce.handles[i]) == handle) { |
599 | if (p->adev->vce.filp[i] != p->filp) { | |
600 | DRM_ERROR("VCE handle collision detected!\n"); | |
601 | return -EINVAL; | |
602 | } | |
f1689ec1 | 603 | return i; |
2f4b9368 | 604 | } |
f1689ec1 CK |
605 | } |
606 | ||
607 | /* handle not found try to alloc a new one */ | |
608 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { | |
609 | if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) { | |
610 | p->adev->vce.filp[i] = p->filp; | |
611 | p->adev->vce.img_size[i] = 0; | |
e5223214 | 612 | *allocated |= 1 << i; |
f1689ec1 CK |
613 | return i; |
614 | } | |
615 | } | |
616 | ||
617 | DRM_ERROR("No more free VCE handles!\n"); | |
618 | return -EINVAL; | |
619 | } | |
620 | ||
d38ceaf9 AD |
621 | /** |
622 | * amdgpu_vce_cs_parse - parse and validate the command stream | |
623 | * | |
624 | * @p: parser context | |
625 | * | |
626 | */ | |
627 | int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) | |
628 | { | |
50838c8c | 629 | struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; |
dc78330a | 630 | unsigned fb_idx = 0, bs_idx = 0; |
f1689ec1 | 631 | int session_idx = -1; |
e5223214 CK |
632 | uint32_t destroyed = 0; |
633 | uint32_t created = 0; | |
634 | uint32_t allocated = 0; | |
f1689ec1 CK |
635 | uint32_t tmp, handle = 0; |
636 | uint32_t *size = &tmp; | |
2f4b9368 | 637 | int i, r = 0, idx = 0; |
d38ceaf9 | 638 | |
d38ceaf9 AD |
639 | while (idx < ib->length_dw) { |
640 | uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); | |
641 | uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1); | |
642 | ||
643 | if ((len < 8) || (len & 3)) { | |
644 | DRM_ERROR("invalid VCE command length (%d)!\n", len); | |
2f4b9368 CK |
645 | r = -EINVAL; |
646 | goto out; | |
d38ceaf9 AD |
647 | } |
648 | ||
649 | switch (cmd) { | |
182830a1 | 650 | case 0x00000001: /* session */ |
d38ceaf9 | 651 | handle = amdgpu_get_ib_value(p, ib_idx, idx + 2); |
2f4b9368 CK |
652 | session_idx = amdgpu_vce_validate_handle(p, handle, |
653 | &allocated); | |
e5223214 CK |
654 | if (session_idx < 0) { |
655 | r = session_idx; | |
656 | goto out; | |
657 | } | |
f1689ec1 | 658 | size = &p->adev->vce.img_size[session_idx]; |
d38ceaf9 AD |
659 | break; |
660 | ||
182830a1 | 661 | case 0x00000002: /* task info */ |
dc78330a CK |
662 | fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6); |
663 | bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7); | |
f1689ec1 CK |
664 | break; |
665 | ||
182830a1 | 666 | case 0x01000001: /* create */ |
e5223214 CK |
667 | created |= 1 << session_idx; |
668 | if (destroyed & (1 << session_idx)) { | |
669 | destroyed &= ~(1 << session_idx); | |
670 | allocated |= 1 << session_idx; | |
671 | ||
672 | } else if (!(allocated & (1 << session_idx))) { | |
2f4b9368 CK |
673 | DRM_ERROR("Handle already in use!\n"); |
674 | r = -EINVAL; | |
675 | goto out; | |
676 | } | |
677 | ||
f1689ec1 CK |
678 | *size = amdgpu_get_ib_value(p, ib_idx, idx + 8) * |
679 | amdgpu_get_ib_value(p, ib_idx, idx + 10) * | |
680 | 8 * 3 / 2; | |
681 | break; | |
682 | ||
182830a1 CK |
683 | case 0x04000001: /* config extension */ |
684 | case 0x04000002: /* pic control */ | |
685 | case 0x04000005: /* rate control */ | |
686 | case 0x04000007: /* motion estimation */ | |
687 | case 0x04000008: /* rdo */ | |
688 | case 0x04000009: /* vui */ | |
689 | case 0x05000002: /* auxiliary buffer */ | |
d38ceaf9 AD |
690 | break; |
691 | ||
182830a1 | 692 | case 0x03000001: /* encode */ |
f1689ec1 | 693 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9, |
dc78330a | 694 | *size, 0); |
d38ceaf9 | 695 | if (r) |
2f4b9368 | 696 | goto out; |
d38ceaf9 | 697 | |
f1689ec1 | 698 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 12, idx + 11, |
dc78330a | 699 | *size / 3, 0); |
d38ceaf9 | 700 | if (r) |
2f4b9368 | 701 | goto out; |
d38ceaf9 AD |
702 | break; |
703 | ||
182830a1 | 704 | case 0x02000001: /* destroy */ |
e5223214 | 705 | destroyed |= 1 << session_idx; |
d38ceaf9 AD |
706 | break; |
707 | ||
182830a1 | 708 | case 0x05000001: /* context buffer */ |
f1689ec1 | 709 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, |
dc78330a | 710 | *size * 2, 0); |
f1689ec1 | 711 | if (r) |
2f4b9368 | 712 | goto out; |
f1689ec1 CK |
713 | break; |
714 | ||
182830a1 | 715 | case 0x05000004: /* video bitstream buffer */ |
f1689ec1 CK |
716 | tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4); |
717 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, | |
dc78330a | 718 | tmp, bs_idx); |
f1689ec1 | 719 | if (r) |
2f4b9368 | 720 | goto out; |
f1689ec1 CK |
721 | break; |
722 | ||
182830a1 | 723 | case 0x05000005: /* feedback buffer */ |
f1689ec1 | 724 | r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2, |
dc78330a | 725 | 4096, fb_idx); |
d38ceaf9 | 726 | if (r) |
2f4b9368 | 727 | goto out; |
d38ceaf9 AD |
728 | break; |
729 | ||
730 | default: | |
731 | DRM_ERROR("invalid VCE command (0x%x)!\n", cmd); | |
2f4b9368 CK |
732 | r = -EINVAL; |
733 | goto out; | |
d38ceaf9 AD |
734 | } |
735 | ||
f1689ec1 CK |
736 | if (session_idx == -1) { |
737 | DRM_ERROR("no session command at start of IB\n"); | |
2f4b9368 CK |
738 | r = -EINVAL; |
739 | goto out; | |
f1689ec1 CK |
740 | } |
741 | ||
d38ceaf9 AD |
742 | idx += len / 4; |
743 | } | |
744 | ||
e5223214 | 745 | if (allocated & ~created) { |
2f4b9368 CK |
746 | DRM_ERROR("New session without create command!\n"); |
747 | r = -ENOENT; | |
748 | } | |
749 | ||
750 | out: | |
e5223214 CK |
751 | if (!r) { |
752 | /* No error, free all destroyed handle slots */ | |
753 | tmp = destroyed; | |
754 | } else { | |
755 | /* Error during parsing, free all allocated handle slots */ | |
756 | tmp = allocated; | |
d38ceaf9 AD |
757 | } |
758 | ||
e5223214 CK |
759 | for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) |
760 | if (tmp & (1 << i)) | |
761 | atomic_set(&p->adev->vce.handles[i], 0); | |
762 | ||
2f4b9368 | 763 | return r; |
d38ceaf9 AD |
764 | } |
765 | ||
d38ceaf9 AD |
766 | /** |
767 | * amdgpu_vce_ring_emit_ib - execute indirect buffer | |
768 | * | |
769 | * @ring: engine to use | |
770 | * @ib: the IB to execute | |
771 | * | |
772 | */ | |
d88bf583 CK |
773 | void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, |
774 | unsigned vm_id, bool ctx_switch) | |
d38ceaf9 AD |
775 | { |
776 | amdgpu_ring_write(ring, VCE_CMD_IB); | |
777 | amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); | |
778 | amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); | |
779 | amdgpu_ring_write(ring, ib->length_dw); | |
780 | } | |
781 | ||
782 | /** | |
783 | * amdgpu_vce_ring_emit_fence - add a fence command to the ring | |
784 | * | |
785 | * @ring: engine to use | |
786 | * @fence: the fence | |
787 | * | |
788 | */ | |
789 | void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, | |
890ee23f | 790 | unsigned flags) |
d38ceaf9 | 791 | { |
890ee23f | 792 | WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); |
d38ceaf9 AD |
793 | |
794 | amdgpu_ring_write(ring, VCE_CMD_FENCE); | |
795 | amdgpu_ring_write(ring, addr); | |
796 | amdgpu_ring_write(ring, upper_32_bits(addr)); | |
797 | amdgpu_ring_write(ring, seq); | |
798 | amdgpu_ring_write(ring, VCE_CMD_TRAP); | |
799 | amdgpu_ring_write(ring, VCE_CMD_END); | |
800 | } | |
801 | ||
802 | /** | |
803 | * amdgpu_vce_ring_test_ring - test if VCE ring is working | |
804 | * | |
805 | * @ring: the engine to test on | |
806 | * | |
807 | */ | |
808 | int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) | |
809 | { | |
810 | struct amdgpu_device *adev = ring->adev; | |
811 | uint32_t rptr = amdgpu_ring_get_rptr(ring); | |
812 | unsigned i; | |
813 | int r; | |
814 | ||
a27de35c | 815 | r = amdgpu_ring_alloc(ring, 16); |
d38ceaf9 AD |
816 | if (r) { |
817 | DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", | |
818 | ring->idx, r); | |
819 | return r; | |
820 | } | |
821 | amdgpu_ring_write(ring, VCE_CMD_END); | |
a27de35c | 822 | amdgpu_ring_commit(ring); |
d38ceaf9 AD |
823 | |
824 | for (i = 0; i < adev->usec_timeout; i++) { | |
825 | if (amdgpu_ring_get_rptr(ring) != rptr) | |
826 | break; | |
827 | DRM_UDELAY(1); | |
828 | } | |
829 | ||
830 | if (i < adev->usec_timeout) { | |
831 | DRM_INFO("ring test on %d succeeded in %d usecs\n", | |
832 | ring->idx, i); | |
833 | } else { | |
834 | DRM_ERROR("amdgpu: ring %d test failed\n", | |
835 | ring->idx); | |
836 | r = -ETIMEDOUT; | |
837 | } | |
838 | ||
839 | return r; | |
840 | } | |
841 | ||
842 | /** | |
843 | * amdgpu_vce_ring_test_ib - test if VCE IBs are working | |
844 | * | |
845 | * @ring: the engine to test on | |
846 | * | |
847 | */ | |
bbec97aa | 848 | int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) |
d38ceaf9 | 849 | { |
ed40bfb8 | 850 | struct fence *fence = NULL; |
bbec97aa | 851 | long r; |
d38ceaf9 | 852 | |
898e50d4 LL |
853 | /* skip vce ring1 ib test for now, since it's not reliable */ |
854 | if (ring == &ring->adev->vce.ring[1]) | |
855 | return 0; | |
856 | ||
d38ceaf9 AD |
857 | r = amdgpu_vce_get_create_msg(ring, 1, NULL); |
858 | if (r) { | |
bbec97aa | 859 | DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); |
d38ceaf9 AD |
860 | goto error; |
861 | } | |
862 | ||
9f2ade33 | 863 | r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); |
d38ceaf9 | 864 | if (r) { |
bbec97aa | 865 | DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); |
d38ceaf9 AD |
866 | goto error; |
867 | } | |
868 | ||
bbec97aa CK |
869 | r = fence_wait_timeout(fence, false, timeout); |
870 | if (r == 0) { | |
871 | DRM_ERROR("amdgpu: IB test timed out.\n"); | |
872 | r = -ETIMEDOUT; | |
873 | } else if (r < 0) { | |
874 | DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); | |
d38ceaf9 AD |
875 | } else { |
876 | DRM_INFO("ib test on ring %d succeeded\n", ring->idx); | |
bbec97aa | 877 | r = 0; |
d38ceaf9 AD |
878 | } |
879 | error: | |
ed40bfb8 | 880 | fence_put(fence); |
d38ceaf9 AD |
881 | return r; |
882 | } |