drm/amdgpu: make pad_ib a ring function v3
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
28#include "vid.h"
29#include "amdgpu_ucode.h"
30#include "clearstate_vi.h"
31
32#include "gmc/gmc_8_2_d.h"
33#include "gmc/gmc_8_2_sh_mask.h"
34
35#include "oss/oss_3_0_d.h"
36#include "oss/oss_3_0_sh_mask.h"
37
38#include "bif/bif_5_0_d.h"
39#include "bif/bif_5_0_sh_mask.h"
40
41#include "gca/gfx_8_0_d.h"
42#include "gca/gfx_8_0_enum.h"
43#include "gca/gfx_8_0_sh_mask.h"
44#include "gca/gfx_8_0_enum.h"
45
46#include "uvd/uvd_5_0_d.h"
47#include "uvd/uvd_5_0_sh_mask.h"
48
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
52#define GFX8_NUM_GFX_RINGS 1
53#define GFX8_NUM_COMPUTE_RINGS 8
54
55#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58
59#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68
6e378858
EH
69#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
70#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
71#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
72#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
73#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
74#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
75
76/* BPM SERDES CMD */
77#define SET_BPM_SERDES_CMD 1
78#define CLE_BPM_SERDES_CMD 0
79
80/* BPM Register Address*/
81enum {
82 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
83 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
84 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
85 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
86 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
87 BPM_REG_FGCG_MAX
88};
89
c65444fe
JZ
90MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
91MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
92MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
93MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
94MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
95MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
96
e3c7656c
SL
97MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
98MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
99MODULE_FIRMWARE("amdgpu/stoney_me.bin");
100MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
101MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
102
c65444fe
JZ
103MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
104MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
105MODULE_FIRMWARE("amdgpu/tonga_me.bin");
106MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
107MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
108MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
109
110MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
111MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
112MODULE_FIRMWARE("amdgpu/topaz_me.bin");
113MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 114MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 115
af15a2d5
DZ
116MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
117MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
118MODULE_FIRMWARE("amdgpu/fiji_me.bin");
119MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
120MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
121MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
122
aaa36a97
AD
123static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
124{
125 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
126 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
127 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
128 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
129 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
130 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
131 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
132 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
133 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
134 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
135 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
136 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
137 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
138 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
139 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
140 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
141};
142
143static const u32 golden_settings_tonga_a11[] =
144{
145 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
146 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
147 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
148 mmGB_GPU_ID, 0x0000000f, 0x00000000,
149 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
150 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
151 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 152 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
153 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
154 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 155 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
156 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
157 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
158 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 159 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
160};
161
162static const u32 tonga_golden_common_all[] =
163{
164 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
165 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
166 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
167 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
168 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
169 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
170 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
171 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
172};
173
174static const u32 tonga_mgcg_cgcg_init[] =
175{
176 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
177 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
178 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
180 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
181 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
182 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
183 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
184 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
185 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
186 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
187 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
188 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
189 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
190 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
191 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
192 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
193 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
194 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
195 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
196 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
197 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
198 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
201 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
202 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
203 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
204 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
205 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
206 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
207 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
208 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
209 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
210 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
211 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
212 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
213 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
214 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
215 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
216 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
217 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
218 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
219 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
220 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
221 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
222 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
223 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
224 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
225 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
226 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
227 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
228 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
229 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
230 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
231 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
232 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
233 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
234 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
235 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
236 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
237 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
238 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
239 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
240 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
241 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
242 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
243 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
244 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
245 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
246 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
247 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
248 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
249 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
250 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
251};
252
af15a2d5
DZ
253static const u32 fiji_golden_common_all[] =
254{
255 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
256 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
257 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 258 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
259 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
260 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
261 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
262 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
263 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
264 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
265};
266
267static const u32 golden_settings_fiji_a10[] =
268{
269 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
270 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
271 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 272 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
273 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
274 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 275 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
276 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
277 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 278 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 279 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
280};
281
282static const u32 fiji_mgcg_cgcg_init[] =
283{
a7ca8ef9 284 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
285 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
286 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
287 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
288 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
289 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
290 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
291 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
292 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
293 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
294 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
295 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
296 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
297 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
298 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
299 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
300 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
301 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
302 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
303 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
304 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
305 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
306 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
307 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
308 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
309 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
310 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
311 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
312 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
313 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
314 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
315 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
316 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
317 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
318 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
319};
320
aaa36a97
AD
321static const u32 golden_settings_iceland_a11[] =
322{
323 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
324 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
325 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
326 mmGB_GPU_ID, 0x0000000f, 0x00000000,
327 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
328 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
329 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
330 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
6a00a09e 331 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
332 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
333 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 334 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
335 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
336 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
337 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
338};
339
340static const u32 iceland_golden_common_all[] =
341{
342 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
343 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
344 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
345 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
346 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
347 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
348 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
349 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
350};
351
352static const u32 iceland_mgcg_cgcg_init[] =
353{
354 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
355 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
356 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
357 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
358 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
359 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
360 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
361 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
362 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
363 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
365 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
366 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
367 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
372 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
373 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
374 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
375 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
376 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
377 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
379 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
380 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
381 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
382 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
383 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
384 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
385 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
386 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
387 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
388 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
389 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
390 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
391 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
392 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
393 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
394 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
395 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
396 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
397 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
398 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
399 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
400 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
401 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
402 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
403 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
404 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
405 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
406 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
407 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
408 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
409 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
410 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
411 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
412 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
413 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
414 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
415 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
416 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
417 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
418};
419
420static const u32 cz_golden_settings_a11[] =
421{
422 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
423 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
424 mmGB_GPU_ID, 0x0000000f, 0x00000000,
425 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
426 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 427 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 428 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
6a00a09e 429 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
430 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
431 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
432};
433
434static const u32 cz_golden_common_all[] =
435{
436 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
438 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
439 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
440 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
441 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
442 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
443 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444};
445
446static const u32 cz_mgcg_cgcg_init[] =
447{
448 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
449 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
450 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
459 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
464 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
468 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
469 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
470 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
471 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
472 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
473 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
474 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
476 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
477 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
478 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
479 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
480 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
481 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
482 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
483 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
484 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
485 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
486 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
487 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
488 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
489 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
490 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
491 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
492 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
493 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
494 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
495 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
496 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
497 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
498 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
499 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
500 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
501 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
502 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
503 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
504 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
505 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
506 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
507 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
508 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
509 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
510 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
511 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
512 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
513 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
514 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
515 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
516 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
517 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
518 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
519 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
520 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
521 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
522 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
523};
524
e3c7656c
SL
525static const u32 stoney_golden_settings_a11[] =
526{
527 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
528 mmGB_GPU_ID, 0x0000000f, 0x00000000,
529 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
530 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
531 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
532 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
533 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
534 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
535 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
536 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
537};
538
539static const u32 stoney_golden_common_all[] =
540{
541 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
542 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
543 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
544 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
545 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
546 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
547 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
548 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
549};
550
551static const u32 stoney_mgcg_cgcg_init[] =
552{
553 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
554 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
555 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
556 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
557 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
558 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
559};
560
aaa36a97
AD
561static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
562static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
563static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
564
565static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
566{
567 switch (adev->asic_type) {
568 case CHIP_TOPAZ:
569 amdgpu_program_register_sequence(adev,
570 iceland_mgcg_cgcg_init,
571 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
572 amdgpu_program_register_sequence(adev,
573 golden_settings_iceland_a11,
574 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
575 amdgpu_program_register_sequence(adev,
576 iceland_golden_common_all,
577 (const u32)ARRAY_SIZE(iceland_golden_common_all));
578 break;
af15a2d5
DZ
579 case CHIP_FIJI:
580 amdgpu_program_register_sequence(adev,
581 fiji_mgcg_cgcg_init,
582 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
583 amdgpu_program_register_sequence(adev,
584 golden_settings_fiji_a10,
585 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
586 amdgpu_program_register_sequence(adev,
587 fiji_golden_common_all,
588 (const u32)ARRAY_SIZE(fiji_golden_common_all));
589 break;
590
aaa36a97
AD
591 case CHIP_TONGA:
592 amdgpu_program_register_sequence(adev,
593 tonga_mgcg_cgcg_init,
594 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
595 amdgpu_program_register_sequence(adev,
596 golden_settings_tonga_a11,
597 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
598 amdgpu_program_register_sequence(adev,
599 tonga_golden_common_all,
600 (const u32)ARRAY_SIZE(tonga_golden_common_all));
601 break;
602 case CHIP_CARRIZO:
603 amdgpu_program_register_sequence(adev,
604 cz_mgcg_cgcg_init,
605 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
606 amdgpu_program_register_sequence(adev,
607 cz_golden_settings_a11,
608 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
609 amdgpu_program_register_sequence(adev,
610 cz_golden_common_all,
611 (const u32)ARRAY_SIZE(cz_golden_common_all));
612 break;
e3c7656c
SL
613 case CHIP_STONEY:
614 amdgpu_program_register_sequence(adev,
615 stoney_mgcg_cgcg_init,
616 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
617 amdgpu_program_register_sequence(adev,
618 stoney_golden_settings_a11,
619 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
620 amdgpu_program_register_sequence(adev,
621 stoney_golden_common_all,
622 (const u32)ARRAY_SIZE(stoney_golden_common_all));
623 break;
aaa36a97
AD
624 default:
625 break;
626 }
627}
628
629static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
630{
631 int i;
632
633 adev->gfx.scratch.num_reg = 7;
634 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
635 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
636 adev->gfx.scratch.free[i] = true;
637 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
638 }
639}
640
641static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
642{
643 struct amdgpu_device *adev = ring->adev;
644 uint32_t scratch;
645 uint32_t tmp = 0;
646 unsigned i;
647 int r;
648
649 r = amdgpu_gfx_scratch_get(adev, &scratch);
650 if (r) {
651 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
652 return r;
653 }
654 WREG32(scratch, 0xCAFEDEAD);
a27de35c 655 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
656 if (r) {
657 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
658 ring->idx, r);
659 amdgpu_gfx_scratch_free(adev, scratch);
660 return r;
661 }
662 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
663 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
664 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 665 amdgpu_ring_commit(ring);
aaa36a97
AD
666
667 for (i = 0; i < adev->usec_timeout; i++) {
668 tmp = RREG32(scratch);
669 if (tmp == 0xDEADBEEF)
670 break;
671 DRM_UDELAY(1);
672 }
673 if (i < adev->usec_timeout) {
674 DRM_INFO("ring test on %d succeeded in %d usecs\n",
675 ring->idx, i);
676 } else {
677 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
678 ring->idx, scratch, tmp);
679 r = -EINVAL;
680 }
681 amdgpu_gfx_scratch_free(adev, scratch);
682 return r;
683}
684
685static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
686{
687 struct amdgpu_device *adev = ring->adev;
688 struct amdgpu_ib ib;
1763552e 689 struct fence *f = NULL;
aaa36a97
AD
690 uint32_t scratch;
691 uint32_t tmp = 0;
692 unsigned i;
693 int r;
694
695 r = amdgpu_gfx_scratch_get(adev, &scratch);
696 if (r) {
697 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
698 return r;
699 }
700 WREG32(scratch, 0xCAFEDEAD);
b203dd95 701 memset(&ib, 0, sizeof(ib));
aaa36a97
AD
702 r = amdgpu_ib_get(ring, NULL, 256, &ib);
703 if (r) {
704 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
42d13693 705 goto err1;
aaa36a97
AD
706 }
707 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
708 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
709 ib.ptr[2] = 0xDEADBEEF;
710 ib.length_dw = 3;
42d13693
CZ
711
712 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1763552e
CZ
713 AMDGPU_FENCE_OWNER_UNDEFINED,
714 &f);
42d13693
CZ
715 if (r)
716 goto err2;
717
1763552e 718 r = fence_wait(f, false);
aaa36a97
AD
719 if (r) {
720 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
42d13693 721 goto err2;
aaa36a97
AD
722 }
723 for (i = 0; i < adev->usec_timeout; i++) {
724 tmp = RREG32(scratch);
725 if (tmp == 0xDEADBEEF)
726 break;
727 DRM_UDELAY(1);
728 }
729 if (i < adev->usec_timeout) {
730 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
42d13693
CZ
731 ring->idx, i);
732 goto err2;
aaa36a97
AD
733 } else {
734 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
735 scratch, tmp);
736 r = -EINVAL;
737 }
42d13693 738err2:
281b4223 739 fence_put(f);
aaa36a97 740 amdgpu_ib_free(adev, &ib);
42d13693
CZ
741err1:
742 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
743 return r;
744}
745
746static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
747{
748 const char *chip_name;
749 char fw_name[30];
750 int err;
751 struct amdgpu_firmware_info *info = NULL;
752 const struct common_firmware_header *header = NULL;
595fd013 753 const struct gfx_firmware_header_v1_0 *cp_hdr;
aaa36a97
AD
754
755 DRM_DEBUG("\n");
756
757 switch (adev->asic_type) {
758 case CHIP_TOPAZ:
759 chip_name = "topaz";
760 break;
761 case CHIP_TONGA:
762 chip_name = "tonga";
763 break;
764 case CHIP_CARRIZO:
765 chip_name = "carrizo";
766 break;
af15a2d5
DZ
767 case CHIP_FIJI:
768 chip_name = "fiji";
769 break;
e3c7656c
SL
770 case CHIP_STONEY:
771 chip_name = "stoney";
772 break;
aaa36a97
AD
773 default:
774 BUG();
775 }
776
c65444fe 777 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
778 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
779 if (err)
780 goto out;
781 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
782 if (err)
783 goto out;
595fd013
JZ
784 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
785 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
786 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 787
c65444fe 788 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
789 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
790 if (err)
791 goto out;
792 err = amdgpu_ucode_validate(adev->gfx.me_fw);
793 if (err)
794 goto out;
595fd013
JZ
795 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
796 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
797 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 798
c65444fe 799 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
800 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
801 if (err)
802 goto out;
803 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
804 if (err)
805 goto out;
595fd013
JZ
806 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
807 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
808 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 809
c65444fe 810 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
811 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
812 if (err)
813 goto out;
814 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
595fd013
JZ
815 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
816 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
817 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 818
c65444fe 819 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
820 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
821 if (err)
822 goto out;
823 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
824 if (err)
825 goto out;
595fd013
JZ
826 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
827 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
828 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 829
97dde76a
AD
830 if ((adev->asic_type != CHIP_STONEY) &&
831 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
832 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
833 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
834 if (!err) {
835 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
836 if (err)
837 goto out;
838 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
839 adev->gfx.mec2_fw->data;
840 adev->gfx.mec2_fw_version =
841 le32_to_cpu(cp_hdr->header.ucode_version);
842 adev->gfx.mec2_feature_version =
843 le32_to_cpu(cp_hdr->ucode_feature_version);
844 } else {
845 err = 0;
846 adev->gfx.mec2_fw = NULL;
847 }
aaa36a97
AD
848 }
849
850 if (adev->firmware.smu_load) {
851 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
852 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
853 info->fw = adev->gfx.pfp_fw;
854 header = (const struct common_firmware_header *)info->fw->data;
855 adev->firmware.fw_size +=
856 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
857
858 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
859 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
860 info->fw = adev->gfx.me_fw;
861 header = (const struct common_firmware_header *)info->fw->data;
862 adev->firmware.fw_size +=
863 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
864
865 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
866 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
867 info->fw = adev->gfx.ce_fw;
868 header = (const struct common_firmware_header *)info->fw->data;
869 adev->firmware.fw_size +=
870 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
871
872 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
873 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
874 info->fw = adev->gfx.rlc_fw;
875 header = (const struct common_firmware_header *)info->fw->data;
876 adev->firmware.fw_size +=
877 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
878
879 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
880 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
881 info->fw = adev->gfx.mec_fw;
882 header = (const struct common_firmware_header *)info->fw->data;
883 adev->firmware.fw_size +=
884 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
885
886 if (adev->gfx.mec2_fw) {
887 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
888 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
889 info->fw = adev->gfx.mec2_fw;
890 header = (const struct common_firmware_header *)info->fw->data;
891 adev->firmware.fw_size +=
892 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
893 }
894
895 }
896
897out:
898 if (err) {
899 dev_err(adev->dev,
900 "gfx8: Failed to load firmware \"%s\"\n",
901 fw_name);
902 release_firmware(adev->gfx.pfp_fw);
903 adev->gfx.pfp_fw = NULL;
904 release_firmware(adev->gfx.me_fw);
905 adev->gfx.me_fw = NULL;
906 release_firmware(adev->gfx.ce_fw);
907 adev->gfx.ce_fw = NULL;
908 release_firmware(adev->gfx.rlc_fw);
909 adev->gfx.rlc_fw = NULL;
910 release_firmware(adev->gfx.mec_fw);
911 adev->gfx.mec_fw = NULL;
912 release_firmware(adev->gfx.mec2_fw);
913 adev->gfx.mec2_fw = NULL;
914 }
915 return err;
916}
917
918static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
919{
920 int r;
921
922 if (adev->gfx.mec.hpd_eop_obj) {
923 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
924 if (unlikely(r != 0))
925 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
926 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
927 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
928
929 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
930 adev->gfx.mec.hpd_eop_obj = NULL;
931 }
932}
933
934#define MEC_HPD_SIZE 2048
935
936static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
937{
938 int r;
939 u32 *hpd;
940
941 /*
942 * we assign only 1 pipe because all other pipes will
943 * be handled by KFD
944 */
945 adev->gfx.mec.num_mec = 1;
946 adev->gfx.mec.num_pipe = 1;
947 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
948
949 if (adev->gfx.mec.hpd_eop_obj == NULL) {
950 r = amdgpu_bo_create(adev,
951 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
952 PAGE_SIZE, true,
72d7668b 953 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
954 &adev->gfx.mec.hpd_eop_obj);
955 if (r) {
956 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
957 return r;
958 }
959 }
960
961 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
962 if (unlikely(r != 0)) {
963 gfx_v8_0_mec_fini(adev);
964 return r;
965 }
966 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
967 &adev->gfx.mec.hpd_eop_gpu_addr);
968 if (r) {
969 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
970 gfx_v8_0_mec_fini(adev);
971 return r;
972 }
973 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
974 if (r) {
975 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
976 gfx_v8_0_mec_fini(adev);
977 return r;
978 }
979
980 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
981
982 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
983 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
984
985 return 0;
986}
987
ccba7691
AD
988static const u32 vgpr_init_compute_shader[] =
989{
990 0x7e000209, 0x7e020208,
991 0x7e040207, 0x7e060206,
992 0x7e080205, 0x7e0a0204,
993 0x7e0c0203, 0x7e0e0202,
994 0x7e100201, 0x7e120200,
995 0x7e140209, 0x7e160208,
996 0x7e180207, 0x7e1a0206,
997 0x7e1c0205, 0x7e1e0204,
998 0x7e200203, 0x7e220202,
999 0x7e240201, 0x7e260200,
1000 0x7e280209, 0x7e2a0208,
1001 0x7e2c0207, 0x7e2e0206,
1002 0x7e300205, 0x7e320204,
1003 0x7e340203, 0x7e360202,
1004 0x7e380201, 0x7e3a0200,
1005 0x7e3c0209, 0x7e3e0208,
1006 0x7e400207, 0x7e420206,
1007 0x7e440205, 0x7e460204,
1008 0x7e480203, 0x7e4a0202,
1009 0x7e4c0201, 0x7e4e0200,
1010 0x7e500209, 0x7e520208,
1011 0x7e540207, 0x7e560206,
1012 0x7e580205, 0x7e5a0204,
1013 0x7e5c0203, 0x7e5e0202,
1014 0x7e600201, 0x7e620200,
1015 0x7e640209, 0x7e660208,
1016 0x7e680207, 0x7e6a0206,
1017 0x7e6c0205, 0x7e6e0204,
1018 0x7e700203, 0x7e720202,
1019 0x7e740201, 0x7e760200,
1020 0x7e780209, 0x7e7a0208,
1021 0x7e7c0207, 0x7e7e0206,
1022 0xbf8a0000, 0xbf810000,
1023};
1024
1025static const u32 sgpr_init_compute_shader[] =
1026{
1027 0xbe8a0100, 0xbe8c0102,
1028 0xbe8e0104, 0xbe900106,
1029 0xbe920108, 0xbe940100,
1030 0xbe960102, 0xbe980104,
1031 0xbe9a0106, 0xbe9c0108,
1032 0xbe9e0100, 0xbea00102,
1033 0xbea20104, 0xbea40106,
1034 0xbea60108, 0xbea80100,
1035 0xbeaa0102, 0xbeac0104,
1036 0xbeae0106, 0xbeb00108,
1037 0xbeb20100, 0xbeb40102,
1038 0xbeb60104, 0xbeb80106,
1039 0xbeba0108, 0xbebc0100,
1040 0xbebe0102, 0xbec00104,
1041 0xbec20106, 0xbec40108,
1042 0xbec60100, 0xbec80102,
1043 0xbee60004, 0xbee70005,
1044 0xbeea0006, 0xbeeb0007,
1045 0xbee80008, 0xbee90009,
1046 0xbefc0000, 0xbf8a0000,
1047 0xbf810000, 0x00000000,
1048};
1049
1050static const u32 vgpr_init_regs[] =
1051{
1052 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1053 mmCOMPUTE_RESOURCE_LIMITS, 0,
1054 mmCOMPUTE_NUM_THREAD_X, 256*4,
1055 mmCOMPUTE_NUM_THREAD_Y, 1,
1056 mmCOMPUTE_NUM_THREAD_Z, 1,
1057 mmCOMPUTE_PGM_RSRC2, 20,
1058 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1059 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1060 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1061 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1062 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1063 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1064 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1065 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1066 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1067 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1068};
1069
1070static const u32 sgpr1_init_regs[] =
1071{
1072 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1073 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1074 mmCOMPUTE_NUM_THREAD_X, 256*5,
1075 mmCOMPUTE_NUM_THREAD_Y, 1,
1076 mmCOMPUTE_NUM_THREAD_Z, 1,
1077 mmCOMPUTE_PGM_RSRC2, 20,
1078 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1079 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1080 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1081 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1082 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1083 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1084 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1085 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1086 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1087 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1088};
1089
1090static const u32 sgpr2_init_regs[] =
1091{
1092 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1093 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1094 mmCOMPUTE_NUM_THREAD_X, 256*5,
1095 mmCOMPUTE_NUM_THREAD_Y, 1,
1096 mmCOMPUTE_NUM_THREAD_Z, 1,
1097 mmCOMPUTE_PGM_RSRC2, 20,
1098 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1099 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1100 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1101 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1102 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1103 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1104 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1105 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1106 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1107 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1108};
1109
1110static const u32 sec_ded_counter_registers[] =
1111{
1112 mmCPC_EDC_ATC_CNT,
1113 mmCPC_EDC_SCRATCH_CNT,
1114 mmCPC_EDC_UCODE_CNT,
1115 mmCPF_EDC_ATC_CNT,
1116 mmCPF_EDC_ROQ_CNT,
1117 mmCPF_EDC_TAG_CNT,
1118 mmCPG_EDC_ATC_CNT,
1119 mmCPG_EDC_DMA_CNT,
1120 mmCPG_EDC_TAG_CNT,
1121 mmDC_EDC_CSINVOC_CNT,
1122 mmDC_EDC_RESTORE_CNT,
1123 mmDC_EDC_STATE_CNT,
1124 mmGDS_EDC_CNT,
1125 mmGDS_EDC_GRBM_CNT,
1126 mmGDS_EDC_OA_DED,
1127 mmSPI_EDC_CNT,
1128 mmSQC_ATC_EDC_GATCL1_CNT,
1129 mmSQC_EDC_CNT,
1130 mmSQ_EDC_DED_CNT,
1131 mmSQ_EDC_INFO,
1132 mmSQ_EDC_SEC_CNT,
1133 mmTCC_EDC_CNT,
1134 mmTCP_ATC_EDC_GATCL1_CNT,
1135 mmTCP_EDC_CNT,
1136 mmTD_EDC_CNT
1137};
1138
1139static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1140{
1141 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1142 struct amdgpu_ib ib;
1143 struct fence *f = NULL;
1144 int r, i;
1145 u32 tmp;
1146 unsigned total_size, vgpr_offset, sgpr_offset;
1147 u64 gpu_addr;
1148
1149 /* only supported on CZ */
1150 if (adev->asic_type != CHIP_CARRIZO)
1151 return 0;
1152
1153 /* bail if the compute ring is not ready */
1154 if (!ring->ready)
1155 return 0;
1156
1157 tmp = RREG32(mmGB_EDC_MODE);
1158 WREG32(mmGB_EDC_MODE, 0);
1159
1160 total_size =
1161 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1162 total_size +=
1163 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1164 total_size +=
1165 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1166 total_size = ALIGN(total_size, 256);
1167 vgpr_offset = total_size;
1168 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1169 sgpr_offset = total_size;
1170 total_size += sizeof(sgpr_init_compute_shader);
1171
1172 /* allocate an indirect buffer to put the commands in */
1173 memset(&ib, 0, sizeof(ib));
1174 r = amdgpu_ib_get(ring, NULL, total_size, &ib);
1175 if (r) {
1176 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1177 return r;
1178 }
1179
1180 /* load the compute shaders */
1181 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1182 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1183
1184 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1185 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1186
1187 /* init the ib length to 0 */
1188 ib.length_dw = 0;
1189
1190 /* VGPR */
1191 /* write the register state for the compute dispatch */
1192 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1193 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1194 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1195 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1196 }
1197 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1198 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1199 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1200 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1201 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1202 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1203
1204 /* write dispatch packet */
1205 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1206 ib.ptr[ib.length_dw++] = 8; /* x */
1207 ib.ptr[ib.length_dw++] = 1; /* y */
1208 ib.ptr[ib.length_dw++] = 1; /* z */
1209 ib.ptr[ib.length_dw++] =
1210 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1211
1212 /* write CS partial flush packet */
1213 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1214 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1215
1216 /* SGPR1 */
1217 /* write the register state for the compute dispatch */
1218 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1219 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1220 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1221 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1222 }
1223 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1224 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1225 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1226 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1227 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1228 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1229
1230 /* write dispatch packet */
1231 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1232 ib.ptr[ib.length_dw++] = 8; /* x */
1233 ib.ptr[ib.length_dw++] = 1; /* y */
1234 ib.ptr[ib.length_dw++] = 1; /* z */
1235 ib.ptr[ib.length_dw++] =
1236 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1237
1238 /* write CS partial flush packet */
1239 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1240 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1241
1242 /* SGPR2 */
1243 /* write the register state for the compute dispatch */
1244 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1245 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1246 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1247 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1248 }
1249 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1250 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1251 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1252 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1253 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1254 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1255
1256 /* write dispatch packet */
1257 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1258 ib.ptr[ib.length_dw++] = 8; /* x */
1259 ib.ptr[ib.length_dw++] = 1; /* y */
1260 ib.ptr[ib.length_dw++] = 1; /* z */
1261 ib.ptr[ib.length_dw++] =
1262 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1263
1264 /* write CS partial flush packet */
1265 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1266 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1267
1268 /* shedule the ib on the ring */
1269 r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
1270 AMDGPU_FENCE_OWNER_UNDEFINED,
1271 &f);
1272 if (r) {
1273 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1274 goto fail;
1275 }
1276
1277 /* wait for the GPU to finish processing the IB */
1278 r = fence_wait(f, false);
1279 if (r) {
1280 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1281 goto fail;
1282 }
1283
1284 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1285 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1286 WREG32(mmGB_EDC_MODE, tmp);
1287
1288 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1289 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1290 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1291
1292
1293 /* read back registers to clear the counters */
1294 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1295 RREG32(sec_ded_counter_registers[i]);
1296
1297fail:
1298 fence_put(f);
1299 amdgpu_ib_free(adev, &ib);
1300
1301 return r;
1302}
1303
0bde3a95
AD
1304static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1305{
1306 u32 gb_addr_config;
1307 u32 mc_shared_chmap, mc_arb_ramcfg;
1308 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1309 u32 tmp;
1310
1311 switch (adev->asic_type) {
1312 case CHIP_TOPAZ:
1313 adev->gfx.config.max_shader_engines = 1;
1314 adev->gfx.config.max_tile_pipes = 2;
1315 adev->gfx.config.max_cu_per_sh = 6;
1316 adev->gfx.config.max_sh_per_se = 1;
1317 adev->gfx.config.max_backends_per_se = 2;
1318 adev->gfx.config.max_texture_channel_caches = 2;
1319 adev->gfx.config.max_gprs = 256;
1320 adev->gfx.config.max_gs_threads = 32;
1321 adev->gfx.config.max_hw_contexts = 8;
1322
1323 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1324 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1325 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1326 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1327 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1328 break;
1329 case CHIP_FIJI:
1330 adev->gfx.config.max_shader_engines = 4;
1331 adev->gfx.config.max_tile_pipes = 16;
1332 adev->gfx.config.max_cu_per_sh = 16;
1333 adev->gfx.config.max_sh_per_se = 1;
1334 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1335 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1336 adev->gfx.config.max_gprs = 256;
1337 adev->gfx.config.max_gs_threads = 32;
1338 adev->gfx.config.max_hw_contexts = 8;
1339
1340 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1341 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1342 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1343 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1344 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1345 break;
1346 case CHIP_TONGA:
1347 adev->gfx.config.max_shader_engines = 4;
1348 adev->gfx.config.max_tile_pipes = 8;
1349 adev->gfx.config.max_cu_per_sh = 8;
1350 adev->gfx.config.max_sh_per_se = 1;
1351 adev->gfx.config.max_backends_per_se = 2;
1352 adev->gfx.config.max_texture_channel_caches = 8;
1353 adev->gfx.config.max_gprs = 256;
1354 adev->gfx.config.max_gs_threads = 32;
1355 adev->gfx.config.max_hw_contexts = 8;
1356
1357 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1358 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1359 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1360 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1361 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1362 break;
1363 case CHIP_CARRIZO:
1364 adev->gfx.config.max_shader_engines = 1;
1365 adev->gfx.config.max_tile_pipes = 2;
1366 adev->gfx.config.max_sh_per_se = 1;
1367 adev->gfx.config.max_backends_per_se = 2;
1368
1369 switch (adev->pdev->revision) {
1370 case 0xc4:
1371 case 0x84:
1372 case 0xc8:
1373 case 0xcc:
b8b339ea
AD
1374 case 0xe1:
1375 case 0xe3:
0bde3a95
AD
1376 /* B10 */
1377 adev->gfx.config.max_cu_per_sh = 8;
1378 break;
1379 case 0xc5:
1380 case 0x81:
1381 case 0x85:
1382 case 0xc9:
1383 case 0xcd:
b8b339ea
AD
1384 case 0xe2:
1385 case 0xe4:
0bde3a95
AD
1386 /* B8 */
1387 adev->gfx.config.max_cu_per_sh = 6;
1388 break;
1389 case 0xc6:
1390 case 0xca:
1391 case 0xce:
b8b339ea 1392 case 0x88:
0bde3a95
AD
1393 /* B6 */
1394 adev->gfx.config.max_cu_per_sh = 6;
1395 break;
1396 case 0xc7:
1397 case 0x87:
1398 case 0xcb:
b8b339ea
AD
1399 case 0xe5:
1400 case 0x89:
0bde3a95
AD
1401 default:
1402 /* B4 */
1403 adev->gfx.config.max_cu_per_sh = 4;
1404 break;
1405 }
1406
1407 adev->gfx.config.max_texture_channel_caches = 2;
1408 adev->gfx.config.max_gprs = 256;
1409 adev->gfx.config.max_gs_threads = 32;
1410 adev->gfx.config.max_hw_contexts = 8;
1411
e3c7656c
SL
1412 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1413 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1414 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1415 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1416 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1417 break;
1418 case CHIP_STONEY:
1419 adev->gfx.config.max_shader_engines = 1;
1420 adev->gfx.config.max_tile_pipes = 2;
1421 adev->gfx.config.max_sh_per_se = 1;
1422 adev->gfx.config.max_backends_per_se = 1;
1423
1424 switch (adev->pdev->revision) {
1425 case 0xc0:
1426 case 0xc1:
1427 case 0xc2:
1428 case 0xc4:
1429 case 0xc8:
1430 case 0xc9:
1431 adev->gfx.config.max_cu_per_sh = 3;
1432 break;
1433 case 0xd0:
1434 case 0xd1:
1435 case 0xd2:
1436 default:
1437 adev->gfx.config.max_cu_per_sh = 2;
1438 break;
1439 }
1440
1441 adev->gfx.config.max_texture_channel_caches = 2;
1442 adev->gfx.config.max_gprs = 256;
1443 adev->gfx.config.max_gs_threads = 16;
1444 adev->gfx.config.max_hw_contexts = 8;
1445
0bde3a95
AD
1446 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1447 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1448 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1449 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1450 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1451 break;
1452 default:
1453 adev->gfx.config.max_shader_engines = 2;
1454 adev->gfx.config.max_tile_pipes = 4;
1455 adev->gfx.config.max_cu_per_sh = 2;
1456 adev->gfx.config.max_sh_per_se = 1;
1457 adev->gfx.config.max_backends_per_se = 2;
1458 adev->gfx.config.max_texture_channel_caches = 4;
1459 adev->gfx.config.max_gprs = 256;
1460 adev->gfx.config.max_gs_threads = 32;
1461 adev->gfx.config.max_hw_contexts = 8;
1462
1463 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1464 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1465 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1466 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1467 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1468 break;
1469 }
1470
1471 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1472 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1473 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1474
1475 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1476 adev->gfx.config.mem_max_burst_length_bytes = 256;
1477 if (adev->flags & AMD_IS_APU) {
1478 /* Get memory bank mapping mode. */
1479 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1480 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482
1483 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1484 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1485 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1486
1487 /* Validate settings in case only one DIMM installed. */
1488 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1489 dimm00_addr_map = 0;
1490 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1491 dimm01_addr_map = 0;
1492 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1493 dimm10_addr_map = 0;
1494 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1495 dimm11_addr_map = 0;
1496
1497 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1498 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1499 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1500 adev->gfx.config.mem_row_size_in_kb = 2;
1501 else
1502 adev->gfx.config.mem_row_size_in_kb = 1;
1503 } else {
1504 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1505 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1506 if (adev->gfx.config.mem_row_size_in_kb > 4)
1507 adev->gfx.config.mem_row_size_in_kb = 4;
1508 }
1509
1510 adev->gfx.config.shader_engine_tile_size = 32;
1511 adev->gfx.config.num_gpus = 1;
1512 adev->gfx.config.multi_gpu_tile_size = 64;
1513
1514 /* fix up row size */
1515 switch (adev->gfx.config.mem_row_size_in_kb) {
1516 case 1:
1517 default:
1518 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1519 break;
1520 case 2:
1521 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1522 break;
1523 case 4:
1524 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1525 break;
1526 }
1527 adev->gfx.config.gb_addr_config = gb_addr_config;
1528}
1529
5fc3aeeb 1530static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
1531{
1532 int i, r;
1533 struct amdgpu_ring *ring;
5fc3aeeb 1534 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1535
1536 /* EOP Event */
1537 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1538 if (r)
1539 return r;
1540
1541 /* Privileged reg */
1542 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1543 if (r)
1544 return r;
1545
1546 /* Privileged inst */
1547 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1548 if (r)
1549 return r;
1550
1551 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1552
1553 gfx_v8_0_scratch_init(adev);
1554
1555 r = gfx_v8_0_init_microcode(adev);
1556 if (r) {
1557 DRM_ERROR("Failed to load gfx firmware!\n");
1558 return r;
1559 }
1560
1561 r = gfx_v8_0_mec_init(adev);
1562 if (r) {
1563 DRM_ERROR("Failed to init MEC BOs!\n");
1564 return r;
1565 }
1566
aaa36a97
AD
1567 /* set up the gfx ring */
1568 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1569 ring = &adev->gfx.gfx_ring[i];
1570 ring->ring_obj = NULL;
1571 sprintf(ring->name, "gfx");
1572 /* no gfx doorbells on iceland */
1573 if (adev->asic_type != CHIP_TOPAZ) {
1574 ring->use_doorbell = true;
1575 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1576 }
1577
1578 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1579 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1580 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1581 AMDGPU_RING_TYPE_GFX);
1582 if (r)
1583 return r;
1584 }
1585
1586 /* set up the compute queues */
1587 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1588 unsigned irq_type;
1589
1590 /* max 32 queues per MEC */
1591 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1592 DRM_ERROR("Too many (%d) compute rings!\n", i);
1593 break;
1594 }
1595 ring = &adev->gfx.compute_ring[i];
1596 ring->ring_obj = NULL;
1597 ring->use_doorbell = true;
1598 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1599 ring->me = 1; /* first MEC */
1600 ring->pipe = i / 8;
1601 ring->queue = i % 8;
1602 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
1603 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1604 /* type-2 packets are deprecated on MEC, use type-3 instead */
1605 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
1606 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1607 &adev->gfx.eop_irq, irq_type,
1608 AMDGPU_RING_TYPE_COMPUTE);
1609 if (r)
1610 return r;
1611 }
1612
1613 /* reserve GDS, GWS and OA resource for gfx */
1614 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1615 PAGE_SIZE, true,
72d7668b 1616 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
aaa36a97
AD
1617 NULL, &adev->gds.gds_gfx_bo);
1618 if (r)
1619 return r;
1620
1621 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1622 PAGE_SIZE, true,
72d7668b 1623 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
aaa36a97
AD
1624 NULL, &adev->gds.gws_gfx_bo);
1625 if (r)
1626 return r;
1627
1628 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1629 PAGE_SIZE, true,
72d7668b 1630 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
aaa36a97
AD
1631 NULL, &adev->gds.oa_gfx_bo);
1632 if (r)
1633 return r;
1634
a101a899
KW
1635 adev->gfx.ce_ram_size = 0x8000;
1636
0bde3a95
AD
1637 gfx_v8_0_gpu_early_init(adev);
1638
aaa36a97
AD
1639 return 0;
1640}
1641
5fc3aeeb 1642static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
1643{
1644 int i;
5fc3aeeb 1645 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1646
1647 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1648 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1649 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1650
1651 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1652 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1653 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1654 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1655
aaa36a97
AD
1656 gfx_v8_0_mec_fini(adev);
1657
1658 return 0;
1659}
1660
1661static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1662{
90bea0ab 1663 uint32_t *modearray, *mod2array;
eb64526f
TSD
1664 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1665 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 1666 u32 reg_offset;
aaa36a97 1667
90bea0ab
TSD
1668 modearray = adev->gfx.config.tile_mode_array;
1669 mod2array = adev->gfx.config.macrotile_mode_array;
1670
1671 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1672 modearray[reg_offset] = 0;
1673
1674 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1675 mod2array[reg_offset] = 0;
aaa36a97
AD
1676
1677 switch (adev->asic_type) {
1678 case CHIP_TOPAZ:
90bea0ab
TSD
1679 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680 PIPE_CONFIG(ADDR_SURF_P2) |
1681 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1682 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684 PIPE_CONFIG(ADDR_SURF_P2) |
1685 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1686 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 PIPE_CONFIG(ADDR_SURF_P2) |
1689 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1690 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692 PIPE_CONFIG(ADDR_SURF_P2) |
1693 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1694 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1696 PIPE_CONFIG(ADDR_SURF_P2) |
1697 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1700 PIPE_CONFIG(ADDR_SURF_P2) |
1701 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1704 PIPE_CONFIG(ADDR_SURF_P2) |
1705 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1706 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1707 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1708 PIPE_CONFIG(ADDR_SURF_P2));
1709 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1710 PIPE_CONFIG(ADDR_SURF_P2) |
1711 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1714 PIPE_CONFIG(ADDR_SURF_P2) |
1715 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1717 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1718 PIPE_CONFIG(ADDR_SURF_P2) |
1719 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1721 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1722 PIPE_CONFIG(ADDR_SURF_P2) |
1723 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1726 PIPE_CONFIG(ADDR_SURF_P2) |
1727 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1730 PIPE_CONFIG(ADDR_SURF_P2) |
1731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1733 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1734 PIPE_CONFIG(ADDR_SURF_P2) |
1735 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1737 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738 PIPE_CONFIG(ADDR_SURF_P2) |
1739 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1742 PIPE_CONFIG(ADDR_SURF_P2) |
1743 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1746 PIPE_CONFIG(ADDR_SURF_P2) |
1747 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1750 PIPE_CONFIG(ADDR_SURF_P2) |
1751 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1754 PIPE_CONFIG(ADDR_SURF_P2) |
1755 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1758 PIPE_CONFIG(ADDR_SURF_P2) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1762 PIPE_CONFIG(ADDR_SURF_P2) |
1763 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1766 PIPE_CONFIG(ADDR_SURF_P2) |
1767 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1769 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1770 PIPE_CONFIG(ADDR_SURF_P2) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1774 PIPE_CONFIG(ADDR_SURF_P2) |
1775 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1777 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1778 PIPE_CONFIG(ADDR_SURF_P2) |
1779 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1780 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1781
1782 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785 NUM_BANKS(ADDR_SURF_8_BANK));
1786 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 NUM_BANKS(ADDR_SURF_8_BANK));
1790 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1793 NUM_BANKS(ADDR_SURF_8_BANK));
1794 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1797 NUM_BANKS(ADDR_SURF_8_BANK));
1798 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 NUM_BANKS(ADDR_SURF_8_BANK));
1802 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805 NUM_BANKS(ADDR_SURF_8_BANK));
1806 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1809 NUM_BANKS(ADDR_SURF_8_BANK));
1810 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 NUM_BANKS(ADDR_SURF_16_BANK));
1814 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1815 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817 NUM_BANKS(ADDR_SURF_16_BANK));
1818 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1820 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821 NUM_BANKS(ADDR_SURF_16_BANK));
1822 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 NUM_BANKS(ADDR_SURF_16_BANK));
1826 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829 NUM_BANKS(ADDR_SURF_16_BANK));
1830 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1833 NUM_BANKS(ADDR_SURF_16_BANK));
1834 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1837 NUM_BANKS(ADDR_SURF_8_BANK));
1838
1839 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1840 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1841 reg_offset != 23)
1842 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1843
1844 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1845 if (reg_offset != 7)
1846 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1847
8cdacf44 1848 break;
af15a2d5 1849 case CHIP_FIJI:
90bea0ab
TSD
1850 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1867 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1871 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1876 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1879 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1880 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1881 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1882 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1883 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1884 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1885 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1889 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1892 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1894 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1898 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1900 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1901 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1905 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1909 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1912 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1914 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1917 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1918 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1920 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1925 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1929 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1933 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1941 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1942 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1945 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1949 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1953 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1956 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1957 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1961 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1964 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1966 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1969 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1970 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1972
1973 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976 NUM_BANKS(ADDR_SURF_8_BANK));
1977 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980 NUM_BANKS(ADDR_SURF_8_BANK));
1981 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984 NUM_BANKS(ADDR_SURF_8_BANK));
1985 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1988 NUM_BANKS(ADDR_SURF_8_BANK));
1989 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992 NUM_BANKS(ADDR_SURF_8_BANK));
1993 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996 NUM_BANKS(ADDR_SURF_8_BANK));
1997 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2000 NUM_BANKS(ADDR_SURF_8_BANK));
2001 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2003 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004 NUM_BANKS(ADDR_SURF_8_BANK));
2005 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2008 NUM_BANKS(ADDR_SURF_8_BANK));
2009 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012 NUM_BANKS(ADDR_SURF_8_BANK));
2013 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2016 NUM_BANKS(ADDR_SURF_8_BANK));
2017 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020 NUM_BANKS(ADDR_SURF_8_BANK));
2021 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2024 NUM_BANKS(ADDR_SURF_8_BANK));
2025 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2026 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2027 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2028 NUM_BANKS(ADDR_SURF_4_BANK));
2029
2030 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2031 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2032
2033 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2034 if (reg_offset != 7)
2035 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2036
5f2e816b 2037 break;
aaa36a97 2038 case CHIP_TONGA:
90bea0ab
TSD
2039 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2050 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2068 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2069 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2071 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2072 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2073 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2086 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2087 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2089 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2101 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2103 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2106 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2109 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2127 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2130 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2131 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2134 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2138 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2142 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2145 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2146 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2150 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2153 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2161
2162 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165 NUM_BANKS(ADDR_SURF_16_BANK));
2166 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169 NUM_BANKS(ADDR_SURF_16_BANK));
2170 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173 NUM_BANKS(ADDR_SURF_16_BANK));
2174 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2177 NUM_BANKS(ADDR_SURF_16_BANK));
2178 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2181 NUM_BANKS(ADDR_SURF_16_BANK));
2182 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 NUM_BANKS(ADDR_SURF_16_BANK));
2186 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2189 NUM_BANKS(ADDR_SURF_16_BANK));
2190 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193 NUM_BANKS(ADDR_SURF_16_BANK));
2194 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2197 NUM_BANKS(ADDR_SURF_16_BANK));
2198 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201 NUM_BANKS(ADDR_SURF_16_BANK));
2202 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2205 NUM_BANKS(ADDR_SURF_16_BANK));
2206 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 NUM_BANKS(ADDR_SURF_8_BANK));
2210 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213 NUM_BANKS(ADDR_SURF_4_BANK));
2214 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2215 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2216 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2217 NUM_BANKS(ADDR_SURF_4_BANK));
2218
2219 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2220 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2221
2222 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2223 if (reg_offset != 7)
2224 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2225
aaa36a97 2226 break;
e3c7656c 2227 case CHIP_STONEY:
90bea0ab
TSD
2228 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 PIPE_CONFIG(ADDR_SURF_P2) |
2230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2257 PIPE_CONFIG(ADDR_SURF_P2));
2258 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P2) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P2) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2270 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P2) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2286 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2330
2331 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334 NUM_BANKS(ADDR_SURF_8_BANK));
2335 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2338 NUM_BANKS(ADDR_SURF_8_BANK));
2339 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 NUM_BANKS(ADDR_SURF_8_BANK));
2343 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 NUM_BANKS(ADDR_SURF_8_BANK));
2347 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 NUM_BANKS(ADDR_SURF_16_BANK));
2363 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 NUM_BANKS(ADDR_SURF_16_BANK));
2367 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 NUM_BANKS(ADDR_SURF_16_BANK));
2371 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 NUM_BANKS(ADDR_SURF_16_BANK));
2375 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 NUM_BANKS(ADDR_SURF_16_BANK));
2379 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 NUM_BANKS(ADDR_SURF_16_BANK));
2383 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2386 NUM_BANKS(ADDR_SURF_8_BANK));
2387
2388 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2389 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2390 reg_offset != 23)
2391 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2392
2393 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2394 if (reg_offset != 7)
2395 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2396
e3c7656c 2397 break;
aaa36a97 2398 default:
90bea0ab
TSD
2399 dev_warn(adev->dev,
2400 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2401 adev->asic_type);
2402
2403 case CHIP_CARRIZO:
2404 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 PIPE_CONFIG(ADDR_SURF_P2) |
2406 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 PIPE_CONFIG(ADDR_SURF_P2) |
2410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P2) |
2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P2) |
2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P2) |
2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P2) |
2430 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2432 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2433 PIPE_CONFIG(ADDR_SURF_P2));
2434 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P2) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2442 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P2) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2446 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P2) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P2) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2458 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2459 PIPE_CONFIG(ADDR_SURF_P2) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2462 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463 PIPE_CONFIG(ADDR_SURF_P2) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2467 PIPE_CONFIG(ADDR_SURF_P2) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2471 PIPE_CONFIG(ADDR_SURF_P2) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2475 PIPE_CONFIG(ADDR_SURF_P2) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2479 PIPE_CONFIG(ADDR_SURF_P2) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2483 PIPE_CONFIG(ADDR_SURF_P2) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2487 PIPE_CONFIG(ADDR_SURF_P2) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2491 PIPE_CONFIG(ADDR_SURF_P2) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2494 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2495 PIPE_CONFIG(ADDR_SURF_P2) |
2496 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 PIPE_CONFIG(ADDR_SURF_P2) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2503 PIPE_CONFIG(ADDR_SURF_P2) |
2504 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2505 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2506
2507 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_8_BANK));
2511 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2514 NUM_BANKS(ADDR_SURF_8_BANK));
2515 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_8_BANK));
2519 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 NUM_BANKS(ADDR_SURF_8_BANK));
2527 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530 NUM_BANKS(ADDR_SURF_8_BANK));
2531 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2534 NUM_BANKS(ADDR_SURF_8_BANK));
2535 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 NUM_BANKS(ADDR_SURF_16_BANK));
2539 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 NUM_BANKS(ADDR_SURF_16_BANK));
2543 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 NUM_BANKS(ADDR_SURF_16_BANK));
2547 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550 NUM_BANKS(ADDR_SURF_16_BANK));
2551 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 NUM_BANKS(ADDR_SURF_16_BANK));
2555 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2558 NUM_BANKS(ADDR_SURF_16_BANK));
2559 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2560 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2561 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2562 NUM_BANKS(ADDR_SURF_8_BANK));
2563
2564 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2565 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2566 reg_offset != 23)
2567 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2568
2569 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2570 if (reg_offset != 7)
2571 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2572
2573 break;
aaa36a97
AD
2574 }
2575}
2576
2577static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2578{
544b8a74 2579 return (u32)((1ULL << bit_width) - 1);
aaa36a97
AD
2580}
2581
2582void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2583{
2584 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2585
2586 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2587 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2588 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2589 } else if (se_num == 0xffffffff) {
2590 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2591 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2592 } else if (sh_num == 0xffffffff) {
2593 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2594 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2595 } else {
2596 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2597 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2598 }
2599 WREG32(mmGRBM_GFX_INDEX, data);
2600}
2601
2602static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
2603 u32 max_rb_num_per_se,
2604 u32 sh_per_se)
2605{
2606 u32 data, mask;
2607
2608 data = RREG32(mmCC_RB_BACKEND_DISABLE);
4f2d3ad6 2609 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
aaa36a97
AD
2610
2611 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2612
2613 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2614
2615 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
2616
2617 return data & mask;
2618}
2619
2620static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
2621 u32 se_num, u32 sh_per_se,
2622 u32 max_rb_num_per_se)
2623{
2624 int i, j;
2625 u32 data, mask;
2626 u32 disabled_rbs = 0;
2627 u32 enabled_rbs = 0;
2628
2629 mutex_lock(&adev->grbm_idx_mutex);
2630 for (i = 0; i < se_num; i++) {
2631 for (j = 0; j < sh_per_se; j++) {
2632 gfx_v8_0_select_se_sh(adev, i, j);
2633 data = gfx_v8_0_get_rb_disabled(adev,
2634 max_rb_num_per_se, sh_per_se);
2635 disabled_rbs |= data << ((i * sh_per_se + j) *
2636 RB_BITMAP_WIDTH_PER_SH);
2637 }
2638 }
2639 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2640 mutex_unlock(&adev->grbm_idx_mutex);
2641
2642 mask = 1;
2643 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
2644 if (!(disabled_rbs & mask))
2645 enabled_rbs |= mask;
2646 mask <<= 1;
2647 }
2648
2649 adev->gfx.config.backend_enable_mask = enabled_rbs;
2650
2651 mutex_lock(&adev->grbm_idx_mutex);
2652 for (i = 0; i < se_num; i++) {
2653 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
c27816a8 2654 data = RREG32(mmPA_SC_RASTER_CONFIG);
aaa36a97
AD
2655 for (j = 0; j < sh_per_se; j++) {
2656 switch (enabled_rbs & 3) {
2657 case 0:
2658 if (j == 0)
2659 data |= (RASTER_CONFIG_RB_MAP_3 <<
2660 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2661 else
2662 data |= (RASTER_CONFIG_RB_MAP_0 <<
2663 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
2664 break;
2665 case 1:
2666 data |= (RASTER_CONFIG_RB_MAP_0 <<
2667 (i * sh_per_se + j) * 2);
2668 break;
2669 case 2:
2670 data |= (RASTER_CONFIG_RB_MAP_3 <<
2671 (i * sh_per_se + j) * 2);
2672 break;
2673 case 3:
2674 default:
2675 data |= (RASTER_CONFIG_RB_MAP_2 <<
2676 (i * sh_per_se + j) * 2);
2677 break;
2678 }
2679 enabled_rbs >>= 2;
2680 }
2681 WREG32(mmPA_SC_RASTER_CONFIG, data);
2682 }
2683 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2684 mutex_unlock(&adev->grbm_idx_mutex);
2685}
2686
cd06bf68 2687/**
35c7a952 2688 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
2689 *
2690 * @rdev: amdgpu_device pointer
2691 *
2692 * Initialize compute vmid sh_mem registers
2693 *
2694 */
2695#define DEFAULT_SH_MEM_BASES (0x6000)
2696#define FIRST_COMPUTE_VMID (8)
2697#define LAST_COMPUTE_VMID (16)
35c7a952 2698static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
2699{
2700 int i;
2701 uint32_t sh_mem_config;
2702 uint32_t sh_mem_bases;
2703
2704 /*
2705 * Configure apertures:
2706 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2707 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2708 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2709 */
2710 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2711
2712 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2713 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2714 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2715 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2716 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2717 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2718
2719 mutex_lock(&adev->srbm_mutex);
2720 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2721 vi_srbm_select(adev, 0, 0, 0, i);
2722 /* CP and shaders */
2723 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2724 WREG32(mmSH_MEM_APE1_BASE, 1);
2725 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2726 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2727 }
2728 vi_srbm_select(adev, 0, 0, 0, 0);
2729 mutex_unlock(&adev->srbm_mutex);
2730}
2731
aaa36a97
AD
2732static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2733{
aaa36a97
AD
2734 u32 tmp;
2735 int i;
2736
aaa36a97
AD
2737 tmp = RREG32(mmGRBM_CNTL);
2738 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2739 WREG32(mmGRBM_CNTL, tmp);
2740
0bde3a95
AD
2741 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2742 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2743 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97 2744 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
0bde3a95 2745 adev->gfx.config.gb_addr_config & 0x70);
aaa36a97 2746 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
0bde3a95
AD
2747 adev->gfx.config.gb_addr_config & 0x70);
2748 WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2749 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2750 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
aaa36a97
AD
2751
2752 gfx_v8_0_tiling_mode_table_init(adev);
2753
2754 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2755 adev->gfx.config.max_sh_per_se,
2756 adev->gfx.config.max_backends_per_se);
2757
2758 /* XXX SH_MEM regs */
2759 /* where to put LDS, scratch, GPUVM in FSA64 space */
2760 mutex_lock(&adev->srbm_mutex);
2761 for (i = 0; i < 16; i++) {
2762 vi_srbm_select(adev, 0, 0, 0, i);
2763 /* CP and shaders */
2764 if (i == 0) {
2765 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2766 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 2767 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 2768 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
2769 WREG32(mmSH_MEM_CONFIG, tmp);
2770 } else {
2771 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2772 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
0bde3a95 2773 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 2774 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
2775 WREG32(mmSH_MEM_CONFIG, tmp);
2776 }
2777
2778 WREG32(mmSH_MEM_APE1_BASE, 1);
2779 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2780 WREG32(mmSH_MEM_BASES, 0);
2781 }
2782 vi_srbm_select(adev, 0, 0, 0, 0);
2783 mutex_unlock(&adev->srbm_mutex);
2784
35c7a952 2785 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 2786
aaa36a97
AD
2787 mutex_lock(&adev->grbm_idx_mutex);
2788 /*
2789 * making sure that the following register writes will be broadcasted
2790 * to all the shaders
2791 */
2792 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2793
2794 WREG32(mmPA_SC_FIFO_SIZE,
2795 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2796 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2797 (adev->gfx.config.sc_prim_fifo_size_backend <<
2798 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2799 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2800 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2801 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2802 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2803 mutex_unlock(&adev->grbm_idx_mutex);
2804
2805}
2806
2807static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2808{
2809 u32 i, j, k;
2810 u32 mask;
2811
2812 mutex_lock(&adev->grbm_idx_mutex);
2813 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2814 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2815 gfx_v8_0_select_se_sh(adev, i, j);
2816 for (k = 0; k < adev->usec_timeout; k++) {
2817 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2818 break;
2819 udelay(1);
2820 }
2821 }
2822 }
2823 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2824 mutex_unlock(&adev->grbm_idx_mutex);
2825
2826 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2827 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2828 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2829 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2830 for (k = 0; k < adev->usec_timeout; k++) {
2831 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2832 break;
2833 udelay(1);
2834 }
2835}
2836
2837static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2838 bool enable)
2839{
2840 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2841
0d07db7e
TSD
2842 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2843 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2844 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2845 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2846
aaa36a97
AD
2847 WREG32(mmCP_INT_CNTL_RING0, tmp);
2848}
2849
2850void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2851{
2852 u32 tmp = RREG32(mmRLC_CNTL);
2853
2854 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2855 WREG32(mmRLC_CNTL, tmp);
2856
2857 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2858
2859 gfx_v8_0_wait_for_rlc_serdes(adev);
2860}
2861
2862static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2863{
2864 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2865
2866 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2867 WREG32(mmGRBM_SOFT_RESET, tmp);
2868 udelay(50);
2869 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2870 WREG32(mmGRBM_SOFT_RESET, tmp);
2871 udelay(50);
2872}
2873
2874static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2875{
2876 u32 tmp = RREG32(mmRLC_CNTL);
2877
2878 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2879 WREG32(mmRLC_CNTL, tmp);
2880
2881 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 2882 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
2883 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2884
2885 udelay(50);
2886}
2887
2888static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2889{
2890 const struct rlc_firmware_header_v2_0 *hdr;
2891 const __le32 *fw_data;
2892 unsigned i, fw_size;
2893
2894 if (!adev->gfx.rlc_fw)
2895 return -EINVAL;
2896
2897 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2898 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
2899
2900 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2901 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2902 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2903
2904 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2905 for (i = 0; i < fw_size; i++)
2906 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2907 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2908
2909 return 0;
2910}
2911
2912static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2913{
2914 int r;
2915
2916 gfx_v8_0_rlc_stop(adev);
2917
2918 /* disable CG */
2919 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2920
2921 /* disable PG */
2922 WREG32(mmRLC_PG_CNTL, 0);
2923
2924 gfx_v8_0_rlc_reset(adev);
2925
e61710c5 2926 if (!adev->pp_enabled) {
ba5c2a87
RZ
2927 if (!adev->firmware.smu_load) {
2928 /* legacy rlc firmware loading */
2929 r = gfx_v8_0_rlc_load_microcode(adev);
2930 if (r)
2931 return r;
2932 } else {
2933 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2934 AMDGPU_UCODE_ID_RLC_G);
2935 if (r)
2936 return -EINVAL;
2937 }
aaa36a97
AD
2938 }
2939
2940 gfx_v8_0_rlc_start(adev);
2941
2942 return 0;
2943}
2944
2945static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2946{
2947 int i;
2948 u32 tmp = RREG32(mmCP_ME_CNTL);
2949
2950 if (enable) {
2951 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2952 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2953 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2954 } else {
2955 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2956 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2957 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2958 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2959 adev->gfx.gfx_ring[i].ready = false;
2960 }
2961 WREG32(mmCP_ME_CNTL, tmp);
2962 udelay(50);
2963}
2964
2965static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2966{
2967 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2968 const struct gfx_firmware_header_v1_0 *ce_hdr;
2969 const struct gfx_firmware_header_v1_0 *me_hdr;
2970 const __le32 *fw_data;
2971 unsigned i, fw_size;
2972
2973 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2974 return -EINVAL;
2975
2976 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2977 adev->gfx.pfp_fw->data;
2978 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2979 adev->gfx.ce_fw->data;
2980 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2981 adev->gfx.me_fw->data;
2982
2983 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2984 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2985 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
2986
2987 gfx_v8_0_cp_gfx_enable(adev, false);
2988
2989 /* PFP */
2990 fw_data = (const __le32 *)
2991 (adev->gfx.pfp_fw->data +
2992 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2993 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2994 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2995 for (i = 0; i < fw_size; i++)
2996 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2997 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2998
2999 /* CE */
3000 fw_data = (const __le32 *)
3001 (adev->gfx.ce_fw->data +
3002 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3003 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3004 WREG32(mmCP_CE_UCODE_ADDR, 0);
3005 for (i = 0; i < fw_size; i++)
3006 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3007 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3008
3009 /* ME */
3010 fw_data = (const __le32 *)
3011 (adev->gfx.me_fw->data +
3012 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3013 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3014 WREG32(mmCP_ME_RAM_WADDR, 0);
3015 for (i = 0; i < fw_size; i++)
3016 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3017 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3018
3019 return 0;
3020}
3021
3022static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
3023{
3024 u32 count = 0;
3025 const struct cs_section_def *sect = NULL;
3026 const struct cs_extent_def *ext = NULL;
3027
3028 /* begin clear state */
3029 count += 2;
3030 /* context control state */
3031 count += 3;
3032
3033 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3034 for (ext = sect->section; ext->extent != NULL; ++ext) {
3035 if (sect->id == SECT_CONTEXT)
3036 count += 2 + ext->reg_count;
3037 else
3038 return 0;
3039 }
3040 }
3041 /* pa_sc_raster_config/pa_sc_raster_config1 */
3042 count += 4;
3043 /* end clear state */
3044 count += 2;
3045 /* clear state */
3046 count += 2;
3047
3048 return count;
3049}
3050
3051static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
3052{
3053 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3054 const struct cs_section_def *sect = NULL;
3055 const struct cs_extent_def *ext = NULL;
3056 int r, i;
3057
3058 /* init the CP */
3059 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3060 WREG32(mmCP_ENDIAN_SWAP, 0);
3061 WREG32(mmCP_DEVICE_ID, 1);
3062
3063 gfx_v8_0_cp_gfx_enable(adev, true);
3064
a27de35c 3065 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
3066 if (r) {
3067 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3068 return r;
3069 }
3070
3071 /* clear state buffer */
3072 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3073 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3074
3075 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3076 amdgpu_ring_write(ring, 0x80000000);
3077 amdgpu_ring_write(ring, 0x80000000);
3078
3079 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3080 for (ext = sect->section; ext->extent != NULL; ++ext) {
3081 if (sect->id == SECT_CONTEXT) {
3082 amdgpu_ring_write(ring,
3083 PACKET3(PACKET3_SET_CONTEXT_REG,
3084 ext->reg_count));
3085 amdgpu_ring_write(ring,
3086 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3087 for (i = 0; i < ext->reg_count; i++)
3088 amdgpu_ring_write(ring, ext->extent[i]);
3089 }
3090 }
3091 }
3092
3093 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3094 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3095 switch (adev->asic_type) {
3096 case CHIP_TONGA:
3097 amdgpu_ring_write(ring, 0x16000012);
3098 amdgpu_ring_write(ring, 0x0000002A);
3099 break;
fa676048
FC
3100 case CHIP_FIJI:
3101 amdgpu_ring_write(ring, 0x3a00161a);
3102 amdgpu_ring_write(ring, 0x0000002e);
3103 break;
aaa36a97
AD
3104 case CHIP_TOPAZ:
3105 case CHIP_CARRIZO:
3106 amdgpu_ring_write(ring, 0x00000002);
3107 amdgpu_ring_write(ring, 0x00000000);
3108 break;
e3c7656c
SL
3109 case CHIP_STONEY:
3110 amdgpu_ring_write(ring, 0x00000000);
3111 amdgpu_ring_write(ring, 0x00000000);
3112 break;
aaa36a97
AD
3113 default:
3114 BUG();
3115 }
3116
3117 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3118 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3119
3120 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3121 amdgpu_ring_write(ring, 0);
3122
3123 /* init the CE partitions */
3124 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3125 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3126 amdgpu_ring_write(ring, 0x8000);
3127 amdgpu_ring_write(ring, 0x8000);
3128
a27de35c 3129 amdgpu_ring_commit(ring);
aaa36a97
AD
3130
3131 return 0;
3132}
3133
3134static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3135{
3136 struct amdgpu_ring *ring;
3137 u32 tmp;
3138 u32 rb_bufsz;
3139 u64 rb_addr, rptr_addr;
3140 int r;
3141
3142 /* Set the write pointer delay */
3143 WREG32(mmCP_RB_WPTR_DELAY, 0);
3144
3145 /* set the RB to use vmid 0 */
3146 WREG32(mmCP_RB_VMID, 0);
3147
3148 /* Set ring buffer size */
3149 ring = &adev->gfx.gfx_ring[0];
3150 rb_bufsz = order_base_2(ring->ring_size / 8);
3151 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3152 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3153 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3154 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3155#ifdef __BIG_ENDIAN
3156 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3157#endif
3158 WREG32(mmCP_RB0_CNTL, tmp);
3159
3160 /* Initialize the ring buffer's read and write pointers */
3161 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3162 ring->wptr = 0;
3163 WREG32(mmCP_RB0_WPTR, ring->wptr);
3164
3165 /* set the wb address wether it's enabled or not */
3166 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3167 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3168 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3169
3170 mdelay(1);
3171 WREG32(mmCP_RB0_CNTL, tmp);
3172
3173 rb_addr = ring->gpu_addr >> 8;
3174 WREG32(mmCP_RB0_BASE, rb_addr);
3175 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3176
3177 /* no gfx doorbells on iceland */
3178 if (adev->asic_type != CHIP_TOPAZ) {
3179 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3180 if (ring->use_doorbell) {
3181 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3182 DOORBELL_OFFSET, ring->doorbell_index);
3183 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3184 DOORBELL_EN, 1);
3185 } else {
3186 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3187 DOORBELL_EN, 0);
3188 }
3189 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3190
3191 if (adev->asic_type == CHIP_TONGA) {
3192 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3193 DOORBELL_RANGE_LOWER,
3194 AMDGPU_DOORBELL_GFX_RING0);
3195 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3196
3197 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3198 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3199 }
3200
3201 }
3202
3203 /* start the ring */
3204 gfx_v8_0_cp_gfx_start(adev);
3205 ring->ready = true;
3206 r = amdgpu_ring_test_ring(ring);
3207 if (r) {
3208 ring->ready = false;
3209 return r;
3210 }
3211
3212 return 0;
3213}
3214
3215static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3216{
3217 int i;
3218
3219 if (enable) {
3220 WREG32(mmCP_MEC_CNTL, 0);
3221 } else {
3222 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3223 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3224 adev->gfx.compute_ring[i].ready = false;
3225 }
3226 udelay(50);
3227}
3228
aaa36a97
AD
3229static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3230{
3231 const struct gfx_firmware_header_v1_0 *mec_hdr;
3232 const __le32 *fw_data;
3233 unsigned i, fw_size;
3234
3235 if (!adev->gfx.mec_fw)
3236 return -EINVAL;
3237
3238 gfx_v8_0_cp_compute_enable(adev, false);
3239
3240 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3241 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
3242
3243 fw_data = (const __le32 *)
3244 (adev->gfx.mec_fw->data +
3245 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3246 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3247
3248 /* MEC1 */
3249 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3250 for (i = 0; i < fw_size; i++)
3251 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3252 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3253
3254 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3255 if (adev->gfx.mec2_fw) {
3256 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3257
3258 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3259 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
3260
3261 fw_data = (const __le32 *)
3262 (adev->gfx.mec2_fw->data +
3263 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3264 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3265
3266 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3267 for (i = 0; i < fw_size; i++)
3268 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3269 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3270 }
3271
3272 return 0;
3273}
3274
3275struct vi_mqd {
3276 uint32_t header; /* ordinal0 */
3277 uint32_t compute_dispatch_initiator; /* ordinal1 */
3278 uint32_t compute_dim_x; /* ordinal2 */
3279 uint32_t compute_dim_y; /* ordinal3 */
3280 uint32_t compute_dim_z; /* ordinal4 */
3281 uint32_t compute_start_x; /* ordinal5 */
3282 uint32_t compute_start_y; /* ordinal6 */
3283 uint32_t compute_start_z; /* ordinal7 */
3284 uint32_t compute_num_thread_x; /* ordinal8 */
3285 uint32_t compute_num_thread_y; /* ordinal9 */
3286 uint32_t compute_num_thread_z; /* ordinal10 */
3287 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3288 uint32_t compute_perfcount_enable; /* ordinal12 */
3289 uint32_t compute_pgm_lo; /* ordinal13 */
3290 uint32_t compute_pgm_hi; /* ordinal14 */
3291 uint32_t compute_tba_lo; /* ordinal15 */
3292 uint32_t compute_tba_hi; /* ordinal16 */
3293 uint32_t compute_tma_lo; /* ordinal17 */
3294 uint32_t compute_tma_hi; /* ordinal18 */
3295 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3296 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3297 uint32_t compute_vmid; /* ordinal21 */
3298 uint32_t compute_resource_limits; /* ordinal22 */
3299 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3300 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3301 uint32_t compute_tmpring_size; /* ordinal25 */
3302 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3303 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3304 uint32_t compute_restart_x; /* ordinal28 */
3305 uint32_t compute_restart_y; /* ordinal29 */
3306 uint32_t compute_restart_z; /* ordinal30 */
3307 uint32_t compute_thread_trace_enable; /* ordinal31 */
3308 uint32_t compute_misc_reserved; /* ordinal32 */
3309 uint32_t compute_dispatch_id; /* ordinal33 */
3310 uint32_t compute_threadgroup_id; /* ordinal34 */
3311 uint32_t compute_relaunch; /* ordinal35 */
3312 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3313 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3314 uint32_t compute_wave_restore_control; /* ordinal38 */
3315 uint32_t reserved9; /* ordinal39 */
3316 uint32_t reserved10; /* ordinal40 */
3317 uint32_t reserved11; /* ordinal41 */
3318 uint32_t reserved12; /* ordinal42 */
3319 uint32_t reserved13; /* ordinal43 */
3320 uint32_t reserved14; /* ordinal44 */
3321 uint32_t reserved15; /* ordinal45 */
3322 uint32_t reserved16; /* ordinal46 */
3323 uint32_t reserved17; /* ordinal47 */
3324 uint32_t reserved18; /* ordinal48 */
3325 uint32_t reserved19; /* ordinal49 */
3326 uint32_t reserved20; /* ordinal50 */
3327 uint32_t reserved21; /* ordinal51 */
3328 uint32_t reserved22; /* ordinal52 */
3329 uint32_t reserved23; /* ordinal53 */
3330 uint32_t reserved24; /* ordinal54 */
3331 uint32_t reserved25; /* ordinal55 */
3332 uint32_t reserved26; /* ordinal56 */
3333 uint32_t reserved27; /* ordinal57 */
3334 uint32_t reserved28; /* ordinal58 */
3335 uint32_t reserved29; /* ordinal59 */
3336 uint32_t reserved30; /* ordinal60 */
3337 uint32_t reserved31; /* ordinal61 */
3338 uint32_t reserved32; /* ordinal62 */
3339 uint32_t reserved33; /* ordinal63 */
3340 uint32_t reserved34; /* ordinal64 */
3341 uint32_t compute_user_data_0; /* ordinal65 */
3342 uint32_t compute_user_data_1; /* ordinal66 */
3343 uint32_t compute_user_data_2; /* ordinal67 */
3344 uint32_t compute_user_data_3; /* ordinal68 */
3345 uint32_t compute_user_data_4; /* ordinal69 */
3346 uint32_t compute_user_data_5; /* ordinal70 */
3347 uint32_t compute_user_data_6; /* ordinal71 */
3348 uint32_t compute_user_data_7; /* ordinal72 */
3349 uint32_t compute_user_data_8; /* ordinal73 */
3350 uint32_t compute_user_data_9; /* ordinal74 */
3351 uint32_t compute_user_data_10; /* ordinal75 */
3352 uint32_t compute_user_data_11; /* ordinal76 */
3353 uint32_t compute_user_data_12; /* ordinal77 */
3354 uint32_t compute_user_data_13; /* ordinal78 */
3355 uint32_t compute_user_data_14; /* ordinal79 */
3356 uint32_t compute_user_data_15; /* ordinal80 */
3357 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3358 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3359 uint32_t reserved35; /* ordinal83 */
3360 uint32_t reserved36; /* ordinal84 */
3361 uint32_t reserved37; /* ordinal85 */
3362 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3363 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3364 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3365 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3366 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3367 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3368 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3369 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3370 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3371 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3372 uint32_t reserved38; /* ordinal96 */
3373 uint32_t reserved39; /* ordinal97 */
3374 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3375 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3376 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3377 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3378 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3379 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3380 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3381 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3382 uint32_t reserved40; /* ordinal106 */
3383 uint32_t reserved41; /* ordinal107 */
3384 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3385 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3386 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3387 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3388 uint32_t reserved42; /* ordinal112 */
3389 uint32_t reserved43; /* ordinal113 */
3390 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3391 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3392 uint32_t cp_packet_id_lo; /* ordinal116 */
3393 uint32_t cp_packet_id_hi; /* ordinal117 */
3394 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3395 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3396 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3397 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3398 uint32_t gds_save_mask_lo; /* ordinal122 */
3399 uint32_t gds_save_mask_hi; /* ordinal123 */
3400 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3401 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3402 uint32_t reserved44; /* ordinal126 */
3403 uint32_t reserved45; /* ordinal127 */
3404 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3405 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3406 uint32_t cp_hqd_active; /* ordinal130 */
3407 uint32_t cp_hqd_vmid; /* ordinal131 */
3408 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3409 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3410 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3411 uint32_t cp_hqd_quantum; /* ordinal135 */
3412 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3413 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3414 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3415 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3416 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3417 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3418 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3419 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3420 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3421 uint32_t cp_hqd_pq_control; /* ordinal145 */
3422 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3423 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3424 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3425 uint32_t cp_hqd_ib_control; /* ordinal149 */
3426 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3427 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3428 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3429 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3430 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3431 uint32_t cp_hqd_msg_type; /* ordinal155 */
3432 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3433 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3434 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3435 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3436 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3437 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3438 uint32_t cp_mqd_control; /* ordinal162 */
3439 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3440 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3441 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3442 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3443 uint32_t cp_hqd_eop_control; /* ordinal167 */
3444 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3445 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3446 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3447 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3448 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3449 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3450 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3451 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3452 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3453 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3454 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3455 uint32_t cp_hqd_error; /* ordinal179 */
3456 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3457 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3458 uint32_t reserved46; /* ordinal182 */
3459 uint32_t reserved47; /* ordinal183 */
3460 uint32_t reserved48; /* ordinal184 */
3461 uint32_t reserved49; /* ordinal185 */
3462 uint32_t reserved50; /* ordinal186 */
3463 uint32_t reserved51; /* ordinal187 */
3464 uint32_t reserved52; /* ordinal188 */
3465 uint32_t reserved53; /* ordinal189 */
3466 uint32_t reserved54; /* ordinal190 */
3467 uint32_t reserved55; /* ordinal191 */
3468 uint32_t iqtimer_pkt_header; /* ordinal192 */
3469 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3470 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3471 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3472 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3473 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3474 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3475 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3476 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3477 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3478 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3479 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3480 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3481 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3482 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3483 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3484 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3485 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3486 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3487 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3488 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3489 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3490 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3491 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3492 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3493 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3494 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3495 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3496 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3497 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3498 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3499 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3500 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3501 uint32_t reserved56; /* ordinal225 */
3502 uint32_t reserved57; /* ordinal226 */
3503 uint32_t reserved58; /* ordinal227 */
3504 uint32_t set_resources_header; /* ordinal228 */
3505 uint32_t set_resources_dw1; /* ordinal229 */
3506 uint32_t set_resources_dw2; /* ordinal230 */
3507 uint32_t set_resources_dw3; /* ordinal231 */
3508 uint32_t set_resources_dw4; /* ordinal232 */
3509 uint32_t set_resources_dw5; /* ordinal233 */
3510 uint32_t set_resources_dw6; /* ordinal234 */
3511 uint32_t set_resources_dw7; /* ordinal235 */
3512 uint32_t reserved59; /* ordinal236 */
3513 uint32_t reserved60; /* ordinal237 */
3514 uint32_t reserved61; /* ordinal238 */
3515 uint32_t reserved62; /* ordinal239 */
3516 uint32_t reserved63; /* ordinal240 */
3517 uint32_t reserved64; /* ordinal241 */
3518 uint32_t reserved65; /* ordinal242 */
3519 uint32_t reserved66; /* ordinal243 */
3520 uint32_t reserved67; /* ordinal244 */
3521 uint32_t reserved68; /* ordinal245 */
3522 uint32_t reserved69; /* ordinal246 */
3523 uint32_t reserved70; /* ordinal247 */
3524 uint32_t reserved71; /* ordinal248 */
3525 uint32_t reserved72; /* ordinal249 */
3526 uint32_t reserved73; /* ordinal250 */
3527 uint32_t reserved74; /* ordinal251 */
3528 uint32_t reserved75; /* ordinal252 */
3529 uint32_t reserved76; /* ordinal253 */
3530 uint32_t reserved77; /* ordinal254 */
3531 uint32_t reserved78; /* ordinal255 */
3532
3533 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3534};
3535
3536static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3537{
3538 int i, r;
3539
3540 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3541 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3542
3543 if (ring->mqd_obj) {
3544 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3545 if (unlikely(r != 0))
3546 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3547
3548 amdgpu_bo_unpin(ring->mqd_obj);
3549 amdgpu_bo_unreserve(ring->mqd_obj);
3550
3551 amdgpu_bo_unref(&ring->mqd_obj);
3552 ring->mqd_obj = NULL;
3553 }
3554 }
3555}
3556
3557static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3558{
3559 int r, i, j;
3560 u32 tmp;
3561 bool use_doorbell = true;
3562 u64 hqd_gpu_addr;
3563 u64 mqd_gpu_addr;
3564 u64 eop_gpu_addr;
3565 u64 wb_gpu_addr;
3566 u32 *buf;
3567 struct vi_mqd *mqd;
3568
3569 /* init the pipes */
3570 mutex_lock(&adev->srbm_mutex);
3571 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3572 int me = (i < 4) ? 1 : 2;
3573 int pipe = (i < 4) ? i : (i - 4);
3574
3575 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3576 eop_gpu_addr >>= 8;
3577
3578 vi_srbm_select(adev, me, pipe, 0, 0);
3579
3580 /* write the EOP addr */
3581 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3582 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3583
3584 /* set the VMID assigned */
3585 WREG32(mmCP_HQD_VMID, 0);
3586
3587 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3588 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3589 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3590 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3591 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3592 }
3593 vi_srbm_select(adev, 0, 0, 0, 0);
3594 mutex_unlock(&adev->srbm_mutex);
3595
3596 /* init the queues. Just two for now. */
3597 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3598 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3599
3600 if (ring->mqd_obj == NULL) {
3601 r = amdgpu_bo_create(adev,
3602 sizeof(struct vi_mqd),
3603 PAGE_SIZE, true,
3604 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 3605 NULL, &ring->mqd_obj);
aaa36a97
AD
3606 if (r) {
3607 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3608 return r;
3609 }
3610 }
3611
3612 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3613 if (unlikely(r != 0)) {
3614 gfx_v8_0_cp_compute_fini(adev);
3615 return r;
3616 }
3617 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3618 &mqd_gpu_addr);
3619 if (r) {
3620 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3621 gfx_v8_0_cp_compute_fini(adev);
3622 return r;
3623 }
3624 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3625 if (r) {
3626 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3627 gfx_v8_0_cp_compute_fini(adev);
3628 return r;
3629 }
3630
3631 /* init the mqd struct */
3632 memset(buf, 0, sizeof(struct vi_mqd));
3633
3634 mqd = (struct vi_mqd *)buf;
3635 mqd->header = 0xC0310800;
3636 mqd->compute_pipelinestat_enable = 0x00000001;
3637 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3638 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3639 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3640 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3641 mqd->compute_misc_reserved = 0x00000003;
3642
3643 mutex_lock(&adev->srbm_mutex);
3644 vi_srbm_select(adev, ring->me,
3645 ring->pipe,
3646 ring->queue, 0);
3647
3648 /* disable wptr polling */
3649 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3650 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3651 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3652
3653 mqd->cp_hqd_eop_base_addr_lo =
3654 RREG32(mmCP_HQD_EOP_BASE_ADDR);
3655 mqd->cp_hqd_eop_base_addr_hi =
3656 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3657
3658 /* enable doorbell? */
3659 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3660 if (use_doorbell) {
3661 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3662 } else {
3663 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3664 }
3665 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3666 mqd->cp_hqd_pq_doorbell_control = tmp;
3667
3668 /* disable the queue if it's active */
3669 mqd->cp_hqd_dequeue_request = 0;
3670 mqd->cp_hqd_pq_rptr = 0;
3671 mqd->cp_hqd_pq_wptr= 0;
3672 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3673 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3674 for (j = 0; j < adev->usec_timeout; j++) {
3675 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3676 break;
3677 udelay(1);
3678 }
3679 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3680 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3681 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3682 }
3683
3684 /* set the pointer to the MQD */
3685 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3686 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3687 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3688 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3689
3690 /* set MQD vmid to 0 */
3691 tmp = RREG32(mmCP_MQD_CONTROL);
3692 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3693 WREG32(mmCP_MQD_CONTROL, tmp);
3694 mqd->cp_mqd_control = tmp;
3695
3696 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3697 hqd_gpu_addr = ring->gpu_addr >> 8;
3698 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3699 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3700 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3701 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3702
3703 /* set up the HQD, this is similar to CP_RB0_CNTL */
3704 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3705 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3706 (order_base_2(ring->ring_size / 4) - 1));
3707 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3708 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3709#ifdef __BIG_ENDIAN
3710 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3711#endif
3712 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3713 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3714 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3715 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3716 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3717 mqd->cp_hqd_pq_control = tmp;
3718
3719 /* set the wb address wether it's enabled or not */
3720 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3721 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3722 mqd->cp_hqd_pq_rptr_report_addr_hi =
3723 upper_32_bits(wb_gpu_addr) & 0xffff;
3724 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3725 mqd->cp_hqd_pq_rptr_report_addr_lo);
3726 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3727 mqd->cp_hqd_pq_rptr_report_addr_hi);
3728
3729 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3730 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3731 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3732 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3733 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3734 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3735 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3736
3737 /* enable the doorbell if requested */
3738 if (use_doorbell) {
bddf8026 3739 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c
SL
3740 (adev->asic_type == CHIP_FIJI) ||
3741 (adev->asic_type == CHIP_STONEY)) {
aaa36a97
AD
3742 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3743 AMDGPU_DOORBELL_KIQ << 2);
3744 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 3745 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
3746 }
3747 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3748 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3749 DOORBELL_OFFSET, ring->doorbell_index);
3750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3751 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3752 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3753 mqd->cp_hqd_pq_doorbell_control = tmp;
3754
3755 } else {
3756 mqd->cp_hqd_pq_doorbell_control = 0;
3757 }
3758 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3759 mqd->cp_hqd_pq_doorbell_control);
3760
845253e7
SJ
3761 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3762 ring->wptr = 0;
3763 mqd->cp_hqd_pq_wptr = ring->wptr;
3764 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3765 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3766
aaa36a97
AD
3767 /* set the vmid for the queue */
3768 mqd->cp_hqd_vmid = 0;
3769 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3770
3771 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3772 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3773 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3774 mqd->cp_hqd_persistent_state = tmp;
3b55ddad
FC
3775 if (adev->asic_type == CHIP_STONEY) {
3776 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3777 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3778 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3779 }
aaa36a97
AD
3780
3781 /* activate the queue */
3782 mqd->cp_hqd_active = 1;
3783 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3784
3785 vi_srbm_select(adev, 0, 0, 0, 0);
3786 mutex_unlock(&adev->srbm_mutex);
3787
3788 amdgpu_bo_kunmap(ring->mqd_obj);
3789 amdgpu_bo_unreserve(ring->mqd_obj);
3790 }
3791
3792 if (use_doorbell) {
3793 tmp = RREG32(mmCP_PQ_STATUS);
3794 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3795 WREG32(mmCP_PQ_STATUS, tmp);
3796 }
3797
6e9821b2 3798 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
3799
3800 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3801 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3802
3803 ring->ready = true;
3804 r = amdgpu_ring_test_ring(ring);
3805 if (r)
3806 ring->ready = false;
3807 }
3808
3809 return 0;
3810}
3811
3812static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3813{
3814 int r;
3815
e3c7656c 3816 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
3817 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3818
e61710c5 3819 if (!adev->pp_enabled) {
ba5c2a87
RZ
3820 if (!adev->firmware.smu_load) {
3821 /* legacy firmware loading */
3822 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3823 if (r)
3824 return r;
aaa36a97 3825
ba5c2a87
RZ
3826 r = gfx_v8_0_cp_compute_load_microcode(adev);
3827 if (r)
3828 return r;
3829 } else {
3830 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3831 AMDGPU_UCODE_ID_CP_CE);
3832 if (r)
3833 return -EINVAL;
3834
3835 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3836 AMDGPU_UCODE_ID_CP_PFP);
3837 if (r)
3838 return -EINVAL;
3839
3840 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3841 AMDGPU_UCODE_ID_CP_ME);
3842 if (r)
3843 return -EINVAL;
3844
951e0962
AD
3845 if (adev->asic_type == CHIP_TOPAZ) {
3846 r = gfx_v8_0_cp_compute_load_microcode(adev);
3847 if (r)
3848 return r;
3849 } else {
3850 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3851 AMDGPU_UCODE_ID_CP_MEC1);
3852 if (r)
3853 return -EINVAL;
3854 }
ba5c2a87 3855 }
aaa36a97
AD
3856 }
3857
3858 r = gfx_v8_0_cp_gfx_resume(adev);
3859 if (r)
3860 return r;
3861
3862 r = gfx_v8_0_cp_compute_resume(adev);
3863 if (r)
3864 return r;
3865
3866 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3867
3868 return 0;
3869}
3870
3871static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3872{
3873 gfx_v8_0_cp_gfx_enable(adev, enable);
3874 gfx_v8_0_cp_compute_enable(adev, enable);
3875}
3876
5fc3aeeb 3877static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
3878{
3879 int r;
5fc3aeeb 3880 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3881
3882 gfx_v8_0_init_golden_registers(adev);
3883
3884 gfx_v8_0_gpu_init(adev);
3885
3886 r = gfx_v8_0_rlc_resume(adev);
3887 if (r)
3888 return r;
3889
3890 r = gfx_v8_0_cp_resume(adev);
3891 if (r)
3892 return r;
3893
3894 return r;
3895}
3896
5fc3aeeb 3897static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 3898{
5fc3aeeb 3899 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3900
1d22a454
AD
3901 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3902 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
aaa36a97
AD
3903 gfx_v8_0_cp_enable(adev, false);
3904 gfx_v8_0_rlc_stop(adev);
3905 gfx_v8_0_cp_compute_fini(adev);
3906
3907 return 0;
3908}
3909
5fc3aeeb 3910static int gfx_v8_0_suspend(void *handle)
aaa36a97 3911{
5fc3aeeb 3912 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3913
aaa36a97
AD
3914 return gfx_v8_0_hw_fini(adev);
3915}
3916
5fc3aeeb 3917static int gfx_v8_0_resume(void *handle)
aaa36a97 3918{
5fc3aeeb 3919 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3920
aaa36a97
AD
3921 return gfx_v8_0_hw_init(adev);
3922}
3923
5fc3aeeb 3924static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 3925{
5fc3aeeb 3926 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3927
aaa36a97
AD
3928 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3929 return false;
3930 else
3931 return true;
3932}
3933
5fc3aeeb 3934static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
3935{
3936 unsigned i;
3937 u32 tmp;
5fc3aeeb 3938 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3939
3940 for (i = 0; i < adev->usec_timeout; i++) {
3941 /* read MC_STATUS */
3942 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3943
3944 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3945 return 0;
3946 udelay(1);
3947 }
3948 return -ETIMEDOUT;
3949}
3950
5fc3aeeb 3951static void gfx_v8_0_print_status(void *handle)
aaa36a97
AD
3952{
3953 int i;
5fc3aeeb 3954 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
3955
3956 dev_info(adev->dev, "GFX 8.x registers\n");
3957 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3958 RREG32(mmGRBM_STATUS));
3959 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3960 RREG32(mmGRBM_STATUS2));
3961 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3962 RREG32(mmGRBM_STATUS_SE0));
3963 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3964 RREG32(mmGRBM_STATUS_SE1));
3965 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3966 RREG32(mmGRBM_STATUS_SE2));
3967 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3968 RREG32(mmGRBM_STATUS_SE3));
3969 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3970 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3971 RREG32(mmCP_STALLED_STAT1));
3972 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3973 RREG32(mmCP_STALLED_STAT2));
3974 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3975 RREG32(mmCP_STALLED_STAT3));
3976 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3977 RREG32(mmCP_CPF_BUSY_STAT));
3978 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3979 RREG32(mmCP_CPF_STALLED_STAT1));
3980 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3981 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3982 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3983 RREG32(mmCP_CPC_STALLED_STAT1));
3984 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3985
3986 for (i = 0; i < 32; i++) {
3987 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3988 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3989 }
3990 for (i = 0; i < 16; i++) {
3991 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3992 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3993 }
3994 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3995 dev_info(adev->dev, " se: %d\n", i);
3996 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3997 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3998 RREG32(mmPA_SC_RASTER_CONFIG));
3999 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
4000 RREG32(mmPA_SC_RASTER_CONFIG_1));
4001 }
4002 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4003
4004 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
4005 RREG32(mmGB_ADDR_CONFIG));
4006 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
4007 RREG32(mmHDP_ADDR_CONFIG));
4008 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
4009 RREG32(mmDMIF_ADDR_CALC));
4010 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
4011 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
4012 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
4013 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
4014 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
4015 RREG32(mmUVD_UDEC_ADDR_CONFIG));
4016 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
4017 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
4018 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
4019 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
4020
4021 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
4022 RREG32(mmCP_MEQ_THRESHOLDS));
4023 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
4024 RREG32(mmSX_DEBUG_1));
4025 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
4026 RREG32(mmTA_CNTL_AUX));
4027 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
4028 RREG32(mmSPI_CONFIG_CNTL));
4029 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
4030 RREG32(mmSQ_CONFIG));
4031 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
4032 RREG32(mmDB_DEBUG));
4033 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
4034 RREG32(mmDB_DEBUG2));
4035 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
4036 RREG32(mmDB_DEBUG3));
4037 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
4038 RREG32(mmCB_HW_CONTROL));
4039 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
4040 RREG32(mmSPI_CONFIG_CNTL_1));
4041 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
4042 RREG32(mmPA_SC_FIFO_SIZE));
4043 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
4044 RREG32(mmVGT_NUM_INSTANCES));
4045 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
4046 RREG32(mmCP_PERFMON_CNTL));
4047 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
4048 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
4049 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
4050 RREG32(mmVGT_CACHE_INVALIDATION));
4051 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
4052 RREG32(mmVGT_GS_VERTEX_REUSE));
4053 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
4054 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
4055 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
4056 RREG32(mmPA_CL_ENHANCE));
4057 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
4058 RREG32(mmPA_SC_ENHANCE));
4059
4060 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
4061 RREG32(mmCP_ME_CNTL));
4062 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
4063 RREG32(mmCP_MAX_CONTEXT));
4064 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
4065 RREG32(mmCP_ENDIAN_SWAP));
4066 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
4067 RREG32(mmCP_DEVICE_ID));
4068
4069 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4070 RREG32(mmCP_SEM_WAIT_TIMER));
4071
4072 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4073 RREG32(mmCP_RB_WPTR_DELAY));
4074 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4075 RREG32(mmCP_RB_VMID));
4076 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4077 RREG32(mmCP_RB0_CNTL));
4078 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4079 RREG32(mmCP_RB0_WPTR));
4080 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4081 RREG32(mmCP_RB0_RPTR_ADDR));
4082 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4083 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4084 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4085 RREG32(mmCP_RB0_CNTL));
4086 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4087 RREG32(mmCP_RB0_BASE));
4088 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4089 RREG32(mmCP_RB0_BASE_HI));
4090 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4091 RREG32(mmCP_MEC_CNTL));
4092 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4093 RREG32(mmCP_CPF_DEBUG));
4094
4095 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4096 RREG32(mmSCRATCH_ADDR));
4097 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4098 RREG32(mmSCRATCH_UMSK));
4099
4100 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4101 RREG32(mmCP_INT_CNTL_RING0));
4102 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4103 RREG32(mmRLC_LB_CNTL));
4104 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4105 RREG32(mmRLC_CNTL));
4106 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4107 RREG32(mmRLC_CGCG_CGLS_CTRL));
4108 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4109 RREG32(mmRLC_LB_CNTR_INIT));
4110 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4111 RREG32(mmRLC_LB_CNTR_MAX));
4112 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4113 RREG32(mmRLC_LB_INIT_CU_MASK));
4114 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4115 RREG32(mmRLC_LB_PARAMS));
4116 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4117 RREG32(mmRLC_LB_CNTL));
4118 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4119 RREG32(mmRLC_MC_CNTL));
4120 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4121 RREG32(mmRLC_UCODE_CNTL));
4122
4123 mutex_lock(&adev->srbm_mutex);
4124 for (i = 0; i < 16; i++) {
4125 vi_srbm_select(adev, 0, 0, 0, i);
4126 dev_info(adev->dev, " VM %d:\n", i);
4127 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4128 RREG32(mmSH_MEM_CONFIG));
4129 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4130 RREG32(mmSH_MEM_APE1_BASE));
4131 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4132 RREG32(mmSH_MEM_APE1_LIMIT));
4133 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4134 RREG32(mmSH_MEM_BASES));
4135 }
4136 vi_srbm_select(adev, 0, 0, 0, 0);
4137 mutex_unlock(&adev->srbm_mutex);
4138}
4139
5fc3aeeb 4140static int gfx_v8_0_soft_reset(void *handle)
aaa36a97
AD
4141{
4142 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4143 u32 tmp;
5fc3aeeb 4144 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4145
4146 /* GRBM_STATUS */
4147 tmp = RREG32(mmGRBM_STATUS);
4148 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4149 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4150 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4151 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4152 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4153 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4154 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4155 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4156 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4157 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4158 }
4159
4160 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4161 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4162 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4163 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4164 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4165 }
4166
4167 /* GRBM_STATUS2 */
4168 tmp = RREG32(mmGRBM_STATUS2);
4169 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4170 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4171 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4172
4173 /* SRBM_STATUS */
4174 tmp = RREG32(mmSRBM_STATUS);
4175 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4176 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4177 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4178
4179 if (grbm_soft_reset || srbm_soft_reset) {
5fc3aeeb 4180 gfx_v8_0_print_status((void *)adev);
aaa36a97
AD
4181 /* stop the rlc */
4182 gfx_v8_0_rlc_stop(adev);
4183
4184 /* Disable GFX parsing/prefetching */
4185 gfx_v8_0_cp_gfx_enable(adev, false);
4186
4187 /* Disable MEC parsing/prefetching */
7776a693
AD
4188 gfx_v8_0_cp_compute_enable(adev, false);
4189
4190 if (grbm_soft_reset || srbm_soft_reset) {
4191 tmp = RREG32(mmGMCON_DEBUG);
4192 tmp = REG_SET_FIELD(tmp,
4193 GMCON_DEBUG, GFX_STALL, 1);
4194 tmp = REG_SET_FIELD(tmp,
4195 GMCON_DEBUG, GFX_CLEAR, 1);
4196 WREG32(mmGMCON_DEBUG, tmp);
4197
4198 udelay(50);
4199 }
aaa36a97
AD
4200
4201 if (grbm_soft_reset) {
4202 tmp = RREG32(mmGRBM_SOFT_RESET);
4203 tmp |= grbm_soft_reset;
4204 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4205 WREG32(mmGRBM_SOFT_RESET, tmp);
4206 tmp = RREG32(mmGRBM_SOFT_RESET);
4207
4208 udelay(50);
4209
4210 tmp &= ~grbm_soft_reset;
4211 WREG32(mmGRBM_SOFT_RESET, tmp);
4212 tmp = RREG32(mmGRBM_SOFT_RESET);
4213 }
4214
4215 if (srbm_soft_reset) {
4216 tmp = RREG32(mmSRBM_SOFT_RESET);
4217 tmp |= srbm_soft_reset;
4218 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4219 WREG32(mmSRBM_SOFT_RESET, tmp);
4220 tmp = RREG32(mmSRBM_SOFT_RESET);
4221
4222 udelay(50);
4223
4224 tmp &= ~srbm_soft_reset;
4225 WREG32(mmSRBM_SOFT_RESET, tmp);
4226 tmp = RREG32(mmSRBM_SOFT_RESET);
4227 }
7776a693
AD
4228
4229 if (grbm_soft_reset || srbm_soft_reset) {
4230 tmp = RREG32(mmGMCON_DEBUG);
4231 tmp = REG_SET_FIELD(tmp,
4232 GMCON_DEBUG, GFX_STALL, 0);
4233 tmp = REG_SET_FIELD(tmp,
4234 GMCON_DEBUG, GFX_CLEAR, 0);
4235 WREG32(mmGMCON_DEBUG, tmp);
4236 }
4237
aaa36a97
AD
4238 /* Wait a little for things to settle down */
4239 udelay(50);
5fc3aeeb 4240 gfx_v8_0_print_status((void *)adev);
aaa36a97
AD
4241 }
4242 return 0;
4243}
4244
4245/**
4246 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4247 *
4248 * @adev: amdgpu_device pointer
4249 *
4250 * Fetches a GPU clock counter snapshot.
4251 * Returns the 64 bit clock counter snapshot.
4252 */
4253uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4254{
4255 uint64_t clock;
4256
4257 mutex_lock(&adev->gfx.gpu_clock_mutex);
4258 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4259 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4260 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4261 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4262 return clock;
4263}
4264
4265static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4266 uint32_t vmid,
4267 uint32_t gds_base, uint32_t gds_size,
4268 uint32_t gws_base, uint32_t gws_size,
4269 uint32_t oa_base, uint32_t oa_size)
4270{
4271 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4272 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4273
4274 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4275 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4276
4277 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4278 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4279
4280 /* GDS Base */
4281 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4282 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4283 WRITE_DATA_DST_SEL(0)));
4284 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4285 amdgpu_ring_write(ring, 0);
4286 amdgpu_ring_write(ring, gds_base);
4287
4288 /* GDS Size */
4289 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4290 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4291 WRITE_DATA_DST_SEL(0)));
4292 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4293 amdgpu_ring_write(ring, 0);
4294 amdgpu_ring_write(ring, gds_size);
4295
4296 /* GWS */
4297 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4298 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4299 WRITE_DATA_DST_SEL(0)));
4300 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4301 amdgpu_ring_write(ring, 0);
4302 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4303
4304 /* OA */
4305 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4306 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4307 WRITE_DATA_DST_SEL(0)));
4308 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4309 amdgpu_ring_write(ring, 0);
4310 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4311}
4312
5fc3aeeb 4313static int gfx_v8_0_early_init(void *handle)
aaa36a97 4314{
5fc3aeeb 4315 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4316
4317 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4318 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4319 gfx_v8_0_set_ring_funcs(adev);
4320 gfx_v8_0_set_irq_funcs(adev);
4321 gfx_v8_0_set_gds_init(adev);
4322
4323 return 0;
4324}
4325
ccba7691
AD
4326static int gfx_v8_0_late_init(void *handle)
4327{
4328 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4329 int r;
4330
1d22a454
AD
4331 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4332 if (r)
4333 return r;
4334
4335 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4336 if (r)
4337 return r;
4338
ccba7691
AD
4339 /* requires IBs so do in late init after IB pool is initialized */
4340 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4341 if (r)
4342 return r;
4343
4344 return 0;
4345}
4346
5fc3aeeb 4347static int gfx_v8_0_set_powergating_state(void *handle,
4348 enum amd_powergating_state state)
aaa36a97
AD
4349{
4350 return 0;
4351}
4352
6e378858
EH
4353static void fiji_send_serdes_cmd(struct amdgpu_device *adev,
4354 uint32_t reg_addr, uint32_t cmd)
4355{
4356 uint32_t data;
4357
4358 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4359
4360 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4361 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4362
4363 data = RREG32(mmRLC_SERDES_WR_CTRL);
4364 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4365 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4366 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4367 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4368 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4369 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4370 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4371 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4372 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4373 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4374 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4375 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4376 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4377 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4378 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4379
4380 WREG32(mmRLC_SERDES_WR_CTRL, data);
4381}
4382
4383static void fiji_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4384 bool enable)
4385{
4386 uint32_t temp, data;
4387
4388 /* It is disabled by HW by default */
4389 if (enable) {
4390 /* 1 - RLC memory Light sleep */
4391 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4392 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4393 if (temp != data)
4394 WREG32(mmRLC_MEM_SLP_CNTL, data);
4395
4396 /* 2 - CP memory Light sleep */
4397 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4398 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4399 if (temp != data)
4400 WREG32(mmCP_MEM_SLP_CNTL, data);
4401
4402 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4403 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4404 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4405 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4406 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4407 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4408
4409 if (temp != data)
4410 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4411
4412 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4413 gfx_v8_0_wait_for_rlc_serdes(adev);
4414
4415 /* 5 - clear mgcg override */
4416 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4417
4418 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4419 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4420 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4421 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4422 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4423 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4424 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4425 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4426 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4427 if (temp != data)
4428 WREG32(mmCGTS_SM_CTRL_REG, data);
4429 udelay(50);
4430
4431 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4432 gfx_v8_0_wait_for_rlc_serdes(adev);
4433 } else {
4434 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4435 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4436 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4437 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4438 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4439 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4440 if (temp != data)
4441 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4442
4443 /* 2 - disable MGLS in RLC */
4444 data = RREG32(mmRLC_MEM_SLP_CNTL);
4445 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4446 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4447 WREG32(mmRLC_MEM_SLP_CNTL, data);
4448 }
4449
4450 /* 3 - disable MGLS in CP */
4451 data = RREG32(mmCP_MEM_SLP_CNTL);
4452 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4453 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4454 WREG32(mmCP_MEM_SLP_CNTL, data);
4455 }
4456
4457 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4458 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4459 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4460 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4461 if (temp != data)
4462 WREG32(mmCGTS_SM_CTRL_REG, data);
4463
4464 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4465 gfx_v8_0_wait_for_rlc_serdes(adev);
4466
4467 /* 6 - set mgcg override */
4468 fiji_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4469
4470 udelay(50);
4471
4472 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4473 gfx_v8_0_wait_for_rlc_serdes(adev);
4474 }
4475}
4476
4477static void fiji_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4478 bool enable)
4479{
4480 uint32_t temp, temp1, data, data1;
4481
4482 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4483
4484 if (enable) {
4485 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4486 * Cmp_busy/GFX_Idle interrupts
4487 */
4488 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4489
4490 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4491 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4492 if (temp1 != data1)
4493 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4494
4495 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4496 gfx_v8_0_wait_for_rlc_serdes(adev);
4497
4498 /* 3 - clear cgcg override */
4499 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4500
4501 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4502 gfx_v8_0_wait_for_rlc_serdes(adev);
4503
4504 /* 4 - write cmd to set CGLS */
4505 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4506
4507 /* 5 - enable cgcg */
4508 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4509
4510 /* enable cgls*/
4511 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4512
4513 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4514 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4515
4516 if (temp1 != data1)
4517 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4518
4519 if (temp != data)
4520 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4521 } else {
4522 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4523 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4524
4525 /* TEST CGCG */
4526 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4527 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4528 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4529 if (temp1 != data1)
4530 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4531
4532 /* read gfx register to wake up cgcg */
4533 RREG32(mmCB_CGTT_SCLK_CTRL);
4534 RREG32(mmCB_CGTT_SCLK_CTRL);
4535 RREG32(mmCB_CGTT_SCLK_CTRL);
4536 RREG32(mmCB_CGTT_SCLK_CTRL);
4537
4538 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4539 gfx_v8_0_wait_for_rlc_serdes(adev);
4540
4541 /* write cmd to Set CGCG Overrride */
4542 fiji_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4543
4544 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4545 gfx_v8_0_wait_for_rlc_serdes(adev);
4546
4547 /* write cmd to Clear CGLS */
4548 fiji_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4549
4550 /* disable cgcg, cgls should be disabled too. */
4551 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4552 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4553 if (temp != data)
4554 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4555 }
4556}
4557static int fiji_update_gfx_clock_gating(struct amdgpu_device *adev,
4558 bool enable)
4559{
4560 if (enable) {
4561 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4562 * === MGCG + MGLS + TS(CG/LS) ===
4563 */
4564 fiji_update_medium_grain_clock_gating(adev, enable);
4565 fiji_update_coarse_grain_clock_gating(adev, enable);
4566 } else {
4567 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4568 * === CGCG + CGLS ===
4569 */
4570 fiji_update_coarse_grain_clock_gating(adev, enable);
4571 fiji_update_medium_grain_clock_gating(adev, enable);
4572 }
4573 return 0;
4574}
4575
5fc3aeeb 4576static int gfx_v8_0_set_clockgating_state(void *handle,
4577 enum amd_clockgating_state state)
aaa36a97 4578{
6e378858
EH
4579 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4580
4581 switch (adev->asic_type) {
4582 case CHIP_FIJI:
4583 fiji_update_gfx_clock_gating(adev,
4584 state == AMD_CG_STATE_GATE ? true : false);
4585 break;
4586 default:
4587 break;
4588 }
aaa36a97
AD
4589 return 0;
4590}
4591
4592static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4593{
4594 u32 rptr;
4595
4596 rptr = ring->adev->wb.wb[ring->rptr_offs];
4597
4598 return rptr;
4599}
4600
4601static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4602{
4603 struct amdgpu_device *adev = ring->adev;
4604 u32 wptr;
4605
4606 if (ring->use_doorbell)
4607 /* XXX check if swapping is necessary on BE */
4608 wptr = ring->adev->wb.wb[ring->wptr_offs];
4609 else
4610 wptr = RREG32(mmCP_RB0_WPTR);
4611
4612 return wptr;
4613}
4614
4615static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4616{
4617 struct amdgpu_device *adev = ring->adev;
4618
4619 if (ring->use_doorbell) {
4620 /* XXX check if swapping is necessary on BE */
4621 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4622 WDOORBELL32(ring->doorbell_index, ring->wptr);
4623 } else {
4624 WREG32(mmCP_RB0_WPTR, ring->wptr);
4625 (void)RREG32(mmCP_RB0_WPTR);
4626 }
4627}
4628
d2edb07b 4629static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
4630{
4631 u32 ref_and_mask, reg_mem_engine;
4632
4633 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4634 switch (ring->me) {
4635 case 1:
4636 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4637 break;
4638 case 2:
4639 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4640 break;
4641 default:
4642 return;
4643 }
4644 reg_mem_engine = 0;
4645 } else {
4646 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4647 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4648 }
4649
4650 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4651 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4652 WAIT_REG_MEM_FUNCTION(3) | /* == */
4653 reg_mem_engine));
4654 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4655 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4656 amdgpu_ring_write(ring, ref_and_mask);
4657 amdgpu_ring_write(ring, ref_and_mask);
4658 amdgpu_ring_write(ring, 0x20); /* poll interval */
4659}
4660
93323131 4661static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
aaa36a97
AD
4662 struct amdgpu_ib *ib)
4663{
3cb485f3 4664 bool need_ctx_switch = ring->current_ctx != ib->ctx;
aaa36a97
AD
4665 u32 header, control = 0;
4666 u32 next_rptr = ring->wptr + 5;
aa2bdb24
JZ
4667
4668 /* drop the CE preamble IB for the same context */
93323131 4669 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
aa2bdb24
JZ
4670 return;
4671
93323131 4672 if (need_ctx_switch)
aaa36a97
AD
4673 next_rptr += 2;
4674
4675 next_rptr += 4;
4676 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4677 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4678 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4679 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4680 amdgpu_ring_write(ring, next_rptr);
4681
aaa36a97 4682 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
93323131 4683 if (need_ctx_switch) {
aaa36a97
AD
4684 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4685 amdgpu_ring_write(ring, 0);
aaa36a97
AD
4686 }
4687
de807f81 4688 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
4689 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4690 else
4691 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4692
4693 control |= ib->length_dw |
4694 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4695
4696 amdgpu_ring_write(ring, header);
4697 amdgpu_ring_write(ring,
4698#ifdef __BIG_ENDIAN
4699 (2 << 0) |
4700#endif
4701 (ib->gpu_addr & 0xFFFFFFFC));
4702 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4703 amdgpu_ring_write(ring, control);
4704}
4705
93323131 4706static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4707 struct amdgpu_ib *ib)
4708{
4709 u32 header, control = 0;
4710 u32 next_rptr = ring->wptr + 5;
4711
4712 control |= INDIRECT_BUFFER_VALID;
4713
4714 next_rptr += 4;
4715 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4716 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4717 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4718 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4719 amdgpu_ring_write(ring, next_rptr);
4720
4721 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4722
4723 control |= ib->length_dw |
4724 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
4725
4726 amdgpu_ring_write(ring, header);
4727 amdgpu_ring_write(ring,
4728#ifdef __BIG_ENDIAN
4729 (2 << 0) |
4730#endif
4731 (ib->gpu_addr & 0xFFFFFFFC));
4732 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4733 amdgpu_ring_write(ring, control);
4734}
4735
aaa36a97 4736static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 4737 u64 seq, unsigned flags)
aaa36a97 4738{
890ee23f
CZ
4739 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4740 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4741
aaa36a97
AD
4742 /* EVENT_WRITE_EOP - flush caches, send int */
4743 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4744 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4745 EOP_TC_ACTION_EN |
4746 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4747 EVENT_INDEX(5)));
4748 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 4749 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 4750 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
4751 amdgpu_ring_write(ring, lower_32_bits(seq));
4752 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 4753
aaa36a97
AD
4754}
4755
aaa36a97
AD
4756static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4757 unsigned vm_id, uint64_t pd_addr)
4758{
4759 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 4760 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
4761 uint64_t addr = ring->fence_drv.gpu_addr;
4762
4763 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4764 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4765 WAIT_REG_MEM_FUNCTION(3))); /* equal */
4766 amdgpu_ring_write(ring, addr & 0xfffffffc);
4767 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4768 amdgpu_ring_write(ring, seq);
4769 amdgpu_ring_write(ring, 0xffffffff);
4770 amdgpu_ring_write(ring, 4); /* poll interval */
aaa36a97 4771
5c3422b0 4772 if (usepfp) {
4773 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4774 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4775 amdgpu_ring_write(ring, 0);
4776 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4777 amdgpu_ring_write(ring, 0);
4778 }
4779
aaa36a97
AD
4780 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4781 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
4782 WRITE_DATA_DST_SEL(0)) |
4783 WR_CONFIRM);
aaa36a97
AD
4784 if (vm_id < 8) {
4785 amdgpu_ring_write(ring,
4786 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4787 } else {
4788 amdgpu_ring_write(ring,
4789 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4790 }
4791 amdgpu_ring_write(ring, 0);
4792 amdgpu_ring_write(ring, pd_addr >> 12);
4793
aaa36a97
AD
4794 /* bits 0-15 are the VM contexts0-15 */
4795 /* invalidate the cache */
4796 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4797 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4798 WRITE_DATA_DST_SEL(0)));
4799 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4800 amdgpu_ring_write(ring, 0);
4801 amdgpu_ring_write(ring, 1 << vm_id);
4802
4803 /* wait for the invalidate to complete */
4804 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4805 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4806 WAIT_REG_MEM_FUNCTION(0) | /* always */
4807 WAIT_REG_MEM_ENGINE(0))); /* me */
4808 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4809 amdgpu_ring_write(ring, 0);
4810 amdgpu_ring_write(ring, 0); /* ref */
4811 amdgpu_ring_write(ring, 0); /* mask */
4812 amdgpu_ring_write(ring, 0x20); /* poll interval */
4813
4814 /* compute doesn't have PFP */
4815 if (usepfp) {
4816 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4817 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4818 amdgpu_ring_write(ring, 0x0);
5c3422b0 4819 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4820 amdgpu_ring_write(ring, 0);
4821 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4822 amdgpu_ring_write(ring, 0);
aaa36a97
AD
4823 }
4824}
4825
aaa36a97
AD
4826static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4827{
4828 return ring->adev->wb.wb[ring->rptr_offs];
4829}
4830
4831static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4832{
4833 return ring->adev->wb.wb[ring->wptr_offs];
4834}
4835
4836static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4837{
4838 struct amdgpu_device *adev = ring->adev;
4839
4840 /* XXX check if swapping is necessary on BE */
4841 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4842 WDOORBELL32(ring->doorbell_index, ring->wptr);
4843}
4844
4845static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4846 u64 addr, u64 seq,
890ee23f 4847 unsigned flags)
aaa36a97 4848{
890ee23f
CZ
4849 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4850 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4851
aaa36a97
AD
4852 /* RELEASE_MEM - flush caches, send int */
4853 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4854 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4855 EOP_TC_ACTION_EN |
a3d5aaa8 4856 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
4857 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4858 EVENT_INDEX(5)));
890ee23f 4859 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
4860 amdgpu_ring_write(ring, addr & 0xfffffffc);
4861 amdgpu_ring_write(ring, upper_32_bits(addr));
4862 amdgpu_ring_write(ring, lower_32_bits(seq));
4863 amdgpu_ring_write(ring, upper_32_bits(seq));
4864}
4865
4866static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
4867 enum amdgpu_interrupt_state state)
4868{
4869 u32 cp_int_cntl;
4870
4871 switch (state) {
4872 case AMDGPU_IRQ_STATE_DISABLE:
4873 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4874 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4875 TIME_STAMP_INT_ENABLE, 0);
4876 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4877 break;
4878 case AMDGPU_IRQ_STATE_ENABLE:
4879 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4880 cp_int_cntl =
4881 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4882 TIME_STAMP_INT_ENABLE, 1);
4883 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4884 break;
4885 default:
4886 break;
4887 }
4888}
4889
4890static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
4891 int me, int pipe,
4892 enum amdgpu_interrupt_state state)
4893{
4894 u32 mec_int_cntl, mec_int_cntl_reg;
4895
4896 /*
4897 * amdgpu controls only pipe 0 of MEC1. That's why this function only
4898 * handles the setting of interrupts for this specific pipe. All other
4899 * pipes' interrupts are set by amdkfd.
4900 */
4901
4902 if (me == 1) {
4903 switch (pipe) {
4904 case 0:
4905 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
4906 break;
4907 default:
4908 DRM_DEBUG("invalid pipe %d\n", pipe);
4909 return;
4910 }
4911 } else {
4912 DRM_DEBUG("invalid me %d\n", me);
4913 return;
4914 }
4915
4916 switch (state) {
4917 case AMDGPU_IRQ_STATE_DISABLE:
4918 mec_int_cntl = RREG32(mec_int_cntl_reg);
4919 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4920 TIME_STAMP_INT_ENABLE, 0);
4921 WREG32(mec_int_cntl_reg, mec_int_cntl);
4922 break;
4923 case AMDGPU_IRQ_STATE_ENABLE:
4924 mec_int_cntl = RREG32(mec_int_cntl_reg);
4925 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
4926 TIME_STAMP_INT_ENABLE, 1);
4927 WREG32(mec_int_cntl_reg, mec_int_cntl);
4928 break;
4929 default:
4930 break;
4931 }
4932}
4933
4934static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
4935 struct amdgpu_irq_src *source,
4936 unsigned type,
4937 enum amdgpu_interrupt_state state)
4938{
4939 u32 cp_int_cntl;
4940
4941 switch (state) {
4942 case AMDGPU_IRQ_STATE_DISABLE:
4943 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4944 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4945 PRIV_REG_INT_ENABLE, 0);
4946 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4947 break;
4948 case AMDGPU_IRQ_STATE_ENABLE:
4949 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4950 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4951 PRIV_REG_INT_ENABLE, 0);
4952 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4953 break;
4954 default:
4955 break;
4956 }
4957
4958 return 0;
4959}
4960
4961static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
4962 struct amdgpu_irq_src *source,
4963 unsigned type,
4964 enum amdgpu_interrupt_state state)
4965{
4966 u32 cp_int_cntl;
4967
4968 switch (state) {
4969 case AMDGPU_IRQ_STATE_DISABLE:
4970 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4971 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4972 PRIV_INSTR_INT_ENABLE, 0);
4973 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4974 break;
4975 case AMDGPU_IRQ_STATE_ENABLE:
4976 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4977 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4978 PRIV_INSTR_INT_ENABLE, 1);
4979 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4980 break;
4981 default:
4982 break;
4983 }
4984
4985 return 0;
4986}
4987
4988static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4989 struct amdgpu_irq_src *src,
4990 unsigned type,
4991 enum amdgpu_interrupt_state state)
4992{
4993 switch (type) {
4994 case AMDGPU_CP_IRQ_GFX_EOP:
4995 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4996 break;
4997 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4998 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4999 break;
5000 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5001 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5002 break;
5003 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5004 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5005 break;
5006 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5007 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5008 break;
5009 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5010 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5011 break;
5012 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5013 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5014 break;
5015 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5016 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5017 break;
5018 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5019 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5020 break;
5021 default:
5022 break;
5023 }
5024 return 0;
5025}
5026
5027static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5028 struct amdgpu_irq_src *source,
5029 struct amdgpu_iv_entry *entry)
5030{
5031 int i;
5032 u8 me_id, pipe_id, queue_id;
5033 struct amdgpu_ring *ring;
5034
5035 DRM_DEBUG("IH: CP EOP\n");
5036 me_id = (entry->ring_id & 0x0c) >> 2;
5037 pipe_id = (entry->ring_id & 0x03) >> 0;
5038 queue_id = (entry->ring_id & 0x70) >> 4;
5039
5040 switch (me_id) {
5041 case 0:
5042 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5043 break;
5044 case 1:
5045 case 2:
5046 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5047 ring = &adev->gfx.compute_ring[i];
5048 /* Per-queue interrupt is supported for MEC starting from VI.
5049 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5050 */
5051 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5052 amdgpu_fence_process(ring);
5053 }
5054 break;
5055 }
5056 return 0;
5057}
5058
5059static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5060 struct amdgpu_irq_src *source,
5061 struct amdgpu_iv_entry *entry)
5062{
5063 DRM_ERROR("Illegal register access in command stream\n");
5064 schedule_work(&adev->reset_work);
5065 return 0;
5066}
5067
5068static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5069 struct amdgpu_irq_src *source,
5070 struct amdgpu_iv_entry *entry)
5071{
5072 DRM_ERROR("Illegal instruction in command stream\n");
5073 schedule_work(&adev->reset_work);
5074 return 0;
5075}
5076
5fc3aeeb 5077const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
aaa36a97 5078 .early_init = gfx_v8_0_early_init,
ccba7691 5079 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
5080 .sw_init = gfx_v8_0_sw_init,
5081 .sw_fini = gfx_v8_0_sw_fini,
5082 .hw_init = gfx_v8_0_hw_init,
5083 .hw_fini = gfx_v8_0_hw_fini,
5084 .suspend = gfx_v8_0_suspend,
5085 .resume = gfx_v8_0_resume,
5086 .is_idle = gfx_v8_0_is_idle,
5087 .wait_for_idle = gfx_v8_0_wait_for_idle,
5088 .soft_reset = gfx_v8_0_soft_reset,
5089 .print_status = gfx_v8_0_print_status,
5090 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5091 .set_powergating_state = gfx_v8_0_set_powergating_state,
5092};
5093
5094static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5095 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5096 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5097 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5098 .parse_cs = NULL,
93323131 5099 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 5100 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
aaa36a97
AD
5101 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5102 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 5103 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
5104 .test_ring = gfx_v8_0_ring_test_ring,
5105 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 5106 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 5107 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
5108};
5109
5110static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5111 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5112 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5113 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5114 .parse_cs = NULL,
93323131 5115 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 5116 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
aaa36a97
AD
5117 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5118 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 5119 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
aaa36a97
AD
5120 .test_ring = gfx_v8_0_ring_test_ring,
5121 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 5122 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 5123 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
5124};
5125
5126static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5127{
5128 int i;
5129
5130 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5131 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5132
5133 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5134 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5135}
5136
5137static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5138 .set = gfx_v8_0_set_eop_interrupt_state,
5139 .process = gfx_v8_0_eop_irq,
5140};
5141
5142static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5143 .set = gfx_v8_0_set_priv_reg_fault_state,
5144 .process = gfx_v8_0_priv_reg_irq,
5145};
5146
5147static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5148 .set = gfx_v8_0_set_priv_inst_fault_state,
5149 .process = gfx_v8_0_priv_inst_irq,
5150};
5151
5152static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5153{
5154 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5155 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5156
5157 adev->gfx.priv_reg_irq.num_types = 1;
5158 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5159
5160 adev->gfx.priv_inst_irq.num_types = 1;
5161 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5162}
5163
5164static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5165{
5166 /* init asci gds info */
5167 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5168 adev->gds.gws.total_size = 64;
5169 adev->gds.oa.total_size = 16;
5170
5171 if (adev->gds.mem.total_size == 64 * 1024) {
5172 adev->gds.mem.gfx_partition_size = 4096;
5173 adev->gds.mem.cs_partition_size = 4096;
5174
5175 adev->gds.gws.gfx_partition_size = 4;
5176 adev->gds.gws.cs_partition_size = 4;
5177
5178 adev->gds.oa.gfx_partition_size = 4;
5179 adev->gds.oa.cs_partition_size = 1;
5180 } else {
5181 adev->gds.mem.gfx_partition_size = 1024;
5182 adev->gds.mem.cs_partition_size = 1024;
5183
5184 adev->gds.gws.gfx_partition_size = 16;
5185 adev->gds.gws.cs_partition_size = 16;
5186
5187 adev->gds.oa.gfx_partition_size = 4;
5188 adev->gds.oa.cs_partition_size = 4;
5189 }
5190}
5191
5192static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
5193 u32 se, u32 sh)
5194{
5195 u32 mask = 0, tmp, tmp1;
5196 int i;
5197
5198 gfx_v8_0_select_se_sh(adev, se, sh);
5199 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5200 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5201 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5202
5203 tmp &= 0xffff0000;
5204
5205 tmp |= tmp1;
5206 tmp >>= 16;
5207
5208 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
5209 mask <<= 1;
5210 mask |= 1;
5211 }
5212
5213 return (~tmp) & mask;
5214}
5215
5216int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5217 struct amdgpu_cu_info *cu_info)
5218{
5219 int i, j, k, counter, active_cu_number = 0;
5220 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5221
5222 if (!adev || !cu_info)
5223 return -EINVAL;
5224
5225 mutex_lock(&adev->grbm_idx_mutex);
5226 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5227 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5228 mask = 1;
5229 ao_bitmap = 0;
5230 counter = 0;
5231 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
5232 cu_info->bitmap[i][j] = bitmap;
5233
5234 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
5235 if (bitmap & mask) {
5236 if (counter < 2)
5237 ao_bitmap |= mask;
5238 counter ++;
5239 }
5240 mask <<= 1;
5241 }
5242 active_cu_number += counter;
5243 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5244 }
5245 }
5246
5247 cu_info->number = active_cu_number;
5248 cu_info->ao_cu_mask = ao_cu_mask;
5249 mutex_unlock(&adev->grbm_idx_mutex);
5250 return 0;
5251}
This page took 0.334983 seconds and 5 git commands to generate.