m68knommu: fix user a5 register being overwritten
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "amdgpu_atombios.h"
31 #include "atombios_i2c.h"
32 #include "clearstate_vi.h"
33
34 #include "gmc/gmc_8_2_d.h"
35 #include "gmc/gmc_8_2_sh_mask.h"
36
37 #include "oss/oss_3_0_d.h"
38 #include "oss/oss_3_0_sh_mask.h"
39
40 #include "bif/bif_5_0_d.h"
41 #include "bif/bif_5_0_sh_mask.h"
42
43 #include "gca/gfx_8_0_d.h"
44 #include "gca/gfx_8_0_enum.h"
45 #include "gca/gfx_8_0_sh_mask.h"
46 #include "gca/gfx_8_0_enum.h"
47
48 #include "dce/dce_10_0_d.h"
49 #include "dce/dce_10_0_sh_mask.h"
50
51 #include "smu/smu_7_1_3_d.h"
52
53 #define GFX8_NUM_GFX_RINGS 1
54 #define GFX8_NUM_COMPUTE_RINGS 8
55
56 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
59 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
71 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78 /* BPM SERDES CMD */
79 #define SET_BPM_SERDES_CMD 1
80 #define CLE_BPM_SERDES_CMD 0
81
82 /* BPM Register Address*/
83 enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90 };
91
92 #define RLC_FormatDirectRegListLength 14
93
94 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
101 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
119
120 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
127 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
133
134 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
140
141 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142 {
143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159 };
160
161 static const u32 golden_settings_tonga_a11[] =
162 {
163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
178 };
179
180 static const u32 tonga_golden_common_all[] =
181 {
182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190 };
191
192 static const u32 tonga_mgcg_cgcg_init[] =
193 {
194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269 };
270
271 static const u32 golden_settings_polaris11_a11[] =
272 {
273 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
274 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
278 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
280 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282 mmSQ_CONFIG, 0x07f80000, 0x07180000,
283 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
288 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
289 };
290
291 static const u32 polaris11_golden_common_all[] =
292 {
293 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
294 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299 };
300
301 static const u32 golden_settings_polaris10_a11[] =
302 {
303 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
304 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0, 0x0f000000,
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x07180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
319 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
320 };
321
322 static const u32 polaris10_golden_common_all[] =
323 {
324 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332 };
333
334 static const u32 fiji_golden_common_all[] =
335 {
336 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
339 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
340 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
343 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
346 };
347
348 static const u32 golden_settings_fiji_a10[] =
349 {
350 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
353 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
354 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
356 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
357 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
359 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
360 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
361 };
362
363 static const u32 fiji_mgcg_cgcg_init[] =
364 {
365 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
366 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400 };
401
402 static const u32 golden_settings_iceland_a11[] =
403 {
404 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407 mmGB_GPU_ID, 0x0000000f, 0x00000000,
408 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
412 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
413 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
415 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
416 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419 };
420
421 static const u32 iceland_golden_common_all[] =
422 {
423 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431 };
432
433 static const u32 iceland_mgcg_cgcg_init[] =
434 {
435 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499 };
500
501 static const u32 cz_golden_settings_a11[] =
502 {
503 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505 mmGB_GPU_ID, 0x0000000f, 0x00000000,
506 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
508 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
509 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
510 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
511 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513 };
514
515 static const u32 cz_golden_common_all[] =
516 {
517 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525 };
526
527 static const u32 cz_mgcg_cgcg_init[] =
528 {
529 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604 };
605
606 static const u32 stoney_golden_settings_a11[] =
607 {
608 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609 mmGB_GPU_ID, 0x0000000f, 0x00000000,
610 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
614 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
615 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618 };
619
620 static const u32 stoney_golden_common_all[] =
621 {
622 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630 };
631
632 static const u32 stoney_mgcg_cgcg_init[] =
633 {
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640 };
641
642 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
645 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
646 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
647 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
648
649 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650 {
651 switch (adev->asic_type) {
652 case CHIP_TOPAZ:
653 amdgpu_program_register_sequence(adev,
654 iceland_mgcg_cgcg_init,
655 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656 amdgpu_program_register_sequence(adev,
657 golden_settings_iceland_a11,
658 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659 amdgpu_program_register_sequence(adev,
660 iceland_golden_common_all,
661 (const u32)ARRAY_SIZE(iceland_golden_common_all));
662 break;
663 case CHIP_FIJI:
664 amdgpu_program_register_sequence(adev,
665 fiji_mgcg_cgcg_init,
666 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667 amdgpu_program_register_sequence(adev,
668 golden_settings_fiji_a10,
669 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670 amdgpu_program_register_sequence(adev,
671 fiji_golden_common_all,
672 (const u32)ARRAY_SIZE(fiji_golden_common_all));
673 break;
674
675 case CHIP_TONGA:
676 amdgpu_program_register_sequence(adev,
677 tonga_mgcg_cgcg_init,
678 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679 amdgpu_program_register_sequence(adev,
680 golden_settings_tonga_a11,
681 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682 amdgpu_program_register_sequence(adev,
683 tonga_golden_common_all,
684 (const u32)ARRAY_SIZE(tonga_golden_common_all));
685 break;
686 case CHIP_POLARIS11:
687 amdgpu_program_register_sequence(adev,
688 golden_settings_polaris11_a11,
689 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
690 amdgpu_program_register_sequence(adev,
691 polaris11_golden_common_all,
692 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
693 break;
694 case CHIP_POLARIS10:
695 amdgpu_program_register_sequence(adev,
696 golden_settings_polaris10_a11,
697 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
698 amdgpu_program_register_sequence(adev,
699 polaris10_golden_common_all,
700 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
701 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
702 if (adev->pdev->revision == 0xc7) {
703 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705 }
706 break;
707 case CHIP_CARRIZO:
708 amdgpu_program_register_sequence(adev,
709 cz_mgcg_cgcg_init,
710 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711 amdgpu_program_register_sequence(adev,
712 cz_golden_settings_a11,
713 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714 amdgpu_program_register_sequence(adev,
715 cz_golden_common_all,
716 (const u32)ARRAY_SIZE(cz_golden_common_all));
717 break;
718 case CHIP_STONEY:
719 amdgpu_program_register_sequence(adev,
720 stoney_mgcg_cgcg_init,
721 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722 amdgpu_program_register_sequence(adev,
723 stoney_golden_settings_a11,
724 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725 amdgpu_program_register_sequence(adev,
726 stoney_golden_common_all,
727 (const u32)ARRAY_SIZE(stoney_golden_common_all));
728 break;
729 default:
730 break;
731 }
732 }
733
734 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735 {
736 int i;
737
738 adev->gfx.scratch.num_reg = 7;
739 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741 adev->gfx.scratch.free[i] = true;
742 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743 }
744 }
745
746 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747 {
748 struct amdgpu_device *adev = ring->adev;
749 uint32_t scratch;
750 uint32_t tmp = 0;
751 unsigned i;
752 int r;
753
754 r = amdgpu_gfx_scratch_get(adev, &scratch);
755 if (r) {
756 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757 return r;
758 }
759 WREG32(scratch, 0xCAFEDEAD);
760 r = amdgpu_ring_alloc(ring, 3);
761 if (r) {
762 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763 ring->idx, r);
764 amdgpu_gfx_scratch_free(adev, scratch);
765 return r;
766 }
767 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769 amdgpu_ring_write(ring, 0xDEADBEEF);
770 amdgpu_ring_commit(ring);
771
772 for (i = 0; i < adev->usec_timeout; i++) {
773 tmp = RREG32(scratch);
774 if (tmp == 0xDEADBEEF)
775 break;
776 DRM_UDELAY(1);
777 }
778 if (i < adev->usec_timeout) {
779 DRM_INFO("ring test on %d succeeded in %d usecs\n",
780 ring->idx, i);
781 } else {
782 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783 ring->idx, scratch, tmp);
784 r = -EINVAL;
785 }
786 amdgpu_gfx_scratch_free(adev, scratch);
787 return r;
788 }
789
790 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
791 {
792 struct amdgpu_device *adev = ring->adev;
793 struct amdgpu_ib ib;
794 struct fence *f = NULL;
795 uint32_t scratch;
796 uint32_t tmp = 0;
797 long r;
798
799 r = amdgpu_gfx_scratch_get(adev, &scratch);
800 if (r) {
801 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
802 return r;
803 }
804 WREG32(scratch, 0xCAFEDEAD);
805 memset(&ib, 0, sizeof(ib));
806 r = amdgpu_ib_get(adev, NULL, 256, &ib);
807 if (r) {
808 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
809 goto err1;
810 }
811 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
812 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
813 ib.ptr[2] = 0xDEADBEEF;
814 ib.length_dw = 3;
815
816 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
817 if (r)
818 goto err2;
819
820 r = fence_wait_timeout(f, false, timeout);
821 if (r == 0) {
822 DRM_ERROR("amdgpu: IB test timed out.\n");
823 r = -ETIMEDOUT;
824 goto err2;
825 } else if (r < 0) {
826 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
827 goto err2;
828 }
829 tmp = RREG32(scratch);
830 if (tmp == 0xDEADBEEF) {
831 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
832 r = 0;
833 } else {
834 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
835 scratch, tmp);
836 r = -EINVAL;
837 }
838 err2:
839 amdgpu_ib_free(adev, &ib, NULL);
840 fence_put(f);
841 err1:
842 amdgpu_gfx_scratch_free(adev, scratch);
843 return r;
844 }
845
846
847 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
848 release_firmware(adev->gfx.pfp_fw);
849 adev->gfx.pfp_fw = NULL;
850 release_firmware(adev->gfx.me_fw);
851 adev->gfx.me_fw = NULL;
852 release_firmware(adev->gfx.ce_fw);
853 adev->gfx.ce_fw = NULL;
854 release_firmware(adev->gfx.rlc_fw);
855 adev->gfx.rlc_fw = NULL;
856 release_firmware(adev->gfx.mec_fw);
857 adev->gfx.mec_fw = NULL;
858 if ((adev->asic_type != CHIP_STONEY) &&
859 (adev->asic_type != CHIP_TOPAZ))
860 release_firmware(adev->gfx.mec2_fw);
861 adev->gfx.mec2_fw = NULL;
862
863 kfree(adev->gfx.rlc.register_list_format);
864 }
865
866 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
867 {
868 const char *chip_name;
869 char fw_name[30];
870 int err;
871 struct amdgpu_firmware_info *info = NULL;
872 const struct common_firmware_header *header = NULL;
873 const struct gfx_firmware_header_v1_0 *cp_hdr;
874 const struct rlc_firmware_header_v2_0 *rlc_hdr;
875 unsigned int *tmp = NULL, i;
876
877 DRM_DEBUG("\n");
878
879 switch (adev->asic_type) {
880 case CHIP_TOPAZ:
881 chip_name = "topaz";
882 break;
883 case CHIP_TONGA:
884 chip_name = "tonga";
885 break;
886 case CHIP_CARRIZO:
887 chip_name = "carrizo";
888 break;
889 case CHIP_FIJI:
890 chip_name = "fiji";
891 break;
892 case CHIP_POLARIS11:
893 chip_name = "polaris11";
894 break;
895 case CHIP_POLARIS10:
896 chip_name = "polaris10";
897 break;
898 case CHIP_STONEY:
899 chip_name = "stoney";
900 break;
901 default:
902 BUG();
903 }
904
905 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
906 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
907 if (err)
908 goto out;
909 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
910 if (err)
911 goto out;
912 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
913 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
914 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
915
916 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
917 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
918 if (err)
919 goto out;
920 err = amdgpu_ucode_validate(adev->gfx.me_fw);
921 if (err)
922 goto out;
923 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
924 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
925 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
926
927 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
928 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
929 if (err)
930 goto out;
931 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
932 if (err)
933 goto out;
934 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
935 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
936 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
937
938 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
939 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
940 if (err)
941 goto out;
942 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
943 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
944 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
945 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
946
947 adev->gfx.rlc.save_and_restore_offset =
948 le32_to_cpu(rlc_hdr->save_and_restore_offset);
949 adev->gfx.rlc.clear_state_descriptor_offset =
950 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
951 adev->gfx.rlc.avail_scratch_ram_locations =
952 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
953 adev->gfx.rlc.reg_restore_list_size =
954 le32_to_cpu(rlc_hdr->reg_restore_list_size);
955 adev->gfx.rlc.reg_list_format_start =
956 le32_to_cpu(rlc_hdr->reg_list_format_start);
957 adev->gfx.rlc.reg_list_format_separate_start =
958 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
959 adev->gfx.rlc.starting_offsets_start =
960 le32_to_cpu(rlc_hdr->starting_offsets_start);
961 adev->gfx.rlc.reg_list_format_size_bytes =
962 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
963 adev->gfx.rlc.reg_list_size_bytes =
964 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
965
966 adev->gfx.rlc.register_list_format =
967 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
968 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
969
970 if (!adev->gfx.rlc.register_list_format) {
971 err = -ENOMEM;
972 goto out;
973 }
974
975 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
976 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
977 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
978 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
979
980 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
981
982 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
983 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
984 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
985 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
986
987 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
988 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
989 if (err)
990 goto out;
991 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
992 if (err)
993 goto out;
994 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
995 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
996 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
997
998 if ((adev->asic_type != CHIP_STONEY) &&
999 (adev->asic_type != CHIP_TOPAZ)) {
1000 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1001 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1002 if (!err) {
1003 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1004 if (err)
1005 goto out;
1006 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1007 adev->gfx.mec2_fw->data;
1008 adev->gfx.mec2_fw_version =
1009 le32_to_cpu(cp_hdr->header.ucode_version);
1010 adev->gfx.mec2_feature_version =
1011 le32_to_cpu(cp_hdr->ucode_feature_version);
1012 } else {
1013 err = 0;
1014 adev->gfx.mec2_fw = NULL;
1015 }
1016 }
1017
1018 if (adev->firmware.smu_load) {
1019 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1020 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1021 info->fw = adev->gfx.pfp_fw;
1022 header = (const struct common_firmware_header *)info->fw->data;
1023 adev->firmware.fw_size +=
1024 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1025
1026 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1027 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1028 info->fw = adev->gfx.me_fw;
1029 header = (const struct common_firmware_header *)info->fw->data;
1030 adev->firmware.fw_size +=
1031 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1032
1033 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1034 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1035 info->fw = adev->gfx.ce_fw;
1036 header = (const struct common_firmware_header *)info->fw->data;
1037 adev->firmware.fw_size +=
1038 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1039
1040 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1041 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1042 info->fw = adev->gfx.rlc_fw;
1043 header = (const struct common_firmware_header *)info->fw->data;
1044 adev->firmware.fw_size +=
1045 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1046
1047 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1048 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1049 info->fw = adev->gfx.mec_fw;
1050 header = (const struct common_firmware_header *)info->fw->data;
1051 adev->firmware.fw_size +=
1052 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1053
1054 if (adev->gfx.mec2_fw) {
1055 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1056 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1057 info->fw = adev->gfx.mec2_fw;
1058 header = (const struct common_firmware_header *)info->fw->data;
1059 adev->firmware.fw_size +=
1060 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1061 }
1062
1063 }
1064
1065 out:
1066 if (err) {
1067 dev_err(adev->dev,
1068 "gfx8: Failed to load firmware \"%s\"\n",
1069 fw_name);
1070 release_firmware(adev->gfx.pfp_fw);
1071 adev->gfx.pfp_fw = NULL;
1072 release_firmware(adev->gfx.me_fw);
1073 adev->gfx.me_fw = NULL;
1074 release_firmware(adev->gfx.ce_fw);
1075 adev->gfx.ce_fw = NULL;
1076 release_firmware(adev->gfx.rlc_fw);
1077 adev->gfx.rlc_fw = NULL;
1078 release_firmware(adev->gfx.mec_fw);
1079 adev->gfx.mec_fw = NULL;
1080 release_firmware(adev->gfx.mec2_fw);
1081 adev->gfx.mec2_fw = NULL;
1082 }
1083 return err;
1084 }
1085
1086 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1087 volatile u32 *buffer)
1088 {
1089 u32 count = 0, i;
1090 const struct cs_section_def *sect = NULL;
1091 const struct cs_extent_def *ext = NULL;
1092
1093 if (adev->gfx.rlc.cs_data == NULL)
1094 return;
1095 if (buffer == NULL)
1096 return;
1097
1098 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1099 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1100
1101 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1102 buffer[count++] = cpu_to_le32(0x80000000);
1103 buffer[count++] = cpu_to_le32(0x80000000);
1104
1105 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1106 for (ext = sect->section; ext->extent != NULL; ++ext) {
1107 if (sect->id == SECT_CONTEXT) {
1108 buffer[count++] =
1109 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1110 buffer[count++] = cpu_to_le32(ext->reg_index -
1111 PACKET3_SET_CONTEXT_REG_START);
1112 for (i = 0; i < ext->reg_count; i++)
1113 buffer[count++] = cpu_to_le32(ext->extent[i]);
1114 } else {
1115 return;
1116 }
1117 }
1118 }
1119
1120 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1121 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1122 PACKET3_SET_CONTEXT_REG_START);
1123 switch (adev->asic_type) {
1124 case CHIP_TONGA:
1125 case CHIP_POLARIS10:
1126 buffer[count++] = cpu_to_le32(0x16000012);
1127 buffer[count++] = cpu_to_le32(0x0000002A);
1128 break;
1129 case CHIP_POLARIS11:
1130 buffer[count++] = cpu_to_le32(0x16000012);
1131 buffer[count++] = cpu_to_le32(0x00000000);
1132 break;
1133 case CHIP_FIJI:
1134 buffer[count++] = cpu_to_le32(0x3a00161a);
1135 buffer[count++] = cpu_to_le32(0x0000002e);
1136 break;
1137 case CHIP_TOPAZ:
1138 case CHIP_CARRIZO:
1139 buffer[count++] = cpu_to_le32(0x00000002);
1140 buffer[count++] = cpu_to_le32(0x00000000);
1141 break;
1142 case CHIP_STONEY:
1143 buffer[count++] = cpu_to_le32(0x00000000);
1144 buffer[count++] = cpu_to_le32(0x00000000);
1145 break;
1146 default:
1147 buffer[count++] = cpu_to_le32(0x00000000);
1148 buffer[count++] = cpu_to_le32(0x00000000);
1149 break;
1150 }
1151
1152 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1153 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1154
1155 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1156 buffer[count++] = cpu_to_le32(0);
1157 }
1158
1159 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1160 {
1161 const __le32 *fw_data;
1162 volatile u32 *dst_ptr;
1163 int me, i, max_me = 4;
1164 u32 bo_offset = 0;
1165 u32 table_offset, table_size;
1166
1167 if (adev->asic_type == CHIP_CARRIZO)
1168 max_me = 5;
1169
1170 /* write the cp table buffer */
1171 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1172 for (me = 0; me < max_me; me++) {
1173 if (me == 0) {
1174 const struct gfx_firmware_header_v1_0 *hdr =
1175 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1176 fw_data = (const __le32 *)
1177 (adev->gfx.ce_fw->data +
1178 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1179 table_offset = le32_to_cpu(hdr->jt_offset);
1180 table_size = le32_to_cpu(hdr->jt_size);
1181 } else if (me == 1) {
1182 const struct gfx_firmware_header_v1_0 *hdr =
1183 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1184 fw_data = (const __le32 *)
1185 (adev->gfx.pfp_fw->data +
1186 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1187 table_offset = le32_to_cpu(hdr->jt_offset);
1188 table_size = le32_to_cpu(hdr->jt_size);
1189 } else if (me == 2) {
1190 const struct gfx_firmware_header_v1_0 *hdr =
1191 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1192 fw_data = (const __le32 *)
1193 (adev->gfx.me_fw->data +
1194 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1195 table_offset = le32_to_cpu(hdr->jt_offset);
1196 table_size = le32_to_cpu(hdr->jt_size);
1197 } else if (me == 3) {
1198 const struct gfx_firmware_header_v1_0 *hdr =
1199 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1200 fw_data = (const __le32 *)
1201 (adev->gfx.mec_fw->data +
1202 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1203 table_offset = le32_to_cpu(hdr->jt_offset);
1204 table_size = le32_to_cpu(hdr->jt_size);
1205 } else if (me == 4) {
1206 const struct gfx_firmware_header_v1_0 *hdr =
1207 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1208 fw_data = (const __le32 *)
1209 (adev->gfx.mec2_fw->data +
1210 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1211 table_offset = le32_to_cpu(hdr->jt_offset);
1212 table_size = le32_to_cpu(hdr->jt_size);
1213 }
1214
1215 for (i = 0; i < table_size; i ++) {
1216 dst_ptr[bo_offset + i] =
1217 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1218 }
1219
1220 bo_offset += table_size;
1221 }
1222 }
1223
1224 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1225 {
1226 int r;
1227
1228 /* clear state block */
1229 if (adev->gfx.rlc.clear_state_obj) {
1230 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1231 if (unlikely(r != 0))
1232 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1233 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1234 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1235
1236 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1237 adev->gfx.rlc.clear_state_obj = NULL;
1238 }
1239
1240 /* jump table block */
1241 if (adev->gfx.rlc.cp_table_obj) {
1242 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1243 if (unlikely(r != 0))
1244 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1245 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1246 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1247
1248 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1249 adev->gfx.rlc.cp_table_obj = NULL;
1250 }
1251 }
1252
1253 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1254 {
1255 volatile u32 *dst_ptr;
1256 u32 dws;
1257 const struct cs_section_def *cs_data;
1258 int r;
1259
1260 adev->gfx.rlc.cs_data = vi_cs_data;
1261
1262 cs_data = adev->gfx.rlc.cs_data;
1263
1264 if (cs_data) {
1265 /* clear state block */
1266 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1267
1268 if (adev->gfx.rlc.clear_state_obj == NULL) {
1269 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1270 AMDGPU_GEM_DOMAIN_VRAM,
1271 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1272 NULL, NULL,
1273 &adev->gfx.rlc.clear_state_obj);
1274 if (r) {
1275 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1276 gfx_v8_0_rlc_fini(adev);
1277 return r;
1278 }
1279 }
1280 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1281 if (unlikely(r != 0)) {
1282 gfx_v8_0_rlc_fini(adev);
1283 return r;
1284 }
1285 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1286 &adev->gfx.rlc.clear_state_gpu_addr);
1287 if (r) {
1288 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1289 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1290 gfx_v8_0_rlc_fini(adev);
1291 return r;
1292 }
1293
1294 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1295 if (r) {
1296 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1297 gfx_v8_0_rlc_fini(adev);
1298 return r;
1299 }
1300 /* set up the cs buffer */
1301 dst_ptr = adev->gfx.rlc.cs_ptr;
1302 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1303 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1304 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1305 }
1306
1307 if ((adev->asic_type == CHIP_CARRIZO) ||
1308 (adev->asic_type == CHIP_STONEY)) {
1309 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1310 if (adev->gfx.rlc.cp_table_obj == NULL) {
1311 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1312 AMDGPU_GEM_DOMAIN_VRAM,
1313 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1314 NULL, NULL,
1315 &adev->gfx.rlc.cp_table_obj);
1316 if (r) {
1317 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1318 return r;
1319 }
1320 }
1321
1322 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1323 if (unlikely(r != 0)) {
1324 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1325 return r;
1326 }
1327 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1328 &adev->gfx.rlc.cp_table_gpu_addr);
1329 if (r) {
1330 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1331 dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1332 return r;
1333 }
1334 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1335 if (r) {
1336 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1337 return r;
1338 }
1339
1340 cz_init_cp_jump_table(adev);
1341
1342 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1343 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1344
1345 }
1346
1347 return 0;
1348 }
1349
1350 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1351 {
1352 int r;
1353
1354 if (adev->gfx.mec.hpd_eop_obj) {
1355 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1356 if (unlikely(r != 0))
1357 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1358 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1359 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1360
1361 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1362 adev->gfx.mec.hpd_eop_obj = NULL;
1363 }
1364 }
1365
1366 #define MEC_HPD_SIZE 2048
1367
1368 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1369 {
1370 int r;
1371 u32 *hpd;
1372
1373 /*
1374 * we assign only 1 pipe because all other pipes will
1375 * be handled by KFD
1376 */
1377 adev->gfx.mec.num_mec = 1;
1378 adev->gfx.mec.num_pipe = 1;
1379 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1380
1381 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1382 r = amdgpu_bo_create(adev,
1383 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1384 PAGE_SIZE, true,
1385 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1386 &adev->gfx.mec.hpd_eop_obj);
1387 if (r) {
1388 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1389 return r;
1390 }
1391 }
1392
1393 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1394 if (unlikely(r != 0)) {
1395 gfx_v8_0_mec_fini(adev);
1396 return r;
1397 }
1398 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1399 &adev->gfx.mec.hpd_eop_gpu_addr);
1400 if (r) {
1401 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1402 gfx_v8_0_mec_fini(adev);
1403 return r;
1404 }
1405 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1406 if (r) {
1407 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1408 gfx_v8_0_mec_fini(adev);
1409 return r;
1410 }
1411
1412 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1413
1414 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1415 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1416
1417 return 0;
1418 }
1419
1420 static const u32 vgpr_init_compute_shader[] =
1421 {
1422 0x7e000209, 0x7e020208,
1423 0x7e040207, 0x7e060206,
1424 0x7e080205, 0x7e0a0204,
1425 0x7e0c0203, 0x7e0e0202,
1426 0x7e100201, 0x7e120200,
1427 0x7e140209, 0x7e160208,
1428 0x7e180207, 0x7e1a0206,
1429 0x7e1c0205, 0x7e1e0204,
1430 0x7e200203, 0x7e220202,
1431 0x7e240201, 0x7e260200,
1432 0x7e280209, 0x7e2a0208,
1433 0x7e2c0207, 0x7e2e0206,
1434 0x7e300205, 0x7e320204,
1435 0x7e340203, 0x7e360202,
1436 0x7e380201, 0x7e3a0200,
1437 0x7e3c0209, 0x7e3e0208,
1438 0x7e400207, 0x7e420206,
1439 0x7e440205, 0x7e460204,
1440 0x7e480203, 0x7e4a0202,
1441 0x7e4c0201, 0x7e4e0200,
1442 0x7e500209, 0x7e520208,
1443 0x7e540207, 0x7e560206,
1444 0x7e580205, 0x7e5a0204,
1445 0x7e5c0203, 0x7e5e0202,
1446 0x7e600201, 0x7e620200,
1447 0x7e640209, 0x7e660208,
1448 0x7e680207, 0x7e6a0206,
1449 0x7e6c0205, 0x7e6e0204,
1450 0x7e700203, 0x7e720202,
1451 0x7e740201, 0x7e760200,
1452 0x7e780209, 0x7e7a0208,
1453 0x7e7c0207, 0x7e7e0206,
1454 0xbf8a0000, 0xbf810000,
1455 };
1456
1457 static const u32 sgpr_init_compute_shader[] =
1458 {
1459 0xbe8a0100, 0xbe8c0102,
1460 0xbe8e0104, 0xbe900106,
1461 0xbe920108, 0xbe940100,
1462 0xbe960102, 0xbe980104,
1463 0xbe9a0106, 0xbe9c0108,
1464 0xbe9e0100, 0xbea00102,
1465 0xbea20104, 0xbea40106,
1466 0xbea60108, 0xbea80100,
1467 0xbeaa0102, 0xbeac0104,
1468 0xbeae0106, 0xbeb00108,
1469 0xbeb20100, 0xbeb40102,
1470 0xbeb60104, 0xbeb80106,
1471 0xbeba0108, 0xbebc0100,
1472 0xbebe0102, 0xbec00104,
1473 0xbec20106, 0xbec40108,
1474 0xbec60100, 0xbec80102,
1475 0xbee60004, 0xbee70005,
1476 0xbeea0006, 0xbeeb0007,
1477 0xbee80008, 0xbee90009,
1478 0xbefc0000, 0xbf8a0000,
1479 0xbf810000, 0x00000000,
1480 };
1481
1482 static const u32 vgpr_init_regs[] =
1483 {
1484 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1485 mmCOMPUTE_RESOURCE_LIMITS, 0,
1486 mmCOMPUTE_NUM_THREAD_X, 256*4,
1487 mmCOMPUTE_NUM_THREAD_Y, 1,
1488 mmCOMPUTE_NUM_THREAD_Z, 1,
1489 mmCOMPUTE_PGM_RSRC2, 20,
1490 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1491 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1492 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1493 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1494 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1495 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1496 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1497 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1498 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1499 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1500 };
1501
1502 static const u32 sgpr1_init_regs[] =
1503 {
1504 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1505 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1506 mmCOMPUTE_NUM_THREAD_X, 256*5,
1507 mmCOMPUTE_NUM_THREAD_Y, 1,
1508 mmCOMPUTE_NUM_THREAD_Z, 1,
1509 mmCOMPUTE_PGM_RSRC2, 20,
1510 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1511 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1512 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1513 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1514 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1515 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1516 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1517 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1518 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1519 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1520 };
1521
1522 static const u32 sgpr2_init_regs[] =
1523 {
1524 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1525 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1526 mmCOMPUTE_NUM_THREAD_X, 256*5,
1527 mmCOMPUTE_NUM_THREAD_Y, 1,
1528 mmCOMPUTE_NUM_THREAD_Z, 1,
1529 mmCOMPUTE_PGM_RSRC2, 20,
1530 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1531 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1532 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1533 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1534 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1535 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1536 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1537 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1538 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1539 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1540 };
1541
1542 static const u32 sec_ded_counter_registers[] =
1543 {
1544 mmCPC_EDC_ATC_CNT,
1545 mmCPC_EDC_SCRATCH_CNT,
1546 mmCPC_EDC_UCODE_CNT,
1547 mmCPF_EDC_ATC_CNT,
1548 mmCPF_EDC_ROQ_CNT,
1549 mmCPF_EDC_TAG_CNT,
1550 mmCPG_EDC_ATC_CNT,
1551 mmCPG_EDC_DMA_CNT,
1552 mmCPG_EDC_TAG_CNT,
1553 mmDC_EDC_CSINVOC_CNT,
1554 mmDC_EDC_RESTORE_CNT,
1555 mmDC_EDC_STATE_CNT,
1556 mmGDS_EDC_CNT,
1557 mmGDS_EDC_GRBM_CNT,
1558 mmGDS_EDC_OA_DED,
1559 mmSPI_EDC_CNT,
1560 mmSQC_ATC_EDC_GATCL1_CNT,
1561 mmSQC_EDC_CNT,
1562 mmSQ_EDC_DED_CNT,
1563 mmSQ_EDC_INFO,
1564 mmSQ_EDC_SEC_CNT,
1565 mmTCC_EDC_CNT,
1566 mmTCP_ATC_EDC_GATCL1_CNT,
1567 mmTCP_EDC_CNT,
1568 mmTD_EDC_CNT
1569 };
1570
1571 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1572 {
1573 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1574 struct amdgpu_ib ib;
1575 struct fence *f = NULL;
1576 int r, i;
1577 u32 tmp;
1578 unsigned total_size, vgpr_offset, sgpr_offset;
1579 u64 gpu_addr;
1580
1581 /* only supported on CZ */
1582 if (adev->asic_type != CHIP_CARRIZO)
1583 return 0;
1584
1585 /* bail if the compute ring is not ready */
1586 if (!ring->ready)
1587 return 0;
1588
1589 tmp = RREG32(mmGB_EDC_MODE);
1590 WREG32(mmGB_EDC_MODE, 0);
1591
1592 total_size =
1593 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1594 total_size +=
1595 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1596 total_size +=
1597 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1598 total_size = ALIGN(total_size, 256);
1599 vgpr_offset = total_size;
1600 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1601 sgpr_offset = total_size;
1602 total_size += sizeof(sgpr_init_compute_shader);
1603
1604 /* allocate an indirect buffer to put the commands in */
1605 memset(&ib, 0, sizeof(ib));
1606 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1607 if (r) {
1608 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1609 return r;
1610 }
1611
1612 /* load the compute shaders */
1613 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1614 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1615
1616 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1617 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1618
1619 /* init the ib length to 0 */
1620 ib.length_dw = 0;
1621
1622 /* VGPR */
1623 /* write the register state for the compute dispatch */
1624 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1625 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1626 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1627 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1628 }
1629 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1630 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1631 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1632 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1633 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1634 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1635
1636 /* write dispatch packet */
1637 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1638 ib.ptr[ib.length_dw++] = 8; /* x */
1639 ib.ptr[ib.length_dw++] = 1; /* y */
1640 ib.ptr[ib.length_dw++] = 1; /* z */
1641 ib.ptr[ib.length_dw++] =
1642 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1643
1644 /* write CS partial flush packet */
1645 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1646 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1647
1648 /* SGPR1 */
1649 /* write the register state for the compute dispatch */
1650 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1651 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1652 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1653 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1654 }
1655 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1656 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1657 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1658 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1659 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1660 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1661
1662 /* write dispatch packet */
1663 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1664 ib.ptr[ib.length_dw++] = 8; /* x */
1665 ib.ptr[ib.length_dw++] = 1; /* y */
1666 ib.ptr[ib.length_dw++] = 1; /* z */
1667 ib.ptr[ib.length_dw++] =
1668 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1669
1670 /* write CS partial flush packet */
1671 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1672 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1673
1674 /* SGPR2 */
1675 /* write the register state for the compute dispatch */
1676 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1677 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1678 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1679 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1680 }
1681 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1682 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1683 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1684 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1685 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1686 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1687
1688 /* write dispatch packet */
1689 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1690 ib.ptr[ib.length_dw++] = 8; /* x */
1691 ib.ptr[ib.length_dw++] = 1; /* y */
1692 ib.ptr[ib.length_dw++] = 1; /* z */
1693 ib.ptr[ib.length_dw++] =
1694 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1695
1696 /* write CS partial flush packet */
1697 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1698 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1699
1700 /* shedule the ib on the ring */
1701 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1702 if (r) {
1703 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1704 goto fail;
1705 }
1706
1707 /* wait for the GPU to finish processing the IB */
1708 r = fence_wait(f, false);
1709 if (r) {
1710 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1711 goto fail;
1712 }
1713
1714 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1715 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1716 WREG32(mmGB_EDC_MODE, tmp);
1717
1718 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1719 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1720 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1721
1722
1723 /* read back registers to clear the counters */
1724 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1725 RREG32(sec_ded_counter_registers[i]);
1726
1727 fail:
1728 amdgpu_ib_free(adev, &ib, NULL);
1729 fence_put(f);
1730
1731 return r;
1732 }
1733
1734 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1735 {
1736 u32 gb_addr_config;
1737 u32 mc_shared_chmap, mc_arb_ramcfg;
1738 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1739 u32 tmp;
1740 int ret;
1741
1742 switch (adev->asic_type) {
1743 case CHIP_TOPAZ:
1744 adev->gfx.config.max_shader_engines = 1;
1745 adev->gfx.config.max_tile_pipes = 2;
1746 adev->gfx.config.max_cu_per_sh = 6;
1747 adev->gfx.config.max_sh_per_se = 1;
1748 adev->gfx.config.max_backends_per_se = 2;
1749 adev->gfx.config.max_texture_channel_caches = 2;
1750 adev->gfx.config.max_gprs = 256;
1751 adev->gfx.config.max_gs_threads = 32;
1752 adev->gfx.config.max_hw_contexts = 8;
1753
1754 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1755 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1756 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1757 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1758 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1759 break;
1760 case CHIP_FIJI:
1761 adev->gfx.config.max_shader_engines = 4;
1762 adev->gfx.config.max_tile_pipes = 16;
1763 adev->gfx.config.max_cu_per_sh = 16;
1764 adev->gfx.config.max_sh_per_se = 1;
1765 adev->gfx.config.max_backends_per_se = 4;
1766 adev->gfx.config.max_texture_channel_caches = 16;
1767 adev->gfx.config.max_gprs = 256;
1768 adev->gfx.config.max_gs_threads = 32;
1769 adev->gfx.config.max_hw_contexts = 8;
1770
1771 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1772 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1773 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1774 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1775 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1776 break;
1777 case CHIP_POLARIS11:
1778 ret = amdgpu_atombios_get_gfx_info(adev);
1779 if (ret)
1780 return ret;
1781 adev->gfx.config.max_gprs = 256;
1782 adev->gfx.config.max_gs_threads = 32;
1783 adev->gfx.config.max_hw_contexts = 8;
1784
1785 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1786 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1787 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1788 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1789 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1790 break;
1791 case CHIP_POLARIS10:
1792 ret = amdgpu_atombios_get_gfx_info(adev);
1793 if (ret)
1794 return ret;
1795 adev->gfx.config.max_gprs = 256;
1796 adev->gfx.config.max_gs_threads = 32;
1797 adev->gfx.config.max_hw_contexts = 8;
1798
1799 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1800 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1801 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1802 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1803 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1804 break;
1805 case CHIP_TONGA:
1806 adev->gfx.config.max_shader_engines = 4;
1807 adev->gfx.config.max_tile_pipes = 8;
1808 adev->gfx.config.max_cu_per_sh = 8;
1809 adev->gfx.config.max_sh_per_se = 1;
1810 adev->gfx.config.max_backends_per_se = 2;
1811 adev->gfx.config.max_texture_channel_caches = 8;
1812 adev->gfx.config.max_gprs = 256;
1813 adev->gfx.config.max_gs_threads = 32;
1814 adev->gfx.config.max_hw_contexts = 8;
1815
1816 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1817 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1818 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1819 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1820 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1821 break;
1822 case CHIP_CARRIZO:
1823 adev->gfx.config.max_shader_engines = 1;
1824 adev->gfx.config.max_tile_pipes = 2;
1825 adev->gfx.config.max_sh_per_se = 1;
1826 adev->gfx.config.max_backends_per_se = 2;
1827
1828 switch (adev->pdev->revision) {
1829 case 0xc4:
1830 case 0x84:
1831 case 0xc8:
1832 case 0xcc:
1833 case 0xe1:
1834 case 0xe3:
1835 /* B10 */
1836 adev->gfx.config.max_cu_per_sh = 8;
1837 break;
1838 case 0xc5:
1839 case 0x81:
1840 case 0x85:
1841 case 0xc9:
1842 case 0xcd:
1843 case 0xe2:
1844 case 0xe4:
1845 /* B8 */
1846 adev->gfx.config.max_cu_per_sh = 6;
1847 break;
1848 case 0xc6:
1849 case 0xca:
1850 case 0xce:
1851 case 0x88:
1852 /* B6 */
1853 adev->gfx.config.max_cu_per_sh = 6;
1854 break;
1855 case 0xc7:
1856 case 0x87:
1857 case 0xcb:
1858 case 0xe5:
1859 case 0x89:
1860 default:
1861 /* B4 */
1862 adev->gfx.config.max_cu_per_sh = 4;
1863 break;
1864 }
1865
1866 adev->gfx.config.max_texture_channel_caches = 2;
1867 adev->gfx.config.max_gprs = 256;
1868 adev->gfx.config.max_gs_threads = 32;
1869 adev->gfx.config.max_hw_contexts = 8;
1870
1871 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1872 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1873 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1874 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1875 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1876 break;
1877 case CHIP_STONEY:
1878 adev->gfx.config.max_shader_engines = 1;
1879 adev->gfx.config.max_tile_pipes = 2;
1880 adev->gfx.config.max_sh_per_se = 1;
1881 adev->gfx.config.max_backends_per_se = 1;
1882
1883 switch (adev->pdev->revision) {
1884 case 0xc0:
1885 case 0xc1:
1886 case 0xc2:
1887 case 0xc4:
1888 case 0xc8:
1889 case 0xc9:
1890 adev->gfx.config.max_cu_per_sh = 3;
1891 break;
1892 case 0xd0:
1893 case 0xd1:
1894 case 0xd2:
1895 default:
1896 adev->gfx.config.max_cu_per_sh = 2;
1897 break;
1898 }
1899
1900 adev->gfx.config.max_texture_channel_caches = 2;
1901 adev->gfx.config.max_gprs = 256;
1902 adev->gfx.config.max_gs_threads = 16;
1903 adev->gfx.config.max_hw_contexts = 8;
1904
1905 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1906 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1907 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1908 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1909 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1910 break;
1911 default:
1912 adev->gfx.config.max_shader_engines = 2;
1913 adev->gfx.config.max_tile_pipes = 4;
1914 adev->gfx.config.max_cu_per_sh = 2;
1915 adev->gfx.config.max_sh_per_se = 1;
1916 adev->gfx.config.max_backends_per_se = 2;
1917 adev->gfx.config.max_texture_channel_caches = 4;
1918 adev->gfx.config.max_gprs = 256;
1919 adev->gfx.config.max_gs_threads = 32;
1920 adev->gfx.config.max_hw_contexts = 8;
1921
1922 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1923 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1924 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1925 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1926 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1927 break;
1928 }
1929
1930 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1931 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1932 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1933
1934 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1935 adev->gfx.config.mem_max_burst_length_bytes = 256;
1936 if (adev->flags & AMD_IS_APU) {
1937 /* Get memory bank mapping mode. */
1938 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1939 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1940 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1941
1942 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1943 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1944 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1945
1946 /* Validate settings in case only one DIMM installed. */
1947 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1948 dimm00_addr_map = 0;
1949 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1950 dimm01_addr_map = 0;
1951 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1952 dimm10_addr_map = 0;
1953 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1954 dimm11_addr_map = 0;
1955
1956 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1957 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1958 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1959 adev->gfx.config.mem_row_size_in_kb = 2;
1960 else
1961 adev->gfx.config.mem_row_size_in_kb = 1;
1962 } else {
1963 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1964 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1965 if (adev->gfx.config.mem_row_size_in_kb > 4)
1966 adev->gfx.config.mem_row_size_in_kb = 4;
1967 }
1968
1969 adev->gfx.config.shader_engine_tile_size = 32;
1970 adev->gfx.config.num_gpus = 1;
1971 adev->gfx.config.multi_gpu_tile_size = 64;
1972
1973 /* fix up row size */
1974 switch (adev->gfx.config.mem_row_size_in_kb) {
1975 case 1:
1976 default:
1977 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1978 break;
1979 case 2:
1980 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1981 break;
1982 case 4:
1983 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1984 break;
1985 }
1986 adev->gfx.config.gb_addr_config = gb_addr_config;
1987
1988 return 0;
1989 }
1990
1991 static int gfx_v8_0_sw_init(void *handle)
1992 {
1993 int i, r;
1994 struct amdgpu_ring *ring;
1995 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1996
1997 /* EOP Event */
1998 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1999 if (r)
2000 return r;
2001
2002 /* Privileged reg */
2003 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2004 if (r)
2005 return r;
2006
2007 /* Privileged inst */
2008 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2009 if (r)
2010 return r;
2011
2012 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2013
2014 gfx_v8_0_scratch_init(adev);
2015
2016 r = gfx_v8_0_init_microcode(adev);
2017 if (r) {
2018 DRM_ERROR("Failed to load gfx firmware!\n");
2019 return r;
2020 }
2021
2022 r = gfx_v8_0_rlc_init(adev);
2023 if (r) {
2024 DRM_ERROR("Failed to init rlc BOs!\n");
2025 return r;
2026 }
2027
2028 r = gfx_v8_0_mec_init(adev);
2029 if (r) {
2030 DRM_ERROR("Failed to init MEC BOs!\n");
2031 return r;
2032 }
2033
2034 /* set up the gfx ring */
2035 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2036 ring = &adev->gfx.gfx_ring[i];
2037 ring->ring_obj = NULL;
2038 sprintf(ring->name, "gfx");
2039 /* no gfx doorbells on iceland */
2040 if (adev->asic_type != CHIP_TOPAZ) {
2041 ring->use_doorbell = true;
2042 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2043 }
2044
2045 r = amdgpu_ring_init(adev, ring, 1024,
2046 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2047 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2048 AMDGPU_RING_TYPE_GFX);
2049 if (r)
2050 return r;
2051 }
2052
2053 /* set up the compute queues */
2054 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2055 unsigned irq_type;
2056
2057 /* max 32 queues per MEC */
2058 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2059 DRM_ERROR("Too many (%d) compute rings!\n", i);
2060 break;
2061 }
2062 ring = &adev->gfx.compute_ring[i];
2063 ring->ring_obj = NULL;
2064 ring->use_doorbell = true;
2065 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2066 ring->me = 1; /* first MEC */
2067 ring->pipe = i / 8;
2068 ring->queue = i % 8;
2069 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2070 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2071 /* type-2 packets are deprecated on MEC, use type-3 instead */
2072 r = amdgpu_ring_init(adev, ring, 1024,
2073 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2074 &adev->gfx.eop_irq, irq_type,
2075 AMDGPU_RING_TYPE_COMPUTE);
2076 if (r)
2077 return r;
2078 }
2079
2080 /* reserve GDS, GWS and OA resource for gfx */
2081 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2082 PAGE_SIZE, true,
2083 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
2084 NULL, &adev->gds.gds_gfx_bo);
2085 if (r)
2086 return r;
2087
2088 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2089 PAGE_SIZE, true,
2090 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
2091 NULL, &adev->gds.gws_gfx_bo);
2092 if (r)
2093 return r;
2094
2095 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2096 PAGE_SIZE, true,
2097 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
2098 NULL, &adev->gds.oa_gfx_bo);
2099 if (r)
2100 return r;
2101
2102 adev->gfx.ce_ram_size = 0x8000;
2103
2104 r = gfx_v8_0_gpu_early_init(adev);
2105 if (r)
2106 return r;
2107
2108 return 0;
2109 }
2110
2111 static int gfx_v8_0_sw_fini(void *handle)
2112 {
2113 int i;
2114 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2115
2116 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2117 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2118 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2119
2120 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2121 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2122 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2123 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2124
2125 gfx_v8_0_mec_fini(adev);
2126
2127 gfx_v8_0_rlc_fini(adev);
2128
2129 gfx_v8_0_free_microcode(adev);
2130
2131 return 0;
2132 }
2133
2134 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2135 {
2136 uint32_t *modearray, *mod2array;
2137 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2138 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2139 u32 reg_offset;
2140
2141 modearray = adev->gfx.config.tile_mode_array;
2142 mod2array = adev->gfx.config.macrotile_mode_array;
2143
2144 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2145 modearray[reg_offset] = 0;
2146
2147 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2148 mod2array[reg_offset] = 0;
2149
2150 switch (adev->asic_type) {
2151 case CHIP_TOPAZ:
2152 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2153 PIPE_CONFIG(ADDR_SURF_P2) |
2154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2156 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2157 PIPE_CONFIG(ADDR_SURF_P2) |
2158 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2159 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2160 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2161 PIPE_CONFIG(ADDR_SURF_P2) |
2162 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2163 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2164 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2165 PIPE_CONFIG(ADDR_SURF_P2) |
2166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2167 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2168 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2169 PIPE_CONFIG(ADDR_SURF_P2) |
2170 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2171 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2172 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2173 PIPE_CONFIG(ADDR_SURF_P2) |
2174 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2175 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2176 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2177 PIPE_CONFIG(ADDR_SURF_P2) |
2178 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2179 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2180 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2181 PIPE_CONFIG(ADDR_SURF_P2));
2182 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2183 PIPE_CONFIG(ADDR_SURF_P2) |
2184 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2186 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2187 PIPE_CONFIG(ADDR_SURF_P2) |
2188 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2191 PIPE_CONFIG(ADDR_SURF_P2) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2194 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2195 PIPE_CONFIG(ADDR_SURF_P2) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2198 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2199 PIPE_CONFIG(ADDR_SURF_P2) |
2200 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2202 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2203 PIPE_CONFIG(ADDR_SURF_P2) |
2204 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2205 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2206 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2207 PIPE_CONFIG(ADDR_SURF_P2) |
2208 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2209 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2210 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2211 PIPE_CONFIG(ADDR_SURF_P2) |
2212 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2213 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2214 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2215 PIPE_CONFIG(ADDR_SURF_P2) |
2216 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2217 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2218 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2219 PIPE_CONFIG(ADDR_SURF_P2) |
2220 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2221 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2222 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2223 PIPE_CONFIG(ADDR_SURF_P2) |
2224 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2225 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2226 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2227 PIPE_CONFIG(ADDR_SURF_P2) |
2228 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2229 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2230 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2231 PIPE_CONFIG(ADDR_SURF_P2) |
2232 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2233 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2234 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2235 PIPE_CONFIG(ADDR_SURF_P2) |
2236 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2237 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2238 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2239 PIPE_CONFIG(ADDR_SURF_P2) |
2240 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2241 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2242 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2243 PIPE_CONFIG(ADDR_SURF_P2) |
2244 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2245 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2246 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2247 PIPE_CONFIG(ADDR_SURF_P2) |
2248 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2249 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2250 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2251 PIPE_CONFIG(ADDR_SURF_P2) |
2252 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2253 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2254
2255 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 NUM_BANKS(ADDR_SURF_8_BANK));
2259 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2262 NUM_BANKS(ADDR_SURF_8_BANK));
2263 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2264 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2266 NUM_BANKS(ADDR_SURF_8_BANK));
2267 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2269 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2270 NUM_BANKS(ADDR_SURF_8_BANK));
2271 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2274 NUM_BANKS(ADDR_SURF_8_BANK));
2275 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2276 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2277 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2278 NUM_BANKS(ADDR_SURF_8_BANK));
2279 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2282 NUM_BANKS(ADDR_SURF_8_BANK));
2283 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 NUM_BANKS(ADDR_SURF_16_BANK));
2287 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2288 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2289 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2290 NUM_BANKS(ADDR_SURF_16_BANK));
2291 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2292 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2293 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 NUM_BANKS(ADDR_SURF_16_BANK));
2295 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2298 NUM_BANKS(ADDR_SURF_16_BANK));
2299 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2300 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2301 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2302 NUM_BANKS(ADDR_SURF_16_BANK));
2303 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2306 NUM_BANKS(ADDR_SURF_16_BANK));
2307 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2308 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2309 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2310 NUM_BANKS(ADDR_SURF_8_BANK));
2311
2312 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2313 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2314 reg_offset != 23)
2315 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2316
2317 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2318 if (reg_offset != 7)
2319 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2320
2321 break;
2322 case CHIP_FIJI:
2323 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2324 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2326 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2327 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2328 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2329 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2330 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2331 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2332 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2333 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2334 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2335 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2336 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2338 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2339 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2340 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2341 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2342 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2343 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2344 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2345 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2346 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2347 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2348 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2349 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2350 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2351 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2352 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2355 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2356 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2357 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2358 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2359 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2361 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2362 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2363 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2365 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2366 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2367 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2369 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2370 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2371 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2373 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2375 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2376 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2377 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2379 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2380 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2382 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2387 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2388 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2389 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2390 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2391 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2392 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2393 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2394 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2396 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2397 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2398 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2399 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2401 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2402 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2404 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2405 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2406 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2408 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2409 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2410 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2412 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2413 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2414 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2417 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2418 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2420 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2421 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2422 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2424 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2425 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2426 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2429 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2430 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2431 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2432 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2433 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2434 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2435 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2436 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2441 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2442 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2445
2446 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2449 NUM_BANKS(ADDR_SURF_8_BANK));
2450 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2451 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2452 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2453 NUM_BANKS(ADDR_SURF_8_BANK));
2454 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2455 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2456 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2457 NUM_BANKS(ADDR_SURF_8_BANK));
2458 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2461 NUM_BANKS(ADDR_SURF_8_BANK));
2462 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2463 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2464 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2465 NUM_BANKS(ADDR_SURF_8_BANK));
2466 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2469 NUM_BANKS(ADDR_SURF_8_BANK));
2470 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2473 NUM_BANKS(ADDR_SURF_8_BANK));
2474 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2475 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2476 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2477 NUM_BANKS(ADDR_SURF_8_BANK));
2478 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2480 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2481 NUM_BANKS(ADDR_SURF_8_BANK));
2482 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2485 NUM_BANKS(ADDR_SURF_8_BANK));
2486 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2487 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2488 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2489 NUM_BANKS(ADDR_SURF_8_BANK));
2490 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2491 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2492 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2493 NUM_BANKS(ADDR_SURF_8_BANK));
2494 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2497 NUM_BANKS(ADDR_SURF_8_BANK));
2498 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2499 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2500 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2501 NUM_BANKS(ADDR_SURF_4_BANK));
2502
2503 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2504 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2505
2506 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2507 if (reg_offset != 7)
2508 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2509
2510 break;
2511 case CHIP_TONGA:
2512 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2513 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2514 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2516 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2518 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2520 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2524 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2526 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2528 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2529 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2530 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2532 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2534 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2536 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2537 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2538 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2539 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2540 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2541 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2542 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2544 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2545 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2546 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2547 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2548 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2549 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2550 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2552 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2553 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2554 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2555 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2558 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2559 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2560 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2561 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2562 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2563 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2564 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2565 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2566 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2567 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2570 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2571 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2574 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2575 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2576 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2577 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2578 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2579 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2580 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2582 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2583 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2585 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2586 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2587 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2588 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2590 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2591 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2592 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2594 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2595 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2596 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2597 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2598 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2599 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2602 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2603 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2604 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2606 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2607 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2608 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2609 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2610 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2611 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2613 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2614 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2615 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2616 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2618 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2619 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2620 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2621 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2625 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2626 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2627 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2630 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2631 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2634
2635 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2637 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2638 NUM_BANKS(ADDR_SURF_16_BANK));
2639 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2640 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2641 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2642 NUM_BANKS(ADDR_SURF_16_BANK));
2643 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2644 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2645 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2646 NUM_BANKS(ADDR_SURF_16_BANK));
2647 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2649 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2650 NUM_BANKS(ADDR_SURF_16_BANK));
2651 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2652 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2653 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2654 NUM_BANKS(ADDR_SURF_16_BANK));
2655 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2657 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2658 NUM_BANKS(ADDR_SURF_16_BANK));
2659 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2660 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2661 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2662 NUM_BANKS(ADDR_SURF_16_BANK));
2663 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2664 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2665 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2666 NUM_BANKS(ADDR_SURF_16_BANK));
2667 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2668 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2669 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2670 NUM_BANKS(ADDR_SURF_16_BANK));
2671 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2674 NUM_BANKS(ADDR_SURF_16_BANK));
2675 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2678 NUM_BANKS(ADDR_SURF_16_BANK));
2679 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2680 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2681 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2682 NUM_BANKS(ADDR_SURF_8_BANK));
2683 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2686 NUM_BANKS(ADDR_SURF_4_BANK));
2687 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2688 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2689 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2690 NUM_BANKS(ADDR_SURF_4_BANK));
2691
2692 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2693 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2694
2695 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2696 if (reg_offset != 7)
2697 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2698
2699 break;
2700 case CHIP_POLARIS11:
2701 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2702 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2703 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2705 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2706 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2707 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2709 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2710 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2711 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2713 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2715 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2716 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2717 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2718 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2719 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2720 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2721 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2722 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2723 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2724 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2725 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2726 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2727 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2728 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2729 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2730 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2733 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2734 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2735 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2736 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2737 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2738 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2739 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2740 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2741 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2742 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2744 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2745 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2746 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2747 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2748 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2749 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2750 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2751 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2752 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2753 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2754 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2755 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2756 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2757 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2758 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2759 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2760 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2761 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2762 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2763 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2764 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2765 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2766 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2767 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2768 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2769 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2770 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2771 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2772 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2773 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2774 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2775 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2776 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2777 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2778 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2779 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2780 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2781 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2782 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2783 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2784 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2785 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2786 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2787 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2788 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2790 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2791 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2792 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2794 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2795 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2796 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2798 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2799 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2800 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2802 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2803 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2804 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2806 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2807 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2808 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2810 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2811 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2812 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2814 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2815 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2816 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2817 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2818 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2819 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2820 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2821 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2822 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2823
2824 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2842 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2855 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2856 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2857 NUM_BANKS(ADDR_SURF_16_BANK));
2858
2859 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2861 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862 NUM_BANKS(ADDR_SURF_16_BANK));
2863
2864 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2865 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 NUM_BANKS(ADDR_SURF_16_BANK));
2868
2869 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 NUM_BANKS(ADDR_SURF_16_BANK));
2873
2874 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2877 NUM_BANKS(ADDR_SURF_16_BANK));
2878
2879 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2882 NUM_BANKS(ADDR_SURF_16_BANK));
2883
2884 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2887 NUM_BANKS(ADDR_SURF_8_BANK));
2888
2889 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2892 NUM_BANKS(ADDR_SURF_4_BANK));
2893
2894 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2895 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2896
2897 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2898 if (reg_offset != 7)
2899 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2900
2901 break;
2902 case CHIP_POLARIS10:
2903 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2904 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2905 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2906 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2907 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2908 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2909 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2910 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2912 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2913 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2914 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2915 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2916 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2917 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2918 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2919 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2920 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2921 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2922 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2923 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2924 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2925 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2926 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2927 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2928 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2929 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2930 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2931 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2932 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2933 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2935 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2936 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2937 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2938 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2939 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2940 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2941 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2942 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2943 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2945 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2947 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2949 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2950 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2951 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2952 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2953 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2954 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2955 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2956 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2957 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2959 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2962 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2963 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2964 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2965 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2966 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2967 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2968 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2969 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2970 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2971 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2972 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2973 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2975 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2976 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2977 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2978 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2979 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2980 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2981 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2982 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2983 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2984 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2985 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2986 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2987 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2988 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2989 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2990 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2991 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2992 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2993 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2994 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2996 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2997 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2998 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3000 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3001 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3002 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3004 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3005 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3006 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3008 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3009 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3012 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3013 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3014 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3016 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3018 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3019 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3020 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3021 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3022 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3023 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3024 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3025
3026 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3033 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3043 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3044 NUM_BANKS(ADDR_SURF_16_BANK));
3045
3046 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3048 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3049 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054 NUM_BANKS(ADDR_SURF_16_BANK));
3055
3056 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3059 NUM_BANKS(ADDR_SURF_16_BANK));
3060
3061 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3063 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 NUM_BANKS(ADDR_SURF_16_BANK));
3065
3066 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3069 NUM_BANKS(ADDR_SURF_16_BANK));
3070
3071 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3073 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3074 NUM_BANKS(ADDR_SURF_16_BANK));
3075
3076 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3079 NUM_BANKS(ADDR_SURF_16_BANK));
3080
3081 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3084 NUM_BANKS(ADDR_SURF_8_BANK));
3085
3086 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3089 NUM_BANKS(ADDR_SURF_4_BANK));
3090
3091 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3092 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3093 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3094 NUM_BANKS(ADDR_SURF_4_BANK));
3095
3096 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3097 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3098
3099 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3100 if (reg_offset != 7)
3101 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3102
3103 break;
3104 case CHIP_STONEY:
3105 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3106 PIPE_CONFIG(ADDR_SURF_P2) |
3107 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3108 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3109 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3110 PIPE_CONFIG(ADDR_SURF_P2) |
3111 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3113 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3114 PIPE_CONFIG(ADDR_SURF_P2) |
3115 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3116 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3117 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3118 PIPE_CONFIG(ADDR_SURF_P2) |
3119 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3120 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3121 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3122 PIPE_CONFIG(ADDR_SURF_P2) |
3123 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3124 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3125 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3126 PIPE_CONFIG(ADDR_SURF_P2) |
3127 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3129 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3130 PIPE_CONFIG(ADDR_SURF_P2) |
3131 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3133 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3134 PIPE_CONFIG(ADDR_SURF_P2));
3135 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3136 PIPE_CONFIG(ADDR_SURF_P2) |
3137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3138 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3139 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3140 PIPE_CONFIG(ADDR_SURF_P2) |
3141 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3142 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3143 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3144 PIPE_CONFIG(ADDR_SURF_P2) |
3145 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3146 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3147 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3148 PIPE_CONFIG(ADDR_SURF_P2) |
3149 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3150 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3151 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3152 PIPE_CONFIG(ADDR_SURF_P2) |
3153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3155 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3156 PIPE_CONFIG(ADDR_SURF_P2) |
3157 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3158 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3159 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3160 PIPE_CONFIG(ADDR_SURF_P2) |
3161 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3162 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3163 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3164 PIPE_CONFIG(ADDR_SURF_P2) |
3165 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3167 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3168 PIPE_CONFIG(ADDR_SURF_P2) |
3169 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3170 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3171 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3172 PIPE_CONFIG(ADDR_SURF_P2) |
3173 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3174 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3175 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3176 PIPE_CONFIG(ADDR_SURF_P2) |
3177 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3178 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3179 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3180 PIPE_CONFIG(ADDR_SURF_P2) |
3181 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3183 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3184 PIPE_CONFIG(ADDR_SURF_P2) |
3185 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3186 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3187 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3188 PIPE_CONFIG(ADDR_SURF_P2) |
3189 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3190 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3191 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3192 PIPE_CONFIG(ADDR_SURF_P2) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3194 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3195 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3196 PIPE_CONFIG(ADDR_SURF_P2) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3199 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 PIPE_CONFIG(ADDR_SURF_P2) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3202 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3203 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3204 PIPE_CONFIG(ADDR_SURF_P2) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3206 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3207
3208 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3209 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3210 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3211 NUM_BANKS(ADDR_SURF_8_BANK));
3212 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3215 NUM_BANKS(ADDR_SURF_8_BANK));
3216 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3217 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3218 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3219 NUM_BANKS(ADDR_SURF_8_BANK));
3220 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3221 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3222 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3223 NUM_BANKS(ADDR_SURF_8_BANK));
3224 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3227 NUM_BANKS(ADDR_SURF_8_BANK));
3228 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3229 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3230 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3231 NUM_BANKS(ADDR_SURF_8_BANK));
3232 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3233 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3234 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3235 NUM_BANKS(ADDR_SURF_8_BANK));
3236 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3239 NUM_BANKS(ADDR_SURF_16_BANK));
3240 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3241 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3242 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3243 NUM_BANKS(ADDR_SURF_16_BANK));
3244 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3245 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3246 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3247 NUM_BANKS(ADDR_SURF_16_BANK));
3248 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3251 NUM_BANKS(ADDR_SURF_16_BANK));
3252 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3253 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3254 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3255 NUM_BANKS(ADDR_SURF_16_BANK));
3256 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3257 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3258 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3259 NUM_BANKS(ADDR_SURF_16_BANK));
3260 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3263 NUM_BANKS(ADDR_SURF_8_BANK));
3264
3265 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3266 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3267 reg_offset != 23)
3268 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3269
3270 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3271 if (reg_offset != 7)
3272 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3273
3274 break;
3275 default:
3276 dev_warn(adev->dev,
3277 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3278 adev->asic_type);
3279
3280 case CHIP_CARRIZO:
3281 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3282 PIPE_CONFIG(ADDR_SURF_P2) |
3283 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3285 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3286 PIPE_CONFIG(ADDR_SURF_P2) |
3287 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3288 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3289 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3290 PIPE_CONFIG(ADDR_SURF_P2) |
3291 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3292 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3293 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3294 PIPE_CONFIG(ADDR_SURF_P2) |
3295 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3297 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3298 PIPE_CONFIG(ADDR_SURF_P2) |
3299 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3300 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3301 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3302 PIPE_CONFIG(ADDR_SURF_P2) |
3303 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3304 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3305 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3306 PIPE_CONFIG(ADDR_SURF_P2) |
3307 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3309 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3310 PIPE_CONFIG(ADDR_SURF_P2));
3311 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3312 PIPE_CONFIG(ADDR_SURF_P2) |
3313 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3314 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3315 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3316 PIPE_CONFIG(ADDR_SURF_P2) |
3317 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3318 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3319 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3320 PIPE_CONFIG(ADDR_SURF_P2) |
3321 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3322 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3323 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3324 PIPE_CONFIG(ADDR_SURF_P2) |
3325 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3326 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3327 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3328 PIPE_CONFIG(ADDR_SURF_P2) |
3329 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3330 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3331 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3332 PIPE_CONFIG(ADDR_SURF_P2) |
3333 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3335 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3336 PIPE_CONFIG(ADDR_SURF_P2) |
3337 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3338 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3339 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3340 PIPE_CONFIG(ADDR_SURF_P2) |
3341 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3342 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3343 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3344 PIPE_CONFIG(ADDR_SURF_P2) |
3345 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3347 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3348 PIPE_CONFIG(ADDR_SURF_P2) |
3349 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3350 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3351 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3352 PIPE_CONFIG(ADDR_SURF_P2) |
3353 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3354 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3355 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3356 PIPE_CONFIG(ADDR_SURF_P2) |
3357 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3358 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3359 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3360 PIPE_CONFIG(ADDR_SURF_P2) |
3361 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3363 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3364 PIPE_CONFIG(ADDR_SURF_P2) |
3365 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3366 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3367 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3368 PIPE_CONFIG(ADDR_SURF_P2) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3370 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3371 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3372 PIPE_CONFIG(ADDR_SURF_P2) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3374 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3375 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3376 PIPE_CONFIG(ADDR_SURF_P2) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3379 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3380 PIPE_CONFIG(ADDR_SURF_P2) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3382 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3383
3384 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3385 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3386 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3387 NUM_BANKS(ADDR_SURF_8_BANK));
3388 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3389 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3390 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3391 NUM_BANKS(ADDR_SURF_8_BANK));
3392 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3393 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3394 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3395 NUM_BANKS(ADDR_SURF_8_BANK));
3396 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3397 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3398 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3399 NUM_BANKS(ADDR_SURF_8_BANK));
3400 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3401 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3402 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3403 NUM_BANKS(ADDR_SURF_8_BANK));
3404 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3405 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3406 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3407 NUM_BANKS(ADDR_SURF_8_BANK));
3408 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3409 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3410 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3411 NUM_BANKS(ADDR_SURF_8_BANK));
3412 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3413 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3414 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3415 NUM_BANKS(ADDR_SURF_16_BANK));
3416 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3417 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3418 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3419 NUM_BANKS(ADDR_SURF_16_BANK));
3420 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3421 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3422 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3423 NUM_BANKS(ADDR_SURF_16_BANK));
3424 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3425 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3426 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3427 NUM_BANKS(ADDR_SURF_16_BANK));
3428 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3429 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3430 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3431 NUM_BANKS(ADDR_SURF_16_BANK));
3432 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3433 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3434 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3435 NUM_BANKS(ADDR_SURF_16_BANK));
3436 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3437 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3438 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3439 NUM_BANKS(ADDR_SURF_8_BANK));
3440
3441 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3442 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3443 reg_offset != 23)
3444 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3445
3446 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3447 if (reg_offset != 7)
3448 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3449
3450 break;
3451 }
3452 }
3453
3454 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3455 u32 se_num, u32 sh_num, u32 instance)
3456 {
3457 u32 data;
3458
3459 if (instance == 0xffffffff)
3460 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3461 else
3462 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3463
3464 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3465 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3466 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3467 } else if (se_num == 0xffffffff) {
3468 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3470 } else if (sh_num == 0xffffffff) {
3471 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3472 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3473 } else {
3474 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3475 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3476 }
3477 WREG32(mmGRBM_GFX_INDEX, data);
3478 }
3479
3480 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3481 {
3482 return (u32)((1ULL << bit_width) - 1);
3483 }
3484
3485 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3486 {
3487 u32 data, mask;
3488
3489 data = RREG32(mmCC_RB_BACKEND_DISABLE);
3490 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3491
3492 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
3493 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3494
3495 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3496 adev->gfx.config.max_sh_per_se);
3497
3498 return (~data) & mask;
3499 }
3500
3501 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3502 {
3503 int i, j;
3504 u32 data;
3505 u32 active_rbs = 0;
3506 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3507 adev->gfx.config.max_sh_per_se;
3508
3509 mutex_lock(&adev->grbm_idx_mutex);
3510 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3511 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3512 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3513 data = gfx_v8_0_get_rb_active_bitmap(adev);
3514 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3515 rb_bitmap_width_per_sh);
3516 }
3517 }
3518 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3519 mutex_unlock(&adev->grbm_idx_mutex);
3520
3521 adev->gfx.config.backend_enable_mask = active_rbs;
3522 adev->gfx.config.num_rbs = hweight32(active_rbs);
3523 }
3524
3525 /**
3526 * gfx_v8_0_init_compute_vmid - gart enable
3527 *
3528 * @rdev: amdgpu_device pointer
3529 *
3530 * Initialize compute vmid sh_mem registers
3531 *
3532 */
3533 #define DEFAULT_SH_MEM_BASES (0x6000)
3534 #define FIRST_COMPUTE_VMID (8)
3535 #define LAST_COMPUTE_VMID (16)
3536 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3537 {
3538 int i;
3539 uint32_t sh_mem_config;
3540 uint32_t sh_mem_bases;
3541
3542 /*
3543 * Configure apertures:
3544 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3545 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3546 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3547 */
3548 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3549
3550 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3551 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3552 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3553 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3554 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3555 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3556
3557 mutex_lock(&adev->srbm_mutex);
3558 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3559 vi_srbm_select(adev, 0, 0, 0, i);
3560 /* CP and shaders */
3561 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3562 WREG32(mmSH_MEM_APE1_BASE, 1);
3563 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3564 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3565 }
3566 vi_srbm_select(adev, 0, 0, 0, 0);
3567 mutex_unlock(&adev->srbm_mutex);
3568 }
3569
3570 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3571 {
3572 u32 tmp;
3573 int i;
3574
3575 tmp = RREG32(mmGRBM_CNTL);
3576 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3577 WREG32(mmGRBM_CNTL, tmp);
3578
3579 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3580 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3581 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3582
3583 gfx_v8_0_tiling_mode_table_init(adev);
3584
3585 gfx_v8_0_setup_rb(adev);
3586 gfx_v8_0_get_cu_info(adev);
3587
3588 /* XXX SH_MEM regs */
3589 /* where to put LDS, scratch, GPUVM in FSA64 space */
3590 mutex_lock(&adev->srbm_mutex);
3591 for (i = 0; i < 16; i++) {
3592 vi_srbm_select(adev, 0, 0, 0, i);
3593 /* CP and shaders */
3594 if (i == 0) {
3595 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3596 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3597 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3598 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3599 WREG32(mmSH_MEM_CONFIG, tmp);
3600 } else {
3601 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3602 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3603 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3604 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3605 WREG32(mmSH_MEM_CONFIG, tmp);
3606 }
3607
3608 WREG32(mmSH_MEM_APE1_BASE, 1);
3609 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3610 WREG32(mmSH_MEM_BASES, 0);
3611 }
3612 vi_srbm_select(adev, 0, 0, 0, 0);
3613 mutex_unlock(&adev->srbm_mutex);
3614
3615 gfx_v8_0_init_compute_vmid(adev);
3616
3617 mutex_lock(&adev->grbm_idx_mutex);
3618 /*
3619 * making sure that the following register writes will be broadcasted
3620 * to all the shaders
3621 */
3622 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3623
3624 WREG32(mmPA_SC_FIFO_SIZE,
3625 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3626 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3627 (adev->gfx.config.sc_prim_fifo_size_backend <<
3628 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3629 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3630 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3631 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3632 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3633 mutex_unlock(&adev->grbm_idx_mutex);
3634
3635 }
3636
3637 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3638 {
3639 u32 i, j, k;
3640 u32 mask;
3641
3642 mutex_lock(&adev->grbm_idx_mutex);
3643 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3644 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3645 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3646 for (k = 0; k < adev->usec_timeout; k++) {
3647 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3648 break;
3649 udelay(1);
3650 }
3651 }
3652 }
3653 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3654 mutex_unlock(&adev->grbm_idx_mutex);
3655
3656 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3657 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3658 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3659 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3660 for (k = 0; k < adev->usec_timeout; k++) {
3661 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3662 break;
3663 udelay(1);
3664 }
3665 }
3666
3667 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3668 bool enable)
3669 {
3670 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3671
3672 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3673 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3674 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3675 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3676
3677 WREG32(mmCP_INT_CNTL_RING0, tmp);
3678 }
3679
3680 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3681 {
3682 /* csib */
3683 WREG32(mmRLC_CSIB_ADDR_HI,
3684 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3685 WREG32(mmRLC_CSIB_ADDR_LO,
3686 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3687 WREG32(mmRLC_CSIB_LENGTH,
3688 adev->gfx.rlc.clear_state_size);
3689 }
3690
3691 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3692 int ind_offset,
3693 int list_size,
3694 int *unique_indices,
3695 int *indices_count,
3696 int max_indices,
3697 int *ind_start_offsets,
3698 int *offset_count,
3699 int max_offset)
3700 {
3701 int indices;
3702 bool new_entry = true;
3703
3704 for (; ind_offset < list_size; ind_offset++) {
3705
3706 if (new_entry) {
3707 new_entry = false;
3708 ind_start_offsets[*offset_count] = ind_offset;
3709 *offset_count = *offset_count + 1;
3710 BUG_ON(*offset_count >= max_offset);
3711 }
3712
3713 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3714 new_entry = true;
3715 continue;
3716 }
3717
3718 ind_offset += 2;
3719
3720 /* look for the matching indice */
3721 for (indices = 0;
3722 indices < *indices_count;
3723 indices++) {
3724 if (unique_indices[indices] ==
3725 register_list_format[ind_offset])
3726 break;
3727 }
3728
3729 if (indices >= *indices_count) {
3730 unique_indices[*indices_count] =
3731 register_list_format[ind_offset];
3732 indices = *indices_count;
3733 *indices_count = *indices_count + 1;
3734 BUG_ON(*indices_count >= max_indices);
3735 }
3736
3737 register_list_format[ind_offset] = indices;
3738 }
3739 }
3740
3741 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3742 {
3743 int i, temp, data;
3744 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3745 int indices_count = 0;
3746 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3747 int offset_count = 0;
3748
3749 int list_size;
3750 unsigned int *register_list_format =
3751 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3752 if (register_list_format == NULL)
3753 return -ENOMEM;
3754 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3755 adev->gfx.rlc.reg_list_format_size_bytes);
3756
3757 gfx_v8_0_parse_ind_reg_list(register_list_format,
3758 RLC_FormatDirectRegListLength,
3759 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3760 unique_indices,
3761 &indices_count,
3762 sizeof(unique_indices) / sizeof(int),
3763 indirect_start_offsets,
3764 &offset_count,
3765 sizeof(indirect_start_offsets)/sizeof(int));
3766
3767 /* save and restore list */
3768 temp = RREG32(mmRLC_SRM_CNTL);
3769 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3770 WREG32(mmRLC_SRM_CNTL, temp);
3771
3772 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3773 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3774 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3775
3776 /* indirect list */
3777 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3778 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3779 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3780
3781 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3782 list_size = list_size >> 1;
3783 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3784 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3785
3786 /* starting offsets starts */
3787 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3788 adev->gfx.rlc.starting_offsets_start);
3789 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3790 WREG32(mmRLC_GPM_SCRATCH_DATA,
3791 indirect_start_offsets[i]);
3792
3793 /* unique indices */
3794 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3795 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3796 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3797 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3798 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3799 }
3800 kfree(register_list_format);
3801
3802 return 0;
3803 }
3804
3805 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3806 {
3807 uint32_t data;
3808
3809 data = RREG32(mmRLC_SRM_CNTL);
3810 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3811 WREG32(mmRLC_SRM_CNTL, data);
3812 }
3813
3814 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
3815 {
3816 uint32_t data;
3817
3818 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3819 AMD_PG_SUPPORT_GFX_SMG |
3820 AMD_PG_SUPPORT_GFX_DMG)) {
3821 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3822 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3823 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3824 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3825
3826 data = 0;
3827 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3828 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3829 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3830 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3831 WREG32(mmRLC_PG_DELAY, data);
3832
3833 data = RREG32(mmRLC_PG_DELAY_2);
3834 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3835 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3836 WREG32(mmRLC_PG_DELAY_2, data);
3837
3838 data = RREG32(mmRLC_AUTO_PG_CTRL);
3839 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3840 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3841 WREG32(mmRLC_AUTO_PG_CTRL, data);
3842 }
3843 }
3844
3845 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3846 bool enable)
3847 {
3848 u32 data, orig;
3849
3850 orig = data = RREG32(mmRLC_PG_CNTL);
3851
3852 if (enable)
3853 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3854 else
3855 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3856
3857 if (orig != data)
3858 WREG32(mmRLC_PG_CNTL, data);
3859 }
3860
3861 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3862 bool enable)
3863 {
3864 u32 data, orig;
3865
3866 orig = data = RREG32(mmRLC_PG_CNTL);
3867
3868 if (enable)
3869 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3870 else
3871 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3872
3873 if (orig != data)
3874 WREG32(mmRLC_PG_CNTL, data);
3875 }
3876
3877 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3878 {
3879 u32 data, orig;
3880
3881 orig = data = RREG32(mmRLC_PG_CNTL);
3882
3883 if (enable)
3884 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3885 else
3886 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3887
3888 if (orig != data)
3889 WREG32(mmRLC_PG_CNTL, data);
3890 }
3891
3892 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3893 {
3894 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3895 AMD_PG_SUPPORT_GFX_SMG |
3896 AMD_PG_SUPPORT_GFX_DMG |
3897 AMD_PG_SUPPORT_CP |
3898 AMD_PG_SUPPORT_GDS |
3899 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3900 gfx_v8_0_init_csb(adev);
3901 gfx_v8_0_init_save_restore_list(adev);
3902 gfx_v8_0_enable_save_restore_machine(adev);
3903
3904 if ((adev->asic_type == CHIP_CARRIZO) ||
3905 (adev->asic_type == CHIP_STONEY)) {
3906 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3907 gfx_v8_0_init_power_gating(adev);
3908 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
3909 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3910 cz_enable_sck_slow_down_on_power_up(adev, true);
3911 cz_enable_sck_slow_down_on_power_down(adev, true);
3912 } else {
3913 cz_enable_sck_slow_down_on_power_up(adev, false);
3914 cz_enable_sck_slow_down_on_power_down(adev, false);
3915 }
3916 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3917 cz_enable_cp_power_gating(adev, true);
3918 else
3919 cz_enable_cp_power_gating(adev, false);
3920 } else if (adev->asic_type == CHIP_POLARIS11) {
3921 gfx_v8_0_init_power_gating(adev);
3922 }
3923 }
3924 }
3925
3926 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3927 {
3928 u32 tmp = RREG32(mmRLC_CNTL);
3929
3930 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3931 WREG32(mmRLC_CNTL, tmp);
3932
3933 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3934
3935 gfx_v8_0_wait_for_rlc_serdes(adev);
3936 }
3937
3938 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3939 {
3940 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3941
3942 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3943 WREG32(mmGRBM_SOFT_RESET, tmp);
3944 udelay(50);
3945 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3946 WREG32(mmGRBM_SOFT_RESET, tmp);
3947 udelay(50);
3948 }
3949
3950 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3951 {
3952 u32 tmp = RREG32(mmRLC_CNTL);
3953
3954 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3955 WREG32(mmRLC_CNTL, tmp);
3956
3957 /* carrizo do enable cp interrupt after cp inited */
3958 if (!(adev->flags & AMD_IS_APU))
3959 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3960
3961 udelay(50);
3962 }
3963
3964 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3965 {
3966 const struct rlc_firmware_header_v2_0 *hdr;
3967 const __le32 *fw_data;
3968 unsigned i, fw_size;
3969
3970 if (!adev->gfx.rlc_fw)
3971 return -EINVAL;
3972
3973 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3974 amdgpu_ucode_print_rlc_hdr(&hdr->header);
3975
3976 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3977 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3978 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3979
3980 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3981 for (i = 0; i < fw_size; i++)
3982 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3983 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3984
3985 return 0;
3986 }
3987
3988 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3989 {
3990 int r;
3991
3992 gfx_v8_0_rlc_stop(adev);
3993
3994 /* disable CG */
3995 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
3996 if (adev->asic_type == CHIP_POLARIS11 ||
3997 adev->asic_type == CHIP_POLARIS10)
3998 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
3999
4000 /* disable PG */
4001 WREG32(mmRLC_PG_CNTL, 0);
4002
4003 gfx_v8_0_rlc_reset(adev);
4004
4005 gfx_v8_0_init_pg(adev);
4006
4007 if (!adev->pp_enabled) {
4008 if (!adev->firmware.smu_load) {
4009 /* legacy rlc firmware loading */
4010 r = gfx_v8_0_rlc_load_microcode(adev);
4011 if (r)
4012 return r;
4013 } else {
4014 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4015 AMDGPU_UCODE_ID_RLC_G);
4016 if (r)
4017 return -EINVAL;
4018 }
4019 }
4020
4021 gfx_v8_0_rlc_start(adev);
4022
4023 return 0;
4024 }
4025
4026 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4027 {
4028 int i;
4029 u32 tmp = RREG32(mmCP_ME_CNTL);
4030
4031 if (enable) {
4032 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4033 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4034 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4035 } else {
4036 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4037 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4038 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4039 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4040 adev->gfx.gfx_ring[i].ready = false;
4041 }
4042 WREG32(mmCP_ME_CNTL, tmp);
4043 udelay(50);
4044 }
4045
4046 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4047 {
4048 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4049 const struct gfx_firmware_header_v1_0 *ce_hdr;
4050 const struct gfx_firmware_header_v1_0 *me_hdr;
4051 const __le32 *fw_data;
4052 unsigned i, fw_size;
4053
4054 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4055 return -EINVAL;
4056
4057 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4058 adev->gfx.pfp_fw->data;
4059 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4060 adev->gfx.ce_fw->data;
4061 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4062 adev->gfx.me_fw->data;
4063
4064 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4065 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4066 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4067
4068 gfx_v8_0_cp_gfx_enable(adev, false);
4069
4070 /* PFP */
4071 fw_data = (const __le32 *)
4072 (adev->gfx.pfp_fw->data +
4073 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4074 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4075 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4076 for (i = 0; i < fw_size; i++)
4077 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4078 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4079
4080 /* CE */
4081 fw_data = (const __le32 *)
4082 (adev->gfx.ce_fw->data +
4083 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4084 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4085 WREG32(mmCP_CE_UCODE_ADDR, 0);
4086 for (i = 0; i < fw_size; i++)
4087 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4088 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4089
4090 /* ME */
4091 fw_data = (const __le32 *)
4092 (adev->gfx.me_fw->data +
4093 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4094 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4095 WREG32(mmCP_ME_RAM_WADDR, 0);
4096 for (i = 0; i < fw_size; i++)
4097 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4098 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4099
4100 return 0;
4101 }
4102
4103 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4104 {
4105 u32 count = 0;
4106 const struct cs_section_def *sect = NULL;
4107 const struct cs_extent_def *ext = NULL;
4108
4109 /* begin clear state */
4110 count += 2;
4111 /* context control state */
4112 count += 3;
4113
4114 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4115 for (ext = sect->section; ext->extent != NULL; ++ext) {
4116 if (sect->id == SECT_CONTEXT)
4117 count += 2 + ext->reg_count;
4118 else
4119 return 0;
4120 }
4121 }
4122 /* pa_sc_raster_config/pa_sc_raster_config1 */
4123 count += 4;
4124 /* end clear state */
4125 count += 2;
4126 /* clear state */
4127 count += 2;
4128
4129 return count;
4130 }
4131
4132 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4133 {
4134 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4135 const struct cs_section_def *sect = NULL;
4136 const struct cs_extent_def *ext = NULL;
4137 int r, i;
4138
4139 /* init the CP */
4140 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4141 WREG32(mmCP_ENDIAN_SWAP, 0);
4142 WREG32(mmCP_DEVICE_ID, 1);
4143
4144 gfx_v8_0_cp_gfx_enable(adev, true);
4145
4146 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4147 if (r) {
4148 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4149 return r;
4150 }
4151
4152 /* clear state buffer */
4153 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4154 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4155
4156 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4157 amdgpu_ring_write(ring, 0x80000000);
4158 amdgpu_ring_write(ring, 0x80000000);
4159
4160 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4161 for (ext = sect->section; ext->extent != NULL; ++ext) {
4162 if (sect->id == SECT_CONTEXT) {
4163 amdgpu_ring_write(ring,
4164 PACKET3(PACKET3_SET_CONTEXT_REG,
4165 ext->reg_count));
4166 amdgpu_ring_write(ring,
4167 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4168 for (i = 0; i < ext->reg_count; i++)
4169 amdgpu_ring_write(ring, ext->extent[i]);
4170 }
4171 }
4172 }
4173
4174 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4175 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4176 switch (adev->asic_type) {
4177 case CHIP_TONGA:
4178 case CHIP_POLARIS10:
4179 amdgpu_ring_write(ring, 0x16000012);
4180 amdgpu_ring_write(ring, 0x0000002A);
4181 break;
4182 case CHIP_POLARIS11:
4183 amdgpu_ring_write(ring, 0x16000012);
4184 amdgpu_ring_write(ring, 0x00000000);
4185 break;
4186 case CHIP_FIJI:
4187 amdgpu_ring_write(ring, 0x3a00161a);
4188 amdgpu_ring_write(ring, 0x0000002e);
4189 break;
4190 case CHIP_CARRIZO:
4191 amdgpu_ring_write(ring, 0x00000002);
4192 amdgpu_ring_write(ring, 0x00000000);
4193 break;
4194 case CHIP_TOPAZ:
4195 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4196 0x00000000 : 0x00000002);
4197 amdgpu_ring_write(ring, 0x00000000);
4198 break;
4199 case CHIP_STONEY:
4200 amdgpu_ring_write(ring, 0x00000000);
4201 amdgpu_ring_write(ring, 0x00000000);
4202 break;
4203 default:
4204 BUG();
4205 }
4206
4207 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4208 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4209
4210 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4211 amdgpu_ring_write(ring, 0);
4212
4213 /* init the CE partitions */
4214 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4215 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4216 amdgpu_ring_write(ring, 0x8000);
4217 amdgpu_ring_write(ring, 0x8000);
4218
4219 amdgpu_ring_commit(ring);
4220
4221 return 0;
4222 }
4223
4224 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4225 {
4226 struct amdgpu_ring *ring;
4227 u32 tmp;
4228 u32 rb_bufsz;
4229 u64 rb_addr, rptr_addr;
4230 int r;
4231
4232 /* Set the write pointer delay */
4233 WREG32(mmCP_RB_WPTR_DELAY, 0);
4234
4235 /* set the RB to use vmid 0 */
4236 WREG32(mmCP_RB_VMID, 0);
4237
4238 /* Set ring buffer size */
4239 ring = &adev->gfx.gfx_ring[0];
4240 rb_bufsz = order_base_2(ring->ring_size / 8);
4241 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4242 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4243 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4244 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4245 #ifdef __BIG_ENDIAN
4246 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4247 #endif
4248 WREG32(mmCP_RB0_CNTL, tmp);
4249
4250 /* Initialize the ring buffer's read and write pointers */
4251 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4252 ring->wptr = 0;
4253 WREG32(mmCP_RB0_WPTR, ring->wptr);
4254
4255 /* set the wb address wether it's enabled or not */
4256 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4257 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4258 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4259
4260 mdelay(1);
4261 WREG32(mmCP_RB0_CNTL, tmp);
4262
4263 rb_addr = ring->gpu_addr >> 8;
4264 WREG32(mmCP_RB0_BASE, rb_addr);
4265 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4266
4267 /* no gfx doorbells on iceland */
4268 if (adev->asic_type != CHIP_TOPAZ) {
4269 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4270 if (ring->use_doorbell) {
4271 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4272 DOORBELL_OFFSET, ring->doorbell_index);
4273 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4274 DOORBELL_HIT, 0);
4275 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4276 DOORBELL_EN, 1);
4277 } else {
4278 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4279 DOORBELL_EN, 0);
4280 }
4281 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4282
4283 if (adev->asic_type == CHIP_TONGA) {
4284 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4285 DOORBELL_RANGE_LOWER,
4286 AMDGPU_DOORBELL_GFX_RING0);
4287 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4288
4289 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4290 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4291 }
4292
4293 }
4294
4295 /* start the ring */
4296 gfx_v8_0_cp_gfx_start(adev);
4297 ring->ready = true;
4298 r = amdgpu_ring_test_ring(ring);
4299 if (r) {
4300 ring->ready = false;
4301 return r;
4302 }
4303
4304 return 0;
4305 }
4306
4307 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4308 {
4309 int i;
4310
4311 if (enable) {
4312 WREG32(mmCP_MEC_CNTL, 0);
4313 } else {
4314 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4315 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4316 adev->gfx.compute_ring[i].ready = false;
4317 }
4318 udelay(50);
4319 }
4320
4321 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4322 {
4323 const struct gfx_firmware_header_v1_0 *mec_hdr;
4324 const __le32 *fw_data;
4325 unsigned i, fw_size;
4326
4327 if (!adev->gfx.mec_fw)
4328 return -EINVAL;
4329
4330 gfx_v8_0_cp_compute_enable(adev, false);
4331
4332 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4333 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4334
4335 fw_data = (const __le32 *)
4336 (adev->gfx.mec_fw->data +
4337 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4338 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4339
4340 /* MEC1 */
4341 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4342 for (i = 0; i < fw_size; i++)
4343 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4344 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4345
4346 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4347 if (adev->gfx.mec2_fw) {
4348 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4349
4350 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4351 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4352
4353 fw_data = (const __le32 *)
4354 (adev->gfx.mec2_fw->data +
4355 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4356 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4357
4358 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4359 for (i = 0; i < fw_size; i++)
4360 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4361 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4362 }
4363
4364 return 0;
4365 }
4366
4367 struct vi_mqd {
4368 uint32_t header; /* ordinal0 */
4369 uint32_t compute_dispatch_initiator; /* ordinal1 */
4370 uint32_t compute_dim_x; /* ordinal2 */
4371 uint32_t compute_dim_y; /* ordinal3 */
4372 uint32_t compute_dim_z; /* ordinal4 */
4373 uint32_t compute_start_x; /* ordinal5 */
4374 uint32_t compute_start_y; /* ordinal6 */
4375 uint32_t compute_start_z; /* ordinal7 */
4376 uint32_t compute_num_thread_x; /* ordinal8 */
4377 uint32_t compute_num_thread_y; /* ordinal9 */
4378 uint32_t compute_num_thread_z; /* ordinal10 */
4379 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4380 uint32_t compute_perfcount_enable; /* ordinal12 */
4381 uint32_t compute_pgm_lo; /* ordinal13 */
4382 uint32_t compute_pgm_hi; /* ordinal14 */
4383 uint32_t compute_tba_lo; /* ordinal15 */
4384 uint32_t compute_tba_hi; /* ordinal16 */
4385 uint32_t compute_tma_lo; /* ordinal17 */
4386 uint32_t compute_tma_hi; /* ordinal18 */
4387 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4388 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4389 uint32_t compute_vmid; /* ordinal21 */
4390 uint32_t compute_resource_limits; /* ordinal22 */
4391 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4392 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4393 uint32_t compute_tmpring_size; /* ordinal25 */
4394 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4395 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4396 uint32_t compute_restart_x; /* ordinal28 */
4397 uint32_t compute_restart_y; /* ordinal29 */
4398 uint32_t compute_restart_z; /* ordinal30 */
4399 uint32_t compute_thread_trace_enable; /* ordinal31 */
4400 uint32_t compute_misc_reserved; /* ordinal32 */
4401 uint32_t compute_dispatch_id; /* ordinal33 */
4402 uint32_t compute_threadgroup_id; /* ordinal34 */
4403 uint32_t compute_relaunch; /* ordinal35 */
4404 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4405 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4406 uint32_t compute_wave_restore_control; /* ordinal38 */
4407 uint32_t reserved9; /* ordinal39 */
4408 uint32_t reserved10; /* ordinal40 */
4409 uint32_t reserved11; /* ordinal41 */
4410 uint32_t reserved12; /* ordinal42 */
4411 uint32_t reserved13; /* ordinal43 */
4412 uint32_t reserved14; /* ordinal44 */
4413 uint32_t reserved15; /* ordinal45 */
4414 uint32_t reserved16; /* ordinal46 */
4415 uint32_t reserved17; /* ordinal47 */
4416 uint32_t reserved18; /* ordinal48 */
4417 uint32_t reserved19; /* ordinal49 */
4418 uint32_t reserved20; /* ordinal50 */
4419 uint32_t reserved21; /* ordinal51 */
4420 uint32_t reserved22; /* ordinal52 */
4421 uint32_t reserved23; /* ordinal53 */
4422 uint32_t reserved24; /* ordinal54 */
4423 uint32_t reserved25; /* ordinal55 */
4424 uint32_t reserved26; /* ordinal56 */
4425 uint32_t reserved27; /* ordinal57 */
4426 uint32_t reserved28; /* ordinal58 */
4427 uint32_t reserved29; /* ordinal59 */
4428 uint32_t reserved30; /* ordinal60 */
4429 uint32_t reserved31; /* ordinal61 */
4430 uint32_t reserved32; /* ordinal62 */
4431 uint32_t reserved33; /* ordinal63 */
4432 uint32_t reserved34; /* ordinal64 */
4433 uint32_t compute_user_data_0; /* ordinal65 */
4434 uint32_t compute_user_data_1; /* ordinal66 */
4435 uint32_t compute_user_data_2; /* ordinal67 */
4436 uint32_t compute_user_data_3; /* ordinal68 */
4437 uint32_t compute_user_data_4; /* ordinal69 */
4438 uint32_t compute_user_data_5; /* ordinal70 */
4439 uint32_t compute_user_data_6; /* ordinal71 */
4440 uint32_t compute_user_data_7; /* ordinal72 */
4441 uint32_t compute_user_data_8; /* ordinal73 */
4442 uint32_t compute_user_data_9; /* ordinal74 */
4443 uint32_t compute_user_data_10; /* ordinal75 */
4444 uint32_t compute_user_data_11; /* ordinal76 */
4445 uint32_t compute_user_data_12; /* ordinal77 */
4446 uint32_t compute_user_data_13; /* ordinal78 */
4447 uint32_t compute_user_data_14; /* ordinal79 */
4448 uint32_t compute_user_data_15; /* ordinal80 */
4449 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4450 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4451 uint32_t reserved35; /* ordinal83 */
4452 uint32_t reserved36; /* ordinal84 */
4453 uint32_t reserved37; /* ordinal85 */
4454 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4455 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4456 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4457 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4458 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4459 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4460 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4461 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4462 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4463 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4464 uint32_t reserved38; /* ordinal96 */
4465 uint32_t reserved39; /* ordinal97 */
4466 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4467 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4468 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4469 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4470 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4471 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4472 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4473 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4474 uint32_t reserved40; /* ordinal106 */
4475 uint32_t reserved41; /* ordinal107 */
4476 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4477 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4478 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4479 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4480 uint32_t reserved42; /* ordinal112 */
4481 uint32_t reserved43; /* ordinal113 */
4482 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4483 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4484 uint32_t cp_packet_id_lo; /* ordinal116 */
4485 uint32_t cp_packet_id_hi; /* ordinal117 */
4486 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4487 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4488 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4489 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4490 uint32_t gds_save_mask_lo; /* ordinal122 */
4491 uint32_t gds_save_mask_hi; /* ordinal123 */
4492 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4493 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4494 uint32_t reserved44; /* ordinal126 */
4495 uint32_t reserved45; /* ordinal127 */
4496 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4497 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4498 uint32_t cp_hqd_active; /* ordinal130 */
4499 uint32_t cp_hqd_vmid; /* ordinal131 */
4500 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4501 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4502 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4503 uint32_t cp_hqd_quantum; /* ordinal135 */
4504 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4505 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4506 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4507 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4508 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4509 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4510 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4511 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4512 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4513 uint32_t cp_hqd_pq_control; /* ordinal145 */
4514 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4515 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4516 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4517 uint32_t cp_hqd_ib_control; /* ordinal149 */
4518 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4519 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4520 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4521 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4522 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4523 uint32_t cp_hqd_msg_type; /* ordinal155 */
4524 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4525 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4526 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4527 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4528 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4529 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4530 uint32_t cp_mqd_control; /* ordinal162 */
4531 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4532 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4533 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4534 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4535 uint32_t cp_hqd_eop_control; /* ordinal167 */
4536 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4537 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4538 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4539 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4540 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4541 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4542 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4543 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4544 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4545 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4546 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4547 uint32_t cp_hqd_error; /* ordinal179 */
4548 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4549 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4550 uint32_t reserved46; /* ordinal182 */
4551 uint32_t reserved47; /* ordinal183 */
4552 uint32_t reserved48; /* ordinal184 */
4553 uint32_t reserved49; /* ordinal185 */
4554 uint32_t reserved50; /* ordinal186 */
4555 uint32_t reserved51; /* ordinal187 */
4556 uint32_t reserved52; /* ordinal188 */
4557 uint32_t reserved53; /* ordinal189 */
4558 uint32_t reserved54; /* ordinal190 */
4559 uint32_t reserved55; /* ordinal191 */
4560 uint32_t iqtimer_pkt_header; /* ordinal192 */
4561 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4562 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4563 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4564 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4565 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4566 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4567 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4568 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4569 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4570 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4571 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4572 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4573 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4574 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4575 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4576 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4577 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4578 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4579 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4580 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4581 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4582 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4583 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4584 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4585 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4586 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4587 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4588 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4589 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4590 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4591 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4592 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4593 uint32_t reserved56; /* ordinal225 */
4594 uint32_t reserved57; /* ordinal226 */
4595 uint32_t reserved58; /* ordinal227 */
4596 uint32_t set_resources_header; /* ordinal228 */
4597 uint32_t set_resources_dw1; /* ordinal229 */
4598 uint32_t set_resources_dw2; /* ordinal230 */
4599 uint32_t set_resources_dw3; /* ordinal231 */
4600 uint32_t set_resources_dw4; /* ordinal232 */
4601 uint32_t set_resources_dw5; /* ordinal233 */
4602 uint32_t set_resources_dw6; /* ordinal234 */
4603 uint32_t set_resources_dw7; /* ordinal235 */
4604 uint32_t reserved59; /* ordinal236 */
4605 uint32_t reserved60; /* ordinal237 */
4606 uint32_t reserved61; /* ordinal238 */
4607 uint32_t reserved62; /* ordinal239 */
4608 uint32_t reserved63; /* ordinal240 */
4609 uint32_t reserved64; /* ordinal241 */
4610 uint32_t reserved65; /* ordinal242 */
4611 uint32_t reserved66; /* ordinal243 */
4612 uint32_t reserved67; /* ordinal244 */
4613 uint32_t reserved68; /* ordinal245 */
4614 uint32_t reserved69; /* ordinal246 */
4615 uint32_t reserved70; /* ordinal247 */
4616 uint32_t reserved71; /* ordinal248 */
4617 uint32_t reserved72; /* ordinal249 */
4618 uint32_t reserved73; /* ordinal250 */
4619 uint32_t reserved74; /* ordinal251 */
4620 uint32_t reserved75; /* ordinal252 */
4621 uint32_t reserved76; /* ordinal253 */
4622 uint32_t reserved77; /* ordinal254 */
4623 uint32_t reserved78; /* ordinal255 */
4624
4625 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4626 };
4627
4628 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4629 {
4630 int i, r;
4631
4632 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4633 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4634
4635 if (ring->mqd_obj) {
4636 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4637 if (unlikely(r != 0))
4638 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4639
4640 amdgpu_bo_unpin(ring->mqd_obj);
4641 amdgpu_bo_unreserve(ring->mqd_obj);
4642
4643 amdgpu_bo_unref(&ring->mqd_obj);
4644 ring->mqd_obj = NULL;
4645 }
4646 }
4647 }
4648
4649 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4650 {
4651 int r, i, j;
4652 u32 tmp;
4653 bool use_doorbell = true;
4654 u64 hqd_gpu_addr;
4655 u64 mqd_gpu_addr;
4656 u64 eop_gpu_addr;
4657 u64 wb_gpu_addr;
4658 u32 *buf;
4659 struct vi_mqd *mqd;
4660
4661 /* init the pipes */
4662 mutex_lock(&adev->srbm_mutex);
4663 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4664 int me = (i < 4) ? 1 : 2;
4665 int pipe = (i < 4) ? i : (i - 4);
4666
4667 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4668 eop_gpu_addr >>= 8;
4669
4670 vi_srbm_select(adev, me, pipe, 0, 0);
4671
4672 /* write the EOP addr */
4673 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4674 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4675
4676 /* set the VMID assigned */
4677 WREG32(mmCP_HQD_VMID, 0);
4678
4679 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4680 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4681 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4682 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4683 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4684 }
4685 vi_srbm_select(adev, 0, 0, 0, 0);
4686 mutex_unlock(&adev->srbm_mutex);
4687
4688 /* init the queues. Just two for now. */
4689 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4690 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4691
4692 if (ring->mqd_obj == NULL) {
4693 r = amdgpu_bo_create(adev,
4694 sizeof(struct vi_mqd),
4695 PAGE_SIZE, true,
4696 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4697 NULL, &ring->mqd_obj);
4698 if (r) {
4699 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4700 return r;
4701 }
4702 }
4703
4704 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4705 if (unlikely(r != 0)) {
4706 gfx_v8_0_cp_compute_fini(adev);
4707 return r;
4708 }
4709 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4710 &mqd_gpu_addr);
4711 if (r) {
4712 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4713 gfx_v8_0_cp_compute_fini(adev);
4714 return r;
4715 }
4716 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4717 if (r) {
4718 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4719 gfx_v8_0_cp_compute_fini(adev);
4720 return r;
4721 }
4722
4723 /* init the mqd struct */
4724 memset(buf, 0, sizeof(struct vi_mqd));
4725
4726 mqd = (struct vi_mqd *)buf;
4727 mqd->header = 0xC0310800;
4728 mqd->compute_pipelinestat_enable = 0x00000001;
4729 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4730 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4731 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4732 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4733 mqd->compute_misc_reserved = 0x00000003;
4734
4735 mutex_lock(&adev->srbm_mutex);
4736 vi_srbm_select(adev, ring->me,
4737 ring->pipe,
4738 ring->queue, 0);
4739
4740 /* disable wptr polling */
4741 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4742 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4743 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4744
4745 mqd->cp_hqd_eop_base_addr_lo =
4746 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4747 mqd->cp_hqd_eop_base_addr_hi =
4748 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4749
4750 /* enable doorbell? */
4751 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4752 if (use_doorbell) {
4753 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4754 } else {
4755 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4756 }
4757 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4758 mqd->cp_hqd_pq_doorbell_control = tmp;
4759
4760 /* disable the queue if it's active */
4761 mqd->cp_hqd_dequeue_request = 0;
4762 mqd->cp_hqd_pq_rptr = 0;
4763 mqd->cp_hqd_pq_wptr= 0;
4764 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4765 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4766 for (j = 0; j < adev->usec_timeout; j++) {
4767 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4768 break;
4769 udelay(1);
4770 }
4771 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4772 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4773 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4774 }
4775
4776 /* set the pointer to the MQD */
4777 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4778 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4779 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4780 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4781
4782 /* set MQD vmid to 0 */
4783 tmp = RREG32(mmCP_MQD_CONTROL);
4784 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4785 WREG32(mmCP_MQD_CONTROL, tmp);
4786 mqd->cp_mqd_control = tmp;
4787
4788 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4789 hqd_gpu_addr = ring->gpu_addr >> 8;
4790 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4791 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4792 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4793 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4794
4795 /* set up the HQD, this is similar to CP_RB0_CNTL */
4796 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4798 (order_base_2(ring->ring_size / 4) - 1));
4799 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4800 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4801 #ifdef __BIG_ENDIAN
4802 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4803 #endif
4804 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4805 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4806 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4807 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4808 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4809 mqd->cp_hqd_pq_control = tmp;
4810
4811 /* set the wb address wether it's enabled or not */
4812 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4813 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4814 mqd->cp_hqd_pq_rptr_report_addr_hi =
4815 upper_32_bits(wb_gpu_addr) & 0xffff;
4816 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4817 mqd->cp_hqd_pq_rptr_report_addr_lo);
4818 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4819 mqd->cp_hqd_pq_rptr_report_addr_hi);
4820
4821 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4822 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4823 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4824 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4825 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4826 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4827 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4828
4829 /* enable the doorbell if requested */
4830 if (use_doorbell) {
4831 if ((adev->asic_type == CHIP_CARRIZO) ||
4832 (adev->asic_type == CHIP_FIJI) ||
4833 (adev->asic_type == CHIP_STONEY) ||
4834 (adev->asic_type == CHIP_POLARIS11) ||
4835 (adev->asic_type == CHIP_POLARIS10)) {
4836 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4837 AMDGPU_DOORBELL_KIQ << 2);
4838 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4839 AMDGPU_DOORBELL_MEC_RING7 << 2);
4840 }
4841 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4842 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4843 DOORBELL_OFFSET, ring->doorbell_index);
4844 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4845 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4846 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4847 mqd->cp_hqd_pq_doorbell_control = tmp;
4848
4849 } else {
4850 mqd->cp_hqd_pq_doorbell_control = 0;
4851 }
4852 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4853 mqd->cp_hqd_pq_doorbell_control);
4854
4855 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4856 ring->wptr = 0;
4857 mqd->cp_hqd_pq_wptr = ring->wptr;
4858 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4859 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4860
4861 /* set the vmid for the queue */
4862 mqd->cp_hqd_vmid = 0;
4863 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4864
4865 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4866 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4867 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4868 mqd->cp_hqd_persistent_state = tmp;
4869 if (adev->asic_type == CHIP_STONEY ||
4870 adev->asic_type == CHIP_POLARIS11 ||
4871 adev->asic_type == CHIP_POLARIS10) {
4872 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4873 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4874 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4875 }
4876
4877 /* activate the queue */
4878 mqd->cp_hqd_active = 1;
4879 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4880
4881 vi_srbm_select(adev, 0, 0, 0, 0);
4882 mutex_unlock(&adev->srbm_mutex);
4883
4884 amdgpu_bo_kunmap(ring->mqd_obj);
4885 amdgpu_bo_unreserve(ring->mqd_obj);
4886 }
4887
4888 if (use_doorbell) {
4889 tmp = RREG32(mmCP_PQ_STATUS);
4890 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4891 WREG32(mmCP_PQ_STATUS, tmp);
4892 }
4893
4894 gfx_v8_0_cp_compute_enable(adev, true);
4895
4896 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4897 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4898
4899 ring->ready = true;
4900 r = amdgpu_ring_test_ring(ring);
4901 if (r)
4902 ring->ready = false;
4903 }
4904
4905 return 0;
4906 }
4907
4908 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4909 {
4910 int r;
4911
4912 if (!(adev->flags & AMD_IS_APU))
4913 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4914
4915 if (!adev->pp_enabled) {
4916 if (!adev->firmware.smu_load) {
4917 /* legacy firmware loading */
4918 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4919 if (r)
4920 return r;
4921
4922 r = gfx_v8_0_cp_compute_load_microcode(adev);
4923 if (r)
4924 return r;
4925 } else {
4926 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4927 AMDGPU_UCODE_ID_CP_CE);
4928 if (r)
4929 return -EINVAL;
4930
4931 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4932 AMDGPU_UCODE_ID_CP_PFP);
4933 if (r)
4934 return -EINVAL;
4935
4936 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4937 AMDGPU_UCODE_ID_CP_ME);
4938 if (r)
4939 return -EINVAL;
4940
4941 if (adev->asic_type == CHIP_TOPAZ) {
4942 r = gfx_v8_0_cp_compute_load_microcode(adev);
4943 if (r)
4944 return r;
4945 } else {
4946 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4947 AMDGPU_UCODE_ID_CP_MEC1);
4948 if (r)
4949 return -EINVAL;
4950 }
4951 }
4952 }
4953
4954 r = gfx_v8_0_cp_gfx_resume(adev);
4955 if (r)
4956 return r;
4957
4958 r = gfx_v8_0_cp_compute_resume(adev);
4959 if (r)
4960 return r;
4961
4962 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4963
4964 return 0;
4965 }
4966
4967 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4968 {
4969 gfx_v8_0_cp_gfx_enable(adev, enable);
4970 gfx_v8_0_cp_compute_enable(adev, enable);
4971 }
4972
4973 static int gfx_v8_0_hw_init(void *handle)
4974 {
4975 int r;
4976 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4977
4978 gfx_v8_0_init_golden_registers(adev);
4979
4980 gfx_v8_0_gpu_init(adev);
4981
4982 r = gfx_v8_0_rlc_resume(adev);
4983 if (r)
4984 return r;
4985
4986 r = gfx_v8_0_cp_resume(adev);
4987 if (r)
4988 return r;
4989
4990 return r;
4991 }
4992
4993 static int gfx_v8_0_hw_fini(void *handle)
4994 {
4995 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4996
4997 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4998 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4999 gfx_v8_0_cp_enable(adev, false);
5000 gfx_v8_0_rlc_stop(adev);
5001 gfx_v8_0_cp_compute_fini(adev);
5002
5003 amdgpu_set_powergating_state(adev,
5004 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5005
5006 return 0;
5007 }
5008
5009 static int gfx_v8_0_suspend(void *handle)
5010 {
5011 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5012
5013 return gfx_v8_0_hw_fini(adev);
5014 }
5015
5016 static int gfx_v8_0_resume(void *handle)
5017 {
5018 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5019
5020 return gfx_v8_0_hw_init(adev);
5021 }
5022
5023 static bool gfx_v8_0_is_idle(void *handle)
5024 {
5025 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5026
5027 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5028 return false;
5029 else
5030 return true;
5031 }
5032
5033 static int gfx_v8_0_wait_for_idle(void *handle)
5034 {
5035 unsigned i;
5036 u32 tmp;
5037 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5038
5039 for (i = 0; i < adev->usec_timeout; i++) {
5040 /* read MC_STATUS */
5041 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5042
5043 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5044 return 0;
5045 udelay(1);
5046 }
5047 return -ETIMEDOUT;
5048 }
5049
5050 static int gfx_v8_0_soft_reset(void *handle)
5051 {
5052 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5053 u32 tmp;
5054 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5055
5056 /* GRBM_STATUS */
5057 tmp = RREG32(mmGRBM_STATUS);
5058 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5059 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5060 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5061 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5062 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5063 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5064 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5065 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5066 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5067 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5068 }
5069
5070 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5071 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5072 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5073 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5074 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5075 }
5076
5077 /* GRBM_STATUS2 */
5078 tmp = RREG32(mmGRBM_STATUS2);
5079 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5080 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5081 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5082
5083 /* SRBM_STATUS */
5084 tmp = RREG32(mmSRBM_STATUS);
5085 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5086 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5087 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5088
5089 if (grbm_soft_reset || srbm_soft_reset) {
5090 /* stop the rlc */
5091 gfx_v8_0_rlc_stop(adev);
5092
5093 /* Disable GFX parsing/prefetching */
5094 gfx_v8_0_cp_gfx_enable(adev, false);
5095
5096 /* Disable MEC parsing/prefetching */
5097 gfx_v8_0_cp_compute_enable(adev, false);
5098
5099 if (grbm_soft_reset || srbm_soft_reset) {
5100 tmp = RREG32(mmGMCON_DEBUG);
5101 tmp = REG_SET_FIELD(tmp,
5102 GMCON_DEBUG, GFX_STALL, 1);
5103 tmp = REG_SET_FIELD(tmp,
5104 GMCON_DEBUG, GFX_CLEAR, 1);
5105 WREG32(mmGMCON_DEBUG, tmp);
5106
5107 udelay(50);
5108 }
5109
5110 if (grbm_soft_reset) {
5111 tmp = RREG32(mmGRBM_SOFT_RESET);
5112 tmp |= grbm_soft_reset;
5113 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5114 WREG32(mmGRBM_SOFT_RESET, tmp);
5115 tmp = RREG32(mmGRBM_SOFT_RESET);
5116
5117 udelay(50);
5118
5119 tmp &= ~grbm_soft_reset;
5120 WREG32(mmGRBM_SOFT_RESET, tmp);
5121 tmp = RREG32(mmGRBM_SOFT_RESET);
5122 }
5123
5124 if (srbm_soft_reset) {
5125 tmp = RREG32(mmSRBM_SOFT_RESET);
5126 tmp |= srbm_soft_reset;
5127 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5128 WREG32(mmSRBM_SOFT_RESET, tmp);
5129 tmp = RREG32(mmSRBM_SOFT_RESET);
5130
5131 udelay(50);
5132
5133 tmp &= ~srbm_soft_reset;
5134 WREG32(mmSRBM_SOFT_RESET, tmp);
5135 tmp = RREG32(mmSRBM_SOFT_RESET);
5136 }
5137
5138 if (grbm_soft_reset || srbm_soft_reset) {
5139 tmp = RREG32(mmGMCON_DEBUG);
5140 tmp = REG_SET_FIELD(tmp,
5141 GMCON_DEBUG, GFX_STALL, 0);
5142 tmp = REG_SET_FIELD(tmp,
5143 GMCON_DEBUG, GFX_CLEAR, 0);
5144 WREG32(mmGMCON_DEBUG, tmp);
5145 }
5146
5147 /* Wait a little for things to settle down */
5148 udelay(50);
5149 }
5150 return 0;
5151 }
5152
5153 /**
5154 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5155 *
5156 * @adev: amdgpu_device pointer
5157 *
5158 * Fetches a GPU clock counter snapshot.
5159 * Returns the 64 bit clock counter snapshot.
5160 */
5161 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5162 {
5163 uint64_t clock;
5164
5165 mutex_lock(&adev->gfx.gpu_clock_mutex);
5166 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5167 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5168 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5169 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5170 return clock;
5171 }
5172
5173 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5174 uint32_t vmid,
5175 uint32_t gds_base, uint32_t gds_size,
5176 uint32_t gws_base, uint32_t gws_size,
5177 uint32_t oa_base, uint32_t oa_size)
5178 {
5179 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5180 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5181
5182 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5183 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5184
5185 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5186 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5187
5188 /* GDS Base */
5189 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5190 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5191 WRITE_DATA_DST_SEL(0)));
5192 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5193 amdgpu_ring_write(ring, 0);
5194 amdgpu_ring_write(ring, gds_base);
5195
5196 /* GDS Size */
5197 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5198 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5199 WRITE_DATA_DST_SEL(0)));
5200 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5201 amdgpu_ring_write(ring, 0);
5202 amdgpu_ring_write(ring, gds_size);
5203
5204 /* GWS */
5205 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5206 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5207 WRITE_DATA_DST_SEL(0)));
5208 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5209 amdgpu_ring_write(ring, 0);
5210 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5211
5212 /* OA */
5213 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5214 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5215 WRITE_DATA_DST_SEL(0)));
5216 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5217 amdgpu_ring_write(ring, 0);
5218 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5219 }
5220
5221 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5222 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5223 .select_se_sh = &gfx_v8_0_select_se_sh,
5224 };
5225
5226 static int gfx_v8_0_early_init(void *handle)
5227 {
5228 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5229
5230 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5231 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5232 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5233 gfx_v8_0_set_ring_funcs(adev);
5234 gfx_v8_0_set_irq_funcs(adev);
5235 gfx_v8_0_set_gds_init(adev);
5236 gfx_v8_0_set_rlc_funcs(adev);
5237
5238 return 0;
5239 }
5240
5241 static int gfx_v8_0_late_init(void *handle)
5242 {
5243 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5244 int r;
5245
5246 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5247 if (r)
5248 return r;
5249
5250 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5251 if (r)
5252 return r;
5253
5254 /* requires IBs so do in late init after IB pool is initialized */
5255 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5256 if (r)
5257 return r;
5258
5259 amdgpu_set_powergating_state(adev,
5260 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5261
5262 return 0;
5263 }
5264
5265 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5266 bool enable)
5267 {
5268 uint32_t data, temp;
5269
5270 if (adev->asic_type == CHIP_POLARIS11)
5271 /* Send msg to SMU via Powerplay */
5272 amdgpu_set_powergating_state(adev,
5273 AMD_IP_BLOCK_TYPE_SMC,
5274 enable ?
5275 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5276
5277 temp = data = RREG32(mmRLC_PG_CNTL);
5278 /* Enable static MGPG */
5279 if (enable)
5280 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5281 else
5282 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5283
5284 if (temp != data)
5285 WREG32(mmRLC_PG_CNTL, data);
5286 }
5287
5288 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5289 bool enable)
5290 {
5291 uint32_t data, temp;
5292
5293 temp = data = RREG32(mmRLC_PG_CNTL);
5294 /* Enable dynamic MGPG */
5295 if (enable)
5296 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5297 else
5298 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5299
5300 if (temp != data)
5301 WREG32(mmRLC_PG_CNTL, data);
5302 }
5303
5304 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5305 bool enable)
5306 {
5307 uint32_t data, temp;
5308
5309 temp = data = RREG32(mmRLC_PG_CNTL);
5310 /* Enable quick PG */
5311 if (enable)
5312 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5313 else
5314 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
5315
5316 if (temp != data)
5317 WREG32(mmRLC_PG_CNTL, data);
5318 }
5319
5320 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5321 bool enable)
5322 {
5323 u32 data, orig;
5324
5325 orig = data = RREG32(mmRLC_PG_CNTL);
5326
5327 if (enable)
5328 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5329 else
5330 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5331
5332 if (orig != data)
5333 WREG32(mmRLC_PG_CNTL, data);
5334 }
5335
5336 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5337 bool enable)
5338 {
5339 u32 data, orig;
5340
5341 orig = data = RREG32(mmRLC_PG_CNTL);
5342
5343 if (enable)
5344 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5345 else
5346 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5347
5348 if (orig != data)
5349 WREG32(mmRLC_PG_CNTL, data);
5350
5351 /* Read any GFX register to wake up GFX. */
5352 if (!enable)
5353 data = RREG32(mmDB_RENDER_CONTROL);
5354 }
5355
5356 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5357 bool enable)
5358 {
5359 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5360 cz_enable_gfx_cg_power_gating(adev, true);
5361 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5362 cz_enable_gfx_pipeline_power_gating(adev, true);
5363 } else {
5364 cz_enable_gfx_cg_power_gating(adev, false);
5365 cz_enable_gfx_pipeline_power_gating(adev, false);
5366 }
5367 }
5368
5369 static int gfx_v8_0_set_powergating_state(void *handle,
5370 enum amd_powergating_state state)
5371 {
5372 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5373 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5374
5375 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5376 return 0;
5377
5378 switch (adev->asic_type) {
5379 case CHIP_CARRIZO:
5380 case CHIP_STONEY:
5381 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5382 cz_update_gfx_cg_power_gating(adev, enable);
5383
5384 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5385 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5386 else
5387 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5388
5389 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5390 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5391 else
5392 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5393 break;
5394 case CHIP_POLARIS11:
5395 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5396 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5397 else
5398 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5399
5400 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5401 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5402 else
5403 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5404
5405 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5406 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5407 else
5408 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5409 break;
5410 default:
5411 break;
5412 }
5413
5414 return 0;
5415 }
5416
5417 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5418 uint32_t reg_addr, uint32_t cmd)
5419 {
5420 uint32_t data;
5421
5422 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5423
5424 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5425 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5426
5427 data = RREG32(mmRLC_SERDES_WR_CTRL);
5428 if (adev->asic_type == CHIP_STONEY)
5429 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5430 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5431 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5432 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5433 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5434 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5435 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5436 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5437 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5438 else
5439 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5440 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5441 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5442 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5443 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5444 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5445 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5446 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5447 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5448 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5449 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5450 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5451 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5452 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5453 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5454
5455 WREG32(mmRLC_SERDES_WR_CTRL, data);
5456 }
5457
5458 #define MSG_ENTER_RLC_SAFE_MODE 1
5459 #define MSG_EXIT_RLC_SAFE_MODE 0
5460
5461 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5462 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5463 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5464
5465 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5466 {
5467 u32 data = 0;
5468 unsigned i;
5469
5470 data = RREG32(mmRLC_CNTL);
5471 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5472 return;
5473
5474 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5475 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5476 AMD_PG_SUPPORT_GFX_DMG))) {
5477 data |= RLC_GPR_REG2__REQ_MASK;
5478 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5479 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5480 WREG32(mmRLC_GPR_REG2, data);
5481
5482 for (i = 0; i < adev->usec_timeout; i++) {
5483 if ((RREG32(mmRLC_GPM_STAT) &
5484 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5485 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5486 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5487 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5488 break;
5489 udelay(1);
5490 }
5491
5492 for (i = 0; i < adev->usec_timeout; i++) {
5493 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5494 break;
5495 udelay(1);
5496 }
5497 adev->gfx.rlc.in_safe_mode = true;
5498 }
5499 }
5500
5501 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5502 {
5503 u32 data;
5504 unsigned i;
5505
5506 data = RREG32(mmRLC_CNTL);
5507 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5508 return;
5509
5510 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5511 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5512 AMD_PG_SUPPORT_GFX_DMG))) {
5513 data |= RLC_GPR_REG2__REQ_MASK;
5514 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5515 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5516 WREG32(mmRLC_GPR_REG2, data);
5517 adev->gfx.rlc.in_safe_mode = false;
5518 }
5519
5520 for (i = 0; i < adev->usec_timeout; i++) {
5521 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5522 break;
5523 udelay(1);
5524 }
5525 }
5526
5527 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5528 {
5529 u32 data;
5530 unsigned i;
5531
5532 data = RREG32(mmRLC_CNTL);
5533 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5534 return;
5535
5536 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5537 data |= RLC_SAFE_MODE__CMD_MASK;
5538 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5539 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5540 WREG32(mmRLC_SAFE_MODE, data);
5541
5542 for (i = 0; i < adev->usec_timeout; i++) {
5543 if ((RREG32(mmRLC_GPM_STAT) &
5544 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5545 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5546 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5547 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5548 break;
5549 udelay(1);
5550 }
5551
5552 for (i = 0; i < adev->usec_timeout; i++) {
5553 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5554 break;
5555 udelay(1);
5556 }
5557 adev->gfx.rlc.in_safe_mode = true;
5558 }
5559 }
5560
5561 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5562 {
5563 u32 data = 0;
5564 unsigned i;
5565
5566 data = RREG32(mmRLC_CNTL);
5567 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5568 return;
5569
5570 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5571 if (adev->gfx.rlc.in_safe_mode) {
5572 data |= RLC_SAFE_MODE__CMD_MASK;
5573 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5574 WREG32(mmRLC_SAFE_MODE, data);
5575 adev->gfx.rlc.in_safe_mode = false;
5576 }
5577 }
5578
5579 for (i = 0; i < adev->usec_timeout; i++) {
5580 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5581 break;
5582 udelay(1);
5583 }
5584 }
5585
5586 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5587 {
5588 adev->gfx.rlc.in_safe_mode = true;
5589 }
5590
5591 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5592 {
5593 adev->gfx.rlc.in_safe_mode = false;
5594 }
5595
5596 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5597 .enter_safe_mode = cz_enter_rlc_safe_mode,
5598 .exit_safe_mode = cz_exit_rlc_safe_mode
5599 };
5600
5601 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5602 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5603 .exit_safe_mode = iceland_exit_rlc_safe_mode
5604 };
5605
5606 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5607 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5608 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5609 };
5610
5611 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5612 bool enable)
5613 {
5614 uint32_t temp, data;
5615
5616 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5617
5618 /* It is disabled by HW by default */
5619 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5620 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5621 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5622 /* 1 - RLC memory Light sleep */
5623 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5624 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5625 if (temp != data)
5626 WREG32(mmRLC_MEM_SLP_CNTL, data);
5627 }
5628
5629 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5630 /* 2 - CP memory Light sleep */
5631 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5632 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5633 if (temp != data)
5634 WREG32(mmCP_MEM_SLP_CNTL, data);
5635 }
5636 }
5637
5638 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5639 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5640 if (adev->flags & AMD_IS_APU)
5641 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5642 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5643 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5644 else
5645 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5646 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5647 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5648 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5649
5650 if (temp != data)
5651 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5652
5653 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5654 gfx_v8_0_wait_for_rlc_serdes(adev);
5655
5656 /* 5 - clear mgcg override */
5657 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5658
5659 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5660 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5661 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5662 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5663 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5664 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5665 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5666 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5667 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5668 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5669 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5670 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5671 if (temp != data)
5672 WREG32(mmCGTS_SM_CTRL_REG, data);
5673 }
5674 udelay(50);
5675
5676 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5677 gfx_v8_0_wait_for_rlc_serdes(adev);
5678 } else {
5679 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5680 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5681 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5682 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5683 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5684 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5685 if (temp != data)
5686 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5687
5688 /* 2 - disable MGLS in RLC */
5689 data = RREG32(mmRLC_MEM_SLP_CNTL);
5690 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5691 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5692 WREG32(mmRLC_MEM_SLP_CNTL, data);
5693 }
5694
5695 /* 3 - disable MGLS in CP */
5696 data = RREG32(mmCP_MEM_SLP_CNTL);
5697 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5698 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5699 WREG32(mmCP_MEM_SLP_CNTL, data);
5700 }
5701
5702 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5703 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5704 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5705 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5706 if (temp != data)
5707 WREG32(mmCGTS_SM_CTRL_REG, data);
5708
5709 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5710 gfx_v8_0_wait_for_rlc_serdes(adev);
5711
5712 /* 6 - set mgcg override */
5713 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5714
5715 udelay(50);
5716
5717 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5718 gfx_v8_0_wait_for_rlc_serdes(adev);
5719 }
5720
5721 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5722 }
5723
5724 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5725 bool enable)
5726 {
5727 uint32_t temp, temp1, data, data1;
5728
5729 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5730
5731 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5732
5733 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5734 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5735 * Cmp_busy/GFX_Idle interrupts
5736 */
5737 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5738
5739 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5740 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5741 if (temp1 != data1)
5742 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5743
5744 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5745 gfx_v8_0_wait_for_rlc_serdes(adev);
5746
5747 /* 3 - clear cgcg override */
5748 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
5749
5750 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5751 gfx_v8_0_wait_for_rlc_serdes(adev);
5752
5753 /* 4 - write cmd to set CGLS */
5754 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
5755
5756 /* 5 - enable cgcg */
5757 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5758
5759 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5760 /* enable cgls*/
5761 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5762
5763 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5764 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
5765
5766 if (temp1 != data1)
5767 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5768 } else {
5769 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5770 }
5771
5772 if (temp != data)
5773 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5774 } else {
5775 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5776 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5777
5778 /* TEST CGCG */
5779 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5780 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5781 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5782 if (temp1 != data1)
5783 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5784
5785 /* read gfx register to wake up cgcg */
5786 RREG32(mmCB_CGTT_SCLK_CTRL);
5787 RREG32(mmCB_CGTT_SCLK_CTRL);
5788 RREG32(mmCB_CGTT_SCLK_CTRL);
5789 RREG32(mmCB_CGTT_SCLK_CTRL);
5790
5791 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5792 gfx_v8_0_wait_for_rlc_serdes(adev);
5793
5794 /* write cmd to Set CGCG Overrride */
5795 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
5796
5797 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5798 gfx_v8_0_wait_for_rlc_serdes(adev);
5799
5800 /* write cmd to Clear CGLS */
5801 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
5802
5803 /* disable cgcg, cgls should be disabled too. */
5804 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
5805 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5806 if (temp != data)
5807 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5808 }
5809
5810 gfx_v8_0_wait_for_rlc_serdes(adev);
5811
5812 adev->gfx.rlc.funcs->exit_safe_mode(adev);
5813 }
5814 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5815 bool enable)
5816 {
5817 if (enable) {
5818 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5819 * === MGCG + MGLS + TS(CG/LS) ===
5820 */
5821 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5822 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5823 } else {
5824 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5825 * === CGCG + CGLS ===
5826 */
5827 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5828 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5829 }
5830 return 0;
5831 }
5832
5833 static int gfx_v8_0_set_clockgating_state(void *handle,
5834 enum amd_clockgating_state state)
5835 {
5836 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5837
5838 switch (adev->asic_type) {
5839 case CHIP_FIJI:
5840 case CHIP_CARRIZO:
5841 case CHIP_STONEY:
5842 gfx_v8_0_update_gfx_clock_gating(adev,
5843 state == AMD_CG_STATE_GATE ? true : false);
5844 break;
5845 default:
5846 break;
5847 }
5848 return 0;
5849 }
5850
5851 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5852 {
5853 u32 rptr;
5854
5855 rptr = ring->adev->wb.wb[ring->rptr_offs];
5856
5857 return rptr;
5858 }
5859
5860 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5861 {
5862 struct amdgpu_device *adev = ring->adev;
5863 u32 wptr;
5864
5865 if (ring->use_doorbell)
5866 /* XXX check if swapping is necessary on BE */
5867 wptr = ring->adev->wb.wb[ring->wptr_offs];
5868 else
5869 wptr = RREG32(mmCP_RB0_WPTR);
5870
5871 return wptr;
5872 }
5873
5874 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5875 {
5876 struct amdgpu_device *adev = ring->adev;
5877
5878 if (ring->use_doorbell) {
5879 /* XXX check if swapping is necessary on BE */
5880 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5881 WDOORBELL32(ring->doorbell_index, ring->wptr);
5882 } else {
5883 WREG32(mmCP_RB0_WPTR, ring->wptr);
5884 (void)RREG32(mmCP_RB0_WPTR);
5885 }
5886 }
5887
5888 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5889 {
5890 u32 ref_and_mask, reg_mem_engine;
5891
5892 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5893 switch (ring->me) {
5894 case 1:
5895 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5896 break;
5897 case 2:
5898 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5899 break;
5900 default:
5901 return;
5902 }
5903 reg_mem_engine = 0;
5904 } else {
5905 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5906 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5907 }
5908
5909 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5910 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5911 WAIT_REG_MEM_FUNCTION(3) | /* == */
5912 reg_mem_engine));
5913 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5914 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5915 amdgpu_ring_write(ring, ref_and_mask);
5916 amdgpu_ring_write(ring, ref_and_mask);
5917 amdgpu_ring_write(ring, 0x20); /* poll interval */
5918 }
5919
5920 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5921 {
5922 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5923 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5924 WRITE_DATA_DST_SEL(0) |
5925 WR_CONFIRM));
5926 amdgpu_ring_write(ring, mmHDP_DEBUG0);
5927 amdgpu_ring_write(ring, 0);
5928 amdgpu_ring_write(ring, 1);
5929
5930 }
5931
5932 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5933 struct amdgpu_ib *ib,
5934 unsigned vm_id, bool ctx_switch)
5935 {
5936 u32 header, control = 0;
5937
5938 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
5939 if (ctx_switch) {
5940 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5941 amdgpu_ring_write(ring, 0);
5942 }
5943
5944 if (ib->flags & AMDGPU_IB_FLAG_CE)
5945 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5946 else
5947 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5948
5949 control |= ib->length_dw | (vm_id << 24);
5950
5951 amdgpu_ring_write(ring, header);
5952 amdgpu_ring_write(ring,
5953 #ifdef __BIG_ENDIAN
5954 (2 << 0) |
5955 #endif
5956 (ib->gpu_addr & 0xFFFFFFFC));
5957 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5958 amdgpu_ring_write(ring, control);
5959 }
5960
5961 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5962 struct amdgpu_ib *ib,
5963 unsigned vm_id, bool ctx_switch)
5964 {
5965 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
5966
5967 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5968 amdgpu_ring_write(ring,
5969 #ifdef __BIG_ENDIAN
5970 (2 << 0) |
5971 #endif
5972 (ib->gpu_addr & 0xFFFFFFFC));
5973 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5974 amdgpu_ring_write(ring, control);
5975 }
5976
5977 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
5978 u64 seq, unsigned flags)
5979 {
5980 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5981 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5982
5983 /* EVENT_WRITE_EOP - flush caches, send int */
5984 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5985 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5986 EOP_TC_ACTION_EN |
5987 EOP_TC_WB_ACTION_EN |
5988 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5989 EVENT_INDEX(5)));
5990 amdgpu_ring_write(ring, addr & 0xfffffffc);
5991 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
5992 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5993 amdgpu_ring_write(ring, lower_32_bits(seq));
5994 amdgpu_ring_write(ring, upper_32_bits(seq));
5995
5996 }
5997
5998 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5999 {
6000 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6001 uint32_t seq = ring->fence_drv.sync_seq;
6002 uint64_t addr = ring->fence_drv.gpu_addr;
6003
6004 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6005 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6006 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6007 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6008 amdgpu_ring_write(ring, addr & 0xfffffffc);
6009 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6010 amdgpu_ring_write(ring, seq);
6011 amdgpu_ring_write(ring, 0xffffffff);
6012 amdgpu_ring_write(ring, 4); /* poll interval */
6013
6014 if (usepfp) {
6015 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6016 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6017 amdgpu_ring_write(ring, 0);
6018 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6019 amdgpu_ring_write(ring, 0);
6020 }
6021 }
6022
6023 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6024 unsigned vm_id, uint64_t pd_addr)
6025 {
6026 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
6027
6028 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6029 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6030 WRITE_DATA_DST_SEL(0)) |
6031 WR_CONFIRM);
6032 if (vm_id < 8) {
6033 amdgpu_ring_write(ring,
6034 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6035 } else {
6036 amdgpu_ring_write(ring,
6037 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6038 }
6039 amdgpu_ring_write(ring, 0);
6040 amdgpu_ring_write(ring, pd_addr >> 12);
6041
6042 /* bits 0-15 are the VM contexts0-15 */
6043 /* invalidate the cache */
6044 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6045 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6046 WRITE_DATA_DST_SEL(0)));
6047 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6048 amdgpu_ring_write(ring, 0);
6049 amdgpu_ring_write(ring, 1 << vm_id);
6050
6051 /* wait for the invalidate to complete */
6052 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6053 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6054 WAIT_REG_MEM_FUNCTION(0) | /* always */
6055 WAIT_REG_MEM_ENGINE(0))); /* me */
6056 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6057 amdgpu_ring_write(ring, 0);
6058 amdgpu_ring_write(ring, 0); /* ref */
6059 amdgpu_ring_write(ring, 0); /* mask */
6060 amdgpu_ring_write(ring, 0x20); /* poll interval */
6061
6062 /* compute doesn't have PFP */
6063 if (usepfp) {
6064 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6065 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6066 amdgpu_ring_write(ring, 0x0);
6067 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6068 amdgpu_ring_write(ring, 0);
6069 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6070 amdgpu_ring_write(ring, 0);
6071 }
6072 }
6073
6074 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6075 {
6076 return ring->adev->wb.wb[ring->rptr_offs];
6077 }
6078
6079 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6080 {
6081 return ring->adev->wb.wb[ring->wptr_offs];
6082 }
6083
6084 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6085 {
6086 struct amdgpu_device *adev = ring->adev;
6087
6088 /* XXX check if swapping is necessary on BE */
6089 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6090 WDOORBELL32(ring->doorbell_index, ring->wptr);
6091 }
6092
6093 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6094 u64 addr, u64 seq,
6095 unsigned flags)
6096 {
6097 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6098 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6099
6100 /* RELEASE_MEM - flush caches, send int */
6101 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6102 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6103 EOP_TC_ACTION_EN |
6104 EOP_TC_WB_ACTION_EN |
6105 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6106 EVENT_INDEX(5)));
6107 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6108 amdgpu_ring_write(ring, addr & 0xfffffffc);
6109 amdgpu_ring_write(ring, upper_32_bits(addr));
6110 amdgpu_ring_write(ring, lower_32_bits(seq));
6111 amdgpu_ring_write(ring, upper_32_bits(seq));
6112 }
6113
6114 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6115 enum amdgpu_interrupt_state state)
6116 {
6117 u32 cp_int_cntl;
6118
6119 switch (state) {
6120 case AMDGPU_IRQ_STATE_DISABLE:
6121 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6122 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6123 TIME_STAMP_INT_ENABLE, 0);
6124 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6125 break;
6126 case AMDGPU_IRQ_STATE_ENABLE:
6127 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6128 cp_int_cntl =
6129 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6130 TIME_STAMP_INT_ENABLE, 1);
6131 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6132 break;
6133 default:
6134 break;
6135 }
6136 }
6137
6138 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6139 int me, int pipe,
6140 enum amdgpu_interrupt_state state)
6141 {
6142 u32 mec_int_cntl, mec_int_cntl_reg;
6143
6144 /*
6145 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6146 * handles the setting of interrupts for this specific pipe. All other
6147 * pipes' interrupts are set by amdkfd.
6148 */
6149
6150 if (me == 1) {
6151 switch (pipe) {
6152 case 0:
6153 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6154 break;
6155 default:
6156 DRM_DEBUG("invalid pipe %d\n", pipe);
6157 return;
6158 }
6159 } else {
6160 DRM_DEBUG("invalid me %d\n", me);
6161 return;
6162 }
6163
6164 switch (state) {
6165 case AMDGPU_IRQ_STATE_DISABLE:
6166 mec_int_cntl = RREG32(mec_int_cntl_reg);
6167 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6168 TIME_STAMP_INT_ENABLE, 0);
6169 WREG32(mec_int_cntl_reg, mec_int_cntl);
6170 break;
6171 case AMDGPU_IRQ_STATE_ENABLE:
6172 mec_int_cntl = RREG32(mec_int_cntl_reg);
6173 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6174 TIME_STAMP_INT_ENABLE, 1);
6175 WREG32(mec_int_cntl_reg, mec_int_cntl);
6176 break;
6177 default:
6178 break;
6179 }
6180 }
6181
6182 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6183 struct amdgpu_irq_src *source,
6184 unsigned type,
6185 enum amdgpu_interrupt_state state)
6186 {
6187 u32 cp_int_cntl;
6188
6189 switch (state) {
6190 case AMDGPU_IRQ_STATE_DISABLE:
6191 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6192 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6193 PRIV_REG_INT_ENABLE, 0);
6194 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6195 break;
6196 case AMDGPU_IRQ_STATE_ENABLE:
6197 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6198 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6199 PRIV_REG_INT_ENABLE, 1);
6200 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6201 break;
6202 default:
6203 break;
6204 }
6205
6206 return 0;
6207 }
6208
6209 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6210 struct amdgpu_irq_src *source,
6211 unsigned type,
6212 enum amdgpu_interrupt_state state)
6213 {
6214 u32 cp_int_cntl;
6215
6216 switch (state) {
6217 case AMDGPU_IRQ_STATE_DISABLE:
6218 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6219 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6220 PRIV_INSTR_INT_ENABLE, 0);
6221 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6222 break;
6223 case AMDGPU_IRQ_STATE_ENABLE:
6224 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6225 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6226 PRIV_INSTR_INT_ENABLE, 1);
6227 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6228 break;
6229 default:
6230 break;
6231 }
6232
6233 return 0;
6234 }
6235
6236 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6237 struct amdgpu_irq_src *src,
6238 unsigned type,
6239 enum amdgpu_interrupt_state state)
6240 {
6241 switch (type) {
6242 case AMDGPU_CP_IRQ_GFX_EOP:
6243 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6244 break;
6245 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6246 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6247 break;
6248 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6249 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6250 break;
6251 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6252 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6253 break;
6254 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6255 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6256 break;
6257 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6258 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6259 break;
6260 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6261 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6262 break;
6263 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6264 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6265 break;
6266 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6267 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6268 break;
6269 default:
6270 break;
6271 }
6272 return 0;
6273 }
6274
6275 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6276 struct amdgpu_irq_src *source,
6277 struct amdgpu_iv_entry *entry)
6278 {
6279 int i;
6280 u8 me_id, pipe_id, queue_id;
6281 struct amdgpu_ring *ring;
6282
6283 DRM_DEBUG("IH: CP EOP\n");
6284 me_id = (entry->ring_id & 0x0c) >> 2;
6285 pipe_id = (entry->ring_id & 0x03) >> 0;
6286 queue_id = (entry->ring_id & 0x70) >> 4;
6287
6288 switch (me_id) {
6289 case 0:
6290 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6291 break;
6292 case 1:
6293 case 2:
6294 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6295 ring = &adev->gfx.compute_ring[i];
6296 /* Per-queue interrupt is supported for MEC starting from VI.
6297 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6298 */
6299 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6300 amdgpu_fence_process(ring);
6301 }
6302 break;
6303 }
6304 return 0;
6305 }
6306
6307 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6308 struct amdgpu_irq_src *source,
6309 struct amdgpu_iv_entry *entry)
6310 {
6311 DRM_ERROR("Illegal register access in command stream\n");
6312 schedule_work(&adev->reset_work);
6313 return 0;
6314 }
6315
6316 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6317 struct amdgpu_irq_src *source,
6318 struct amdgpu_iv_entry *entry)
6319 {
6320 DRM_ERROR("Illegal instruction in command stream\n");
6321 schedule_work(&adev->reset_work);
6322 return 0;
6323 }
6324
6325 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6326 .name = "gfx_v8_0",
6327 .early_init = gfx_v8_0_early_init,
6328 .late_init = gfx_v8_0_late_init,
6329 .sw_init = gfx_v8_0_sw_init,
6330 .sw_fini = gfx_v8_0_sw_fini,
6331 .hw_init = gfx_v8_0_hw_init,
6332 .hw_fini = gfx_v8_0_hw_fini,
6333 .suspend = gfx_v8_0_suspend,
6334 .resume = gfx_v8_0_resume,
6335 .is_idle = gfx_v8_0_is_idle,
6336 .wait_for_idle = gfx_v8_0_wait_for_idle,
6337 .soft_reset = gfx_v8_0_soft_reset,
6338 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6339 .set_powergating_state = gfx_v8_0_set_powergating_state,
6340 };
6341
6342 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6343 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6344 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6345 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6346 .parse_cs = NULL,
6347 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6348 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6349 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6350 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6351 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6352 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6353 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6354 .test_ring = gfx_v8_0_ring_test_ring,
6355 .test_ib = gfx_v8_0_ring_test_ib,
6356 .insert_nop = amdgpu_ring_insert_nop,
6357 .pad_ib = amdgpu_ring_generic_pad_ib,
6358 };
6359
6360 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6361 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6362 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6363 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6364 .parse_cs = NULL,
6365 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6366 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6367 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6368 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6369 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6370 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6371 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6372 .test_ring = gfx_v8_0_ring_test_ring,
6373 .test_ib = gfx_v8_0_ring_test_ib,
6374 .insert_nop = amdgpu_ring_insert_nop,
6375 .pad_ib = amdgpu_ring_generic_pad_ib,
6376 };
6377
6378 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6379 {
6380 int i;
6381
6382 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6383 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6384
6385 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6386 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6387 }
6388
6389 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6390 .set = gfx_v8_0_set_eop_interrupt_state,
6391 .process = gfx_v8_0_eop_irq,
6392 };
6393
6394 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6395 .set = gfx_v8_0_set_priv_reg_fault_state,
6396 .process = gfx_v8_0_priv_reg_irq,
6397 };
6398
6399 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6400 .set = gfx_v8_0_set_priv_inst_fault_state,
6401 .process = gfx_v8_0_priv_inst_irq,
6402 };
6403
6404 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6405 {
6406 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6407 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6408
6409 adev->gfx.priv_reg_irq.num_types = 1;
6410 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6411
6412 adev->gfx.priv_inst_irq.num_types = 1;
6413 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6414 }
6415
6416 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6417 {
6418 switch (adev->asic_type) {
6419 case CHIP_TOPAZ:
6420 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6421 break;
6422 case CHIP_STONEY:
6423 case CHIP_CARRIZO:
6424 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6425 break;
6426 default:
6427 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6428 break;
6429 }
6430 }
6431
6432 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6433 {
6434 /* init asci gds info */
6435 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6436 adev->gds.gws.total_size = 64;
6437 adev->gds.oa.total_size = 16;
6438
6439 if (adev->gds.mem.total_size == 64 * 1024) {
6440 adev->gds.mem.gfx_partition_size = 4096;
6441 adev->gds.mem.cs_partition_size = 4096;
6442
6443 adev->gds.gws.gfx_partition_size = 4;
6444 adev->gds.gws.cs_partition_size = 4;
6445
6446 adev->gds.oa.gfx_partition_size = 4;
6447 adev->gds.oa.cs_partition_size = 1;
6448 } else {
6449 adev->gds.mem.gfx_partition_size = 1024;
6450 adev->gds.mem.cs_partition_size = 1024;
6451
6452 adev->gds.gws.gfx_partition_size = 16;
6453 adev->gds.gws.cs_partition_size = 16;
6454
6455 adev->gds.oa.gfx_partition_size = 4;
6456 adev->gds.oa.cs_partition_size = 4;
6457 }
6458 }
6459
6460 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6461 u32 bitmap)
6462 {
6463 u32 data;
6464
6465 if (!bitmap)
6466 return;
6467
6468 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6469 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6470
6471 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6472 }
6473
6474 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6475 {
6476 u32 data, mask;
6477
6478 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6479 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
6480
6481 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6482 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6483
6484 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
6485
6486 return (~data) & mask;
6487 }
6488
6489 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
6490 {
6491 int i, j, k, counter, active_cu_number = 0;
6492 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6493 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
6494 unsigned disable_masks[4 * 2];
6495
6496 memset(cu_info, 0, sizeof(*cu_info));
6497
6498 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6499
6500 mutex_lock(&adev->grbm_idx_mutex);
6501 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6502 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6503 mask = 1;
6504 ao_bitmap = 0;
6505 counter = 0;
6506 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
6507 if (i < 4 && j < 2)
6508 gfx_v8_0_set_user_cu_inactive_bitmap(
6509 adev, disable_masks[i * 2 + j]);
6510 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
6511 cu_info->bitmap[i][j] = bitmap;
6512
6513 for (k = 0; k < 16; k ++) {
6514 if (bitmap & mask) {
6515 if (counter < 2)
6516 ao_bitmap |= mask;
6517 counter ++;
6518 }
6519 mask <<= 1;
6520 }
6521 active_cu_number += counter;
6522 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6523 }
6524 }
6525 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6526 mutex_unlock(&adev->grbm_idx_mutex);
6527
6528 cu_info->number = active_cu_number;
6529 cu_info->ao_cu_mask = ao_cu_mask;
6530 }
This page took 0.188654 seconds and 5 git commands to generate.