1 /*
2  * Copyright 2021 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 
24 #include "sienna_cichlid.h"
25 #include "amdgpu_reset.h"
26 #include "amdgpu_amdkfd.h"
27 #include "amdgpu_dpm.h"
28 #include "amdgpu_job.h"
29 #include "amdgpu_ring.h"
30 #include "amdgpu_ras.h"
31 #include "amdgpu_psp.h"
32 #include "amdgpu_xgmi.h"
33 
34 static struct amdgpu_reset_handler *
35 sienna_cichlid_get_reset_handler(struct amdgpu_reset_control *reset_ctl,
36 			    struct amdgpu_reset_context *reset_context)
37 {
38 	struct amdgpu_reset_handler *handler;
39 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
40 
41 	if (reset_context->method != AMD_RESET_METHOD_NONE) {
42 		list_for_each_entry(handler, &reset_ctl->reset_handlers,
43 				     handler_list) {
44 			if (handler->reset_method == reset_context->method)
45 				return handler;
46 		}
47 	} else {
48 		list_for_each_entry(handler, &reset_ctl->reset_handlers,
49 				     handler_list) {
50 			if (handler->reset_method == AMD_RESET_METHOD_MODE2 &&
51 			    adev->pm.fw_version >= 0x3a5500 &&
52 			    !amdgpu_sriov_vf(adev)) {
53 				reset_context->method = AMD_RESET_METHOD_MODE2;
54 				return handler;
55 			}
56 		}
57 	}
58 
59 	return NULL;
60 }
61 
62 static int sienna_cichlid_mode2_suspend_ip(struct amdgpu_device *adev)
63 {
64 	int r, i;
65 
66 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
67 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
68 
69 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
70 		if (!(adev->ip_blocks[i].version->type ==
71 			      AMD_IP_BLOCK_TYPE_GFX ||
72 		      adev->ip_blocks[i].version->type ==
73 			      AMD_IP_BLOCK_TYPE_SDMA))
74 			continue;
75 
76 		r = adev->ip_blocks[i].version->funcs->suspend(adev);
77 
78 		if (r) {
79 			dev_err(adev->dev,
80 				"suspend of IP block <%s> failed %d\n",
81 				adev->ip_blocks[i].version->funcs->name, r);
82 			return r;
83 		}
84 		adev->ip_blocks[i].status.hw = false;
85 	}
86 
87 	return r;
88 }
89 
90 static int
91 sienna_cichlid_mode2_prepare_hwcontext(struct amdgpu_reset_control *reset_ctl,
92 				  struct amdgpu_reset_context *reset_context)
93 {
94 	int r = 0;
95 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
96 
97 	if (!amdgpu_sriov_vf(adev)) {
98 		if (adev->gfxhub.funcs->mode2_save_regs)
99 			adev->gfxhub.funcs->mode2_save_regs(adev);
100 		if (adev->gfxhub.funcs->halt)
101 			adev->gfxhub.funcs->halt(adev);
102 		r = sienna_cichlid_mode2_suspend_ip(adev);
103 	}
104 
105 	return r;
106 }
107 
108 static void sienna_cichlid_async_reset(struct work_struct *work)
109 {
110 	struct amdgpu_reset_handler *handler;
111 	struct amdgpu_reset_control *reset_ctl =
112 		container_of(work, struct amdgpu_reset_control, reset_work);
113 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
114 
115 	list_for_each_entry(handler, &reset_ctl->reset_handlers,
116 			     handler_list) {
117 		if (handler->reset_method == reset_ctl->active_reset) {
118 			dev_dbg(adev->dev, "Resetting device\n");
119 			handler->do_reset(adev);
120 			break;
121 		}
122 	}
123 }
124 
125 static int sienna_cichlid_mode2_reset(struct amdgpu_device *adev)
126 {
127 	/* disable BM */
128 	pci_clear_master(adev->pdev);
129 	return amdgpu_dpm_mode2_reset(adev);
130 }
131 
132 static int
133 sienna_cichlid_mode2_perform_reset(struct amdgpu_reset_control *reset_ctl,
134 			      struct amdgpu_reset_context *reset_context)
135 {
136 	struct amdgpu_device *adev = (struct amdgpu_device *)reset_ctl->handle;
137 	int r;
138 
139 	r = sienna_cichlid_mode2_reset(adev);
140 	if (r) {
141 		dev_err(adev->dev,
142 			"ASIC reset failed with error, %d ", r);
143 	}
144 	return r;
145 }
146 
147 static int sienna_cichlid_mode2_restore_ip(struct amdgpu_device *adev)
148 {
149 	int i, r;
150 	struct psp_context *psp = &adev->psp;
151 
152 	r = psp_rlc_autoload_start(psp);
153 	if (r) {
154 		dev_err(adev->dev, "Failed to start rlc autoload\n");
155 		return r;
156 	}
157 
158 	/* Reinit GFXHUB */
159 	if (adev->gfxhub.funcs->mode2_restore_regs)
160 		adev->gfxhub.funcs->mode2_restore_regs(adev);
161 	adev->gfxhub.funcs->init(adev);
162 	r = adev->gfxhub.funcs->gart_enable(adev);
163 	if (r) {
164 		dev_err(adev->dev, "GFXHUB gart reenable failed after reset\n");
165 		return r;
166 	}
167 
168 	for (i = 0; i < adev->num_ip_blocks; i++) {
169 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
170 			r = adev->ip_blocks[i].version->funcs->resume(adev);
171 			if (r) {
172 				dev_err(adev->dev,
173 					"resume of IP block <%s> failed %d\n",
174 					adev->ip_blocks[i].version->funcs->name, r);
175 				return r;
176 			}
177 
178 			adev->ip_blocks[i].status.hw = true;
179 		}
180 	}
181 
182 	for (i = 0; i < adev->num_ip_blocks; i++) {
183 		if (!(adev->ip_blocks[i].version->type ==
184 			      AMD_IP_BLOCK_TYPE_GFX ||
185 		      adev->ip_blocks[i].version->type ==
186 			      AMD_IP_BLOCK_TYPE_SDMA))
187 			continue;
188 		r = adev->ip_blocks[i].version->funcs->resume(adev);
189 		if (r) {
190 			dev_err(adev->dev,
191 				"resume of IP block <%s> failed %d\n",
192 				adev->ip_blocks[i].version->funcs->name, r);
193 			return r;
194 		}
195 
196 		adev->ip_blocks[i].status.hw = true;
197 	}
198 
199 	for (i = 0; i < adev->num_ip_blocks; i++) {
200 		if (!(adev->ip_blocks[i].version->type ==
201 			      AMD_IP_BLOCK_TYPE_GFX ||
202 		      adev->ip_blocks[i].version->type ==
203 			      AMD_IP_BLOCK_TYPE_SDMA))
204 			continue;
205 
206 		if (adev->ip_blocks[i].version->funcs->late_init) {
207 			r = adev->ip_blocks[i].version->funcs->late_init(
208 				(void *)adev);
209 			if (r) {
210 				dev_err(adev->dev,
211 					"late_init of IP block <%s> failed %d after reset\n",
212 					adev->ip_blocks[i].version->funcs->name,
213 					r);
214 				return r;
215 			}
216 		}
217 		adev->ip_blocks[i].status.late_initialized = true;
218 	}
219 
220 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
221 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
222 
223 	return r;
224 }
225 
226 static int
227 sienna_cichlid_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
228 				  struct amdgpu_reset_context *reset_context)
229 {
230 	int r;
231 	struct amdgpu_device *tmp_adev = (struct amdgpu_device *)reset_ctl->handle;
232 
233 	dev_info(tmp_adev->dev,
234 			"GPU reset succeeded, trying to resume\n");
235 	r = sienna_cichlid_mode2_restore_ip(tmp_adev);
236 	if (r)
237 		goto end;
238 
239 	/*
240 	* Add this ASIC as tracked as reset was already
241 	* complete successfully.
242 	*/
243 	amdgpu_register_gpu_instance(tmp_adev);
244 
245 	/* Resume RAS */
246 	amdgpu_ras_resume(tmp_adev);
247 
248 	amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
249 
250 	r = amdgpu_ib_ring_tests(tmp_adev);
251 	if (r) {
252 		dev_err(tmp_adev->dev,
253 			"ib ring test failed (%d).\n", r);
254 		r = -EAGAIN;
255 		goto end;
256 	}
257 
258 end:
259 	if (r)
260 		return -EAGAIN;
261 	else
262 		return r;
263 }
264 
265 static struct amdgpu_reset_handler sienna_cichlid_mode2_handler = {
266 	.reset_method		= AMD_RESET_METHOD_MODE2,
267 	.prepare_env		= NULL,
268 	.prepare_hwcontext	= sienna_cichlid_mode2_prepare_hwcontext,
269 	.perform_reset		= sienna_cichlid_mode2_perform_reset,
270 	.restore_hwcontext	= sienna_cichlid_mode2_restore_hwcontext,
271 	.restore_env		= NULL,
272 	.do_reset		= sienna_cichlid_mode2_reset,
273 };
274 
275 int sienna_cichlid_reset_init(struct amdgpu_device *adev)
276 {
277 	struct amdgpu_reset_control *reset_ctl;
278 
279 	reset_ctl = kzalloc(sizeof(*reset_ctl), GFP_KERNEL);
280 	if (!reset_ctl)
281 		return -ENOMEM;
282 
283 	reset_ctl->handle = adev;
284 	reset_ctl->async_reset = sienna_cichlid_async_reset;
285 	reset_ctl->active_reset = AMD_RESET_METHOD_NONE;
286 	reset_ctl->get_reset_handler = sienna_cichlid_get_reset_handler;
287 
288 	INIT_LIST_HEAD(&reset_ctl->reset_handlers);
289 	INIT_WORK(&reset_ctl->reset_work, reset_ctl->async_reset);
290 	/* Only mode2 is handled through reset control now */
291 	amdgpu_reset_add_handler(reset_ctl, &sienna_cichlid_mode2_handler);
292 
293 	adev->reset_cntl = reset_ctl;
294 
295 	return 0;
296 }
297 
298 int sienna_cichlid_reset_fini(struct amdgpu_device *adev)
299 {
300 	kfree(adev->reset_cntl);
301 	adev->reset_cntl = NULL;
302 	return 0;
303 }
304