xref: /openbmc/linux/drivers/gpu/drm/vc4/vc4_validate.c (revision 72f793f1)
1 /*
2  * Copyright © 2014 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * DOC: Command list validator for VC4.
26  *
27  * The VC4 has no IOMMU between it and system memory.  So, a user with
28  * access to execute command lists could escalate privilege by
29  * overwriting system memory (drawing to it as a framebuffer) or
30  * reading system memory it shouldn't (reading it as a texture, or
31  * uniform data, or vertex data).
32  *
33  * This validates command lists to ensure that all accesses are within
34  * the bounds of the GEM objects referenced.  It explicitly whitelists
35  * packets, and looks at the offsets in any address fields to make
36  * sure they're constrained within the BOs they reference.
37  *
38  * Note that because of the validation that's happening anyway, this
39  * is where GEM relocation processing happens.
40  */
41 
42 #include "uapi/drm/vc4_drm.h"
43 #include "vc4_drv.h"
44 #include "vc4_packet.h"
45 
46 #define VALIDATE_ARGS \
47 	struct vc4_exec_info *exec,			\
48 	void *validated,				\
49 	void *untrusted
50 
51 /** Return the width in pixels of a 64-byte microtile. */
52 static uint32_t
53 utile_width(int cpp)
54 {
55 	switch (cpp) {
56 	case 1:
57 	case 2:
58 		return 8;
59 	case 4:
60 		return 4;
61 	case 8:
62 		return 2;
63 	default:
64 		DRM_ERROR("unknown cpp: %d\n", cpp);
65 		return 1;
66 	}
67 }
68 
69 /** Return the height in pixels of a 64-byte microtile. */
70 static uint32_t
71 utile_height(int cpp)
72 {
73 	switch (cpp) {
74 	case 1:
75 		return 8;
76 	case 2:
77 	case 4:
78 	case 8:
79 		return 4;
80 	default:
81 		DRM_ERROR("unknown cpp: %d\n", cpp);
82 		return 1;
83 	}
84 }
85 
86 /**
87  * size_is_lt() - Returns whether a miplevel of the given size will
88  * use the lineartile (LT) tiling layout rather than the normal T
89  * tiling layout.
90  * @width: Width in pixels of the miplevel
91  * @height: Height in pixels of the miplevel
92  * @cpp: Bytes per pixel of the pixel format
93  */
94 static bool
95 size_is_lt(uint32_t width, uint32_t height, int cpp)
96 {
97 	return (width <= 4 * utile_width(cpp) ||
98 		height <= 4 * utile_height(cpp));
99 }
100 
101 struct drm_gem_cma_object *
102 vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
103 {
104 	struct drm_gem_cma_object *obj;
105 	struct vc4_bo *bo;
106 
107 	if (hindex >= exec->bo_count) {
108 		DRM_ERROR("BO index %d greater than BO count %d\n",
109 			  hindex, exec->bo_count);
110 		return NULL;
111 	}
112 	obj = exec->bo[hindex];
113 	bo = to_vc4_bo(&obj->base);
114 
115 	if (bo->validated_shader) {
116 		DRM_ERROR("Trying to use shader BO as something other than "
117 			  "a shader\n");
118 		return NULL;
119 	}
120 
121 	return obj;
122 }
123 
124 static struct drm_gem_cma_object *
125 vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)
126 {
127 	return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);
128 }
129 
130 static bool
131 validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)
132 {
133 	/* Note that the untrusted pointer passed to these functions is
134 	 * incremented past the packet byte.
135 	 */
136 	return (untrusted - 1 == exec->bin_u + pos);
137 }
138 
139 static uint32_t
140 gl_shader_rec_size(uint32_t pointer_bits)
141 {
142 	uint32_t attribute_count = pointer_bits & 7;
143 	bool extended = pointer_bits & 8;
144 
145 	if (attribute_count == 0)
146 		attribute_count = 8;
147 
148 	if (extended)
149 		return 100 + attribute_count * 4;
150 	else
151 		return 36 + attribute_count * 8;
152 }
153 
154 bool
155 vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
156 		   uint32_t offset, uint8_t tiling_format,
157 		   uint32_t width, uint32_t height, uint8_t cpp)
158 {
159 	uint32_t aligned_width, aligned_height, stride, size;
160 	uint32_t utile_w = utile_width(cpp);
161 	uint32_t utile_h = utile_height(cpp);
162 
163 	/* The shaded vertex format stores signed 12.4 fixed point
164 	 * (-2048,2047) offsets from the viewport center, so we should
165 	 * never have a render target larger than 4096.  The texture
166 	 * unit can only sample from 2048x2048, so it's even more
167 	 * restricted.  This lets us avoid worrying about overflow in
168 	 * our math.
169 	 */
170 	if (width > 4096 || height > 4096) {
171 		DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);
172 		return false;
173 	}
174 
175 	switch (tiling_format) {
176 	case VC4_TILING_FORMAT_LINEAR:
177 		aligned_width = round_up(width, utile_w);
178 		aligned_height = height;
179 		break;
180 	case VC4_TILING_FORMAT_T:
181 		aligned_width = round_up(width, utile_w * 8);
182 		aligned_height = round_up(height, utile_h * 8);
183 		break;
184 	case VC4_TILING_FORMAT_LT:
185 		aligned_width = round_up(width, utile_w);
186 		aligned_height = round_up(height, utile_h);
187 		break;
188 	default:
189 		DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);
190 		return false;
191 	}
192 
193 	stride = aligned_width * cpp;
194 	size = stride * aligned_height;
195 
196 	if (size + offset < size ||
197 	    size + offset > fbo->base.size) {
198 		DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
199 			  width, height,
200 			  aligned_width, aligned_height,
201 			  size, offset, fbo->base.size);
202 		return false;
203 	}
204 
205 	return true;
206 }
207 
208 static int
209 validate_flush(VALIDATE_ARGS)
210 {
211 	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {
212 		DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");
213 		return -EINVAL;
214 	}
215 	exec->found_flush = true;
216 
217 	return 0;
218 }
219 
220 static int
221 validate_start_tile_binning(VALIDATE_ARGS)
222 {
223 	if (exec->found_start_tile_binning_packet) {
224 		DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");
225 		return -EINVAL;
226 	}
227 	exec->found_start_tile_binning_packet = true;
228 
229 	if (!exec->found_tile_binning_mode_config_packet) {
230 		DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
231 		return -EINVAL;
232 	}
233 
234 	return 0;
235 }
236 
237 static int
238 validate_increment_semaphore(VALIDATE_ARGS)
239 {
240 	if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {
241 		DRM_ERROR("Bin CL must end with "
242 			  "VC4_PACKET_INCREMENT_SEMAPHORE\n");
243 		return -EINVAL;
244 	}
245 	exec->found_increment_semaphore_packet = true;
246 
247 	return 0;
248 }
249 
250 static int
251 validate_indexed_prim_list(VALIDATE_ARGS)
252 {
253 	struct drm_gem_cma_object *ib;
254 	uint32_t length = *(uint32_t *)(untrusted + 1);
255 	uint32_t offset = *(uint32_t *)(untrusted + 5);
256 	uint32_t max_index = *(uint32_t *)(untrusted + 9);
257 	uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;
258 	struct vc4_shader_state *shader_state;
259 
260 	/* Check overflow condition */
261 	if (exec->shader_state_count == 0) {
262 		DRM_ERROR("shader state must precede primitives\n");
263 		return -EINVAL;
264 	}
265 	shader_state = &exec->shader_state[exec->shader_state_count - 1];
266 
267 	if (max_index > shader_state->max_index)
268 		shader_state->max_index = max_index;
269 
270 	ib = vc4_use_handle(exec, 0);
271 	if (!ib)
272 		return -EINVAL;
273 
274 	exec->bin_dep_seqno = max(exec->bin_dep_seqno,
275 				  to_vc4_bo(&ib->base)->write_seqno);
276 
277 	if (offset > ib->base.size ||
278 	    (ib->base.size - offset) / index_size < length) {
279 		DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
280 			  offset, length, index_size, ib->base.size);
281 		return -EINVAL;
282 	}
283 
284 	*(uint32_t *)(validated + 5) = ib->paddr + offset;
285 
286 	return 0;
287 }
288 
289 static int
290 validate_gl_array_primitive(VALIDATE_ARGS)
291 {
292 	uint32_t length = *(uint32_t *)(untrusted + 1);
293 	uint32_t base_index = *(uint32_t *)(untrusted + 5);
294 	uint32_t max_index;
295 	struct vc4_shader_state *shader_state;
296 
297 	/* Check overflow condition */
298 	if (exec->shader_state_count == 0) {
299 		DRM_ERROR("shader state must precede primitives\n");
300 		return -EINVAL;
301 	}
302 	shader_state = &exec->shader_state[exec->shader_state_count - 1];
303 
304 	if (length + base_index < length) {
305 		DRM_ERROR("primitive vertex count overflow\n");
306 		return -EINVAL;
307 	}
308 	max_index = length + base_index - 1;
309 
310 	if (max_index > shader_state->max_index)
311 		shader_state->max_index = max_index;
312 
313 	return 0;
314 }
315 
316 static int
317 validate_gl_shader_state(VALIDATE_ARGS)
318 {
319 	uint32_t i = exec->shader_state_count++;
320 
321 	if (i >= exec->shader_state_size) {
322 		DRM_ERROR("More requests for shader states than declared\n");
323 		return -EINVAL;
324 	}
325 
326 	exec->shader_state[i].addr = *(uint32_t *)untrusted;
327 	exec->shader_state[i].max_index = 0;
328 
329 	if (exec->shader_state[i].addr & ~0xf) {
330 		DRM_ERROR("high bits set in GL shader rec reference\n");
331 		return -EINVAL;
332 	}
333 
334 	*(uint32_t *)validated = (exec->shader_rec_p +
335 				  exec->shader_state[i].addr);
336 
337 	exec->shader_rec_p +=
338 		roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);
339 
340 	return 0;
341 }
342 
343 static int
344 validate_tile_binning_config(VALIDATE_ARGS)
345 {
346 	struct drm_device *dev = exec->exec_bo->base.dev;
347 	struct vc4_bo *tile_bo;
348 	uint8_t flags;
349 	uint32_t tile_state_size, tile_alloc_size;
350 	uint32_t tile_count;
351 
352 	if (exec->found_tile_binning_mode_config_packet) {
353 		DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");
354 		return -EINVAL;
355 	}
356 	exec->found_tile_binning_mode_config_packet = true;
357 
358 	exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);
359 	exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);
360 	tile_count = exec->bin_tiles_x * exec->bin_tiles_y;
361 	flags = *(uint8_t *)(untrusted + 14);
362 
363 	if (exec->bin_tiles_x == 0 ||
364 	    exec->bin_tiles_y == 0) {
365 		DRM_ERROR("Tile binning config of %dx%d too small\n",
366 			  exec->bin_tiles_x, exec->bin_tiles_y);
367 		return -EINVAL;
368 	}
369 
370 	if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
371 		     VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
372 		DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
373 		return -EINVAL;
374 	}
375 
376 	/* The tile state data array is 48 bytes per tile, and we put it at
377 	 * the start of a BO containing both it and the tile alloc.
378 	 */
379 	tile_state_size = 48 * tile_count;
380 
381 	/* Since the tile alloc array will follow us, align. */
382 	exec->tile_alloc_offset = roundup(tile_state_size, 4096);
383 
384 	*(uint8_t *)(validated + 14) =
385 		((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |
386 			    VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |
387 		 VC4_BIN_CONFIG_AUTO_INIT_TSDA |
388 		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,
389 			       VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |
390 		 VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,
391 			       VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));
392 
393 	/* Initial block size. */
394 	tile_alloc_size = 32 * tile_count;
395 
396 	/*
397 	 * The initial allocation gets rounded to the next 256 bytes before
398 	 * the hardware starts fulfilling further allocations.
399 	 */
400 	tile_alloc_size = roundup(tile_alloc_size, 256);
401 
402 	/* Add space for the extra allocations.  This is what gets used first,
403 	 * before overflow memory.  It must have at least 4096 bytes, but we
404 	 * want to avoid overflow memory usage if possible.
405 	 */
406 	tile_alloc_size += 1024 * 1024;
407 
408 	tile_bo = vc4_bo_create(dev, exec->tile_alloc_offset + tile_alloc_size,
409 				true);
410 	exec->tile_bo = &tile_bo->base;
411 	if (IS_ERR(exec->tile_bo))
412 		return PTR_ERR(exec->tile_bo);
413 	list_add_tail(&tile_bo->unref_head, &exec->unref_list);
414 
415 	/* tile alloc address. */
416 	*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +
417 					exec->tile_alloc_offset);
418 	/* tile alloc size. */
419 	*(uint32_t *)(validated + 4) = tile_alloc_size;
420 	/* tile state address. */
421 	*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;
422 
423 	return 0;
424 }
425 
426 static int
427 validate_gem_handles(VALIDATE_ARGS)
428 {
429 	memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));
430 	return 0;
431 }
432 
433 #define VC4_DEFINE_PACKET(packet, func) \
434 	[packet] = { packet ## _SIZE, #packet, func }
435 
436 static const struct cmd_info {
437 	uint16_t len;
438 	const char *name;
439 	int (*func)(struct vc4_exec_info *exec, void *validated,
440 		    void *untrusted);
441 } cmd_info[] = {
442 	VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
443 	VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
444 	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
445 	VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
446 	VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
447 			  validate_start_tile_binning),
448 	VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
449 			  validate_increment_semaphore),
450 
451 	VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
452 			  validate_indexed_prim_list),
453 	VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
454 			  validate_gl_array_primitive),
455 
456 	VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
457 
458 	VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
459 
460 	VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
461 	VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
462 	VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
463 	VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
464 	VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
465 	VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
466 	VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
467 	VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
468 	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
469 	/* Note: The docs say this was also 105, but it was 106 in the
470 	 * initial userland code drop.
471 	 */
472 	VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
473 
474 	VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
475 			  validate_tile_binning_config),
476 
477 	VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
478 };
479 
480 int
481 vc4_validate_bin_cl(struct drm_device *dev,
482 		    void *validated,
483 		    void *unvalidated,
484 		    struct vc4_exec_info *exec)
485 {
486 	uint32_t len = exec->args->bin_cl_size;
487 	uint32_t dst_offset = 0;
488 	uint32_t src_offset = 0;
489 
490 	while (src_offset < len) {
491 		void *dst_pkt = validated + dst_offset;
492 		void *src_pkt = unvalidated + src_offset;
493 		u8 cmd = *(uint8_t *)src_pkt;
494 		const struct cmd_info *info;
495 
496 		if (cmd >= ARRAY_SIZE(cmd_info)) {
497 			DRM_ERROR("0x%08x: packet %d out of bounds\n",
498 				  src_offset, cmd);
499 			return -EINVAL;
500 		}
501 
502 		info = &cmd_info[cmd];
503 		if (!info->name) {
504 			DRM_ERROR("0x%08x: packet %d invalid\n",
505 				  src_offset, cmd);
506 			return -EINVAL;
507 		}
508 
509 		if (src_offset + info->len > len) {
510 			DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
511 				  "exceeds bounds (0x%08x)\n",
512 				  src_offset, cmd, info->name, info->len,
513 				  src_offset + len);
514 			return -EINVAL;
515 		}
516 
517 		if (cmd != VC4_PACKET_GEM_HANDLES)
518 			memcpy(dst_pkt, src_pkt, info->len);
519 
520 		if (info->func && info->func(exec,
521 					     dst_pkt + 1,
522 					     src_pkt + 1)) {
523 			DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
524 				  src_offset, cmd, info->name);
525 			return -EINVAL;
526 		}
527 
528 		src_offset += info->len;
529 		/* GEM handle loading doesn't produce HW packets. */
530 		if (cmd != VC4_PACKET_GEM_HANDLES)
531 			dst_offset += info->len;
532 
533 		/* When the CL hits halt, it'll stop reading anything else. */
534 		if (cmd == VC4_PACKET_HALT)
535 			break;
536 	}
537 
538 	exec->ct0ea = exec->ct0ca + dst_offset;
539 
540 	if (!exec->found_start_tile_binning_packet) {
541 		DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");
542 		return -EINVAL;
543 	}
544 
545 	/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH.  The
546 	 * semaphore is used to trigger the render CL to start up, and the
547 	 * FLUSH is what caps the bin lists with
548 	 * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main
549 	 * render CL when they get called to) and actually triggers the queued
550 	 * semaphore increment.
551 	 */
552 	if (!exec->found_increment_semaphore_packet || !exec->found_flush) {
553 		DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "
554 			  "VC4_PACKET_FLUSH\n");
555 		return -EINVAL;
556 	}
557 
558 	return 0;
559 }
560 
561 static bool
562 reloc_tex(struct vc4_exec_info *exec,
563 	  void *uniform_data_u,
564 	  struct vc4_texture_sample_info *sample,
565 	  uint32_t texture_handle_index, bool is_cs)
566 {
567 	struct drm_gem_cma_object *tex;
568 	uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);
569 	uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);
570 	uint32_t p2 = (sample->p_offset[2] != ~0 ?
571 		       *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);
572 	uint32_t p3 = (sample->p_offset[3] != ~0 ?
573 		       *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);
574 	uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];
575 	uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;
576 	uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);
577 	uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);
578 	uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);
579 	uint32_t cpp, tiling_format, utile_w, utile_h;
580 	uint32_t i;
581 	uint32_t cube_map_stride = 0;
582 	enum vc4_texture_data_type type;
583 
584 	tex = vc4_use_bo(exec, texture_handle_index);
585 	if (!tex)
586 		return false;
587 
588 	if (sample->is_direct) {
589 		uint32_t remaining_size = tex->base.size - p0;
590 
591 		if (p0 > tex->base.size - 4) {
592 			DRM_ERROR("UBO offset greater than UBO size\n");
593 			goto fail;
594 		}
595 		if (p1 > remaining_size - 4) {
596 			DRM_ERROR("UBO clamp would allow reads "
597 				  "outside of UBO\n");
598 			goto fail;
599 		}
600 		*validated_p0 = tex->paddr + p0;
601 		return true;
602 	}
603 
604 	if (width == 0)
605 		width = 2048;
606 	if (height == 0)
607 		height = 2048;
608 
609 	if (p0 & VC4_TEX_P0_CMMODE_MASK) {
610 		if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==
611 		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)
612 			cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;
613 		if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==
614 		    VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {
615 			if (cube_map_stride) {
616 				DRM_ERROR("Cube map stride set twice\n");
617 				goto fail;
618 			}
619 
620 			cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;
621 		}
622 		if (!cube_map_stride) {
623 			DRM_ERROR("Cube map stride not set\n");
624 			goto fail;
625 		}
626 	}
627 
628 	type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |
629 		(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));
630 
631 	switch (type) {
632 	case VC4_TEXTURE_TYPE_RGBA8888:
633 	case VC4_TEXTURE_TYPE_RGBX8888:
634 	case VC4_TEXTURE_TYPE_RGBA32R:
635 		cpp = 4;
636 		break;
637 	case VC4_TEXTURE_TYPE_RGBA4444:
638 	case VC4_TEXTURE_TYPE_RGBA5551:
639 	case VC4_TEXTURE_TYPE_RGB565:
640 	case VC4_TEXTURE_TYPE_LUMALPHA:
641 	case VC4_TEXTURE_TYPE_S16F:
642 	case VC4_TEXTURE_TYPE_S16:
643 		cpp = 2;
644 		break;
645 	case VC4_TEXTURE_TYPE_LUMINANCE:
646 	case VC4_TEXTURE_TYPE_ALPHA:
647 	case VC4_TEXTURE_TYPE_S8:
648 		cpp = 1;
649 		break;
650 	case VC4_TEXTURE_TYPE_ETC1:
651 		/* ETC1 is arranged as 64-bit blocks, where each block is 4x4
652 		 * pixels.
653 		 */
654 		cpp = 8;
655 		width = (width + 3) >> 2;
656 		height = (height + 3) >> 2;
657 		break;
658 	case VC4_TEXTURE_TYPE_BW1:
659 	case VC4_TEXTURE_TYPE_A4:
660 	case VC4_TEXTURE_TYPE_A1:
661 	case VC4_TEXTURE_TYPE_RGBA64:
662 	case VC4_TEXTURE_TYPE_YUV422R:
663 	default:
664 		DRM_ERROR("Texture format %d unsupported\n", type);
665 		goto fail;
666 	}
667 	utile_w = utile_width(cpp);
668 	utile_h = utile_height(cpp);
669 
670 	if (type == VC4_TEXTURE_TYPE_RGBA32R) {
671 		tiling_format = VC4_TILING_FORMAT_LINEAR;
672 	} else {
673 		if (size_is_lt(width, height, cpp))
674 			tiling_format = VC4_TILING_FORMAT_LT;
675 		else
676 			tiling_format = VC4_TILING_FORMAT_T;
677 	}
678 
679 	if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,
680 				tiling_format, width, height, cpp)) {
681 		goto fail;
682 	}
683 
684 	/* The mipmap levels are stored before the base of the texture.  Make
685 	 * sure there is actually space in the BO.
686 	 */
687 	for (i = 1; i <= miplevels; i++) {
688 		uint32_t level_width = max(width >> i, 1u);
689 		uint32_t level_height = max(height >> i, 1u);
690 		uint32_t aligned_width, aligned_height;
691 		uint32_t level_size;
692 
693 		/* Once the levels get small enough, they drop from T to LT. */
694 		if (tiling_format == VC4_TILING_FORMAT_T &&
695 		    size_is_lt(level_width, level_height, cpp)) {
696 			tiling_format = VC4_TILING_FORMAT_LT;
697 		}
698 
699 		switch (tiling_format) {
700 		case VC4_TILING_FORMAT_T:
701 			aligned_width = round_up(level_width, utile_w * 8);
702 			aligned_height = round_up(level_height, utile_h * 8);
703 			break;
704 		case VC4_TILING_FORMAT_LT:
705 			aligned_width = round_up(level_width, utile_w);
706 			aligned_height = round_up(level_height, utile_h);
707 			break;
708 		default:
709 			aligned_width = round_up(level_width, utile_w);
710 			aligned_height = level_height;
711 			break;
712 		}
713 
714 		level_size = aligned_width * cpp * aligned_height;
715 
716 		if (offset < level_size) {
717 			DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "
718 				  "overflowed buffer bounds (offset %d)\n",
719 				  i, level_width, level_height,
720 				  aligned_width, aligned_height,
721 				  level_size, offset);
722 			goto fail;
723 		}
724 
725 		offset -= level_size;
726 	}
727 
728 	*validated_p0 = tex->paddr + p0;
729 
730 	if (is_cs) {
731 		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
732 					  to_vc4_bo(&tex->base)->write_seqno);
733 	}
734 
735 	return true;
736  fail:
737 	DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);
738 	DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);
739 	DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);
740 	DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);
741 	return false;
742 }
743 
744 static int
745 validate_gl_shader_rec(struct drm_device *dev,
746 		       struct vc4_exec_info *exec,
747 		       struct vc4_shader_state *state)
748 {
749 	uint32_t *src_handles;
750 	void *pkt_u, *pkt_v;
751 	static const uint32_t shader_reloc_offsets[] = {
752 		4, /* fs */
753 		16, /* vs */
754 		28, /* cs */
755 	};
756 	uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);
757 	struct drm_gem_cma_object *bo[shader_reloc_count + 8];
758 	uint32_t nr_attributes, nr_relocs, packet_size;
759 	int i;
760 
761 	nr_attributes = state->addr & 0x7;
762 	if (nr_attributes == 0)
763 		nr_attributes = 8;
764 	packet_size = gl_shader_rec_size(state->addr);
765 
766 	nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;
767 	if (nr_relocs * 4 > exec->shader_rec_size) {
768 		DRM_ERROR("overflowed shader recs reading %d handles "
769 			  "from %d bytes left\n",
770 			  nr_relocs, exec->shader_rec_size);
771 		return -EINVAL;
772 	}
773 	src_handles = exec->shader_rec_u;
774 	exec->shader_rec_u += nr_relocs * 4;
775 	exec->shader_rec_size -= nr_relocs * 4;
776 
777 	if (packet_size > exec->shader_rec_size) {
778 		DRM_ERROR("overflowed shader recs copying %db packet "
779 			  "from %d bytes left\n",
780 			  packet_size, exec->shader_rec_size);
781 		return -EINVAL;
782 	}
783 	pkt_u = exec->shader_rec_u;
784 	pkt_v = exec->shader_rec_v;
785 	memcpy(pkt_v, pkt_u, packet_size);
786 	exec->shader_rec_u += packet_size;
787 	/* Shader recs have to be aligned to 16 bytes (due to the attribute
788 	 * flags being in the low bytes), so round the next validated shader
789 	 * rec address up.  This should be safe, since we've got so many
790 	 * relocations in a shader rec packet.
791 	 */
792 	BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);
793 	exec->shader_rec_v += roundup(packet_size, 16);
794 	exec->shader_rec_size -= packet_size;
795 
796 	for (i = 0; i < shader_reloc_count; i++) {
797 		if (src_handles[i] > exec->bo_count) {
798 			DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
799 			return -EINVAL;
800 		}
801 
802 		bo[i] = exec->bo[src_handles[i]];
803 		if (!bo[i])
804 			return -EINVAL;
805 	}
806 	for (i = shader_reloc_count; i < nr_relocs; i++) {
807 		bo[i] = vc4_use_bo(exec, src_handles[i]);
808 		if (!bo[i])
809 			return -EINVAL;
810 	}
811 
812 	if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
813 	    to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
814 		DRM_ERROR("Thread mode of CL and FS do not match\n");
815 		return -EINVAL;
816 	}
817 
818 	if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
819 	    to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
820 		DRM_ERROR("cs and vs cannot be threaded\n");
821 		return -EINVAL;
822 	}
823 
824 	for (i = 0; i < shader_reloc_count; i++) {
825 		struct vc4_validated_shader_info *validated_shader;
826 		uint32_t o = shader_reloc_offsets[i];
827 		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
828 		uint32_t *texture_handles_u;
829 		void *uniform_data_u;
830 		uint32_t tex, uni;
831 
832 		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
833 
834 		if (src_offset != 0) {
835 			DRM_ERROR("Shaders must be at offset 0 of "
836 				  "the BO.\n");
837 			return -EINVAL;
838 		}
839 
840 		validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;
841 		if (!validated_shader)
842 			return -EINVAL;
843 
844 		if (validated_shader->uniforms_src_size >
845 		    exec->uniforms_size) {
846 			DRM_ERROR("Uniforms src buffer overflow\n");
847 			return -EINVAL;
848 		}
849 
850 		texture_handles_u = exec->uniforms_u;
851 		uniform_data_u = (texture_handles_u +
852 				  validated_shader->num_texture_samples);
853 
854 		memcpy(exec->uniforms_v, uniform_data_u,
855 		       validated_shader->uniforms_size);
856 
857 		for (tex = 0;
858 		     tex < validated_shader->num_texture_samples;
859 		     tex++) {
860 			if (!reloc_tex(exec,
861 				       uniform_data_u,
862 				       &validated_shader->texture_samples[tex],
863 				       texture_handles_u[tex],
864 				       i == 2)) {
865 				return -EINVAL;
866 			}
867 		}
868 
869 		/* Fill in the uniform slots that need this shader's
870 		 * start-of-uniforms address (used for resetting the uniform
871 		 * stream in the presence of control flow).
872 		 */
873 		for (uni = 0;
874 		     uni < validated_shader->num_uniform_addr_offsets;
875 		     uni++) {
876 			uint32_t o = validated_shader->uniform_addr_offsets[uni];
877 			((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
878 		}
879 
880 		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
881 
882 		exec->uniforms_u += validated_shader->uniforms_src_size;
883 		exec->uniforms_v += validated_shader->uniforms_size;
884 		exec->uniforms_p += validated_shader->uniforms_size;
885 	}
886 
887 	for (i = 0; i < nr_attributes; i++) {
888 		struct drm_gem_cma_object *vbo =
889 			bo[ARRAY_SIZE(shader_reloc_offsets) + i];
890 		uint32_t o = 36 + i * 8;
891 		uint32_t offset = *(uint32_t *)(pkt_u + o + 0);
892 		uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;
893 		uint32_t stride = *(uint8_t *)(pkt_u + o + 5);
894 		uint32_t max_index;
895 
896 		exec->bin_dep_seqno = max(exec->bin_dep_seqno,
897 					  to_vc4_bo(&vbo->base)->write_seqno);
898 
899 		if (state->addr & 0x8)
900 			stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;
901 
902 		if (vbo->base.size < offset ||
903 		    vbo->base.size - offset < attr_size) {
904 			DRM_ERROR("BO offset overflow (%d + %d > %zu)\n",
905 				  offset, attr_size, vbo->base.size);
906 			return -EINVAL;
907 		}
908 
909 		if (stride != 0) {
910 			max_index = ((vbo->base.size - offset - attr_size) /
911 				     stride);
912 			if (state->max_index > max_index) {
913 				DRM_ERROR("primitives use index %d out of "
914 					  "supplied %d\n",
915 					  state->max_index, max_index);
916 				return -EINVAL;
917 			}
918 		}
919 
920 		*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;
921 	}
922 
923 	return 0;
924 }
925 
926 int
927 vc4_validate_shader_recs(struct drm_device *dev,
928 			 struct vc4_exec_info *exec)
929 {
930 	uint32_t i;
931 	int ret = 0;
932 
933 	for (i = 0; i < exec->shader_state_count; i++) {
934 		ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
935 		if (ret)
936 			return ret;
937 	}
938 
939 	return ret;
940 }
941