1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /**************************************************************************
3  *
4  * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 #include "vmwgfx_drv.h"
28 
29 /*
30  * Different methods for tracking dirty:
31  * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
32  * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
33  * accesses in the VM mkwrite() callback
34  */
35 enum vmw_bo_dirty_method {
36 	VMW_BO_DIRTY_PAGETABLE,
37 	VMW_BO_DIRTY_MKWRITE,
38 };
39 
40 /*
41  * No dirtied pages at scan trigger a transition to the _MKWRITE method,
42  * similarly a certain percentage of dirty pages trigger a transition to
43  * the _PAGETABLE method. How many triggers should we wait for before
44  * changing method?
45  */
46 #define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
47 
48 /* Percentage to trigger a transition to the _PAGETABLE method */
49 #define VMW_DIRTY_PERCENTAGE 10
50 
51 /**
52  * struct vmw_bo_dirty - Dirty information for buffer objects
53  * @start: First currently dirty bit
54  * @end: Last currently dirty bit + 1
55  * @method: The currently used dirty method
56  * @change_count: Number of consecutive method change triggers
57  * @ref_count: Reference count for this structure
58  * @bitmap_size: The size of the bitmap in bits. Typically equal to the
59  * nuber of pages in the bo.
60  * @size: The accounting size for this struct.
61  * @bitmap: A bitmap where each bit represents a page. A set bit means a
62  * dirty page.
63  */
64 struct vmw_bo_dirty {
65 	unsigned long start;
66 	unsigned long end;
67 	enum vmw_bo_dirty_method method;
68 	unsigned int change_count;
69 	unsigned int ref_count;
70 	unsigned long bitmap_size;
71 	size_t size;
72 	unsigned long bitmap[];
73 };
74 
75 /**
76  * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
77  * @vbo: The buffer object to scan
78  *
79  * Scans the pagetable for dirty bits. Clear those bits and modify the
80  * dirty structure with the results. This function may change the
81  * dirty-tracking method.
82  */
83 static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
84 {
85 	struct vmw_bo_dirty *dirty = vbo->dirty;
86 	pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
87 	struct address_space *mapping = vbo->base.bdev->dev_mapping;
88 	pgoff_t num_marked;
89 
90 	num_marked = clean_record_shared_mapping_range
91 		(mapping,
92 		 offset, dirty->bitmap_size,
93 		 offset, &dirty->bitmap[0],
94 		 &dirty->start, &dirty->end);
95 	if (num_marked == 0)
96 		dirty->change_count++;
97 	else
98 		dirty->change_count = 0;
99 
100 	if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
101 		dirty->change_count = 0;
102 		dirty->method = VMW_BO_DIRTY_MKWRITE;
103 		wp_shared_mapping_range(mapping,
104 					offset, dirty->bitmap_size);
105 		clean_record_shared_mapping_range(mapping,
106 						  offset, dirty->bitmap_size,
107 						  offset, &dirty->bitmap[0],
108 						  &dirty->start, &dirty->end);
109 	}
110 }
111 
112 /**
113  * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
114  * @vbo: The buffer object to scan
115  *
116  * Write-protect pages written to so that consecutive write accesses will
117  * trigger a call to mkwrite.
118  *
119  * This function may change the dirty-tracking method.
120  */
121 static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
122 {
123 	struct vmw_bo_dirty *dirty = vbo->dirty;
124 	unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
125 	struct address_space *mapping = vbo->base.bdev->dev_mapping;
126 	pgoff_t num_marked;
127 
128 	if (dirty->end <= dirty->start)
129 		return;
130 
131 	num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
132 					dirty->start + offset,
133 					dirty->end - dirty->start);
134 
135 	if (100UL * num_marked / dirty->bitmap_size >
136 	    VMW_DIRTY_PERCENTAGE) {
137 		dirty->change_count++;
138 	} else {
139 		dirty->change_count = 0;
140 	}
141 
142 	if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
143 		pgoff_t start = 0;
144 		pgoff_t end = dirty->bitmap_size;
145 
146 		dirty->method = VMW_BO_DIRTY_PAGETABLE;
147 		clean_record_shared_mapping_range(mapping, offset, end, offset,
148 						  &dirty->bitmap[0],
149 						  &start, &end);
150 		bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
151 		if (dirty->start < dirty->end)
152 			bitmap_set(&dirty->bitmap[0], dirty->start,
153 				   dirty->end - dirty->start);
154 		dirty->change_count = 0;
155 	}
156 }
157 
158 /**
159  * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
160  * tracking structure
161  * @vbo: The buffer object to scan
162  *
163  * This function may change the dirty tracking method.
164  */
165 void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
166 {
167 	struct vmw_bo_dirty *dirty = vbo->dirty;
168 
169 	if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
170 		vmw_bo_dirty_scan_pagetable(vbo);
171 	else
172 		vmw_bo_dirty_scan_mkwrite(vbo);
173 }
174 
175 /**
176  * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
177  * an unmap_mapping_range operation.
178  * @vbo: The buffer object,
179  * @start: First page of the range within the buffer object.
180  * @end: Last page of the range within the buffer object + 1.
181  *
182  * If we're using the _PAGETABLE scan method, we may leak dirty pages
183  * when calling unmap_mapping_range(). This function makes sure we pick
184  * up all dirty pages.
185  */
186 static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
187 				   pgoff_t start, pgoff_t end)
188 {
189 	struct vmw_bo_dirty *dirty = vbo->dirty;
190 	unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
191 	struct address_space *mapping = vbo->base.bdev->dev_mapping;
192 
193 	if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
194 		return;
195 
196 	wp_shared_mapping_range(mapping, start + offset, end - start);
197 	clean_record_shared_mapping_range(mapping, start + offset,
198 					  end - start, offset,
199 					  &dirty->bitmap[0], &dirty->start,
200 					  &dirty->end);
201 }
202 
203 /**
204  * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
205  * @vbo: The buffer object,
206  * @start: First page of the range within the buffer object.
207  * @end: Last page of the range within the buffer object + 1.
208  *
209  * This is similar to ttm_bo_unmap_virtual() except it takes a subrange.
210  */
211 void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
212 			pgoff_t start, pgoff_t end)
213 {
214 	unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
215 	struct address_space *mapping = vbo->base.bdev->dev_mapping;
216 
217 	vmw_bo_dirty_pre_unmap(vbo, start, end);
218 	unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
219 				   (loff_t) (end - start) << PAGE_SHIFT);
220 }
221 
222 /**
223  * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
224  * @vbo: The buffer object
225  *
226  * This function registers a dirty-tracking user to a buffer object.
227  * A user can be for example a resource or a vma in a special user-space
228  * mapping.
229  *
230  * Return: Zero on success, -ENOMEM on memory allocation failure.
231  */
232 int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
233 {
234 	struct vmw_bo_dirty *dirty = vbo->dirty;
235 	pgoff_t num_pages = vbo->base.mem.num_pages;
236 	size_t size, acc_size;
237 	int ret;
238 	static struct ttm_operation_ctx ctx = {
239 		.interruptible = false,
240 		.no_wait_gpu = false
241 	};
242 
243 	if (dirty) {
244 		dirty->ref_count++;
245 		return 0;
246 	}
247 
248 	size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
249 	acc_size = ttm_round_pot(size);
250 	ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
251 	if (ret) {
252 		VMW_DEBUG_USER("Out of graphics memory for buffer object "
253 			       "dirty tracker.\n");
254 		return ret;
255 	}
256 	dirty = kvzalloc(size, GFP_KERNEL);
257 	if (!dirty) {
258 		ret = -ENOMEM;
259 		goto out_no_dirty;
260 	}
261 
262 	dirty->size = acc_size;
263 	dirty->bitmap_size = num_pages;
264 	dirty->start = dirty->bitmap_size;
265 	dirty->end = 0;
266 	dirty->ref_count = 1;
267 	if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
268 		dirty->method = VMW_BO_DIRTY_PAGETABLE;
269 	} else {
270 		struct address_space *mapping = vbo->base.bdev->dev_mapping;
271 		pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
272 
273 		dirty->method = VMW_BO_DIRTY_MKWRITE;
274 
275 		/* Write-protect and then pick up already dirty bits */
276 		wp_shared_mapping_range(mapping, offset, num_pages);
277 		clean_record_shared_mapping_range(mapping, offset, num_pages,
278 						  offset,
279 						  &dirty->bitmap[0],
280 						  &dirty->start, &dirty->end);
281 	}
282 
283 	vbo->dirty = dirty;
284 
285 	return 0;
286 
287 out_no_dirty:
288 	ttm_mem_global_free(&ttm_mem_glob, acc_size);
289 	return ret;
290 }
291 
292 /**
293  * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
294  * @vbo: The buffer object
295  *
296  * This function releases a dirty-tracking user from a buffer object.
297  * If the reference count reaches zero, then the dirty-tracking object is
298  * freed and the pointer to it cleared.
299  *
300  * Return: Zero on success, -ENOMEM on memory allocation failure.
301  */
302 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
303 {
304 	struct vmw_bo_dirty *dirty = vbo->dirty;
305 
306 	if (dirty && --dirty->ref_count == 0) {
307 		size_t acc_size = dirty->size;
308 
309 		kvfree(dirty);
310 		ttm_mem_global_free(&ttm_mem_glob, acc_size);
311 		vbo->dirty = NULL;
312 	}
313 }
314 
315 /**
316  * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
317  * its backing mob.
318  * @res: The resource
319  *
320  * This function will pick up all dirty ranges affecting the resource from
321  * it's backup mob, and call vmw_resource_dirty_update() once for each
322  * range. The transferred ranges will be cleared from the backing mob's
323  * dirty tracking.
324  */
325 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
326 {
327 	struct vmw_buffer_object *vbo = res->backup;
328 	struct vmw_bo_dirty *dirty = vbo->dirty;
329 	pgoff_t start, cur, end;
330 	unsigned long res_start = res->backup_offset;
331 	unsigned long res_end = res->backup_offset + res->backup_size;
332 
333 	WARN_ON_ONCE(res_start & ~PAGE_MASK);
334 	res_start >>= PAGE_SHIFT;
335 	res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
336 
337 	if (res_start >= dirty->end || res_end <= dirty->start)
338 		return;
339 
340 	cur = max(res_start, dirty->start);
341 	res_end = max(res_end, dirty->end);
342 	while (cur < res_end) {
343 		unsigned long num;
344 
345 		start = find_next_bit(&dirty->bitmap[0], res_end, cur);
346 		if (start >= res_end)
347 			break;
348 
349 		end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
350 		cur = end + 1;
351 		num = end - start;
352 		bitmap_clear(&dirty->bitmap[0], start, num);
353 		vmw_resource_dirty_update(res, start, end);
354 	}
355 
356 	if (res_start <= dirty->start && res_end > dirty->start)
357 		dirty->start = res_end;
358 	if (res_start < dirty->end && res_end >= dirty->end)
359 		dirty->end = res_start;
360 }
361 
362 /**
363  * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
364  * its backing mob.
365  * @res: The resource
366  *
367  * This function will clear all dirty ranges affecting the resource from
368  * it's backup mob's dirty tracking.
369  */
370 void vmw_bo_dirty_clear_res(struct vmw_resource *res)
371 {
372 	unsigned long res_start = res->backup_offset;
373 	unsigned long res_end = res->backup_offset + res->backup_size;
374 	struct vmw_buffer_object *vbo = res->backup;
375 	struct vmw_bo_dirty *dirty = vbo->dirty;
376 
377 	res_start >>= PAGE_SHIFT;
378 	res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
379 
380 	if (res_start >= dirty->end || res_end <= dirty->start)
381 		return;
382 
383 	res_start = max(res_start, dirty->start);
384 	res_end = min(res_end, dirty->end);
385 	bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
386 
387 	if (res_start <= dirty->start && res_end > dirty->start)
388 		dirty->start = res_end;
389 	if (res_start < dirty->end && res_end >= dirty->end)
390 		dirty->end = res_start;
391 }
392 
393 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
394 {
395 	struct vm_area_struct *vma = vmf->vma;
396 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
397 	    vma->vm_private_data;
398 	vm_fault_t ret;
399 	unsigned long page_offset;
400 	unsigned int save_flags;
401 	struct vmw_buffer_object *vbo =
402 		container_of(bo, typeof(*vbo), base);
403 
404 	/*
405 	 * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
406 	 * So make sure the TTM helpers are aware.
407 	 */
408 	save_flags = vmf->flags;
409 	vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
410 	ret = ttm_bo_vm_reserve(bo, vmf);
411 	vmf->flags = save_flags;
412 	if (ret)
413 		return ret;
414 
415 	page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
416 	if (unlikely(page_offset >= bo->mem.num_pages)) {
417 		ret = VM_FAULT_SIGBUS;
418 		goto out_unlock;
419 	}
420 
421 	if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
422 	    !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
423 		struct vmw_bo_dirty *dirty = vbo->dirty;
424 
425 		__set_bit(page_offset, &dirty->bitmap[0]);
426 		dirty->start = min(dirty->start, page_offset);
427 		dirty->end = max(dirty->end, page_offset + 1);
428 	}
429 
430 out_unlock:
431 	dma_resv_unlock(bo->base.resv);
432 	return ret;
433 }
434 
435 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
436 {
437 	struct vm_area_struct *vma = vmf->vma;
438 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
439 	    vma->vm_private_data;
440 	struct vmw_buffer_object *vbo =
441 		container_of(bo, struct vmw_buffer_object, base);
442 	pgoff_t num_prefault;
443 	pgprot_t prot;
444 	vm_fault_t ret;
445 
446 	ret = ttm_bo_vm_reserve(bo, vmf);
447 	if (ret)
448 		return ret;
449 
450 	num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
451 		TTM_BO_VM_NUM_PREFAULT;
452 
453 	if (vbo->dirty) {
454 		pgoff_t allowed_prefault;
455 		unsigned long page_offset;
456 
457 		page_offset = vmf->pgoff -
458 			drm_vma_node_start(&bo->base.vma_node);
459 		if (page_offset >= bo->mem.num_pages ||
460 		    vmw_resources_clean(vbo, page_offset,
461 					page_offset + PAGE_SIZE,
462 					&allowed_prefault)) {
463 			ret = VM_FAULT_SIGBUS;
464 			goto out_unlock;
465 		}
466 
467 		num_prefault = min(num_prefault, allowed_prefault);
468 	}
469 
470 	/*
471 	 * If we don't track dirty using the MKWRITE method, make sure
472 	 * sure the page protection is write-enabled so we don't get
473 	 * a lot of unnecessary write faults.
474 	 */
475 	if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
476 		prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
477 	else
478 		prot = vm_get_page_prot(vma->vm_flags);
479 
480 	ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
481 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
482 		return ret;
483 
484 out_unlock:
485 	dma_resv_unlock(bo->base.resv);
486 
487 	return ret;
488 }
489 
490 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
491 vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
492 				enum page_entry_size pe_size)
493 {
494 	struct vm_area_struct *vma = vmf->vma;
495 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
496 	    vma->vm_private_data;
497 	struct vmw_buffer_object *vbo =
498 		container_of(bo, struct vmw_buffer_object, base);
499 	pgprot_t prot;
500 	vm_fault_t ret;
501 	pgoff_t fault_page_size;
502 	bool write = vmf->flags & FAULT_FLAG_WRITE;
503 	bool is_cow_mapping =
504 		(vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
505 
506 	switch (pe_size) {
507 	case PE_SIZE_PMD:
508 		fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
509 		break;
510 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
511 	case PE_SIZE_PUD:
512 		fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
513 		break;
514 #endif
515 	default:
516 		WARN_ON_ONCE(1);
517 		return VM_FAULT_FALLBACK;
518 	}
519 
520 	/* Always do write dirty-tracking and COW on PTE level. */
521 	if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
522 		return VM_FAULT_FALLBACK;
523 
524 	ret = ttm_bo_vm_reserve(bo, vmf);
525 	if (ret)
526 		return ret;
527 
528 	if (vbo->dirty) {
529 		pgoff_t allowed_prefault;
530 		unsigned long page_offset;
531 
532 		page_offset = vmf->pgoff -
533 			drm_vma_node_start(&bo->base.vma_node);
534 		if (page_offset >= bo->mem.num_pages ||
535 		    vmw_resources_clean(vbo, page_offset,
536 					page_offset + PAGE_SIZE,
537 					&allowed_prefault)) {
538 			ret = VM_FAULT_SIGBUS;
539 			goto out_unlock;
540 		}
541 
542 		/*
543 		 * Write protect, so we get a new fault on write, and can
544 		 * split.
545 		 */
546 		prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
547 	} else {
548 		prot = vm_get_page_prot(vma->vm_flags);
549 	}
550 
551 	ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
552 	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
553 		return ret;
554 
555 out_unlock:
556 	dma_resv_unlock(bo->base.resv);
557 
558 	return ret;
559 }
560 #endif
561