1 /* 2 * arch/s390/mm/pgtable.c 3 * 4 * Copyright IBM Corp. 2007 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/kernel.h> 10 #include <linux/errno.h> 11 #include <linux/mm.h> 12 #include <linux/swap.h> 13 #include <linux/smp.h> 14 #include <linux/highmem.h> 15 #include <linux/slab.h> 16 #include <linux/pagemap.h> 17 #include <linux/spinlock.h> 18 #include <linux/module.h> 19 #include <linux/quicklist.h> 20 21 #include <asm/system.h> 22 #include <asm/pgtable.h> 23 #include <asm/pgalloc.h> 24 #include <asm/tlb.h> 25 #include <asm/tlbflush.h> 26 #include <asm/mmu_context.h> 27 28 #ifndef CONFIG_64BIT 29 #define ALLOC_ORDER 1 30 #define TABLES_PER_PAGE 4 31 #define FRAG_MASK 15UL 32 #define SECOND_HALVES 10UL 33 34 void clear_table_pgstes(unsigned long *table) 35 { 36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); 37 memset(table + 256, 0, PAGE_SIZE/4); 38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); 39 memset(table + 768, 0, PAGE_SIZE/4); 40 } 41 42 #else 43 #define ALLOC_ORDER 2 44 #define TABLES_PER_PAGE 2 45 #define FRAG_MASK 3UL 46 #define SECOND_HALVES 2UL 47 48 void clear_table_pgstes(unsigned long *table) 49 { 50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 51 memset(table + 256, 0, PAGE_SIZE/2); 52 } 53 54 #endif 55 56 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 57 { 58 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 59 60 if (!page) 61 return NULL; 62 page->index = 0; 63 if (noexec) { 64 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 65 if (!shadow) { 66 __free_pages(page, ALLOC_ORDER); 67 return NULL; 68 } 69 page->index = page_to_phys(shadow); 70 } 71 spin_lock(&mm->page_table_lock); 72 list_add(&page->lru, &mm->context.crst_list); 73 spin_unlock(&mm->page_table_lock); 74 return (unsigned long *) page_to_phys(page); 75 } 76 77 void crst_table_free(struct mm_struct *mm, unsigned long *table) 78 { 79 unsigned long *shadow = get_shadow_table(table); 80 struct page *page = virt_to_page(table); 81 82 spin_lock(&mm->page_table_lock); 83 list_del(&page->lru); 84 spin_unlock(&mm->page_table_lock); 85 if (shadow) 86 free_pages((unsigned long) shadow, ALLOC_ORDER); 87 free_pages((unsigned long) table, ALLOC_ORDER); 88 } 89 90 #ifdef CONFIG_64BIT 91 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 92 { 93 unsigned long *table, *pgd; 94 unsigned long entry; 95 96 BUG_ON(limit > (1UL << 53)); 97 repeat: 98 table = crst_table_alloc(mm, mm->context.noexec); 99 if (!table) 100 return -ENOMEM; 101 spin_lock(&mm->page_table_lock); 102 if (mm->context.asce_limit < limit) { 103 pgd = (unsigned long *) mm->pgd; 104 if (mm->context.asce_limit <= (1UL << 31)) { 105 entry = _REGION3_ENTRY_EMPTY; 106 mm->context.asce_limit = 1UL << 42; 107 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 108 _ASCE_USER_BITS | 109 _ASCE_TYPE_REGION3; 110 } else { 111 entry = _REGION2_ENTRY_EMPTY; 112 mm->context.asce_limit = 1UL << 53; 113 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 114 _ASCE_USER_BITS | 115 _ASCE_TYPE_REGION2; 116 } 117 crst_table_init(table, entry); 118 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 119 mm->pgd = (pgd_t *) table; 120 table = NULL; 121 } 122 spin_unlock(&mm->page_table_lock); 123 if (table) 124 crst_table_free(mm, table); 125 if (mm->context.asce_limit < limit) 126 goto repeat; 127 update_mm(mm, current); 128 return 0; 129 } 130 131 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 132 { 133 pgd_t *pgd; 134 135 if (mm->context.asce_limit <= limit) 136 return; 137 __tlb_flush_mm(mm); 138 while (mm->context.asce_limit > limit) { 139 pgd = mm->pgd; 140 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 141 case _REGION_ENTRY_TYPE_R2: 142 mm->context.asce_limit = 1UL << 42; 143 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 144 _ASCE_USER_BITS | 145 _ASCE_TYPE_REGION3; 146 break; 147 case _REGION_ENTRY_TYPE_R3: 148 mm->context.asce_limit = 1UL << 31; 149 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 150 _ASCE_USER_BITS | 151 _ASCE_TYPE_SEGMENT; 152 break; 153 default: 154 BUG(); 155 } 156 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 157 crst_table_free(mm, (unsigned long *) pgd); 158 } 159 update_mm(mm, current); 160 } 161 #endif 162 163 /* 164 * page table entry allocation/free routines. 165 */ 166 unsigned long *page_table_alloc(struct mm_struct *mm) 167 { 168 struct page *page; 169 unsigned long *table; 170 unsigned long bits; 171 172 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 173 spin_lock(&mm->page_table_lock); 174 page = NULL; 175 if (!list_empty(&mm->context.pgtable_list)) { 176 page = list_first_entry(&mm->context.pgtable_list, 177 struct page, lru); 178 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 179 page = NULL; 180 } 181 if (!page) { 182 spin_unlock(&mm->page_table_lock); 183 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 184 if (!page) 185 return NULL; 186 pgtable_page_ctor(page); 187 page->flags &= ~FRAG_MASK; 188 table = (unsigned long *) page_to_phys(page); 189 if (mm->context.has_pgste) 190 clear_table_pgstes(table); 191 else 192 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 193 spin_lock(&mm->page_table_lock); 194 list_add(&page->lru, &mm->context.pgtable_list); 195 } 196 table = (unsigned long *) page_to_phys(page); 197 while (page->flags & bits) { 198 table += 256; 199 bits <<= 1; 200 } 201 page->flags |= bits; 202 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 203 list_move_tail(&page->lru, &mm->context.pgtable_list); 204 spin_unlock(&mm->page_table_lock); 205 return table; 206 } 207 208 void page_table_free(struct mm_struct *mm, unsigned long *table) 209 { 210 struct page *page; 211 unsigned long bits; 212 213 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 214 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 215 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 216 spin_lock(&mm->page_table_lock); 217 page->flags ^= bits; 218 if (page->flags & FRAG_MASK) { 219 /* Page now has some free pgtable fragments. */ 220 list_move(&page->lru, &mm->context.pgtable_list); 221 page = NULL; 222 } else 223 /* All fragments of the 4K page have been freed. */ 224 list_del(&page->lru); 225 spin_unlock(&mm->page_table_lock); 226 if (page) { 227 pgtable_page_dtor(page); 228 __free_page(page); 229 } 230 } 231 232 void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) 233 { 234 struct page *page; 235 236 spin_lock(&mm->page_table_lock); 237 /* Free shadow region and segment tables. */ 238 list_for_each_entry(page, &mm->context.crst_list, lru) 239 if (page->index) { 240 free_pages((unsigned long) page->index, ALLOC_ORDER); 241 page->index = 0; 242 } 243 /* "Free" second halves of page tables. */ 244 list_for_each_entry(page, &mm->context.pgtable_list, lru) 245 page->flags &= ~SECOND_HALVES; 246 spin_unlock(&mm->page_table_lock); 247 mm->context.noexec = 0; 248 update_mm(mm, tsk); 249 } 250 251 /* 252 * switch on pgstes for its userspace process (for kvm) 253 */ 254 int s390_enable_sie(void) 255 { 256 struct task_struct *tsk = current; 257 struct mm_struct *mm, *old_mm; 258 259 /* Do we have pgstes? if yes, we are done */ 260 if (tsk->mm->context.has_pgste) 261 return 0; 262 263 /* lets check if we are allowed to replace the mm */ 264 task_lock(tsk); 265 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 266 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { 267 task_unlock(tsk); 268 return -EINVAL; 269 } 270 task_unlock(tsk); 271 272 /* we copy the mm and let dup_mm create the page tables with_pgstes */ 273 tsk->mm->context.alloc_pgste = 1; 274 mm = dup_mm(tsk); 275 tsk->mm->context.alloc_pgste = 0; 276 if (!mm) 277 return -ENOMEM; 278 279 /* Now lets check again if something happened */ 280 task_lock(tsk); 281 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 282 tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { 283 mmput(mm); 284 task_unlock(tsk); 285 return -EINVAL; 286 } 287 288 /* ok, we are alone. No ptrace, no threads, etc. */ 289 old_mm = tsk->mm; 290 tsk->mm = tsk->active_mm = mm; 291 preempt_disable(); 292 update_mm(mm, tsk); 293 cpu_set(smp_processor_id(), mm->cpu_vm_mask); 294 preempt_enable(); 295 task_unlock(tsk); 296 mmput(old_mm); 297 return 0; 298 } 299 EXPORT_SYMBOL_GPL(s390_enable_sie); 300