1 /* 2 * arch/s390/mm/pgtable.c 3 * 4 * Copyright IBM Corp. 2007 5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> 6 */ 7 8 #include <linux/sched.h> 9 #include <linux/kernel.h> 10 #include <linux/errno.h> 11 #include <linux/mm.h> 12 #include <linux/swap.h> 13 #include <linux/smp.h> 14 #include <linux/highmem.h> 15 #include <linux/slab.h> 16 #include <linux/pagemap.h> 17 #include <linux/spinlock.h> 18 #include <linux/module.h> 19 #include <linux/quicklist.h> 20 21 #include <asm/system.h> 22 #include <asm/pgtable.h> 23 #include <asm/pgalloc.h> 24 #include <asm/tlb.h> 25 #include <asm/tlbflush.h> 26 #include <asm/mmu_context.h> 27 28 #ifndef CONFIG_64BIT 29 #define ALLOC_ORDER 1 30 #define TABLES_PER_PAGE 4 31 #define FRAG_MASK 15UL 32 #define SECOND_HALVES 10UL 33 34 void clear_table_pgstes(unsigned long *table) 35 { 36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); 37 memset(table + 256, 0, PAGE_SIZE/4); 38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); 39 memset(table + 768, 0, PAGE_SIZE/4); 40 } 41 42 #else 43 #define ALLOC_ORDER 2 44 #define TABLES_PER_PAGE 2 45 #define FRAG_MASK 3UL 46 #define SECOND_HALVES 2UL 47 48 void clear_table_pgstes(unsigned long *table) 49 { 50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); 51 memset(table + 256, 0, PAGE_SIZE/2); 52 } 53 54 #endif 55 56 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) 57 { 58 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 59 60 if (!page) 61 return NULL; 62 page->index = 0; 63 if (noexec) { 64 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER); 65 if (!shadow) { 66 __free_pages(page, ALLOC_ORDER); 67 return NULL; 68 } 69 page->index = page_to_phys(shadow); 70 } 71 spin_lock(&mm->page_table_lock); 72 list_add(&page->lru, &mm->context.crst_list); 73 spin_unlock(&mm->page_table_lock); 74 return (unsigned long *) page_to_phys(page); 75 } 76 77 void crst_table_free(struct mm_struct *mm, unsigned long *table) 78 { 79 unsigned long *shadow = get_shadow_table(table); 80 struct page *page = virt_to_page(table); 81 82 spin_lock(&mm->page_table_lock); 83 list_del(&page->lru); 84 spin_unlock(&mm->page_table_lock); 85 if (shadow) 86 free_pages((unsigned long) shadow, ALLOC_ORDER); 87 free_pages((unsigned long) table, ALLOC_ORDER); 88 } 89 90 #ifdef CONFIG_64BIT 91 int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) 92 { 93 unsigned long *table, *pgd; 94 unsigned long entry; 95 96 BUG_ON(limit > (1UL << 53)); 97 repeat: 98 table = crst_table_alloc(mm, mm->context.noexec); 99 if (!table) 100 return -ENOMEM; 101 spin_lock(&mm->page_table_lock); 102 if (mm->context.asce_limit < limit) { 103 pgd = (unsigned long *) mm->pgd; 104 if (mm->context.asce_limit <= (1UL << 31)) { 105 entry = _REGION3_ENTRY_EMPTY; 106 mm->context.asce_limit = 1UL << 42; 107 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 108 _ASCE_USER_BITS | 109 _ASCE_TYPE_REGION3; 110 } else { 111 entry = _REGION2_ENTRY_EMPTY; 112 mm->context.asce_limit = 1UL << 53; 113 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 114 _ASCE_USER_BITS | 115 _ASCE_TYPE_REGION2; 116 } 117 crst_table_init(table, entry); 118 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); 119 mm->pgd = (pgd_t *) table; 120 mm->task_size = mm->context.asce_limit; 121 table = NULL; 122 } 123 spin_unlock(&mm->page_table_lock); 124 if (table) 125 crst_table_free(mm, table); 126 if (mm->context.asce_limit < limit) 127 goto repeat; 128 update_mm(mm, current); 129 return 0; 130 } 131 132 void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) 133 { 134 pgd_t *pgd; 135 136 if (mm->context.asce_limit <= limit) 137 return; 138 __tlb_flush_mm(mm); 139 while (mm->context.asce_limit > limit) { 140 pgd = mm->pgd; 141 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { 142 case _REGION_ENTRY_TYPE_R2: 143 mm->context.asce_limit = 1UL << 42; 144 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 145 _ASCE_USER_BITS | 146 _ASCE_TYPE_REGION3; 147 break; 148 case _REGION_ENTRY_TYPE_R3: 149 mm->context.asce_limit = 1UL << 31; 150 mm->context.asce_bits = _ASCE_TABLE_LENGTH | 151 _ASCE_USER_BITS | 152 _ASCE_TYPE_SEGMENT; 153 break; 154 default: 155 BUG(); 156 } 157 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); 158 mm->task_size = mm->context.asce_limit; 159 crst_table_free(mm, (unsigned long *) pgd); 160 } 161 update_mm(mm, current); 162 } 163 #endif 164 165 /* 166 * page table entry allocation/free routines. 167 */ 168 unsigned long *page_table_alloc(struct mm_struct *mm) 169 { 170 struct page *page; 171 unsigned long *table; 172 unsigned long bits; 173 174 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 175 spin_lock(&mm->page_table_lock); 176 page = NULL; 177 if (!list_empty(&mm->context.pgtable_list)) { 178 page = list_first_entry(&mm->context.pgtable_list, 179 struct page, lru); 180 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 181 page = NULL; 182 } 183 if (!page) { 184 spin_unlock(&mm->page_table_lock); 185 page = alloc_page(GFP_KERNEL|__GFP_REPEAT); 186 if (!page) 187 return NULL; 188 pgtable_page_ctor(page); 189 page->flags &= ~FRAG_MASK; 190 table = (unsigned long *) page_to_phys(page); 191 if (mm->context.has_pgste) 192 clear_table_pgstes(table); 193 else 194 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); 195 spin_lock(&mm->page_table_lock); 196 list_add(&page->lru, &mm->context.pgtable_list); 197 } 198 table = (unsigned long *) page_to_phys(page); 199 while (page->flags & bits) { 200 table += 256; 201 bits <<= 1; 202 } 203 page->flags |= bits; 204 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) 205 list_move_tail(&page->lru, &mm->context.pgtable_list); 206 spin_unlock(&mm->page_table_lock); 207 return table; 208 } 209 210 void page_table_free(struct mm_struct *mm, unsigned long *table) 211 { 212 struct page *page; 213 unsigned long bits; 214 215 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; 216 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); 217 page = pfn_to_page(__pa(table) >> PAGE_SHIFT); 218 spin_lock(&mm->page_table_lock); 219 page->flags ^= bits; 220 if (page->flags & FRAG_MASK) { 221 /* Page now has some free pgtable fragments. */ 222 list_move(&page->lru, &mm->context.pgtable_list); 223 page = NULL; 224 } else 225 /* All fragments of the 4K page have been freed. */ 226 list_del(&page->lru); 227 spin_unlock(&mm->page_table_lock); 228 if (page) { 229 pgtable_page_dtor(page); 230 __free_page(page); 231 } 232 } 233 234 void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) 235 { 236 struct page *page; 237 238 spin_lock(&mm->page_table_lock); 239 /* Free shadow region and segment tables. */ 240 list_for_each_entry(page, &mm->context.crst_list, lru) 241 if (page->index) { 242 free_pages((unsigned long) page->index, ALLOC_ORDER); 243 page->index = 0; 244 } 245 /* "Free" second halves of page tables. */ 246 list_for_each_entry(page, &mm->context.pgtable_list, lru) 247 page->flags &= ~SECOND_HALVES; 248 spin_unlock(&mm->page_table_lock); 249 mm->context.noexec = 0; 250 update_mm(mm, tsk); 251 } 252 253 /* 254 * switch on pgstes for its userspace process (for kvm) 255 */ 256 int s390_enable_sie(void) 257 { 258 struct task_struct *tsk = current; 259 struct mm_struct *mm, *old_mm; 260 261 /* Do we have switched amode? If no, we cannot do sie */ 262 if (!switch_amode) 263 return -EINVAL; 264 265 /* Do we have pgstes? if yes, we are done */ 266 if (tsk->mm->context.has_pgste) 267 return 0; 268 269 /* lets check if we are allowed to replace the mm */ 270 task_lock(tsk); 271 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 272 tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) { 273 task_unlock(tsk); 274 return -EINVAL; 275 } 276 task_unlock(tsk); 277 278 /* we copy the mm and let dup_mm create the page tables with_pgstes */ 279 tsk->mm->context.alloc_pgste = 1; 280 mm = dup_mm(tsk); 281 tsk->mm->context.alloc_pgste = 0; 282 if (!mm) 283 return -ENOMEM; 284 285 /* Now lets check again if something happened */ 286 task_lock(tsk); 287 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || 288 tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) { 289 mmput(mm); 290 task_unlock(tsk); 291 return -EINVAL; 292 } 293 294 /* ok, we are alone. No ptrace, no threads, etc. */ 295 old_mm = tsk->mm; 296 tsk->mm = tsk->active_mm = mm; 297 preempt_disable(); 298 update_mm(mm, tsk); 299 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); 300 preempt_enable(); 301 task_unlock(tsk); 302 mmput(old_mm); 303 return 0; 304 } 305 EXPORT_SYMBOL_GPL(s390_enable_sie); 306