1 /* 2 * store hypervisor information instruction emulation functions. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License (version 2 only) 6 * as published by the Free Software Foundation. 7 * 8 * Copyright IBM Corp. 2016 9 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 10 */ 11 #include <linux/errno.h> 12 #include <linux/pagemap.h> 13 #include <linux/vmalloc.h> 14 #include <linux/syscalls.h> 15 #include <linux/mutex.h> 16 #include <asm/asm-offsets.h> 17 #include <asm/sclp.h> 18 #include <asm/diag.h> 19 #include <asm/sysinfo.h> 20 #include <asm/ebcdic.h> 21 #include <asm/facility.h> 22 #include <asm/sthyi.h> 23 #include "entry.h" 24 25 #define DED_WEIGHT 0xffff 26 /* 27 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 28 * as they are justified with spaces. 29 */ 30 #define CP 0xc3d7404040404040UL 31 #define IFL 0xc9c6d34040404040UL 32 33 enum hdr_flags { 34 HDR_NOT_LPAR = 0x10, 35 HDR_STACK_INCM = 0x20, 36 HDR_STSI_UNAV = 0x40, 37 HDR_PERF_UNAV = 0x80, 38 }; 39 40 enum mac_validity { 41 MAC_NAME_VLD = 0x20, 42 MAC_ID_VLD = 0x40, 43 MAC_CNT_VLD = 0x80, 44 }; 45 46 enum par_flag { 47 PAR_MT_EN = 0x80, 48 }; 49 50 enum par_validity { 51 PAR_GRP_VLD = 0x08, 52 PAR_ID_VLD = 0x10, 53 PAR_ABS_VLD = 0x20, 54 PAR_WGHT_VLD = 0x40, 55 PAR_PCNT_VLD = 0x80, 56 }; 57 58 struct hdr_sctn { 59 u8 infhflg1; 60 u8 infhflg2; /* reserved */ 61 u8 infhval1; /* reserved */ 62 u8 infhval2; /* reserved */ 63 u8 reserved[3]; 64 u8 infhygct; 65 u16 infhtotl; 66 u16 infhdln; 67 u16 infmoff; 68 u16 infmlen; 69 u16 infpoff; 70 u16 infplen; 71 u16 infhoff1; 72 u16 infhlen1; 73 u16 infgoff1; 74 u16 infglen1; 75 u16 infhoff2; 76 u16 infhlen2; 77 u16 infgoff2; 78 u16 infglen2; 79 u16 infhoff3; 80 u16 infhlen3; 81 u16 infgoff3; 82 u16 infglen3; 83 u8 reserved2[4]; 84 } __packed; 85 86 struct mac_sctn { 87 u8 infmflg1; /* reserved */ 88 u8 infmflg2; /* reserved */ 89 u8 infmval1; 90 u8 infmval2; /* reserved */ 91 u16 infmscps; 92 u16 infmdcps; 93 u16 infmsifl; 94 u16 infmdifl; 95 char infmname[8]; 96 char infmtype[4]; 97 char infmmanu[16]; 98 char infmseq[16]; 99 char infmpman[4]; 100 u8 reserved[4]; 101 } __packed; 102 103 struct par_sctn { 104 u8 infpflg1; 105 u8 infpflg2; /* reserved */ 106 u8 infpval1; 107 u8 infpval2; /* reserved */ 108 u16 infppnum; 109 u16 infpscps; 110 u16 infpdcps; 111 u16 infpsifl; 112 u16 infpdifl; 113 u16 reserved; 114 char infppnam[8]; 115 u32 infpwbcp; 116 u32 infpabcp; 117 u32 infpwbif; 118 u32 infpabif; 119 char infplgnm[8]; 120 u32 infplgcp; 121 u32 infplgif; 122 } __packed; 123 124 struct sthyi_sctns { 125 struct hdr_sctn hdr; 126 struct mac_sctn mac; 127 struct par_sctn par; 128 } __packed; 129 130 struct cpu_inf { 131 u64 lpar_cap; 132 u64 lpar_grp_cap; 133 u64 lpar_weight; 134 u64 all_weight; 135 int cpu_num_ded; 136 int cpu_num_shd; 137 }; 138 139 struct lpar_cpu_inf { 140 struct cpu_inf cp; 141 struct cpu_inf ifl; 142 }; 143 144 /* 145 * STHYI requires extensive locking in the higher hypervisors 146 * and is very computational/memory expensive. Therefore we 147 * cache the retrieved data whose valid period is 1s. 148 */ 149 #define CACHE_VALID_JIFFIES HZ 150 151 struct sthyi_info { 152 void *info; 153 unsigned long end; 154 }; 155 156 static DEFINE_MUTEX(sthyi_mutex); 157 static struct sthyi_info sthyi_cache; 158 159 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 160 { 161 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 162 } 163 164 /* 165 * Scales the cpu capping from the lpar range to the one expected in 166 * sthyi data. 167 * 168 * diag204 reports a cap in hundredths of processor units. 169 * z/VM's range for one core is 0 - 0x10000. 170 */ 171 static u32 scale_cap(u32 in) 172 { 173 return (0x10000 * in) / 100; 174 } 175 176 static void fill_hdr(struct sthyi_sctns *sctns) 177 { 178 sctns->hdr.infhdln = sizeof(sctns->hdr); 179 sctns->hdr.infmoff = sizeof(sctns->hdr); 180 sctns->hdr.infmlen = sizeof(sctns->mac); 181 sctns->hdr.infplen = sizeof(sctns->par); 182 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 183 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 184 } 185 186 static void fill_stsi_mac(struct sthyi_sctns *sctns, 187 struct sysinfo_1_1_1 *sysinfo) 188 { 189 if (stsi(sysinfo, 1, 1, 1)) 190 return; 191 192 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 193 194 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 195 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 196 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 197 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 198 199 sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; 200 } 201 202 static void fill_stsi_par(struct sthyi_sctns *sctns, 203 struct sysinfo_2_2_2 *sysinfo) 204 { 205 if (stsi(sysinfo, 2, 2, 2)) 206 return; 207 208 sctns->par.infppnum = sysinfo->lpar_number; 209 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 210 211 sctns->par.infpval1 |= PAR_ID_VLD; 212 } 213 214 static void fill_stsi(struct sthyi_sctns *sctns) 215 { 216 void *sysinfo; 217 218 /* Errors are handled through the validity bits in the response. */ 219 sysinfo = (void *)__get_free_page(GFP_KERNEL); 220 if (!sysinfo) 221 return; 222 223 fill_stsi_mac(sctns, sysinfo); 224 fill_stsi_par(sctns, sysinfo); 225 226 free_pages((unsigned long)sysinfo, 0); 227 } 228 229 static void fill_diag_mac(struct sthyi_sctns *sctns, 230 struct diag204_x_phys_block *block, 231 void *diag224_buf) 232 { 233 int i; 234 235 for (i = 0; i < block->hdr.cpus; i++) { 236 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 237 case CP: 238 if (block->cpus[i].weight == DED_WEIGHT) 239 sctns->mac.infmdcps++; 240 else 241 sctns->mac.infmscps++; 242 break; 243 case IFL: 244 if (block->cpus[i].weight == DED_WEIGHT) 245 sctns->mac.infmdifl++; 246 else 247 sctns->mac.infmsifl++; 248 break; 249 } 250 } 251 sctns->mac.infmval1 |= MAC_CNT_VLD; 252 } 253 254 /* Returns a pointer to the the next partition block. */ 255 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 256 bool this_lpar, 257 void *diag224_buf, 258 struct diag204_x_part_block *block) 259 { 260 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 261 struct cpu_inf *cpu_inf; 262 263 for (i = 0; i < block->hdr.rcpus; i++) { 264 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 265 continue; 266 267 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 268 case CP: 269 cpu_inf = &part_inf->cp; 270 if (block->cpus[i].cur_weight < DED_WEIGHT) 271 weight_cp |= block->cpus[i].cur_weight; 272 break; 273 case IFL: 274 cpu_inf = &part_inf->ifl; 275 if (block->cpus[i].cur_weight < DED_WEIGHT) 276 weight_ifl |= block->cpus[i].cur_weight; 277 break; 278 default: 279 continue; 280 } 281 282 if (!this_lpar) 283 continue; 284 285 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 286 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 287 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 288 289 if (block->cpus[i].weight == DED_WEIGHT) 290 cpu_inf->cpu_num_ded += 1; 291 else 292 cpu_inf->cpu_num_shd += 1; 293 } 294 295 if (this_lpar && capped) { 296 part_inf->cp.lpar_weight = weight_cp; 297 part_inf->ifl.lpar_weight = weight_ifl; 298 } 299 part_inf->cp.all_weight += weight_cp; 300 part_inf->ifl.all_weight += weight_ifl; 301 return (struct diag204_x_part_block *)&block->cpus[i]; 302 } 303 304 static void fill_diag(struct sthyi_sctns *sctns) 305 { 306 int i, r, pages; 307 bool this_lpar; 308 void *diag204_buf; 309 void *diag224_buf = NULL; 310 struct diag204_x_info_blk_hdr *ti_hdr; 311 struct diag204_x_part_block *part_block; 312 struct diag204_x_phys_block *phys_block; 313 struct lpar_cpu_inf lpar_inf = {}; 314 315 /* Errors are handled through the validity bits in the response. */ 316 pages = diag204((unsigned long)DIAG204_SUBC_RSI | 317 (unsigned long)DIAG204_INFO_EXT, 0, NULL); 318 if (pages <= 0) 319 return; 320 321 diag204_buf = vmalloc(PAGE_SIZE * pages); 322 if (!diag204_buf) 323 return; 324 325 r = diag204((unsigned long)DIAG204_SUBC_STIB7 | 326 (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); 327 if (r < 0) 328 goto out; 329 330 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 331 if (!diag224_buf || diag224(diag224_buf)) 332 goto out; 333 334 ti_hdr = diag204_buf; 335 part_block = diag204_buf + sizeof(*ti_hdr); 336 337 for (i = 0; i < ti_hdr->npar; i++) { 338 /* 339 * For the calling lpar we also need to get the cpu 340 * caps and weights. The time information block header 341 * specifies the offset to the partition block of the 342 * caller lpar, so we know when we process its data. 343 */ 344 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 345 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 346 part_block); 347 } 348 349 phys_block = (struct diag204_x_phys_block *)part_block; 350 part_block = diag204_buf + ti_hdr->this_part; 351 if (part_block->hdr.mtid) 352 sctns->par.infpflg1 = PAR_MT_EN; 353 354 sctns->par.infpval1 |= PAR_GRP_VLD; 355 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 356 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 357 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 358 sizeof(sctns->par.infplgnm)); 359 360 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 361 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 362 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 363 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 364 sctns->par.infpval1 |= PAR_PCNT_VLD; 365 366 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 367 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 368 sctns->par.infpval1 |= PAR_ABS_VLD; 369 370 /* 371 * Everything below needs global performance data to be 372 * meaningful. 373 */ 374 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 375 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 376 goto out; 377 } 378 379 fill_diag_mac(sctns, phys_block, diag224_buf); 380 381 if (lpar_inf.cp.lpar_weight) { 382 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 383 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 384 } 385 386 if (lpar_inf.ifl.lpar_weight) { 387 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 388 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 389 } 390 sctns->par.infpval1 |= PAR_WGHT_VLD; 391 392 out: 393 free_page((unsigned long)diag224_buf); 394 vfree(diag204_buf); 395 } 396 397 static int sthyi(u64 vaddr, u64 *rc) 398 { 399 register u64 code asm("0") = 0; 400 register u64 addr asm("2") = vaddr; 401 register u64 rcode asm("3"); 402 int cc; 403 404 asm volatile( 405 ".insn rre,0xB2560000,%[code],%[addr]\n" 406 "ipm %[cc]\n" 407 "srl %[cc],28\n" 408 : [cc] "=d" (cc), "=d" (rcode) 409 : [code] "d" (code), [addr] "a" (addr) 410 : "memory", "cc"); 411 *rc = rcode; 412 return cc; 413 } 414 415 static int fill_dst(void *dst, u64 *rc) 416 { 417 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 418 419 /* 420 * If the facility is on, we don't want to emulate the instruction. 421 * We ask the hypervisor to provide the data. 422 */ 423 if (test_facility(74)) 424 return sthyi((u64)dst, rc); 425 426 fill_hdr(sctns); 427 fill_stsi(sctns); 428 fill_diag(sctns); 429 *rc = 0; 430 return 0; 431 } 432 433 static int sthyi_init_cache(void) 434 { 435 if (sthyi_cache.info) 436 return 0; 437 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 438 if (!sthyi_cache.info) 439 return -ENOMEM; 440 sthyi_cache.end = jiffies - 1; /* expired */ 441 return 0; 442 } 443 444 static int sthyi_update_cache(u64 *rc) 445 { 446 int r; 447 448 memset(sthyi_cache.info, 0, PAGE_SIZE); 449 r = fill_dst(sthyi_cache.info, rc); 450 if (r) 451 return r; 452 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 453 return r; 454 } 455 456 /* 457 * sthyi_fill - Fill page with data returned by the STHYI instruction 458 * 459 * @dst: Pointer to zeroed page 460 * @rc: Pointer for storing the return code of the instruction 461 * 462 * Fills the destination with system information returned by the STHYI 463 * instruction. The data is generated by emulation or execution of STHYI, 464 * if available. The return value is the condition code that would be 465 * returned, the rc parameter is the return code which is passed in 466 * register R2 + 1. 467 */ 468 int sthyi_fill(void *dst, u64 *rc) 469 { 470 int r; 471 472 mutex_lock(&sthyi_mutex); 473 r = sthyi_init_cache(); 474 if (r) 475 goto out; 476 477 if (time_is_before_jiffies(sthyi_cache.end)) { 478 /* cache expired */ 479 r = sthyi_update_cache(rc); 480 if (r) 481 goto out; 482 } 483 *rc = 0; 484 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 485 out: 486 mutex_unlock(&sthyi_mutex); 487 return r; 488 } 489 EXPORT_SYMBOL_GPL(sthyi_fill); 490 491 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 492 u64 __user *, return_code, unsigned long, flags) 493 { 494 u64 sthyi_rc; 495 void *info; 496 int r; 497 498 if (flags) 499 return -EINVAL; 500 if (function_code != STHYI_FC_CP_IFL_CAP) 501 return -EOPNOTSUPP; 502 info = (void *)get_zeroed_page(GFP_KERNEL); 503 if (!info) 504 return -ENOMEM; 505 r = sthyi_fill(info, &sthyi_rc); 506 if (r < 0) 507 goto out; 508 if (return_code && put_user(sthyi_rc, return_code)) { 509 r = -EFAULT; 510 goto out; 511 } 512 if (copy_to_user(buffer, info, PAGE_SIZE)) 513 r = -EFAULT; 514 out: 515 free_page((unsigned long)info); 516 return r; 517 } 518