1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * store hypervisor information instruction emulation functions. 4 * 5 * Copyright IBM Corp. 2016 6 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> 7 */ 8 #include <linux/errno.h> 9 #include <linux/pagemap.h> 10 #include <linux/vmalloc.h> 11 #include <linux/syscalls.h> 12 #include <linux/mutex.h> 13 #include <asm/asm-offsets.h> 14 #include <asm/sclp.h> 15 #include <asm/diag.h> 16 #include <asm/sysinfo.h> 17 #include <asm/ebcdic.h> 18 #include <asm/facility.h> 19 #include <asm/sthyi.h> 20 #include "entry.h" 21 22 #define DED_WEIGHT 0xffff 23 /* 24 * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string 25 * as they are justified with spaces. 26 */ 27 #define CP 0xc3d7404040404040UL 28 #define IFL 0xc9c6d34040404040UL 29 30 enum hdr_flags { 31 HDR_NOT_LPAR = 0x10, 32 HDR_STACK_INCM = 0x20, 33 HDR_STSI_UNAV = 0x40, 34 HDR_PERF_UNAV = 0x80, 35 }; 36 37 enum mac_validity { 38 MAC_NAME_VLD = 0x20, 39 MAC_ID_VLD = 0x40, 40 MAC_CNT_VLD = 0x80, 41 }; 42 43 enum par_flag { 44 PAR_MT_EN = 0x80, 45 }; 46 47 enum par_validity { 48 PAR_GRP_VLD = 0x08, 49 PAR_ID_VLD = 0x10, 50 PAR_ABS_VLD = 0x20, 51 PAR_WGHT_VLD = 0x40, 52 PAR_PCNT_VLD = 0x80, 53 }; 54 55 struct hdr_sctn { 56 u8 infhflg1; 57 u8 infhflg2; /* reserved */ 58 u8 infhval1; /* reserved */ 59 u8 infhval2; /* reserved */ 60 u8 reserved[3]; 61 u8 infhygct; 62 u16 infhtotl; 63 u16 infhdln; 64 u16 infmoff; 65 u16 infmlen; 66 u16 infpoff; 67 u16 infplen; 68 u16 infhoff1; 69 u16 infhlen1; 70 u16 infgoff1; 71 u16 infglen1; 72 u16 infhoff2; 73 u16 infhlen2; 74 u16 infgoff2; 75 u16 infglen2; 76 u16 infhoff3; 77 u16 infhlen3; 78 u16 infgoff3; 79 u16 infglen3; 80 u8 reserved2[4]; 81 } __packed; 82 83 struct mac_sctn { 84 u8 infmflg1; /* reserved */ 85 u8 infmflg2; /* reserved */ 86 u8 infmval1; 87 u8 infmval2; /* reserved */ 88 u16 infmscps; 89 u16 infmdcps; 90 u16 infmsifl; 91 u16 infmdifl; 92 char infmname[8]; 93 char infmtype[4]; 94 char infmmanu[16]; 95 char infmseq[16]; 96 char infmpman[4]; 97 u8 reserved[4]; 98 } __packed; 99 100 struct par_sctn { 101 u8 infpflg1; 102 u8 infpflg2; /* reserved */ 103 u8 infpval1; 104 u8 infpval2; /* reserved */ 105 u16 infppnum; 106 u16 infpscps; 107 u16 infpdcps; 108 u16 infpsifl; 109 u16 infpdifl; 110 u16 reserved; 111 char infppnam[8]; 112 u32 infpwbcp; 113 u32 infpabcp; 114 u32 infpwbif; 115 u32 infpabif; 116 char infplgnm[8]; 117 u32 infplgcp; 118 u32 infplgif; 119 } __packed; 120 121 struct sthyi_sctns { 122 struct hdr_sctn hdr; 123 struct mac_sctn mac; 124 struct par_sctn par; 125 } __packed; 126 127 struct cpu_inf { 128 u64 lpar_cap; 129 u64 lpar_grp_cap; 130 u64 lpar_weight; 131 u64 all_weight; 132 int cpu_num_ded; 133 int cpu_num_shd; 134 }; 135 136 struct lpar_cpu_inf { 137 struct cpu_inf cp; 138 struct cpu_inf ifl; 139 }; 140 141 /* 142 * STHYI requires extensive locking in the higher hypervisors 143 * and is very computational/memory expensive. Therefore we 144 * cache the retrieved data whose valid period is 1s. 145 */ 146 #define CACHE_VALID_JIFFIES HZ 147 148 struct sthyi_info { 149 void *info; 150 unsigned long end; 151 }; 152 153 static DEFINE_MUTEX(sthyi_mutex); 154 static struct sthyi_info sthyi_cache; 155 156 static inline u64 cpu_id(u8 ctidx, void *diag224_buf) 157 { 158 return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); 159 } 160 161 /* 162 * Scales the cpu capping from the lpar range to the one expected in 163 * sthyi data. 164 * 165 * diag204 reports a cap in hundredths of processor units. 166 * z/VM's range for one core is 0 - 0x10000. 167 */ 168 static u32 scale_cap(u32 in) 169 { 170 return (0x10000 * in) / 100; 171 } 172 173 static void fill_hdr(struct sthyi_sctns *sctns) 174 { 175 sctns->hdr.infhdln = sizeof(sctns->hdr); 176 sctns->hdr.infmoff = sizeof(sctns->hdr); 177 sctns->hdr.infmlen = sizeof(sctns->mac); 178 sctns->hdr.infplen = sizeof(sctns->par); 179 sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; 180 sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; 181 } 182 183 static void fill_stsi_mac(struct sthyi_sctns *sctns, 184 struct sysinfo_1_1_1 *sysinfo) 185 { 186 sclp_ocf_cpc_name_copy(sctns->mac.infmname); 187 if (*(u64 *)sctns->mac.infmname != 0) 188 sctns->mac.infmval1 |= MAC_NAME_VLD; 189 190 if (stsi(sysinfo, 1, 1, 1)) 191 return; 192 193 memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); 194 memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); 195 memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); 196 memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); 197 198 sctns->mac.infmval1 |= MAC_ID_VLD; 199 } 200 201 static void fill_stsi_par(struct sthyi_sctns *sctns, 202 struct sysinfo_2_2_2 *sysinfo) 203 { 204 if (stsi(sysinfo, 2, 2, 2)) 205 return; 206 207 sctns->par.infppnum = sysinfo->lpar_number; 208 memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); 209 210 sctns->par.infpval1 |= PAR_ID_VLD; 211 } 212 213 static void fill_stsi(struct sthyi_sctns *sctns) 214 { 215 void *sysinfo; 216 217 /* Errors are handled through the validity bits in the response. */ 218 sysinfo = (void *)__get_free_page(GFP_KERNEL); 219 if (!sysinfo) 220 return; 221 222 fill_stsi_mac(sctns, sysinfo); 223 fill_stsi_par(sctns, sysinfo); 224 225 free_pages((unsigned long)sysinfo, 0); 226 } 227 228 static void fill_diag_mac(struct sthyi_sctns *sctns, 229 struct diag204_x_phys_block *block, 230 void *diag224_buf) 231 { 232 int i; 233 234 for (i = 0; i < block->hdr.cpus; i++) { 235 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 236 case CP: 237 if (block->cpus[i].weight == DED_WEIGHT) 238 sctns->mac.infmdcps++; 239 else 240 sctns->mac.infmscps++; 241 break; 242 case IFL: 243 if (block->cpus[i].weight == DED_WEIGHT) 244 sctns->mac.infmdifl++; 245 else 246 sctns->mac.infmsifl++; 247 break; 248 } 249 } 250 sctns->mac.infmval1 |= MAC_CNT_VLD; 251 } 252 253 /* Returns a pointer to the the next partition block. */ 254 static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, 255 bool this_lpar, 256 void *diag224_buf, 257 struct diag204_x_part_block *block) 258 { 259 int i, capped = 0, weight_cp = 0, weight_ifl = 0; 260 struct cpu_inf *cpu_inf; 261 262 for (i = 0; i < block->hdr.rcpus; i++) { 263 if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) 264 continue; 265 266 switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { 267 case CP: 268 cpu_inf = &part_inf->cp; 269 if (block->cpus[i].cur_weight < DED_WEIGHT) 270 weight_cp |= block->cpus[i].cur_weight; 271 break; 272 case IFL: 273 cpu_inf = &part_inf->ifl; 274 if (block->cpus[i].cur_weight < DED_WEIGHT) 275 weight_ifl |= block->cpus[i].cur_weight; 276 break; 277 default: 278 continue; 279 } 280 281 if (!this_lpar) 282 continue; 283 284 capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; 285 cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; 286 cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; 287 288 if (block->cpus[i].weight == DED_WEIGHT) 289 cpu_inf->cpu_num_ded += 1; 290 else 291 cpu_inf->cpu_num_shd += 1; 292 } 293 294 if (this_lpar && capped) { 295 part_inf->cp.lpar_weight = weight_cp; 296 part_inf->ifl.lpar_weight = weight_ifl; 297 } 298 part_inf->cp.all_weight += weight_cp; 299 part_inf->ifl.all_weight += weight_ifl; 300 return (struct diag204_x_part_block *)&block->cpus[i]; 301 } 302 303 static void fill_diag(struct sthyi_sctns *sctns) 304 { 305 int i, r, pages; 306 bool this_lpar; 307 void *diag204_buf; 308 void *diag224_buf = NULL; 309 struct diag204_x_info_blk_hdr *ti_hdr; 310 struct diag204_x_part_block *part_block; 311 struct diag204_x_phys_block *phys_block; 312 struct lpar_cpu_inf lpar_inf = {}; 313 314 /* Errors are handled through the validity bits in the response. */ 315 pages = diag204((unsigned long)DIAG204_SUBC_RSI | 316 (unsigned long)DIAG204_INFO_EXT, 0, NULL); 317 if (pages <= 0) 318 return; 319 320 diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), 321 PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, 322 __builtin_return_address(0)); 323 if (!diag204_buf) 324 return; 325 326 r = diag204((unsigned long)DIAG204_SUBC_STIB7 | 327 (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); 328 if (r < 0) 329 goto out; 330 331 diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); 332 if (!diag224_buf || diag224(diag224_buf)) 333 goto out; 334 335 ti_hdr = diag204_buf; 336 part_block = diag204_buf + sizeof(*ti_hdr); 337 338 for (i = 0; i < ti_hdr->npar; i++) { 339 /* 340 * For the calling lpar we also need to get the cpu 341 * caps and weights. The time information block header 342 * specifies the offset to the partition block of the 343 * caller lpar, so we know when we process its data. 344 */ 345 this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; 346 part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, 347 part_block); 348 } 349 350 phys_block = (struct diag204_x_phys_block *)part_block; 351 part_block = diag204_buf + ti_hdr->this_part; 352 if (part_block->hdr.mtid) 353 sctns->par.infpflg1 = PAR_MT_EN; 354 355 sctns->par.infpval1 |= PAR_GRP_VLD; 356 sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); 357 sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); 358 memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, 359 sizeof(sctns->par.infplgnm)); 360 361 sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; 362 sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; 363 sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; 364 sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; 365 sctns->par.infpval1 |= PAR_PCNT_VLD; 366 367 sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); 368 sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); 369 sctns->par.infpval1 |= PAR_ABS_VLD; 370 371 /* 372 * Everything below needs global performance data to be 373 * meaningful. 374 */ 375 if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { 376 sctns->hdr.infhflg1 |= HDR_PERF_UNAV; 377 goto out; 378 } 379 380 fill_diag_mac(sctns, phys_block, diag224_buf); 381 382 if (lpar_inf.cp.lpar_weight) { 383 sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * 384 lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; 385 } 386 387 if (lpar_inf.ifl.lpar_weight) { 388 sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * 389 lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; 390 } 391 sctns->par.infpval1 |= PAR_WGHT_VLD; 392 393 out: 394 free_page((unsigned long)diag224_buf); 395 vfree(diag204_buf); 396 } 397 398 static int sthyi(u64 vaddr, u64 *rc) 399 { 400 union register_pair r1 = { .even = 0, }; /* subcode */ 401 union register_pair r2 = { .even = vaddr, }; 402 int cc; 403 404 asm volatile( 405 ".insn rre,0xB2560000,%[r1],%[r2]\n" 406 "ipm %[cc]\n" 407 "srl %[cc],28\n" 408 : [cc] "=&d" (cc), [r2] "+&d" (r2.pair) 409 : [r1] "d" (r1.pair) 410 : "memory", "cc"); 411 *rc = r2.odd; 412 return cc; 413 } 414 415 static int fill_dst(void *dst, u64 *rc) 416 { 417 struct sthyi_sctns *sctns = (struct sthyi_sctns *)dst; 418 419 /* 420 * If the facility is on, we don't want to emulate the instruction. 421 * We ask the hypervisor to provide the data. 422 */ 423 if (test_facility(74)) 424 return sthyi((u64)dst, rc); 425 426 fill_hdr(sctns); 427 fill_stsi(sctns); 428 fill_diag(sctns); 429 *rc = 0; 430 return 0; 431 } 432 433 static int sthyi_init_cache(void) 434 { 435 if (sthyi_cache.info) 436 return 0; 437 sthyi_cache.info = (void *)get_zeroed_page(GFP_KERNEL); 438 if (!sthyi_cache.info) 439 return -ENOMEM; 440 sthyi_cache.end = jiffies - 1; /* expired */ 441 return 0; 442 } 443 444 static int sthyi_update_cache(u64 *rc) 445 { 446 int r; 447 448 memset(sthyi_cache.info, 0, PAGE_SIZE); 449 r = fill_dst(sthyi_cache.info, rc); 450 if (r) 451 return r; 452 sthyi_cache.end = jiffies + CACHE_VALID_JIFFIES; 453 return r; 454 } 455 456 /* 457 * sthyi_fill - Fill page with data returned by the STHYI instruction 458 * 459 * @dst: Pointer to zeroed page 460 * @rc: Pointer for storing the return code of the instruction 461 * 462 * Fills the destination with system information returned by the STHYI 463 * instruction. The data is generated by emulation or execution of STHYI, 464 * if available. The return value is the condition code that would be 465 * returned, the rc parameter is the return code which is passed in 466 * register R2 + 1. 467 */ 468 int sthyi_fill(void *dst, u64 *rc) 469 { 470 int r; 471 472 mutex_lock(&sthyi_mutex); 473 r = sthyi_init_cache(); 474 if (r) 475 goto out; 476 477 if (time_is_before_jiffies(sthyi_cache.end)) { 478 /* cache expired */ 479 r = sthyi_update_cache(rc); 480 if (r) 481 goto out; 482 } 483 *rc = 0; 484 memcpy(dst, sthyi_cache.info, PAGE_SIZE); 485 out: 486 mutex_unlock(&sthyi_mutex); 487 return r; 488 } 489 EXPORT_SYMBOL_GPL(sthyi_fill); 490 491 SYSCALL_DEFINE4(s390_sthyi, unsigned long, function_code, void __user *, buffer, 492 u64 __user *, return_code, unsigned long, flags) 493 { 494 u64 sthyi_rc; 495 void *info; 496 int r; 497 498 if (flags) 499 return -EINVAL; 500 if (function_code != STHYI_FC_CP_IFL_CAP) 501 return -EOPNOTSUPP; 502 info = (void *)get_zeroed_page(GFP_KERNEL); 503 if (!info) 504 return -ENOMEM; 505 r = sthyi_fill(info, &sthyi_rc); 506 if (r < 0) 507 goto out; 508 if (return_code && put_user(sthyi_rc, return_code)) { 509 r = -EFAULT; 510 goto out; 511 } 512 if (copy_to_user(buffer, info, PAGE_SIZE)) 513 r = -EFAULT; 514 out: 515 free_page((unsigned long)info); 516 return r; 517 } 518