1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 /* P9 gzip sample code for demonstrating the P9 NX hardware interface. 4 * Not intended for productive uses or for performance or compression 5 * ratio measurements. For simplicity of demonstration, this sample 6 * code compresses in to fixed Huffman blocks only (Deflate btype=1) 7 * and has very simple memory management. Dynamic Huffman blocks 8 * (Deflate btype=2) are more involved as detailed in the user guide. 9 * Note also that /dev/crypto/gzip, VAS and skiboot support are 10 * required. 11 * 12 * Copyright 2020 IBM Corp. 13 * 14 * https://github.com/libnxz/power-gzip for zlib api and other utils 15 * 16 * Author: Bulent Abali <abali@us.ibm.com> 17 * 18 * Definitions of acronyms used here. See 19 * P9 NX Gzip Accelerator User's Manual for details: 20 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf 21 * 22 * adler/crc: 32 bit checksums appended to stream tail 23 * ce: completion extension 24 * cpb: coprocessor parameter block (metadata) 25 * crb: coprocessor request block (command) 26 * csb: coprocessor status block (status) 27 * dht: dynamic huffman table 28 * dde: data descriptor element (address, length) 29 * ddl: list of ddes 30 * dh/fh: dynamic and fixed huffman types 31 * fc: coprocessor function code 32 * histlen: history/dictionary length 33 * history: sliding window of up to 32KB of data 34 * lzcount: Deflate LZ symbol counts 35 * rembytecnt: remaining byte count 36 * sfbt: source final block type; last block's type during decomp 37 * spbc: source processed byte count 38 * subc: source unprocessed bit count 39 * tebc: target ending bit count; valid bits in the last byte 40 * tpbc: target processed byte count 41 * vas: virtual accelerator switch; the user mode interface 42 */ 43 44 #define _ISOC11_SOURCE // For aligned_alloc() 45 #define _DEFAULT_SOURCE // For endian.h 46 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <stdint.h> 52 #include <sys/types.h> 53 #include <sys/stat.h> 54 #include <sys/time.h> 55 #include <sys/fcntl.h> 56 #include <sys/mman.h> 57 #include <endian.h> 58 #include <bits/endian.h> 59 #include <sys/ioctl.h> 60 #include <assert.h> 61 #include <errno.h> 62 #include <signal.h> 63 #include "utils.h" 64 #include "nxu.h" 65 #include "nx.h" 66 67 int nx_dbg; 68 FILE *nx_gzip_log; 69 70 #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) 71 #define FNAME_MAX 1024 72 #define FEXT ".nx.gz" 73 74 #define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len" 75 76 /* 77 * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. 78 */ 79 static int compress_fht_sample(char *src, uint32_t srclen, char *dst, 80 uint32_t dstlen, int with_count, 81 struct nx_gzip_crb_cpb_t *cmdp, void *handle) 82 { 83 uint32_t fc; 84 85 assert(!!cmdp); 86 87 put32(cmdp->crb, gzip_fc, 0); /* clear */ 88 fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT : 89 GZIP_FC_COMPRESS_RESUME_FHT; 90 putnn(cmdp->crb, gzip_fc, fc); 91 putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */ 92 memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb)); 93 94 /* Section 6.6 programming notes; spbc may be in two different 95 * places depending on FC. 96 */ 97 if (!with_count) 98 put32(cmdp->cpb, out_spbc_comp, 0); 99 else 100 put32(cmdp->cpb, out_spbc_comp_with_count, 0); 101 102 /* Figure 6-3 6-4; CSB location */ 103 put64(cmdp->crb, csb_address, 0); 104 put64(cmdp->crb, csb_address, 105 (uint64_t) &cmdp->crb.csb & csb_address_mask); 106 107 /* Source direct dde (scatter-gather list) */ 108 clear_dde(cmdp->crb.source_dde); 109 putnn(cmdp->crb.source_dde, dde_count, 0); 110 put32(cmdp->crb.source_dde, ddebc, srclen); 111 put64(cmdp->crb.source_dde, ddead, (uint64_t) src); 112 113 /* Target direct dde (scatter-gather list) */ 114 clear_dde(cmdp->crb.target_dde); 115 putnn(cmdp->crb.target_dde, dde_count, 0); 116 put32(cmdp->crb.target_dde, ddebc, dstlen); 117 put64(cmdp->crb.target_dde, ddead, (uint64_t) dst); 118 119 /* Submit the crb, the job descriptor, to the accelerator */ 120 return nxu_submit_job(cmdp, handle); 121 } 122 123 /* 124 * Prepares a blank no filename no timestamp gzip header and returns 125 * the number of bytes written to buf. 126 * Gzip specification at https://tools.ietf.org/html/rfc1952 127 */ 128 int gzip_header_blank(char *buf) 129 { 130 int i = 0; 131 132 buf[i++] = 0x1f; /* ID1 */ 133 buf[i++] = 0x8b; /* ID2 */ 134 buf[i++] = 0x08; /* CM */ 135 buf[i++] = 0x00; /* FLG */ 136 buf[i++] = 0x00; /* MTIME */ 137 buf[i++] = 0x00; /* MTIME */ 138 buf[i++] = 0x00; /* MTIME */ 139 buf[i++] = 0x00; /* MTIME */ 140 buf[i++] = 0x04; /* XFL 4=fastest */ 141 buf[i++] = 0x03; /* OS UNIX */ 142 143 return i; 144 } 145 146 /* Caller must free the allocated buffer return nonzero on error. */ 147 int read_alloc_input_file(char *fname, char **buf, size_t *bufsize) 148 { 149 struct stat statbuf; 150 FILE *fp; 151 char *p; 152 size_t num_bytes; 153 154 if (stat(fname, &statbuf)) { 155 perror(fname); 156 return(-1); 157 } 158 fp = fopen(fname, "r"); 159 if (fp == NULL) { 160 perror(fname); 161 return(-1); 162 } 163 assert(NULL != (p = (char *) malloc(statbuf.st_size))); 164 num_bytes = fread(p, 1, statbuf.st_size, fp); 165 if (ferror(fp) || (num_bytes != statbuf.st_size)) { 166 perror(fname); 167 return(-1); 168 } 169 *buf = p; 170 *bufsize = num_bytes; 171 return 0; 172 } 173 174 /* Returns nonzero on error */ 175 int write_output_file(char *fname, char *buf, size_t bufsize) 176 { 177 FILE *fp; 178 size_t num_bytes; 179 180 fp = fopen(fname, "w"); 181 if (fp == NULL) { 182 perror(fname); 183 return(-1); 184 } 185 num_bytes = fwrite(buf, 1, bufsize, fp); 186 if (ferror(fp) || (num_bytes != bufsize)) { 187 perror(fname); 188 return(-1); 189 } 190 fclose(fp); 191 return 0; 192 } 193 194 /* 195 * Z_SYNC_FLUSH as described in zlib.h. 196 * Returns number of appended bytes 197 */ 198 int append_sync_flush(char *buf, int tebc, int final) 199 { 200 uint64_t flush; 201 int shift = (tebc & 0x7); 202 203 if (tebc > 0) { 204 /* Last byte is partially full */ 205 buf = buf - 1; 206 *buf = *buf & (unsigned char) ((1<<tebc)-1); 207 } else 208 *buf = 0; 209 flush = ((0x1ULL & final) << shift) | *buf; 210 shift = shift + 3; /* BFINAL and BTYPE written */ 211 shift = (shift <= 8) ? 8 : 16; 212 flush |= (0xFFFF0000ULL) << shift; /* Zero length block */ 213 shift = shift + 32; 214 while (shift > 0) { 215 *buf++ = (unsigned char) (flush & 0xffULL); 216 flush = flush >> 8; 217 shift = shift - 8; 218 } 219 return(((tebc > 5) || (tebc == 0)) ? 5 : 4); 220 } 221 222 /* 223 * Final deflate block bit. This call assumes the block 224 * beginning is byte aligned. 225 */ 226 static void set_bfinal(void *buf, int bfinal) 227 { 228 char *b = buf; 229 230 if (bfinal) 231 *b = *b | (unsigned char) 0x01; 232 else 233 *b = *b & (unsigned char) 0xfe; 234 } 235 236 int compress_file(int argc, char **argv, void *handle) 237 { 238 char *inbuf, *outbuf, *srcbuf, *dstbuf; 239 char outname[FNAME_MAX]; 240 uint32_t srclen, dstlen; 241 uint32_t flushlen, chunk; 242 size_t inlen, outlen, dsttotlen, srctotlen; 243 uint32_t crc, spbc, tpbc, tebc; 244 int lzcounts = 0; 245 int cc; 246 int num_hdr_bytes; 247 struct nx_gzip_crb_cpb_t *cmdp; 248 uint32_t pagelen = 65536; 249 int fault_tries = NX_MAX_FAULTS; 250 char buf[32]; 251 252 cmdp = (void *)(uintptr_t) 253 aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), 254 sizeof(struct nx_gzip_crb_cpb_t)); 255 256 if (argc != 2) { 257 fprintf(stderr, "usage: %s <fname>\n", argv[0]); 258 exit(-1); 259 } 260 if (read_alloc_input_file(argv[1], &inbuf, &inlen)) 261 exit(-1); 262 fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); 263 264 /* Generous output buffer for header/trailer */ 265 outlen = 2 * inlen + 1024; 266 267 assert(NULL != (outbuf = (char *)malloc(outlen))); 268 nxu_touch_pages(outbuf, outlen, pagelen, 1); 269 270 /* 271 * On PowerVM, the hypervisor defines the maximum request buffer 272 * size is defined and this value is available via sysfs. 273 */ 274 if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) { 275 chunk = atoi(buf); 276 } else { 277 /* sysfs entry is not available on PowerNV */ 278 /* Compress piecemeal in smallish chunks */ 279 chunk = 1<<22; 280 } 281 282 /* Write the gzip header to the stream */ 283 num_hdr_bytes = gzip_header_blank(outbuf); 284 dstbuf = outbuf + num_hdr_bytes; 285 outlen = outlen - num_hdr_bytes; 286 dsttotlen = num_hdr_bytes; 287 288 srcbuf = inbuf; 289 srctotlen = 0; 290 291 /* Init the CRB, the coprocessor request block */ 292 memset(&cmdp->crb, 0, sizeof(cmdp->crb)); 293 294 /* Initial gzip crc32 */ 295 put32(cmdp->cpb, in_crc, 0); 296 297 while (inlen > 0) { 298 299 /* Submit chunk size source data per job */ 300 srclen = NX_MIN(chunk, inlen); 301 /* Supply large target in case data expands */ 302 dstlen = NX_MIN(2*srclen, outlen); 303 304 /* Page faults are handled by the user code */ 305 306 /* Fault-in pages; an improved code wouldn't touch so 307 * many pages but would try to estimate the 308 * compression ratio and adjust both the src and dst 309 * touch amounts. 310 */ 311 nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen, 312 1); 313 nxu_touch_pages(srcbuf, srclen, pagelen, 0); 314 nxu_touch_pages(dstbuf, dstlen, pagelen, 1); 315 316 cc = compress_fht_sample( 317 srcbuf, srclen, 318 dstbuf, dstlen, 319 lzcounts, cmdp, handle); 320 321 if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC && 322 cc != ERR_NX_AT_FAULT) { 323 fprintf(stderr, "nx error: cc= %d\n", cc); 324 exit(-1); 325 } 326 327 /* Page faults are handled by the user code */ 328 if (cc == ERR_NX_AT_FAULT) { 329 NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc)); 330 NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n", 331 fault_tries, 332 (unsigned long long) cmdp->crb.csb.fsaddr)); 333 fault_tries--; 334 if (fault_tries > 0) { 335 continue; 336 } else { 337 fprintf(stderr, "error: cannot progress; "); 338 fprintf(stderr, "too many faults\n"); 339 exit(-1); 340 } 341 } 342 343 fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ 344 345 inlen = inlen - srclen; 346 srcbuf = srcbuf + srclen; 347 srctotlen = srctotlen + srclen; 348 349 /* Two possible locations for spbc depending on the function 350 * code. 351 */ 352 spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) : 353 get32(cmdp->cpb, out_spbc_comp_with_count); 354 assert(spbc == srclen); 355 356 /* Target byte count */ 357 tpbc = get32(cmdp->crb.csb, tpbc); 358 /* Target ending bit count */ 359 tebc = getnn(cmdp->cpb, out_tebc); 360 NXPRT(fprintf(stderr, "compressed chunk %d ", spbc)); 361 NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc)); 362 363 if (inlen > 0) { /* More chunks to go */ 364 set_bfinal(dstbuf, 0); 365 dstbuf = dstbuf + tpbc; 366 dsttotlen = dsttotlen + tpbc; 367 outlen = outlen - tpbc; 368 /* Round up to the next byte with a flush 369 * block; do not set the BFINAqL bit. 370 */ 371 flushlen = append_sync_flush(dstbuf, tebc, 0); 372 dsttotlen = dsttotlen + flushlen; 373 outlen = outlen - flushlen; 374 dstbuf = dstbuf + flushlen; 375 NXPRT(fprintf(stderr, "added sync_flush %d bytes\n", 376 flushlen)); 377 } else { /* Done */ 378 /* Set the BFINAL bit of the last block per Deflate 379 * specification. 380 */ 381 set_bfinal(dstbuf, 1); 382 dstbuf = dstbuf + tpbc; 383 dsttotlen = dsttotlen + tpbc; 384 outlen = outlen - tpbc; 385 } 386 387 /* Resuming crc32 for the next chunk */ 388 crc = get32(cmdp->cpb, out_crc); 389 put32(cmdp->cpb, in_crc, crc); 390 crc = be32toh(crc); 391 } 392 393 /* Append crc32 and ISIZE to the end */ 394 memcpy(dstbuf, &crc, 4); 395 memcpy(dstbuf+4, &srctotlen, 4); 396 dsttotlen = dsttotlen + 8; 397 outlen = outlen - 8; 398 399 assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); 400 strcpy(outname, argv[1]); 401 strcat(outname, FEXT); 402 if (write_output_file(outname, outbuf, dsttotlen)) { 403 fprintf(stderr, "write error: %s\n", outname); 404 exit(-1); 405 } 406 407 fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen, 408 dsttotlen); 409 fprintf(stderr, "crc32 checksum = %08x\n", crc); 410 411 if (inbuf != NULL) 412 free(inbuf); 413 414 if (outbuf != NULL) 415 free(outbuf); 416 417 return 0; 418 } 419 420 int main(int argc, char **argv) 421 { 422 int rc; 423 struct sigaction act; 424 void *handle; 425 426 nx_dbg = 0; 427 nx_gzip_log = NULL; 428 act.sa_handler = 0; 429 act.sa_sigaction = nxu_sigsegv_handler; 430 act.sa_flags = SA_SIGINFO; 431 act.sa_restorer = 0; 432 sigemptyset(&act.sa_mask); 433 sigaction(SIGSEGV, &act, NULL); 434 435 handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0); 436 if (!handle) { 437 fprintf(stderr, "Unable to init NX, errno %d\n", errno); 438 exit(-1); 439 } 440 441 rc = compress_file(argc, argv, handle); 442 443 nx_function_end(handle); 444 445 return rc; 446 } 447