1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 /* P9 gzip sample code for demonstrating the P9 NX hardware interface. 4 * Not intended for productive uses or for performance or compression 5 * ratio measurements. For simplicity of demonstration, this sample 6 * code compresses in to fixed Huffman blocks only (Deflate btype=1) 7 * and has very simple memory management. Dynamic Huffman blocks 8 * (Deflate btype=2) are more involved as detailed in the user guide. 9 * Note also that /dev/crypto/gzip, VAS and skiboot support are 10 * required. 11 * 12 * Copyright 2020 IBM Corp. 13 * 14 * https://github.com/libnxz/power-gzip for zlib api and other utils 15 * 16 * Author: Bulent Abali <abali@us.ibm.com> 17 * 18 * Definitions of acronyms used here. See 19 * P9 NX Gzip Accelerator User's Manual for details: 20 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf 21 * 22 * adler/crc: 32 bit checksums appended to stream tail 23 * ce: completion extension 24 * cpb: coprocessor parameter block (metadata) 25 * crb: coprocessor request block (command) 26 * csb: coprocessor status block (status) 27 * dht: dynamic huffman table 28 * dde: data descriptor element (address, length) 29 * ddl: list of ddes 30 * dh/fh: dynamic and fixed huffman types 31 * fc: coprocessor function code 32 * histlen: history/dictionary length 33 * history: sliding window of up to 32KB of data 34 * lzcount: Deflate LZ symbol counts 35 * rembytecnt: remaining byte count 36 * sfbt: source final block type; last block's type during decomp 37 * spbc: source processed byte count 38 * subc: source unprocessed bit count 39 * tebc: target ending bit count; valid bits in the last byte 40 * tpbc: target processed byte count 41 * vas: virtual accelerator switch; the user mode interface 42 */ 43 44 #define _ISOC11_SOURCE // For aligned_alloc() 45 #define _DEFAULT_SOURCE // For endian.h 46 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <stdint.h> 52 #include <sys/types.h> 53 #include <sys/stat.h> 54 #include <sys/time.h> 55 #include <sys/fcntl.h> 56 #include <sys/mman.h> 57 #include <endian.h> 58 #include <bits/endian.h> 59 #include <sys/ioctl.h> 60 #include <assert.h> 61 #include <errno.h> 62 #include <signal.h> 63 #include "nxu.h" 64 #include "nx.h" 65 66 int nx_dbg; 67 FILE *nx_gzip_log; 68 69 #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) 70 #define FNAME_MAX 1024 71 #define FEXT ".nx.gz" 72 73 /* 74 * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. 75 */ 76 static int compress_fht_sample(char *src, uint32_t srclen, char *dst, 77 uint32_t dstlen, int with_count, 78 struct nx_gzip_crb_cpb_t *cmdp, void *handle) 79 { 80 uint32_t fc; 81 82 assert(!!cmdp); 83 84 put32(cmdp->crb, gzip_fc, 0); /* clear */ 85 fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT : 86 GZIP_FC_COMPRESS_RESUME_FHT; 87 putnn(cmdp->crb, gzip_fc, fc); 88 putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */ 89 memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb)); 90 91 /* Section 6.6 programming notes; spbc may be in two different 92 * places depending on FC. 93 */ 94 if (!with_count) 95 put32(cmdp->cpb, out_spbc_comp, 0); 96 else 97 put32(cmdp->cpb, out_spbc_comp_with_count, 0); 98 99 /* Figure 6-3 6-4; CSB location */ 100 put64(cmdp->crb, csb_address, 0); 101 put64(cmdp->crb, csb_address, 102 (uint64_t) &cmdp->crb.csb & csb_address_mask); 103 104 /* Source direct dde (scatter-gather list) */ 105 clear_dde(cmdp->crb.source_dde); 106 putnn(cmdp->crb.source_dde, dde_count, 0); 107 put32(cmdp->crb.source_dde, ddebc, srclen); 108 put64(cmdp->crb.source_dde, ddead, (uint64_t) src); 109 110 /* Target direct dde (scatter-gather list) */ 111 clear_dde(cmdp->crb.target_dde); 112 putnn(cmdp->crb.target_dde, dde_count, 0); 113 put32(cmdp->crb.target_dde, ddebc, dstlen); 114 put64(cmdp->crb.target_dde, ddead, (uint64_t) dst); 115 116 /* Submit the crb, the job descriptor, to the accelerator */ 117 return nxu_submit_job(cmdp, handle); 118 } 119 120 /* 121 * Prepares a blank no filename no timestamp gzip header and returns 122 * the number of bytes written to buf. 123 * Gzip specification at https://tools.ietf.org/html/rfc1952 124 */ 125 int gzip_header_blank(char *buf) 126 { 127 int i = 0; 128 129 buf[i++] = 0x1f; /* ID1 */ 130 buf[i++] = 0x8b; /* ID2 */ 131 buf[i++] = 0x08; /* CM */ 132 buf[i++] = 0x00; /* FLG */ 133 buf[i++] = 0x00; /* MTIME */ 134 buf[i++] = 0x00; /* MTIME */ 135 buf[i++] = 0x00; /* MTIME */ 136 buf[i++] = 0x00; /* MTIME */ 137 buf[i++] = 0x04; /* XFL 4=fastest */ 138 buf[i++] = 0x03; /* OS UNIX */ 139 140 return i; 141 } 142 143 /* Caller must free the allocated buffer return nonzero on error. */ 144 int read_alloc_input_file(char *fname, char **buf, size_t *bufsize) 145 { 146 struct stat statbuf; 147 FILE *fp; 148 char *p; 149 size_t num_bytes; 150 151 if (stat(fname, &statbuf)) { 152 perror(fname); 153 return(-1); 154 } 155 fp = fopen(fname, "r"); 156 if (fp == NULL) { 157 perror(fname); 158 return(-1); 159 } 160 assert(NULL != (p = (char *) malloc(statbuf.st_size))); 161 num_bytes = fread(p, 1, statbuf.st_size, fp); 162 if (ferror(fp) || (num_bytes != statbuf.st_size)) { 163 perror(fname); 164 return(-1); 165 } 166 *buf = p; 167 *bufsize = num_bytes; 168 return 0; 169 } 170 171 /* Returns nonzero on error */ 172 int write_output_file(char *fname, char *buf, size_t bufsize) 173 { 174 FILE *fp; 175 size_t num_bytes; 176 177 fp = fopen(fname, "w"); 178 if (fp == NULL) { 179 perror(fname); 180 return(-1); 181 } 182 num_bytes = fwrite(buf, 1, bufsize, fp); 183 if (ferror(fp) || (num_bytes != bufsize)) { 184 perror(fname); 185 return(-1); 186 } 187 fclose(fp); 188 return 0; 189 } 190 191 /* 192 * Z_SYNC_FLUSH as described in zlib.h. 193 * Returns number of appended bytes 194 */ 195 int append_sync_flush(char *buf, int tebc, int final) 196 { 197 uint64_t flush; 198 int shift = (tebc & 0x7); 199 200 if (tebc > 0) { 201 /* Last byte is partially full */ 202 buf = buf - 1; 203 *buf = *buf & (unsigned char) ((1<<tebc)-1); 204 } else 205 *buf = 0; 206 flush = ((0x1ULL & final) << shift) | *buf; 207 shift = shift + 3; /* BFINAL and BTYPE written */ 208 shift = (shift <= 8) ? 8 : 16; 209 flush |= (0xFFFF0000ULL) << shift; /* Zero length block */ 210 shift = shift + 32; 211 while (shift > 0) { 212 *buf++ = (unsigned char) (flush & 0xffULL); 213 flush = flush >> 8; 214 shift = shift - 8; 215 } 216 return(((tebc > 5) || (tebc == 0)) ? 5 : 4); 217 } 218 219 /* 220 * Final deflate block bit. This call assumes the block 221 * beginning is byte aligned. 222 */ 223 static void set_bfinal(void *buf, int bfinal) 224 { 225 char *b = buf; 226 227 if (bfinal) 228 *b = *b | (unsigned char) 0x01; 229 else 230 *b = *b & (unsigned char) 0xfe; 231 } 232 233 int compress_file(int argc, char **argv, void *handle) 234 { 235 char *inbuf, *outbuf, *srcbuf, *dstbuf; 236 char outname[FNAME_MAX]; 237 uint32_t srclen, dstlen; 238 uint32_t flushlen, chunk; 239 size_t inlen, outlen, dsttotlen, srctotlen; 240 uint32_t crc, spbc, tpbc, tebc; 241 int lzcounts = 0; 242 int cc; 243 int num_hdr_bytes; 244 struct nx_gzip_crb_cpb_t *cmdp; 245 uint32_t pagelen = 65536; 246 int fault_tries = NX_MAX_FAULTS; 247 248 cmdp = (void *)(uintptr_t) 249 aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), 250 sizeof(struct nx_gzip_crb_cpb_t)); 251 252 if (argc != 2) { 253 fprintf(stderr, "usage: %s <fname>\n", argv[0]); 254 exit(-1); 255 } 256 if (read_alloc_input_file(argv[1], &inbuf, &inlen)) 257 exit(-1); 258 fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); 259 260 /* Generous output buffer for header/trailer */ 261 outlen = 2 * inlen + 1024; 262 263 assert(NULL != (outbuf = (char *)malloc(outlen))); 264 nxu_touch_pages(outbuf, outlen, pagelen, 1); 265 266 /* Compress piecemeal in smallish chunks */ 267 chunk = 1<<22; 268 269 /* Write the gzip header to the stream */ 270 num_hdr_bytes = gzip_header_blank(outbuf); 271 dstbuf = outbuf + num_hdr_bytes; 272 outlen = outlen - num_hdr_bytes; 273 dsttotlen = num_hdr_bytes; 274 275 srcbuf = inbuf; 276 srctotlen = 0; 277 278 /* Init the CRB, the coprocessor request block */ 279 memset(&cmdp->crb, 0, sizeof(cmdp->crb)); 280 281 /* Initial gzip crc32 */ 282 put32(cmdp->cpb, in_crc, 0); 283 284 while (inlen > 0) { 285 286 /* Submit chunk size source data per job */ 287 srclen = NX_MIN(chunk, inlen); 288 /* Supply large target in case data expands */ 289 dstlen = NX_MIN(2*srclen, outlen); 290 291 /* Page faults are handled by the user code */ 292 293 /* Fault-in pages; an improved code wouldn't touch so 294 * many pages but would try to estimate the 295 * compression ratio and adjust both the src and dst 296 * touch amounts. 297 */ 298 nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen, 299 1); 300 nxu_touch_pages(srcbuf, srclen, pagelen, 0); 301 nxu_touch_pages(dstbuf, dstlen, pagelen, 1); 302 303 cc = compress_fht_sample( 304 srcbuf, srclen, 305 dstbuf, dstlen, 306 lzcounts, cmdp, handle); 307 308 if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC && 309 cc != ERR_NX_AT_FAULT) { 310 fprintf(stderr, "nx error: cc= %d\n", cc); 311 exit(-1); 312 } 313 314 /* Page faults are handled by the user code */ 315 if (cc == ERR_NX_AT_FAULT) { 316 NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc)); 317 NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n", 318 fault_tries, 319 (unsigned long long) cmdp->crb.csb.fsaddr)); 320 fault_tries--; 321 if (fault_tries > 0) { 322 continue; 323 } else { 324 fprintf(stderr, "error: cannot progress; "); 325 fprintf(stderr, "too many faults\n"); 326 exit(-1); 327 } 328 } 329 330 fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ 331 332 inlen = inlen - srclen; 333 srcbuf = srcbuf + srclen; 334 srctotlen = srctotlen + srclen; 335 336 /* Two possible locations for spbc depending on the function 337 * code. 338 */ 339 spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) : 340 get32(cmdp->cpb, out_spbc_comp_with_count); 341 assert(spbc == srclen); 342 343 /* Target byte count */ 344 tpbc = get32(cmdp->crb.csb, tpbc); 345 /* Target ending bit count */ 346 tebc = getnn(cmdp->cpb, out_tebc); 347 NXPRT(fprintf(stderr, "compressed chunk %d ", spbc)); 348 NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc)); 349 350 if (inlen > 0) { /* More chunks to go */ 351 set_bfinal(dstbuf, 0); 352 dstbuf = dstbuf + tpbc; 353 dsttotlen = dsttotlen + tpbc; 354 outlen = outlen - tpbc; 355 /* Round up to the next byte with a flush 356 * block; do not set the BFINAqL bit. 357 */ 358 flushlen = append_sync_flush(dstbuf, tebc, 0); 359 dsttotlen = dsttotlen + flushlen; 360 outlen = outlen - flushlen; 361 dstbuf = dstbuf + flushlen; 362 NXPRT(fprintf(stderr, "added sync_flush %d bytes\n", 363 flushlen)); 364 } else { /* Done */ 365 /* Set the BFINAL bit of the last block per Deflate 366 * specification. 367 */ 368 set_bfinal(dstbuf, 1); 369 dstbuf = dstbuf + tpbc; 370 dsttotlen = dsttotlen + tpbc; 371 outlen = outlen - tpbc; 372 } 373 374 /* Resuming crc32 for the next chunk */ 375 crc = get32(cmdp->cpb, out_crc); 376 put32(cmdp->cpb, in_crc, crc); 377 crc = be32toh(crc); 378 } 379 380 /* Append crc32 and ISIZE to the end */ 381 memcpy(dstbuf, &crc, 4); 382 memcpy(dstbuf+4, &srctotlen, 4); 383 dsttotlen = dsttotlen + 8; 384 outlen = outlen - 8; 385 386 assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); 387 strcpy(outname, argv[1]); 388 strcat(outname, FEXT); 389 if (write_output_file(outname, outbuf, dsttotlen)) { 390 fprintf(stderr, "write error: %s\n", outname); 391 exit(-1); 392 } 393 394 fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen, 395 dsttotlen); 396 fprintf(stderr, "crc32 checksum = %08x\n", crc); 397 398 if (inbuf != NULL) 399 free(inbuf); 400 401 if (outbuf != NULL) 402 free(outbuf); 403 404 return 0; 405 } 406 407 int main(int argc, char **argv) 408 { 409 int rc; 410 struct sigaction act; 411 void *handle; 412 413 nx_dbg = 0; 414 nx_gzip_log = NULL; 415 act.sa_handler = 0; 416 act.sa_sigaction = nxu_sigsegv_handler; 417 act.sa_flags = SA_SIGINFO; 418 act.sa_restorer = 0; 419 sigemptyset(&act.sa_mask); 420 sigaction(SIGSEGV, &act, NULL); 421 422 handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0); 423 if (!handle) { 424 fprintf(stderr, "Unable to init NX, errno %d\n", errno); 425 exit(-1); 426 } 427 428 rc = compress_file(argc, argv, handle); 429 430 nx_function_end(handle); 431 432 return rc; 433 } 434