1 // SPDX-License-Identifier: GPL-2.0-or-later 2 3 /* P9 gzip sample code for demonstrating the P9 NX hardware interface. 4 * Not intended for productive uses or for performance or compression 5 * ratio measurements. For simplicity of demonstration, this sample 6 * code compresses in to fixed Huffman blocks only (Deflate btype=1) 7 * and has very simple memory management. Dynamic Huffman blocks 8 * (Deflate btype=2) are more involved as detailed in the user guide. 9 * Note also that /dev/crypto/gzip, VAS and skiboot support are 10 * required. 11 * 12 * Copyright 2020 IBM Corp. 13 * 14 * https://github.com/libnxz/power-gzip for zlib api and other utils 15 * 16 * Author: Bulent Abali <abali@us.ibm.com> 17 * 18 * Definitions of acronyms used here. See 19 * P9 NX Gzip Accelerator User's Manual for details: 20 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf 21 * 22 * adler/crc: 32 bit checksums appended to stream tail 23 * ce: completion extension 24 * cpb: coprocessor parameter block (metadata) 25 * crb: coprocessor request block (command) 26 * csb: coprocessor status block (status) 27 * dht: dynamic huffman table 28 * dde: data descriptor element (address, length) 29 * ddl: list of ddes 30 * dh/fh: dynamic and fixed huffman types 31 * fc: coprocessor function code 32 * histlen: history/dictionary length 33 * history: sliding window of up to 32KB of data 34 * lzcount: Deflate LZ symbol counts 35 * rembytecnt: remaining byte count 36 * sfbt: source final block type; last block's type during decomp 37 * spbc: source processed byte count 38 * subc: source unprocessed bit count 39 * tebc: target ending bit count; valid bits in the last byte 40 * tpbc: target processed byte count 41 * vas: virtual accelerator switch; the user mode interface 42 */ 43 44 #define _ISOC11_SOURCE // For aligned_alloc() 45 #define _DEFAULT_SOURCE // For endian.h 46 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <unistd.h> 51 #include <stdint.h> 52 #include <sys/types.h> 53 #include <sys/stat.h> 54 #include <sys/time.h> 55 #include <sys/fcntl.h> 56 #include <sys/mman.h> 57 #include <endian.h> 58 #include <bits/endian.h> 59 #include <sys/ioctl.h> 60 #include <assert.h> 61 #include <errno.h> 62 #include <signal.h> 63 #include "utils.h" 64 #include "nxu.h" 65 #include "nx.h" 66 67 int nx_dbg; 68 FILE *nx_gzip_log; 69 70 #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) 71 #define FNAME_MAX 1024 72 #define FEXT ".nx.gz" 73 74 #define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len" 75 76 /* 77 * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure. 78 */ 79 static int compress_fht_sample(char *src, uint32_t srclen, char *dst, 80 uint32_t dstlen, int with_count, 81 struct nx_gzip_crb_cpb_t *cmdp, void *handle) 82 { 83 uint32_t fc; 84 85 assert(!!cmdp); 86 87 put32(cmdp->crb, gzip_fc, 0); /* clear */ 88 fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT : 89 GZIP_FC_COMPRESS_RESUME_FHT; 90 putnn(cmdp->crb, gzip_fc, fc); 91 putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */ 92 memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb)); 93 94 /* Section 6.6 programming notes; spbc may be in two different 95 * places depending on FC. 96 */ 97 if (!with_count) 98 put32(cmdp->cpb, out_spbc_comp, 0); 99 else 100 put32(cmdp->cpb, out_spbc_comp_with_count, 0); 101 102 /* Figure 6-3 6-4; CSB location */ 103 put64(cmdp->crb, csb_address, 0); 104 put64(cmdp->crb, csb_address, 105 (uint64_t) &cmdp->crb.csb & csb_address_mask); 106 107 /* Source direct dde (scatter-gather list) */ 108 clear_dde(cmdp->crb.source_dde); 109 putnn(cmdp->crb.source_dde, dde_count, 0); 110 put32(cmdp->crb.source_dde, ddebc, srclen); 111 put64(cmdp->crb.source_dde, ddead, (uint64_t) src); 112 113 /* Target direct dde (scatter-gather list) */ 114 clear_dde(cmdp->crb.target_dde); 115 putnn(cmdp->crb.target_dde, dde_count, 0); 116 put32(cmdp->crb.target_dde, ddebc, dstlen); 117 put64(cmdp->crb.target_dde, ddead, (uint64_t) dst); 118 119 /* Submit the crb, the job descriptor, to the accelerator */ 120 return nxu_submit_job(cmdp, handle); 121 } 122 123 /* 124 * Prepares a blank no filename no timestamp gzip header and returns 125 * the number of bytes written to buf. 126 * Gzip specification at https://tools.ietf.org/html/rfc1952 127 */ 128 int gzip_header_blank(char *buf) 129 { 130 int i = 0; 131 132 buf[i++] = 0x1f; /* ID1 */ 133 buf[i++] = 0x8b; /* ID2 */ 134 buf[i++] = 0x08; /* CM */ 135 buf[i++] = 0x00; /* FLG */ 136 buf[i++] = 0x00; /* MTIME */ 137 buf[i++] = 0x00; /* MTIME */ 138 buf[i++] = 0x00; /* MTIME */ 139 buf[i++] = 0x00; /* MTIME */ 140 buf[i++] = 0x04; /* XFL 4=fastest */ 141 buf[i++] = 0x03; /* OS UNIX */ 142 143 return i; 144 } 145 146 /* 147 * Z_SYNC_FLUSH as described in zlib.h. 148 * Returns number of appended bytes 149 */ 150 int append_sync_flush(char *buf, int tebc, int final) 151 { 152 uint64_t flush; 153 int shift = (tebc & 0x7); 154 155 if (tebc > 0) { 156 /* Last byte is partially full */ 157 buf = buf - 1; 158 *buf = *buf & (unsigned char) ((1<<tebc)-1); 159 } else 160 *buf = 0; 161 flush = ((0x1ULL & final) << shift) | *buf; 162 shift = shift + 3; /* BFINAL and BTYPE written */ 163 shift = (shift <= 8) ? 8 : 16; 164 flush |= (0xFFFF0000ULL) << shift; /* Zero length block */ 165 shift = shift + 32; 166 while (shift > 0) { 167 *buf++ = (unsigned char) (flush & 0xffULL); 168 flush = flush >> 8; 169 shift = shift - 8; 170 } 171 return(((tebc > 5) || (tebc == 0)) ? 5 : 4); 172 } 173 174 /* 175 * Final deflate block bit. This call assumes the block 176 * beginning is byte aligned. 177 */ 178 static void set_bfinal(void *buf, int bfinal) 179 { 180 char *b = buf; 181 182 if (bfinal) 183 *b = *b | (unsigned char) 0x01; 184 else 185 *b = *b & (unsigned char) 0xfe; 186 } 187 188 int compress_file(int argc, char **argv, void *handle) 189 { 190 char *inbuf, *outbuf, *srcbuf, *dstbuf; 191 char outname[FNAME_MAX]; 192 uint32_t srclen, dstlen; 193 uint32_t flushlen, chunk; 194 size_t inlen, outlen, dsttotlen, srctotlen; 195 uint32_t crc, spbc, tpbc, tebc; 196 int lzcounts = 0; 197 int cc; 198 int num_hdr_bytes; 199 struct nx_gzip_crb_cpb_t *cmdp; 200 uint32_t pagelen = 65536; 201 int fault_tries = NX_MAX_FAULTS; 202 char buf[32]; 203 204 cmdp = (void *)(uintptr_t) 205 aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t), 206 sizeof(struct nx_gzip_crb_cpb_t)); 207 208 if (argc != 2) { 209 fprintf(stderr, "usage: %s <fname>\n", argv[0]); 210 exit(-1); 211 } 212 if (read_file_alloc(argv[1], &inbuf, &inlen)) 213 exit(-1); 214 fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen); 215 216 /* Generous output buffer for header/trailer */ 217 outlen = 2 * inlen + 1024; 218 219 assert(NULL != (outbuf = (char *)malloc(outlen))); 220 nxu_touch_pages(outbuf, outlen, pagelen, 1); 221 222 /* 223 * On PowerVM, the hypervisor defines the maximum request buffer 224 * size is defined and this value is available via sysfs. 225 */ 226 if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) { 227 chunk = atoi(buf); 228 } else { 229 /* sysfs entry is not available on PowerNV */ 230 /* Compress piecemeal in smallish chunks */ 231 chunk = 1<<22; 232 } 233 234 /* Write the gzip header to the stream */ 235 num_hdr_bytes = gzip_header_blank(outbuf); 236 dstbuf = outbuf + num_hdr_bytes; 237 outlen = outlen - num_hdr_bytes; 238 dsttotlen = num_hdr_bytes; 239 240 srcbuf = inbuf; 241 srctotlen = 0; 242 243 /* Init the CRB, the coprocessor request block */ 244 memset(&cmdp->crb, 0, sizeof(cmdp->crb)); 245 246 /* Initial gzip crc32 */ 247 put32(cmdp->cpb, in_crc, 0); 248 249 while (inlen > 0) { 250 251 /* Submit chunk size source data per job */ 252 srclen = NX_MIN(chunk, inlen); 253 /* Supply large target in case data expands */ 254 dstlen = NX_MIN(2*srclen, outlen); 255 256 /* Page faults are handled by the user code */ 257 258 /* Fault-in pages; an improved code wouldn't touch so 259 * many pages but would try to estimate the 260 * compression ratio and adjust both the src and dst 261 * touch amounts. 262 */ 263 nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen, 264 1); 265 nxu_touch_pages(srcbuf, srclen, pagelen, 0); 266 nxu_touch_pages(dstbuf, dstlen, pagelen, 1); 267 268 cc = compress_fht_sample( 269 srcbuf, srclen, 270 dstbuf, dstlen, 271 lzcounts, cmdp, handle); 272 273 if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC && 274 cc != ERR_NX_AT_FAULT) { 275 fprintf(stderr, "nx error: cc= %d\n", cc); 276 exit(-1); 277 } 278 279 /* Page faults are handled by the user code */ 280 if (cc == ERR_NX_AT_FAULT) { 281 NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc)); 282 NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n", 283 fault_tries, 284 (unsigned long long) cmdp->crb.csb.fsaddr)); 285 fault_tries--; 286 if (fault_tries > 0) { 287 continue; 288 } else { 289 fprintf(stderr, "error: cannot progress; "); 290 fprintf(stderr, "too many faults\n"); 291 exit(-1); 292 } 293 } 294 295 fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */ 296 297 inlen = inlen - srclen; 298 srcbuf = srcbuf + srclen; 299 srctotlen = srctotlen + srclen; 300 301 /* Two possible locations for spbc depending on the function 302 * code. 303 */ 304 spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) : 305 get32(cmdp->cpb, out_spbc_comp_with_count); 306 assert(spbc == srclen); 307 308 /* Target byte count */ 309 tpbc = get32(cmdp->crb.csb, tpbc); 310 /* Target ending bit count */ 311 tebc = getnn(cmdp->cpb, out_tebc); 312 NXPRT(fprintf(stderr, "compressed chunk %d ", spbc)); 313 NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc)); 314 315 if (inlen > 0) { /* More chunks to go */ 316 set_bfinal(dstbuf, 0); 317 dstbuf = dstbuf + tpbc; 318 dsttotlen = dsttotlen + tpbc; 319 outlen = outlen - tpbc; 320 /* Round up to the next byte with a flush 321 * block; do not set the BFINAqL bit. 322 */ 323 flushlen = append_sync_flush(dstbuf, tebc, 0); 324 dsttotlen = dsttotlen + flushlen; 325 outlen = outlen - flushlen; 326 dstbuf = dstbuf + flushlen; 327 NXPRT(fprintf(stderr, "added sync_flush %d bytes\n", 328 flushlen)); 329 } else { /* Done */ 330 /* Set the BFINAL bit of the last block per Deflate 331 * specification. 332 */ 333 set_bfinal(dstbuf, 1); 334 dstbuf = dstbuf + tpbc; 335 dsttotlen = dsttotlen + tpbc; 336 outlen = outlen - tpbc; 337 } 338 339 /* Resuming crc32 for the next chunk */ 340 crc = get32(cmdp->cpb, out_crc); 341 put32(cmdp->cpb, in_crc, crc); 342 crc = be32toh(crc); 343 } 344 345 /* Append crc32 and ISIZE to the end */ 346 memcpy(dstbuf, &crc, 4); 347 memcpy(dstbuf+4, &srctotlen, 4); 348 dsttotlen = dsttotlen + 8; 349 outlen = outlen - 8; 350 351 assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT))); 352 strcpy(outname, argv[1]); 353 strcat(outname, FEXT); 354 if (write_file(outname, outbuf, dsttotlen)) { 355 fprintf(stderr, "write error: %s\n", outname); 356 exit(-1); 357 } 358 359 fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen, 360 dsttotlen); 361 fprintf(stderr, "crc32 checksum = %08x\n", crc); 362 363 if (inbuf != NULL) 364 free(inbuf); 365 366 if (outbuf != NULL) 367 free(outbuf); 368 369 return 0; 370 } 371 372 int main(int argc, char **argv) 373 { 374 int rc; 375 struct sigaction act; 376 void *handle; 377 378 nx_dbg = 0; 379 nx_gzip_log = NULL; 380 act.sa_handler = 0; 381 act.sa_sigaction = nxu_sigsegv_handler; 382 act.sa_flags = SA_SIGINFO; 383 act.sa_restorer = 0; 384 sigemptyset(&act.sa_mask); 385 sigaction(SIGSEGV, &act, NULL); 386 387 handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0); 388 if (!handle) { 389 fprintf(stderr, "Unable to init NX, errno %d\n", errno); 390 exit(-1); 391 } 392 393 rc = compress_file(argc, argv, handle); 394 395 nx_function_end(handle); 396 397 return rc; 398 } 399