1 // SPDX-License-Identifier: GPL-2.0-or-later
2
3 /* P9 gzip sample code for demonstrating the P9 NX hardware interface.
4 * Not intended for productive uses or for performance or compression
5 * ratio measurements. For simplicity of demonstration, this sample
6 * code compresses in to fixed Huffman blocks only (Deflate btype=1)
7 * and has very simple memory management. Dynamic Huffman blocks
8 * (Deflate btype=2) are more involved as detailed in the user guide.
9 * Note also that /dev/crypto/gzip, VAS and skiboot support are
10 * required.
11 *
12 * Copyright 2020 IBM Corp.
13 *
14 * https://github.com/libnxz/power-gzip for zlib api and other utils
15 *
16 * Author: Bulent Abali <abali@us.ibm.com>
17 *
18 * Definitions of acronyms used here. See
19 * P9 NX Gzip Accelerator User's Manual for details:
20 * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
21 *
22 * adler/crc: 32 bit checksums appended to stream tail
23 * ce: completion extension
24 * cpb: coprocessor parameter block (metadata)
25 * crb: coprocessor request block (command)
26 * csb: coprocessor status block (status)
27 * dht: dynamic huffman table
28 * dde: data descriptor element (address, length)
29 * ddl: list of ddes
30 * dh/fh: dynamic and fixed huffman types
31 * fc: coprocessor function code
32 * histlen: history/dictionary length
33 * history: sliding window of up to 32KB of data
34 * lzcount: Deflate LZ symbol counts
35 * rembytecnt: remaining byte count
36 * sfbt: source final block type; last block's type during decomp
37 * spbc: source processed byte count
38 * subc: source unprocessed bit count
39 * tebc: target ending bit count; valid bits in the last byte
40 * tpbc: target processed byte count
41 * vas: virtual accelerator switch; the user mode interface
42 */
43
44 #define _ISOC11_SOURCE // For aligned_alloc()
45 #define _DEFAULT_SOURCE // For endian.h
46
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <stdint.h>
52 #include <sys/types.h>
53 #include <sys/stat.h>
54 #include <sys/time.h>
55 #include <sys/fcntl.h>
56 #include <sys/mman.h>
57 #include <endian.h>
58 #include <bits/endian.h>
59 #include <sys/ioctl.h>
60 #include <assert.h>
61 #include <errno.h>
62 #include <signal.h>
63 #include "utils.h"
64 #include "nxu.h"
65 #include "nx.h"
66
67 int nx_dbg;
68 FILE *nx_gzip_log;
69
70 #define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
71 #define FNAME_MAX 1024
72 #define FEXT ".nx.gz"
73
74 #define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
75
76 /*
77 * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
78 */
compress_fht_sample(char * src,uint32_t srclen,char * dst,uint32_t dstlen,int with_count,struct nx_gzip_crb_cpb_t * cmdp,void * handle)79 static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
80 uint32_t dstlen, int with_count,
81 struct nx_gzip_crb_cpb_t *cmdp, void *handle)
82 {
83 uint32_t fc;
84
85 assert(!!cmdp);
86
87 put32(cmdp->crb, gzip_fc, 0); /* clear */
88 fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
89 GZIP_FC_COMPRESS_RESUME_FHT;
90 putnn(cmdp->crb, gzip_fc, fc);
91 putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
92 memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
93
94 /* Section 6.6 programming notes; spbc may be in two different
95 * places depending on FC.
96 */
97 if (!with_count)
98 put32(cmdp->cpb, out_spbc_comp, 0);
99 else
100 put32(cmdp->cpb, out_spbc_comp_with_count, 0);
101
102 /* Figure 6-3 6-4; CSB location */
103 put64(cmdp->crb, csb_address, 0);
104 put64(cmdp->crb, csb_address,
105 (uint64_t) &cmdp->crb.csb & csb_address_mask);
106
107 /* Source direct dde (scatter-gather list) */
108 clear_dde(cmdp->crb.source_dde);
109 putnn(cmdp->crb.source_dde, dde_count, 0);
110 put32(cmdp->crb.source_dde, ddebc, srclen);
111 put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
112
113 /* Target direct dde (scatter-gather list) */
114 clear_dde(cmdp->crb.target_dde);
115 putnn(cmdp->crb.target_dde, dde_count, 0);
116 put32(cmdp->crb.target_dde, ddebc, dstlen);
117 put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
118
119 /* Submit the crb, the job descriptor, to the accelerator */
120 return nxu_submit_job(cmdp, handle);
121 }
122
123 /*
124 * Prepares a blank no filename no timestamp gzip header and returns
125 * the number of bytes written to buf.
126 * Gzip specification at https://tools.ietf.org/html/rfc1952
127 */
gzip_header_blank(char * buf)128 int gzip_header_blank(char *buf)
129 {
130 int i = 0;
131
132 buf[i++] = 0x1f; /* ID1 */
133 buf[i++] = 0x8b; /* ID2 */
134 buf[i++] = 0x08; /* CM */
135 buf[i++] = 0x00; /* FLG */
136 buf[i++] = 0x00; /* MTIME */
137 buf[i++] = 0x00; /* MTIME */
138 buf[i++] = 0x00; /* MTIME */
139 buf[i++] = 0x00; /* MTIME */
140 buf[i++] = 0x04; /* XFL 4=fastest */
141 buf[i++] = 0x03; /* OS UNIX */
142
143 return i;
144 }
145
146 /*
147 * Z_SYNC_FLUSH as described in zlib.h.
148 * Returns number of appended bytes
149 */
append_sync_flush(char * buf,int tebc,int final)150 int append_sync_flush(char *buf, int tebc, int final)
151 {
152 uint64_t flush;
153 int shift = (tebc & 0x7);
154
155 if (tebc > 0) {
156 /* Last byte is partially full */
157 buf = buf - 1;
158 *buf = *buf & (unsigned char) ((1<<tebc)-1);
159 } else
160 *buf = 0;
161 flush = ((0x1ULL & final) << shift) | *buf;
162 shift = shift + 3; /* BFINAL and BTYPE written */
163 shift = (shift <= 8) ? 8 : 16;
164 flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
165 shift = shift + 32;
166 while (shift > 0) {
167 *buf++ = (unsigned char) (flush & 0xffULL);
168 flush = flush >> 8;
169 shift = shift - 8;
170 }
171 return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
172 }
173
174 /*
175 * Final deflate block bit. This call assumes the block
176 * beginning is byte aligned.
177 */
set_bfinal(void * buf,int bfinal)178 static void set_bfinal(void *buf, int bfinal)
179 {
180 char *b = buf;
181
182 if (bfinal)
183 *b = *b | (unsigned char) 0x01;
184 else
185 *b = *b & (unsigned char) 0xfe;
186 }
187
compress_file(int argc,char ** argv,void * handle)188 int compress_file(int argc, char **argv, void *handle)
189 {
190 char *inbuf, *outbuf, *srcbuf, *dstbuf;
191 char outname[FNAME_MAX];
192 uint32_t srclen, dstlen;
193 uint32_t flushlen, chunk;
194 size_t inlen, outlen, dsttotlen, srctotlen;
195 uint32_t crc, spbc, tpbc, tebc;
196 int lzcounts = 0;
197 int cc;
198 int num_hdr_bytes;
199 struct nx_gzip_crb_cpb_t *cmdp;
200 uint32_t pagelen = 65536;
201 int fault_tries = NX_MAX_FAULTS;
202 char buf[32];
203
204 cmdp = (void *)(uintptr_t)
205 aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
206 sizeof(struct nx_gzip_crb_cpb_t));
207
208 if (argc != 2) {
209 fprintf(stderr, "usage: %s <fname>\n", argv[0]);
210 exit(-1);
211 }
212 if (read_file_alloc(argv[1], &inbuf, &inlen))
213 exit(-1);
214 fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
215
216 /* Generous output buffer for header/trailer */
217 outlen = 2 * inlen + 1024;
218
219 assert(NULL != (outbuf = (char *)malloc(outlen)));
220 nxu_touch_pages(outbuf, outlen, pagelen, 1);
221
222 /*
223 * On PowerVM, the hypervisor defines the maximum request buffer
224 * size is defined and this value is available via sysfs.
225 */
226 if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
227 chunk = atoi(buf);
228 } else {
229 /* sysfs entry is not available on PowerNV */
230 /* Compress piecemeal in smallish chunks */
231 chunk = 1<<22;
232 }
233
234 /* Write the gzip header to the stream */
235 num_hdr_bytes = gzip_header_blank(outbuf);
236 dstbuf = outbuf + num_hdr_bytes;
237 outlen = outlen - num_hdr_bytes;
238 dsttotlen = num_hdr_bytes;
239
240 srcbuf = inbuf;
241 srctotlen = 0;
242
243 /* Init the CRB, the coprocessor request block */
244 memset(&cmdp->crb, 0, sizeof(cmdp->crb));
245
246 /* Initial gzip crc32 */
247 put32(cmdp->cpb, in_crc, 0);
248
249 while (inlen > 0) {
250
251 /* Submit chunk size source data per job */
252 srclen = NX_MIN(chunk, inlen);
253 /* Supply large target in case data expands */
254 dstlen = NX_MIN(2*srclen, outlen);
255
256 /* Page faults are handled by the user code */
257
258 /* Fault-in pages; an improved code wouldn't touch so
259 * many pages but would try to estimate the
260 * compression ratio and adjust both the src and dst
261 * touch amounts.
262 */
263 nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
264 1);
265 nxu_touch_pages(srcbuf, srclen, pagelen, 0);
266 nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
267
268 cc = compress_fht_sample(
269 srcbuf, srclen,
270 dstbuf, dstlen,
271 lzcounts, cmdp, handle);
272
273 if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
274 cc != ERR_NX_AT_FAULT) {
275 fprintf(stderr, "nx error: cc= %d\n", cc);
276 exit(-1);
277 }
278
279 /* Page faults are handled by the user code */
280 if (cc == ERR_NX_AT_FAULT) {
281 NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
282 NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
283 fault_tries,
284 (unsigned long long) cmdp->crb.csb.fsaddr));
285 fault_tries--;
286 if (fault_tries > 0) {
287 continue;
288 } else {
289 fprintf(stderr, "error: cannot progress; ");
290 fprintf(stderr, "too many faults\n");
291 exit(-1);
292 }
293 }
294
295 fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
296
297 inlen = inlen - srclen;
298 srcbuf = srcbuf + srclen;
299 srctotlen = srctotlen + srclen;
300
301 /* Two possible locations for spbc depending on the function
302 * code.
303 */
304 spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
305 get32(cmdp->cpb, out_spbc_comp_with_count);
306 assert(spbc == srclen);
307
308 /* Target byte count */
309 tpbc = get32(cmdp->crb.csb, tpbc);
310 /* Target ending bit count */
311 tebc = getnn(cmdp->cpb, out_tebc);
312 NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
313 NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
314
315 if (inlen > 0) { /* More chunks to go */
316 set_bfinal(dstbuf, 0);
317 dstbuf = dstbuf + tpbc;
318 dsttotlen = dsttotlen + tpbc;
319 outlen = outlen - tpbc;
320 /* Round up to the next byte with a flush
321 * block; do not set the BFINAqL bit.
322 */
323 flushlen = append_sync_flush(dstbuf, tebc, 0);
324 dsttotlen = dsttotlen + flushlen;
325 outlen = outlen - flushlen;
326 dstbuf = dstbuf + flushlen;
327 NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
328 flushlen));
329 } else { /* Done */
330 /* Set the BFINAL bit of the last block per Deflate
331 * specification.
332 */
333 set_bfinal(dstbuf, 1);
334 dstbuf = dstbuf + tpbc;
335 dsttotlen = dsttotlen + tpbc;
336 outlen = outlen - tpbc;
337 }
338
339 /* Resuming crc32 for the next chunk */
340 crc = get32(cmdp->cpb, out_crc);
341 put32(cmdp->cpb, in_crc, crc);
342 crc = be32toh(crc);
343 }
344
345 /* Append crc32 and ISIZE to the end */
346 memcpy(dstbuf, &crc, 4);
347 memcpy(dstbuf+4, &srctotlen, 4);
348 dsttotlen = dsttotlen + 8;
349 outlen = outlen - 8;
350
351 assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
352 strcpy(outname, argv[1]);
353 strcat(outname, FEXT);
354 if (write_file(outname, outbuf, dsttotlen)) {
355 fprintf(stderr, "write error: %s\n", outname);
356 exit(-1);
357 }
358
359 fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
360 dsttotlen);
361 fprintf(stderr, "crc32 checksum = %08x\n", crc);
362
363 if (inbuf != NULL)
364 free(inbuf);
365
366 if (outbuf != NULL)
367 free(outbuf);
368
369 return 0;
370 }
371
main(int argc,char ** argv)372 int main(int argc, char **argv)
373 {
374 int rc;
375 struct sigaction act;
376 void *handle;
377
378 nx_dbg = 0;
379 nx_gzip_log = NULL;
380 act.sa_handler = 0;
381 act.sa_sigaction = nxu_sigsegv_handler;
382 act.sa_flags = SA_SIGINFO;
383 act.sa_restorer = 0;
384 sigemptyset(&act.sa_mask);
385 sigaction(SIGSEGV, &act, NULL);
386
387 handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
388 if (!handle) {
389 fprintf(stderr, "Unable to init NX, errno %d\n", errno);
390 exit(-1);
391 }
392
393 rc = compress_file(argc, argv, handle);
394
395 nx_function_end(handle);
396
397 return rc;
398 }
399