xref: /openbmc/qemu/ui/vnc-enc-zywrle.h (revision 095859e5d97284dd3ea666c337845dc63f6ba5e7)
1 /********************************************************************
2  *                                                                  *
3  * THIS FILE IS PART OF THE 'ZYWRLE' VNC CODEC SOURCE CODE.         *
4  *                                                                  *
5  * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
6  * GOVERNED BY A FOLLOWING BSD-STYLE SOURCE LICENSE.                *
7  * PLEASE READ THESE TERMS BEFORE DISTRIBUTING.                     *
8  *                                                                  *
9  * THE 'ZYWRLE' VNC CODEC SOURCE CODE IS (C) COPYRIGHT 2006         *
10  * BY Hitachi Systems & Services, Ltd.                              *
11  * (Noriaki Yamazaki, Research & Development Center)               *
12  *                                                                  *
13  *                                                                  *
14  ********************************************************************
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions
17 are met:
18 
19 - Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21 
22 - Redistributions in binary form must reproduce the above copyright
23 notice, this list of conditions and the following disclaimer in the
24 documentation and/or other materials provided with the distribution.
25 
26 - Neither the name of the Hitachi Systems & Services, Ltd. nor
27 the names of its contributors may be used to endorse or promote
28 products derived from this software without specific prior written
29 permission.
30 
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
32 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
33 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
34 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION
35 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
36 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
37 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
41 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42  ********************************************************************/
43 
44 #ifndef VNC_ENC_ZYWRLE_H
45 #define VNC_ENC_ZYWRLE_H
46 
47 /* Tables for Coefficients filtering. */
48 #ifndef ZYWRLE_QUANTIZE
49 /* Type A:lower bit omitting of EZW style. */
50 static const unsigned int zywrle_param[3][3]={
51         {0x0000F000, 0x00000000, 0x00000000},
52         {0x0000C000, 0x00F0F0F0, 0x00000000},
53         {0x0000C000, 0x00C0C0C0, 0x00F0F0F0},
54 /*      {0x0000FF00, 0x00000000, 0x00000000},
55         {0x0000FF00, 0x00FFFFFF, 0x00000000},
56         {0x0000FF00, 0x00FFFFFF, 0x00FFFFFF}, */
57 };
58 #else
59 /* Type B:Non liner quantization filter. */
60 static const int8_t zywrle_conv[4][256]={
61 {       /* bi=5, bo=5 r=0.0:PSNR=24.849 */
62         0, 0, 0, 0, 0, 0, 0, 0,
63         0, 0, 0, 0, 0, 0, 0, 0,
64         0, 0, 0, 0, 0, 0, 0, 0,
65         0, 0, 0, 0, 0, 0, 0, 0,
66         0, 0, 0, 0, 0, 0, 0, 0,
67         0, 0, 0, 0, 0, 0, 0, 0,
68         0, 0, 0, 0, 0, 0, 0, 0,
69         0, 0, 0, 0, 0, 0, 0, 0,
70         0, 0, 0, 0, 0, 0, 0, 0,
71         0, 0, 0, 0, 0, 0, 0, 0,
72         0, 0, 0, 0, 0, 0, 0, 0,
73         0, 0, 0, 0, 0, 0, 0, 0,
74         0, 0, 0, 0, 0, 0, 0, 0,
75         0, 0, 0, 0, 0, 0, 0, 0,
76         0, 0, 0, 0, 0, 0, 0, 0,
77         0, 0, 0, 0, 0, 0, 0, 0,
78         0, 0, 0, 0, 0, 0, 0, 0,
79         0, 0, 0, 0, 0, 0, 0, 0,
80         0, 0, 0, 0, 0, 0, 0, 0,
81         0, 0, 0, 0, 0, 0, 0, 0,
82         0, 0, 0, 0, 0, 0, 0, 0,
83         0, 0, 0, 0, 0, 0, 0, 0,
84         0, 0, 0, 0, 0, 0, 0, 0,
85         0, 0, 0, 0, 0, 0, 0, 0,
86         0, 0, 0, 0, 0, 0, 0, 0,
87         0, 0, 0, 0, 0, 0, 0, 0,
88         0, 0, 0, 0, 0, 0, 0, 0,
89         0, 0, 0, 0, 0, 0, 0, 0,
90         0, 0, 0, 0, 0, 0, 0, 0,
91         0, 0, 0, 0, 0, 0, 0, 0,
92         0, 0, 0, 0, 0, 0, 0, 0,
93         0, 0, 0, 0, 0, 0, 0, 0,
94 },
95 {       /* bi=5, bo=5 r=2.0:PSNR=74.031 */
96         0, 0, 0, 0, 0, 0, 0, 0,
97         0, 0, 0, 0, 0, 0, 0, 0,
98         0, 0, 0, 0, 0, 0, 0, 32,
99         32, 32, 32, 32, 32, 32, 32, 32,
100         32, 32, 32, 32, 32, 32, 32, 32,
101         48, 48, 48, 48, 48, 48, 48, 48,
102         48, 48, 48, 56, 56, 56, 56, 56,
103         56, 56, 56, 56, 64, 64, 64, 64,
104         64, 64, 64, 64, 72, 72, 72, 72,
105         72, 72, 72, 72, 80, 80, 80, 80,
106         80, 80, 88, 88, 88, 88, 88, 88,
107         88, 88, 88, 88, 88, 88, 96, 96,
108         96, 96, 96, 104, 104, 104, 104, 104,
109         104, 104, 104, 104, 104, 112, 112, 112,
110         112, 112, 112, 112, 112, 112, 120, 120,
111         120, 120, 120, 120, 120, 120, 120, 120,
112         0, -120, -120, -120, -120, -120, -120, -120,
113         -120, -120, -120, -112, -112, -112, -112, -112,
114         -112, -112, -112, -112, -104, -104, -104, -104,
115         -104, -104, -104, -104, -104, -104, -96, -96,
116         -96, -96, -96, -88, -88, -88, -88, -88,
117         -88, -88, -88, -88, -88, -88, -88, -80,
118         -80, -80, -80, -80, -80, -72, -72, -72,
119         -72, -72, -72, -72, -72, -64, -64, -64,
120         -64, -64, -64, -64, -64, -56, -56, -56,
121         -56, -56, -56, -56, -56, -56, -48, -48,
122         -48, -48, -48, -48, -48, -48, -48, -48,
123         -48, -32, -32, -32, -32, -32, -32, -32,
124         -32, -32, -32, -32, -32, -32, -32, -32,
125         -32, -32, 0, 0, 0, 0, 0, 0,
126         0, 0, 0, 0, 0, 0, 0, 0,
127         0, 0, 0, 0, 0, 0, 0, 0,
128 },
129 {       /* bi=5, bo=4 r=2.0:PSNR=64.441 */
130         0, 0, 0, 0, 0, 0, 0, 0,
131         0, 0, 0, 0, 0, 0, 0, 0,
132         0, 0, 0, 0, 0, 0, 0, 0,
133         0, 0, 0, 0, 0, 0, 0, 0,
134         48, 48, 48, 48, 48, 48, 48, 48,
135         48, 48, 48, 48, 48, 48, 48, 48,
136         48, 48, 48, 48, 48, 48, 48, 48,
137         64, 64, 64, 64, 64, 64, 64, 64,
138         64, 64, 64, 64, 64, 64, 64, 64,
139         80, 80, 80, 80, 80, 80, 80, 80,
140         80, 80, 80, 80, 80, 88, 88, 88,
141         88, 88, 88, 88, 88, 88, 88, 88,
142         104, 104, 104, 104, 104, 104, 104, 104,
143         104, 104, 104, 112, 112, 112, 112, 112,
144         112, 112, 112, 112, 120, 120, 120, 120,
145         120, 120, 120, 120, 120, 120, 120, 120,
146         0, -120, -120, -120, -120, -120, -120, -120,
147         -120, -120, -120, -120, -120, -112, -112, -112,
148         -112, -112, -112, -112, -112, -112, -104, -104,
149         -104, -104, -104, -104, -104, -104, -104, -104,
150         -104, -88, -88, -88, -88, -88, -88, -88,
151         -88, -88, -88, -88, -80, -80, -80, -80,
152         -80, -80, -80, -80, -80, -80, -80, -80,
153         -80, -64, -64, -64, -64, -64, -64, -64,
154         -64, -64, -64, -64, -64, -64, -64, -64,
155         -64, -48, -48, -48, -48, -48, -48, -48,
156         -48, -48, -48, -48, -48, -48, -48, -48,
157         -48, -48, -48, -48, -48, -48, -48, -48,
158         -48, 0, 0, 0, 0, 0, 0, 0,
159         0, 0, 0, 0, 0, 0, 0, 0,
160         0, 0, 0, 0, 0, 0, 0, 0,
161         0, 0, 0, 0, 0, 0, 0, 0,
162 },
163 {       /* bi=5, bo=2 r=2.0:PSNR=43.175 */
164         0, 0, 0, 0, 0, 0, 0, 0,
165         0, 0, 0, 0, 0, 0, 0, 0,
166         0, 0, 0, 0, 0, 0, 0, 0,
167         0, 0, 0, 0, 0, 0, 0, 0,
168         0, 0, 0, 0, 0, 0, 0, 0,
169         0, 0, 0, 0, 0, 0, 0, 0,
170         0, 0, 0, 0, 0, 0, 0, 0,
171         0, 0, 0, 0, 0, 0, 0, 0,
172         88, 88, 88, 88, 88, 88, 88, 88,
173         88, 88, 88, 88, 88, 88, 88, 88,
174         88, 88, 88, 88, 88, 88, 88, 88,
175         88, 88, 88, 88, 88, 88, 88, 88,
176         88, 88, 88, 88, 88, 88, 88, 88,
177         88, 88, 88, 88, 88, 88, 88, 88,
178         88, 88, 88, 88, 88, 88, 88, 88,
179         88, 88, 88, 88, 88, 88, 88, 88,
180         0, -88, -88, -88, -88, -88, -88, -88,
181         -88, -88, -88, -88, -88, -88, -88, -88,
182         -88, -88, -88, -88, -88, -88, -88, -88,
183         -88, -88, -88, -88, -88, -88, -88, -88,
184         -88, -88, -88, -88, -88, -88, -88, -88,
185         -88, -88, -88, -88, -88, -88, -88, -88,
186         -88, -88, -88, -88, -88, -88, -88, -88,
187         -88, -88, -88, -88, -88, -88, -88, -88,
188         -88, 0, 0, 0, 0, 0, 0, 0,
189         0, 0, 0, 0, 0, 0, 0, 0,
190         0, 0, 0, 0, 0, 0, 0, 0,
191         0, 0, 0, 0, 0, 0, 0, 0,
192         0, 0, 0, 0, 0, 0, 0, 0,
193         0, 0, 0, 0, 0, 0, 0, 0,
194         0, 0, 0, 0, 0, 0, 0, 0,
195         0, 0, 0, 0, 0, 0, 0, 0,
196 }
197 };
198 
199 static const int8_t *zywrle_param[3][3][3]={
200         {{zywrle_conv[0], zywrle_conv[2], zywrle_conv[0]},
201          {zywrle_conv[0], zywrle_conv[0], zywrle_conv[0]},
202          {zywrle_conv[0], zywrle_conv[0], zywrle_conv[0]}},
203         {{zywrle_conv[0], zywrle_conv[3], zywrle_conv[0]},
204          {zywrle_conv[1], zywrle_conv[1], zywrle_conv[1]},
205          {zywrle_conv[0], zywrle_conv[0], zywrle_conv[0]}},
206         {{zywrle_conv[0], zywrle_conv[3], zywrle_conv[0]},
207          {zywrle_conv[2], zywrle_conv[2], zywrle_conv[2]},
208          {zywrle_conv[1], zywrle_conv[1], zywrle_conv[1]}},
209 };
210 #endif
211 
212 /*   Load/Save pixel stuffs. */
213 #define ZYWRLE_YMASK15  0xFFFFFFF8
214 #define ZYWRLE_UVMASK15 0xFFFFFFF8
215 #define ZYWRLE_LOAD_PIXEL15(src, r, g, b)                               \
216     do {                                                                \
217         r = (((uint8_t*)src)[S_1]<< 1)& 0xF8;                           \
218         g = (((uint8_t*)src)[S_1]<< 6) | (((uint8_t*)src)[S_0]>> 2);    \
219         g &= 0xF8;                                                      \
220         b =  (((uint8_t*)src)[S_0]<< 3)& 0xF8;                          \
221     } while (0)
222 
223 #define ZYWRLE_SAVE_PIXEL15(dst, r, g, b)                               \
224     do {                                                                \
225         r &= 0xF8;                                                      \
226         g &= 0xF8;                                                      \
227         b &= 0xF8;                                                      \
228         ((uint8_t*)dst)[S_1] = (uint8_t)((r >> 1)|(g >> 6));            \
229         ((uint8_t*)dst)[S_0] = (uint8_t)(((b >> 3)|(g << 2))& 0xFF);    \
230     } while (0)
231 
232 #define ZYWRLE_YMASK16  0xFFFFFFFC
233 #define ZYWRLE_UVMASK16 0xFFFFFFF8
234 #define ZYWRLE_LOAD_PIXEL16(src, r, g, b)                               \
235     do {                                                                \
236         r = ((uint8_t*)src)[S_1] & 0xF8;                                \
237         g = (((uint8_t*)src)[S_1]<< 5) | (((uint8_t*)src)[S_0] >> 3);   \
238         g &= 0xFC;                                                      \
239         b = (((uint8_t*)src)[S_0]<< 3) & 0xF8;                          \
240     } while (0)
241 
242 #define ZYWRLE_SAVE_PIXEL16(dst, r, g,b)                                \
243     do {                                                                \
244         r &= 0xF8;                                                      \
245         g &= 0xFC;                                                      \
246         b &= 0xF8;                                                      \
247         ((uint8_t*)dst)[S_1] = (uint8_t)(r | (g >> 5));                 \
248         ((uint8_t*)dst)[S_0] = (uint8_t)(((b >> 3)|(g << 3)) & 0xFF);   \
249     } while (0)
250 
251 #define ZYWRLE_YMASK32  0xFFFFFFFF
252 #define ZYWRLE_UVMASK32 0xFFFFFFFF
253 #define ZYWRLE_LOAD_PIXEL32(src, r, g, b)     \
254     do {                                      \
255         r = ((uint8_t*)src)[L_2];             \
256         g = ((uint8_t*)src)[L_1];             \
257         b = ((uint8_t*)src)[L_0];             \
258     } while (0)
259 #define ZYWRLE_SAVE_PIXEL32(dst, r, g, b)             \
260     do {                                              \
261         ((uint8_t*)dst)[L_2] = (uint8_t)r;            \
262         ((uint8_t*)dst)[L_1] = (uint8_t)g;            \
263         ((uint8_t*)dst)[L_0] = (uint8_t)b;            \
264     } while (0)
265 
266 static inline void harr(int8_t *px0, int8_t *px1)
267 {
268     /* Piecewise-Linear Harr(PLHarr) */
269     int x0 = (int)*px0, x1 = (int)*px1;
270     int orgx0 = x0, orgx1 = x1;
271 
272     if ((x0 ^ x1) & 0x80) {
273         /* differ sign */
274         x1 += x0;
275         if (((x1 ^ orgx1) & 0x80) == 0) {
276             /* |x1| > |x0| */
277             x0 -= x1;   /* H = -B */
278         }
279     } else {
280         /* same sign */
281         x0 -= x1;
282         if (((x0 ^ orgx0) & 0x80) == 0) {
283             /* |x0| > |x1| */
284             x1 += x0;   /* L = A */
285         }
286     }
287     *px0 = (int8_t)x1;
288     *px1 = (int8_t)x0;
289 }
290 
291 /*
292  1D-Wavelet transform.
293 
294  In coefficients array, the famous 'pyramid' decomposition is well used.
295 
296  1D Model:
297    |L0L0L0L0|L0L0L0L0|H0H0H0H0|H0H0H0H0| : level 0
298    |L1L1L1L1|H1H1H1H1|H0H0H0H0|H0H0H0H0| : level 1
299 
300  But this method needs line buffer because H/L is different position from X0/X1.
301  So, I used 'interleave' decomposition instead of it.
302 
303  1D Model:
304    |L0H0L0H0|L0H0L0H0|L0H0L0H0|L0H0L0H0| : level 0
305    |L1H0H1H0|L1H0H1H0|L1H0H1H0|L1H0H1H0| : level 1
306 
307  In this method, H/L and X0/X1 is always same position.
308  This leads us to more speed and less memory.
309  Of cause, the result of both method is quite same
310  because it's only difference that coefficient position.
311 */
312 static inline void wavelet_level(int *data, int size, int l, int skip_pixel)
313 {
314     int s, ofs;
315     int8_t *px0;
316     int8_t *end;
317 
318     px0 = (int8_t*)data;
319     s = (8 << l) * skip_pixel;
320     end = px0 + (size >> (l + 1)) * s;
321     s -= 2;
322     ofs = (4 << l) * skip_pixel;
323 
324     while (px0 < end) {
325         harr(px0, px0 + ofs);
326         px0++;
327         harr(px0, px0 + ofs);
328         px0++;
329         harr(px0, px0 + ofs);
330         px0 += s;
331     }
332 }
333 
334 #ifndef ZYWRLE_QUANTIZE
335 /* Type A:lower bit omitting of EZW style. */
336 static inline void filter_wavelet_square(int *buf, int width, int height,
337                                          int level, int l)
338 {
339     int r, s;
340     int x, y;
341     int *h;
342     const unsigned int *m;
343 
344     m = &(zywrle_param[level - 1][l]);
345     s = 2 << l;
346 
347     for (r = 1; r < 4; r++) {
348         h = buf;
349         if (r & 0x01) {
350             h += s >> 1;
351         }
352         if (r & 0x02) {
353             h += (s >> 1) * width;
354         }
355         for (y = 0; y < height / s; y++) {
356             for (x = 0; x < width / s; x++) {
357                 /*
358                   these are same following code.
359                   h[x] = h[x] / (~m[x]+1) * (~m[x]+1);
360                   ( round h[x] with m[x] bit )
361                   '&' operator isn't 'round' but is 'floor'.
362                   So, we must offset when h[x] is negative.
363                 */
364                 if (((int8_t*)h)[0] & 0x80) {
365                     ((int8_t*)h)[0] += ~((int8_t*)m)[0];
366                 }
367                 if (((int8_t*)h)[1] & 0x80) {
368                     ((int8_t*)h)[1] += ~((int8_t*)m)[1];
369                 }
370                 if (((int8_t*)h)[2] & 0x80) {
371                     ((int8_t*)h)[2] += ~((int8_t*)m)[2];
372                 }
373                 *h &= *m;
374                 h += s;
375             }
376             h += (s-1)*width;
377         }
378     }
379 }
380 #else
381 /*
382  Type B:Non liner quantization filter.
383 
384  Coefficients have Gaussian curve and smaller value which is
385  large part of coefficients isn't more important than larger value.
386  So, I use filter of Non liner quantize/dequantize table.
387  In general, Non liner quantize formula is explained as following.
388 
389     y=f(x)   = sign(x)*round( ((abs(x)/(2^7))^ r   )* 2^(bo-1) )*2^(8-bo)
390     x=f-1(y) = sign(y)*round( ((abs(y)/(2^7))^(1/r))* 2^(bi-1) )*2^(8-bi)
391  ( r:power coefficient  bi:effective MSB in input  bo:effective MSB in output )
392 
393    r < 1.0 : Smaller value is more important than larger value.
394    r > 1.0 : Larger value is more important than smaller value.
395    r = 1.0 : Liner quantization which is same with EZW style.
396 
397  r = 0.75 is famous non liner quantization used in MP3 audio codec.
398  In contrast to audio data, larger value is important in wavelet coefficients.
399  So, I select r = 2.0 table( quantize is x^2, dequantize sqrt(x) ).
400 
401  As compared with EZW style liner quantization, this filter tended to be
402  more sharp edge and be more compression rate but be more blocking noise and be
403  less quality. Especially, the surface of graphic objects has distinguishable
404  noise in middle quality mode.
405 
406  We need only quantized-dequantized(filtered) value rather than quantized value
407  itself because all values are packed or palette-lized in later ZRLE section.
408  This lead us not to need to modify client decoder when we change
409  the filtering procedure in future.
410  Client only decodes coefficients given by encoder.
411 */
412 static inline void filter_wavelet_square(int *buf, int width, int height,
413                                          int level, int l)
414 {
415     int r, s;
416     int x, y;
417     int *h;
418     const int8_t **m;
419 
420     m = zywrle_param[level - 1][l];
421     s = 2 << l;
422 
423     for (r = 1; r < 4; r++) {
424         h = buf;
425         if (r & 0x01) {
426             h += s >> 1;
427         }
428         if (r & 0x02) {
429             h += (s >> 1) * width;
430         }
431         for (y = 0; y < height / s; y++) {
432             for (x = 0; x < width / s; x++) {
433                 ((int8_t*)h)[0] = m[0][((uint8_t*)h)[0]];
434                 ((int8_t*)h)[1] = m[1][((uint8_t*)h)[1]];
435                 ((int8_t*)h)[2] = m[2][((uint8_t*)h)[2]];
436                 h += s;
437             }
438             h += (s - 1) * width;
439         }
440     }
441 }
442 #endif
443 
444 static inline void wavelet(int *buf, int width, int height, int level)
445 {
446         int l, s;
447         int *top;
448         int *end;
449 
450         for (l = 0; l < level; l++) {
451                 top = buf;
452                 end = buf + height * width;
453                 s = width << l;
454                 while (top < end) {
455                         wavelet_level(top, width, l, 1);
456                         top += s;
457                 }
458                 top = buf;
459                 end = buf + width;
460                 s = 1<<l;
461                 while (top < end) {
462                         wavelet_level(top, height, l, width);
463                         top += s;
464                 }
465                 filter_wavelet_square(buf, width, height, level, l);
466         }
467 }
468 
469 
470 /* Load/Save coefficients stuffs.
471  Coefficients manages as 24 bits little-endian pixel. */
472 #define ZYWRLE_LOAD_COEFF(src, r, g, b)         \
473     do {                                        \
474         r = ((int8_t*)src)[2];                  \
475         g = ((int8_t*)src)[1];                  \
476         b = ((int8_t*)src)[0];                  \
477     } while (0)
478 
479 #define ZYWRLE_SAVE_COEFF(dst, r, g, b)       \
480     do {                                      \
481         ((int8_t*)dst)[2] = (int8_t)r;        \
482         ((int8_t*)dst)[1] = (int8_t)g;        \
483         ((int8_t*)dst)[0] = (int8_t)b;        \
484     } while (0)
485 
486 /*
487   RGB <=> YUV conversion stuffs.
488   YUV conversion is explained as following formula in strict meaning:
489   Y =  0.299R + 0.587G + 0.114B (   0<=Y<=255)
490   U = -0.169R - 0.331G + 0.500B (-128<=U<=127)
491   V =  0.500R - 0.419G - 0.081B (-128<=V<=127)
492 
493   I use simple conversion RCT(reversible color transform) which is described
494   in JPEG-2000 specification.
495   Y = (R + 2G + B)/4 (   0<=Y<=255)
496   U = B-G (-256<=U<=255)
497   V = R-G (-256<=V<=255)
498 */
499 
500 /* RCT is N-bit RGB to N-bit Y and N+1-bit UV.
501    For make Same N-bit, UV is lossy.
502    More exact PLHarr, we reduce to odd range(-127<=x<=127). */
503 #define ZYWRLE_RGBYUV_(r, g, b, y, u, v, ymask, uvmask)          \
504     do {                                                         \
505         y = (r + (g << 1) + b) >> 2;                             \
506         u =  b - g;                                              \
507         v =  r - g;                                              \
508         y -= 128;                                                \
509         u >>= 1;                                                 \
510         v >>= 1;                                                 \
511         y &= ymask;                                              \
512         u &= uvmask;                                             \
513         v &= uvmask;                                             \
514         if (y == -128) {                                         \
515             y += (0xFFFFFFFF - ymask + 1);                       \
516         }                                                        \
517         if (u == -128) {                                         \
518             u += (0xFFFFFFFF - uvmask + 1);                      \
519         }                                                        \
520         if (v == -128) {                                         \
521             v += (0xFFFFFFFF - uvmask + 1);                      \
522         }                                                        \
523     } while (0)
524 
525 
526 /*
527  coefficient packing/unpacking stuffs.
528  Wavelet transform makes 4 sub coefficient image from 1 original image.
529 
530  model with pyramid decomposition:
531    +------+------+
532    |      |      |
533    |  L   |  Hx  |
534    |      |      |
535    +------+------+
536    |      |      |
537    |  H   |  Hxy |
538    |      |      |
539    +------+------+
540 
541  So, we must transfer each sub images individually in strict meaning.
542  But at least ZRLE meaning, following one decomposition image is same as
543  avobe individual sub image. I use this format.
544  (Strictly saying, transfer order is reverse(Hxy->Hy->Hx->L)
545   for simplified procedure for any wavelet level.)
546 
547    +------+------+
548    |      L      |
549    +------+------+
550    |      Hx     |
551    +------+------+
552    |      Hy     |
553    +------+------+
554    |      Hxy    |
555    +------+------+
556 */
557 #define ZYWRLE_INC_PTR(data)                         \
558     do {                                             \
559         data++;                                      \
560         if( data - p >= (w + uw) ) {                 \
561             data += scanline-(w + uw);               \
562             p = data;                                \
563         }                                            \
564     } while (0)
565 
566 #define ZYWRLE_TRANSFER_COEFF(buf, data, t, w, h, scanline, level, TRANS) \
567     do {                                                                \
568         ph = buf;                                                       \
569         s = 2 << level;                                                 \
570         if (t & 0x01) {                                                 \
571             ph += s >> 1;                                               \
572         }                                                               \
573         if (t & 0x02) {                                                 \
574             ph += (s >> 1) * w;                                         \
575         }                                                               \
576         end = ph + h * w;                                               \
577         while (ph < end) {                                              \
578             line = ph + w;                                              \
579             while (ph < line) {                                         \
580                 TRANS                                                   \
581                     ZYWRLE_INC_PTR(data);                               \
582                 ph += s;                                                \
583             }                                                           \
584             ph += (s - 1) * w;                                          \
585         }                                                               \
586     } while (0)
587 
588 #define ZYWRLE_PACK_COEFF(buf, data, t, width, height, scanline, level) \
589     ZYWRLE_TRANSFER_COEFF(buf, data, t, width, height, scanline, level, \
590                           ZYWRLE_LOAD_COEFF(ph, r, g, b);               \
591                           ZYWRLE_SAVE_PIXEL(data, r, g, b);)
592 
593 #define ZYWRLE_UNPACK_COEFF(buf, data, t, width, height, scanline, level) \
594     ZYWRLE_TRANSFER_COEFF(buf, data, t, width, height, scanline, level, \
595                           ZYWRLE_LOAD_PIXEL(data, r, g, b);             \
596                           ZYWRLE_SAVE_COEFF(ph, r, g, b);)
597 
598 #define ZYWRLE_SAVE_UNALIGN(data, TRANS)                     \
599     do {                                                     \
600         top = buf + w * h;                                   \
601         end = buf + (w + uw) * (h + uh);                     \
602         while (top < end) {                                  \
603             TRANS                                            \
604                 ZYWRLE_INC_PTR(data);                        \
605                 top++;                                       \
606         }                                                    \
607     } while (0)
608 
609 #define ZYWRLE_LOAD_UNALIGN(data,TRANS)                                 \
610     do {                                                                \
611         top = buf + w * h;                                              \
612         if (uw) {                                                       \
613             p = data + w;                                               \
614             end = (int*)(p + h * scanline);                             \
615             while (p < (ZRLE_PIXEL*)end) {                              \
616                 line = (int*)(p + uw);                                  \
617                 while (p < (ZRLE_PIXEL*)line) {                         \
618                     TRANS                                               \
619                         p++;                                            \
620                     top++;                                              \
621                 }                                                       \
622                 p += scanline - uw;                                     \
623             }                                                           \
624         }                                                               \
625         if (uh) {                                                       \
626             p = data + h * scanline;                                    \
627             end = (int*)(p + uh * scanline);                            \
628             while (p < (ZRLE_PIXEL*)end) {                              \
629                 line = (int*)(p + w);                                   \
630                 while (p < (ZRLE_PIXEL*)line) {                         \
631                     TRANS                                               \
632                         p++;                                            \
633                     top++;                                              \
634                 }                                                       \
635                 p += scanline - w;                                      \
636             }                                                           \
637         }                                                               \
638         if (uw && uh) {                                                 \
639             p= data + w + h * scanline;                                 \
640             end = (int*)(p + uh * scanline);                            \
641             while (p < (ZRLE_PIXEL*)end) {                              \
642                 line = (int*)(p + uw);                                  \
643                 while (p < (ZRLE_PIXEL*)line) {                         \
644                     TRANS                                               \
645                         p++;                                            \
646                     top++;                                              \
647                 }                                                       \
648                 p += scanline-uw;                                       \
649             }                                                           \
650         }                                                               \
651     } while (0)
652 
653 static inline void zywrle_calc_size(int *w, int *h, int level)
654 {
655     *w &= ~((1 << level) - 1);
656     *h &= ~((1 << level) - 1);
657 }
658 
659 #endif
660