1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Rockchip RK3288 VPU codec driver
4  *
5  * Copyright (c) 2014 Rockchip Electronics Co., Ltd.
6  *	Hertz Wong <hertz.wong@rock-chips.com>
7  *	Herman Chen <herman.chen@rock-chips.com>
8  *
9  * Copyright (C) 2014 Google, Inc.
10  *	Tomasz Figa <tfiga@chromium.org>
11  */
12 
13 #include <linux/types.h>
14 #include <media/v4l2-h264.h>
15 #include <media/v4l2-mem2mem.h>
16 
17 #include "hantro.h"
18 #include "hantro_hw.h"
19 
20 /* Size with u32 units. */
21 #define CABAC_INIT_BUFFER_SIZE		(460 * 2)
22 #define POC_BUFFER_SIZE			34
23 #define SCALING_LIST_SIZE		(6 * 16 + 2 * 64)
24 
25 /*
26  * For valid and long term reference marking, index are reversed, so bit 31
27  * indicates the status of the picture 0.
28  */
29 #define REF_BIT(i)			BIT(32 - 1 - (i))
30 
31 /* Data structure describing auxiliary buffer format. */
32 struct hantro_h264_dec_priv_tbl {
33 	u32 cabac_table[CABAC_INIT_BUFFER_SIZE];
34 	u32 poc[POC_BUFFER_SIZE];
35 	u8 scaling_list[SCALING_LIST_SIZE];
36 };
37 
38 /*
39  * Constant CABAC table.
40  * From drivers/media/platform/rk3288-vpu/rk3288_vpu_hw_h264d.c
41  * in https://chromium.googlesource.com/chromiumos/third_party/kernel,
42  * chromeos-3.14 branch.
43  */
44 static const u32 h264_cabac_table[] = {
45 	0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07330000,
46 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
47 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
48 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
49 	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
50 	0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x000b0137,
51 	0x0045ef7f, 0xf3660052, 0xf94aeb6b, 0xe57fe17f, 0xe87fee5f, 0xe57feb72,
52 	0xe27fef7b, 0xf473f07a, 0xf573f43f, 0xfe44f154, 0xf368fd46, 0xf85df65a,
53 	0xe27fff4a, 0xfa61f95b, 0xec7ffc38, 0xfb52f94c, 0xea7df95d, 0xf557fd4d,
54 	0xfb47fc3f, 0xfc44f454, 0xf93ef941, 0x083d0538, 0xfe420140, 0x003dfe4e,
55 	0x01320734, 0x0a23002c, 0x0b26012d, 0x002e052c, 0x1f110133, 0x07321c13,
56 	0x10210e3e, 0xf36cf164, 0xf365f35b, 0xf45ef658, 0xf054f656, 0xf953f357,
57 	0xed5e0146, 0x0048fb4a, 0x123bf866, 0xf164005f, 0xfc4b0248, 0xf54bfd47,
58 	0x0f2ef345, 0x003e0041, 0x1525f148, 0x09391036, 0x003e0c48, 0x18000f09,
59 	0x08190d12, 0x0f090d13, 0x0a250c12, 0x061d1421, 0x0f1e042d, 0x013a003e,
60 	0x073d0c26, 0x0b2d0f27, 0x0b2a0d2c, 0x102d0c29, 0x0a311e22, 0x122a0a37,
61 	0x1133112e, 0x00591aed, 0x16ef1aef, 0x1ee71cec, 0x21e925e5, 0x21e928e4,
62 	0x26ef21f5, 0x28f129fa, 0x26012911, 0x1efa1b03, 0x1a1625f0, 0x23fc26f8,
63 	0x26fd2503, 0x26052a00, 0x23102716, 0x0e301b25, 0x153c0c44, 0x0261fd47,
64 	0xfa2afb32, 0xfd36fe3e, 0x003a013f, 0xfe48ff4a, 0xf75bfb43, 0xfb1bfd27,
65 	0xfe2c002e, 0xf040f844, 0xf64efa4d, 0xf656f45c, 0xf137f63c, 0xfa3efc41,
66 	0xf449f84c, 0xf950f758, 0xef6ef561, 0xec54f54f, 0xfa49fc4a, 0xf356f360,
67 	0xf561ed75, 0xf84efb21, 0xfc30fe35, 0xfd3ef347, 0xf64ff456, 0xf35af261,
68 	0x0000fa5d, 0xfa54f84f, 0x0042ff47, 0x003efe3c, 0xfe3bfb4b, 0xfd3efc3a,
69 	0xf742ff4f, 0x00470344, 0x0a2cf93e, 0x0f240e28, 0x101b0c1d, 0x012c1424,
70 	0x1220052a, 0x01300a3e, 0x112e0940, 0xf468f561, 0xf060f958, 0xf855f955,
71 	0xf755f358, 0x0442fd4d, 0xfd4cfa4c, 0x0a3aff4c, 0xff53f963, 0xf25f025f,
72 	0x004cfb4a, 0x0046f54b, 0x01440041, 0xf249033e, 0x043eff44, 0xf34b0b37,
73 	0x05400c46, 0x0f060613, 0x07100c0e, 0x120d0d0b, 0x0d0f0f10, 0x0c170d17,
74 	0x0f140e1a, 0x0e2c1128, 0x112f1811, 0x15151916, 0x1f1b161d, 0x13230e32,
75 	0x0a39073f, 0xfe4dfc52, 0xfd5e0945, 0xf46d24dd, 0x24de20e6, 0x25e22ce0,
76 	0x22ee22f1, 0x28f121f9, 0x23fb2100, 0x2602210d, 0x17230d3a, 0x1dfd1a00,
77 	0x161e1ff9, 0x23f122fd, 0x220324ff, 0x2205200b, 0x2305220c, 0x270b1e1d,
78 	0x221a1d27, 0x13421f15, 0x1f1f1932, 0xef78ec70, 0xee72f555, 0xf15cf259,
79 	0xe647f151, 0xf2500044, 0xf246e838, 0xe944e832, 0xf54a17f3, 0x1af328f1,
80 	0x31f22c03, 0x2d062c22, 0x21361352, 0xfd4bff17, 0x0122012b, 0x0036fe37,
81 	0x003d0140, 0x0044f75c, 0xf26af361, 0xf15af45a, 0xee58f649, 0xf74ff256,
82 	0xf649f646, 0xf645fb42, 0xf740fb3a, 0x023b15f6, 0x18f51cf8, 0x1cff1d03,
83 	0x1d092314, 0x1d240e43, 0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968,
84 	0xfa35ff36, 0x07331721, 0x17021500, 0x01090031, 0xdb760539, 0xf34ef541,
85 	0x013e0c31, 0xfc491132, 0x1240092b, 0x1d001a43, 0x105a0968, 0xd27fec68,
86 	0x0143f34e, 0xf541013e, 0xfa56ef5f, 0xfa3d092d, 0xfd45fa51, 0xf5600637,
87 	0x0743fb56, 0x0258003a, 0xfd4cf65e, 0x05360445, 0xfd510058, 0xf943fb4a,
88 	0xfc4afb50, 0xf948013a, 0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948,
89 	0x0d29033e, 0x002dfc4e, 0xfd60e57e, 0xe462e765, 0xe943e452, 0xec5ef053,
90 	0xea6eeb5b, 0xee66f35d, 0xe37ff95c, 0xfb59f960, 0xf36cfd2e, 0xff41ff39,
91 	0xf75dfd4a, 0xf75cf857, 0xe97e0536, 0x063c063b, 0x0645ff30, 0x0044fc45,
92 	0xf858fe55, 0xfa4eff4b, 0xf94d0236, 0x0532fd44, 0x0132062a, 0xfc51013f,
93 	0xfc460043, 0x0239fe4c, 0x0b230440, 0x013d0b23, 0x12190c18, 0x0d1d0d24,
94 	0xf65df949, 0xfe490d2e, 0x0931f964, 0x09350235, 0x0535fe3d, 0x00380038,
95 	0xf33ffb3c, 0xff3e0439, 0xfa450439, 0x0e270433, 0x0d440340, 0x013d093f,
96 	0x07321027, 0x052c0434, 0x0b30fb3c, 0xff3b003b, 0x1621052c, 0x0e2bff4e,
97 	0x003c0945, 0x0b1c0228, 0x032c0031, 0x002e022c, 0x0233002f, 0x0427023e,
98 	0x062e0036, 0x0336023a, 0x043f0633, 0x06390735, 0x06340637, 0x0b2d0e24,
99 	0x0835ff52, 0x0737fd4e, 0x0f2e161f, 0xff541907, 0x1ef91c03, 0x1c042000,
100 	0x22ff1e06, 0x1e062009, 0x1f131a1b, 0x1a1e2514, 0x1c221146, 0x0143053b,
101 	0x0943101e, 0x12201223, 0x161d181f, 0x1726122b, 0x14290b3f, 0x093b0940,
102 	0xff5efe59, 0xf76cfa4c, 0xfe2c002d, 0x0034fd40, 0xfe3bfc46, 0xfc4bf852,
103 	0xef66f74d, 0x0318002a, 0x00300037, 0xfa3bf947, 0xf453f557, 0xe277013a,
104 	0xfd1dff24, 0x0126022b, 0xfa37003a, 0x0040fd4a, 0xf65a0046, 0xfc1d051f,
105 	0x072a013b, 0xfe3afd48, 0xfd51f561, 0x003a0805, 0x0a0e0e12, 0x0d1b0228,
106 	0x003afd46, 0xfa4ff855, 0x0000f36a, 0xf06af657, 0xeb72ee6e, 0xf262ea6e,
107 	0xeb6aee67, 0xeb6be96c, 0xe670f660, 0xf45ffb5b, 0xf75dea5e, 0xfb560943,
108 	0xfc50f655, 0xff46073c, 0x093a053d, 0x0c320f32, 0x12311136, 0x0a29072e,
109 	0xff330731, 0x08340929, 0x062f0237, 0x0d290a2c, 0x06320535, 0x0d31043f,
110 	0x0640fe45, 0xfe3b0646, 0x0a2c091f, 0x0c2b0335, 0x0e220a26, 0xfd340d28,
111 	0x1120072c, 0x07260d32, 0x0a391a2b, 0x0e0b0b0e, 0x090b120b, 0x150917fe,
112 	0x20f120f1, 0x22eb27e9, 0x2adf29e1, 0x2ee426f4, 0x151d2de8, 0x35d330e6,
113 	0x41d52bed, 0x27f61e09, 0x121a141b, 0x0039f252, 0xfb4bed61, 0xdd7d1b00,
114 	0x1c001ffc, 0x1b062208, 0x1e0a1816, 0x21131620, 0x1a1f1529, 0x1a2c172f,
115 	0x10410e47, 0x083c063f, 0x11411518, 0x17141a17, 0x1b201c17, 0x1c181728,
116 	0x18201c1d, 0x172a1339, 0x1635163d, 0x0b560c28, 0x0b330e3b, 0xfc4ff947,
117 	0xfb45f746, 0xf842f644, 0xed49f445, 0xf046f143, 0xec3eed46, 0xf042ea41,
118 	0xec3f09fe, 0x1af721f7, 0x27f929fe, 0x2d033109, 0x2d1b243b, 0xfa42f923,
119 	0xf92af82d, 0xfb30f438, 0xfa3cfb3e, 0xf842f84c, 0xfb55fa51, 0xf64df951,
120 	0xef50ee49, 0xfc4af653, 0xf747f743, 0xff3df842, 0xf242003b, 0x023b15f3,
121 	0x21f227f9, 0x2efe3302, 0x3c063d11, 0x37222a3e, 0x14f10236, 0x034a14f1,
122 	0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331619, 0x22001000, 0xfe090429,
123 	0xe3760241, 0xfa47f34f, 0x05340932, 0xfd460a36, 0x1a221316, 0x28003902,
124 	0x29241a45, 0xd37ff165, 0xfc4cfa47, 0xf34f0534, 0x0645f35a, 0x0034082b,
125 	0xfe45fb52, 0xf660023b, 0x024bfd57, 0xfd640138, 0xfd4afa55, 0x003bfd51,
126 	0xf956fb5f, 0xff42ff4d, 0x0146fe56, 0xfb48003d, 0x0029003f, 0x003f003f,
127 	0xf7530456, 0x0061f948, 0x0d29033e, 0x0d0f0733, 0x0250d97f, 0xee5bef60,
128 	0xe651dd62, 0xe866e961, 0xe577e863, 0xeb6eee66, 0xdc7f0050, 0xfb59f95e,
129 	0xfc5c0027, 0x0041f154, 0xdd7ffe49, 0xf468f75b, 0xe17f0337, 0x07380737,
130 	0x083dfd35, 0x0044f94a, 0xf758f367, 0xf35bf759, 0xf25cf84c, 0xf457e96e,
131 	0xe869f64e, 0xec70ef63, 0xb27fba7f, 0xce7fd27f, 0xfc42fb4e, 0xfc47f848,
132 	0x023bff37, 0xf946fa4b, 0xf859de77, 0xfd4b2014, 0x1e16d47f, 0x0036fb3d,
133 	0x003aff3c, 0xfd3df843, 0xe754f24a, 0xfb410534, 0x0239003d, 0xf745f546,
134 	0x1237fc47, 0x003a073d, 0x09291219, 0x0920052b, 0x092f002c, 0x0033022e,
135 	0x1326fc42, 0x0f260c2a, 0x09220059, 0x042d0a1c, 0x0a1f21f5, 0x34d5120f,
136 	0x1c0023ea, 0x26e72200, 0x27ee20f4, 0x66a20000, 0x38f121fc, 0x1d0a25fb,
137 	0x33e327f7, 0x34de45c6, 0x43c12cfb, 0x200737e3, 0x20010000, 0x1b2421e7,
138 	0x22e224e4, 0x26e426e5, 0x22ee23f0, 0x22f220f8, 0x25fa2300, 0x1e0a1c12,
139 	0x1a191d29, 0x004b0248, 0x084d0e23, 0x121f1123, 0x151e112d, 0x142a122d,
140 	0x1b1a1036, 0x07421038, 0x0b490a43, 0xf674e970, 0xf147f93d, 0x0035fb42,
141 	0xf54df750, 0xf754f657, 0xde7feb65, 0xfd27fb35, 0xf93df54b, 0xf14def5b,
142 	0xe76be76f, 0xe47af54c, 0xf62cf634, 0xf639f73a, 0xf048f945, 0xfc45fb4a,
143 	0xf7560242, 0xf7220120, 0x0b1f0534, 0xfe37fe43, 0x0049f859, 0x03340704,
144 	0x0a081108, 0x10130325, 0xff3dfb49, 0xff46fc4e, 0x0000eb7e, 0xe97cec6e,
145 	0xe67ee77c, 0xef69e579, 0xe575ef66, 0xe675e574, 0xdf7af65f, 0xf264f85f,
146 	0xef6fe472, 0xfa59fe50, 0xfc52f755, 0xf851ff48, 0x05400143, 0x09380045,
147 	0x01450745, 0xf945fa43, 0xf04dfe40, 0x023dfa43, 0xfd400239, 0xfd41fd42,
148 	0x003e0933, 0xff42fe47, 0xfe4bff46, 0xf7480e3c, 0x1025002f, 0x12230b25,
149 	0x0c290a29, 0x02300c29, 0x0d29003b, 0x03321328, 0x03421232, 0x13fa12fa,
150 	0x0e001af4, 0x1ff021e7, 0x21ea25e4, 0x27e22ae2, 0x2fd62ddc, 0x31de29ef,
151 	0x200945b9, 0x3fc142c0, 0x4db636d9, 0x34dd29f6, 0x240028ff, 0x1e0e1c1a,
152 	0x17250c37, 0x0b4125df, 0x27dc28db, 0x26e22edf, 0x2ae228e8, 0x31e326f4,
153 	0x28f626fd, 0x2efb1f14, 0x1d1e192c, 0x0c300b31, 0x1a2d1616, 0x17161b15,
154 	0x21141a1c, 0x1e181b22, 0x122a1927, 0x12320c46, 0x15360e47, 0x0b531920,
155 	0x15311536, 0xfb55fa51, 0xf64df951, 0xef50ee49, 0xfc4af653, 0xf747f743,
156 	0xff3df842, 0xf242003b, 0x023b11f6, 0x20f32af7, 0x31fb3500, 0x4003440a,
157 	0x421b2f39, 0xfb470018, 0xff24fe2a, 0xfe34f739, 0xfa3ffc41, 0xfc43f952,
158 	0xfd51fd4c, 0xf948fa4e, 0xf448f244, 0xfd46fa4c, 0xfb42fb3e, 0x0039fc3d,
159 	0xf73c0136, 0x023a11f6, 0x20f32af7, 0x31fb3500, 0x4003440a, 0x421b2f39,
160 	0x14f10236, 0x034a14f1, 0x0236034a, 0xe47fe968, 0xfa35ff36, 0x07331d10,
161 	0x19000e00, 0xf633fd3e, 0xe5631a10, 0xfc55e866, 0x05390639, 0xef490e39,
162 	0x1428140a, 0x1d003600, 0x252a0c61, 0xe07fea75, 0xfe4afc55, 0xe8660539,
163 	0xfa5df258, 0xfa2c0437, 0xf559f167, 0xeb741339, 0x143a0454, 0x0660013f,
164 	0xfb55f36a, 0x053f064b, 0xfd5aff65, 0x0337fc4f, 0xfe4bf461, 0xf932013c,
165 	0x0029003f, 0x003f003f, 0xf7530456, 0x0061f948, 0x0d29033e, 0x0722f758,
166 	0xec7fdc7f, 0xef5bf25f, 0xe754e756, 0xf459ef5b, 0xe17ff24c, 0xee67f35a,
167 	0xdb7f0b50, 0x054c0254, 0x054efa37, 0x043df253, 0xdb7ffb4f, 0xf568f55b,
168 	0xe27f0041, 0xfe4f0048, 0xfc5cfa38, 0x0344f847, 0xf362fc56, 0xf458fb52,
169 	0xfd48fc43, 0xf848f059, 0xf745ff3b, 0x05420439, 0xfc47fe47, 0x023aff4a,
170 	0xfc2cff45, 0x003ef933, 0xfc2ffa2a, 0xfd29fa35, 0x084cf74e, 0xf5530934,
171 	0x0043fb5a, 0x0143f148, 0xfb4bf850, 0xeb53eb40, 0xf31fe740, 0xe35e094b,
172 	0x113ff84a, 0xfb23fe1b, 0x0d5b0341, 0xf945084d, 0xf642033e, 0xfd44ec51,
173 	0x001e0107, 0xfd17eb4a, 0x1042e97c, 0x11252cee, 0x32deea7f, 0x0427002a,
174 	0x07220b1d, 0x081f0625, 0x072a0328, 0x08210d2b, 0x0d24042f, 0x0337023a,
175 	0x063c082c, 0x0b2c0e2a, 0x07300438, 0x04340d25, 0x0931133a, 0x0a300c2d,
176 	0x00451421, 0x083f23ee, 0x21e71cfd, 0x180a1b00, 0x22f234d4, 0x27e81311,
177 	0x1f19241d, 0x1821220f, 0x1e141649, 0x1422131f, 0x1b2c1310, 0x0f240f24,
178 	0x151c1915, 0x1e141f0c, 0x1b10182a, 0x005d0e38, 0x0f391a26, 0xe87fe873,
179 	0xea52f73e, 0x0035003b, 0xf255f359, 0xf35ef55c, 0xe37feb64, 0xf239f443,
180 	0xf547f64d, 0xeb55f058, 0xe968f162, 0xdb7ff652, 0xf830f83d, 0xf842f946,
181 	0xf24bf64f, 0xf753f45c, 0xee6cfc4f, 0xea45f04b, 0xfe3a013a, 0xf34ef753,
182 	0xfc51f363, 0xf351fa26, 0xf33efa3a, 0xfe3bf049, 0xf64cf356, 0xf753f657,
183 	0x0000ea7f, 0xe77fe778, 0xe57fed72, 0xe975e776, 0xe675e871, 0xe476e178,
184 	0xdb7cf65e, 0xf166f663, 0xf36ace7f, 0xfb5c1139, 0xfb56f35e, 0xf45bfe4d,
185 	0x0047ff49, 0x0440f951, 0x05400f39, 0x01430044, 0xf6430144, 0x004d0240,
186 	0x0044fb4e, 0x0737053b, 0x02410e36, 0x0f2c053c, 0x0246fe4c, 0xee560c46,
187 	0x0540f446, 0x0b370538, 0x00450241, 0xfa4a0536, 0x0736fa4c, 0xf552fe4d,
188 	0xfe4d192a, 0x11f310f7, 0x11f41beb, 0x25e229d8, 0x2ad730d1, 0x27e02ed8,
189 	0x34cd2ed7, 0x34d92bed, 0x200b3dc9, 0x38d23ece, 0x51bd2dec, 0x23fe1c0f,
190 	0x22012701, 0x1e111426, 0x122d0f36, 0x004f24f0, 0x25f225ef, 0x2001220f,
191 	0x1d0f1819, 0x22161f10, 0x23121f1c, 0x2129241c, 0x1b2f153e, 0x121f131a,
192 	0x24181817, 0x1b10181e, 0x1f1d1629, 0x162a103c, 0x0f340e3c, 0x034ef07b,
193 	0x15351638, 0x193d1521, 0x1332113d, 0xfd4ef84a, 0xf748f648, 0xee4bf447,
194 	0xf53ffb46, 0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc,
195 	0x21ff2107, 0x1f0c2517, 0x1f261440, 0xf747f925, 0xf82cf531, 0xf638f43b,
196 	0xf83ff743, 0xfa44f64f, 0xfd4ef84a, 0xf748f648, 0xee4bf447, 0xf53ffb46,
197 	0xef4bf248, 0xf043f835, 0xf23bf734, 0xf54409fe, 0x1ef61ffc, 0x21ff2107,
198 	0x1f0c2517, 0x1f261440
199 };
200 
201 static void
assemble_scaling_list(struct hantro_ctx * ctx)202 assemble_scaling_list(struct hantro_ctx *ctx)
203 {
204 	const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
205 	const struct v4l2_ctrl_h264_scaling_matrix *scaling = ctrls->scaling;
206 	const struct v4l2_ctrl_h264_pps *pps = ctrls->pps;
207 	const size_t num_list_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4);
208 	const size_t list_len_4x4 = ARRAY_SIZE(scaling->scaling_list_4x4[0]);
209 	const size_t list_len_8x8 = ARRAY_SIZE(scaling->scaling_list_8x8[0]);
210 	struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
211 	u32 *dst = (u32 *)tbl->scaling_list;
212 	const u32 *src;
213 	int i, j;
214 
215 	if (!(pps->flags & V4L2_H264_PPS_FLAG_SCALING_MATRIX_PRESENT))
216 		return;
217 
218 	for (i = 0; i < num_list_4x4; i++) {
219 		src = (u32 *)&scaling->scaling_list_4x4[i];
220 		for (j = 0; j < list_len_4x4 / 4; j++)
221 			*dst++ = swab32(src[j]);
222 	}
223 
224 	/* Only Intra/Inter Y lists */
225 	for (i = 0; i < 2; i++) {
226 		src = (u32 *)&scaling->scaling_list_8x8[i];
227 		for (j = 0; j < list_len_8x8 / 4; j++)
228 			*dst++ = swab32(src[j]);
229 	}
230 }
231 
prepare_table(struct hantro_ctx * ctx)232 static void prepare_table(struct hantro_ctx *ctx)
233 {
234 	const struct hantro_h264_dec_ctrls *ctrls = &ctx->h264_dec.ctrls;
235 	const struct v4l2_ctrl_h264_decode_params *dec_param = ctrls->decode;
236 	const struct v4l2_ctrl_h264_sps *sps = ctrls->sps;
237 	struct hantro_h264_dec_priv_tbl *tbl = ctx->h264_dec.priv.cpu;
238 	const struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
239 	u32 dpb_longterm = 0;
240 	u32 dpb_valid = 0;
241 	int i;
242 
243 	for (i = 0; i < HANTRO_H264_DPB_SIZE; ++i) {
244 		tbl->poc[i * 2] = dpb[i].top_field_order_cnt;
245 		tbl->poc[i * 2 + 1] = dpb[i].bottom_field_order_cnt;
246 
247 		if (!(dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
248 			continue;
249 
250 		/*
251 		 * Set up bit maps of valid and long term DPBs.
252 		 * NOTE: The bits are reversed, i.e. MSb is DPB 0. For frame
253 		 * decoding, bit 31 to 15 are used, while for field decoding,
254 		 * all bits are used, with bit 31 being a top field, 30 a bottom
255 		 * field and so on.
256 		 */
257 		if (dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) {
258 			if (dpb[i].fields & V4L2_H264_TOP_FIELD_REF)
259 				dpb_valid |= REF_BIT(i * 2);
260 
261 			if (dpb[i].fields & V4L2_H264_BOTTOM_FIELD_REF)
262 				dpb_valid |= REF_BIT(i * 2 + 1);
263 
264 			if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM) {
265 				dpb_longterm |= REF_BIT(i * 2);
266 				dpb_longterm |= REF_BIT(i * 2 + 1);
267 			}
268 		} else {
269 			dpb_valid |= REF_BIT(i);
270 
271 			if (dpb[i].flags & V4L2_H264_DPB_ENTRY_FLAG_LONG_TERM)
272 				dpb_longterm |= REF_BIT(i);
273 		}
274 	}
275 	ctx->h264_dec.dpb_valid = dpb_valid;
276 	ctx->h264_dec.dpb_longterm = dpb_longterm;
277 
278 	if ((dec_param->flags & V4L2_H264_DECODE_PARAM_FLAG_FIELD_PIC) ||
279 	    !(sps->flags & V4L2_H264_SPS_FLAG_MB_ADAPTIVE_FRAME_FIELD)) {
280 		tbl->poc[32] = ctx->h264_dec.cur_poc;
281 		tbl->poc[33] = 0;
282 	} else {
283 		tbl->poc[32] = dec_param->top_field_order_cnt;
284 		tbl->poc[33] = dec_param->bottom_field_order_cnt;
285 	}
286 
287 	assemble_scaling_list(ctx);
288 }
289 
dpb_entry_match(const struct v4l2_h264_dpb_entry * a,const struct v4l2_h264_dpb_entry * b)290 static bool dpb_entry_match(const struct v4l2_h264_dpb_entry *a,
291 			    const struct v4l2_h264_dpb_entry *b)
292 {
293 	return a->reference_ts == b->reference_ts;
294 }
295 
update_dpb(struct hantro_ctx * ctx)296 static void update_dpb(struct hantro_ctx *ctx)
297 {
298 	const struct v4l2_ctrl_h264_decode_params *dec_param;
299 	DECLARE_BITMAP(new, ARRAY_SIZE(dec_param->dpb)) = { 0, };
300 	DECLARE_BITMAP(used, ARRAY_SIZE(dec_param->dpb)) = { 0, };
301 	unsigned int i, j;
302 
303 	dec_param = ctx->h264_dec.ctrls.decode;
304 
305 	/* Disable all entries by default. */
306 	for (i = 0; i < ARRAY_SIZE(ctx->h264_dec.dpb); i++)
307 		ctx->h264_dec.dpb[i].flags = 0;
308 
309 	/* Try to match new DPB entries with existing ones by their POCs. */
310 	for (i = 0; i < ARRAY_SIZE(dec_param->dpb); i++) {
311 		const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
312 
313 		if (!(ndpb->flags & V4L2_H264_DPB_ENTRY_FLAG_VALID))
314 			continue;
315 
316 		/*
317 		 * To cut off some comparisons, iterate only on target DPB
318 		 * entries which are not used yet.
319 		 */
320 		for_each_clear_bit(j, used, ARRAY_SIZE(ctx->h264_dec.dpb)) {
321 			struct v4l2_h264_dpb_entry *cdpb;
322 
323 			cdpb = &ctx->h264_dec.dpb[j];
324 			if (!dpb_entry_match(cdpb, ndpb))
325 				continue;
326 
327 			*cdpb = *ndpb;
328 			set_bit(j, used);
329 			break;
330 		}
331 
332 		if (j == ARRAY_SIZE(ctx->h264_dec.dpb))
333 			set_bit(i, new);
334 	}
335 
336 	/* For entries that could not be matched, use remaining free slots. */
337 	for_each_set_bit(i, new, ARRAY_SIZE(dec_param->dpb)) {
338 		const struct v4l2_h264_dpb_entry *ndpb = &dec_param->dpb[i];
339 		struct v4l2_h264_dpb_entry *cdpb;
340 
341 		/*
342 		 * Both arrays are of the same sizes, so there is no way
343 		 * we can end up with no space in target array, unless
344 		 * something is buggy.
345 		 */
346 		j = find_first_zero_bit(used, ARRAY_SIZE(ctx->h264_dec.dpb));
347 		if (WARN_ON(j >= ARRAY_SIZE(ctx->h264_dec.dpb)))
348 			return;
349 
350 		cdpb = &ctx->h264_dec.dpb[j];
351 		*cdpb = *ndpb;
352 		set_bit(j, used);
353 	}
354 }
355 
hantro_h264_get_ref_buf(struct hantro_ctx * ctx,unsigned int dpb_idx)356 dma_addr_t hantro_h264_get_ref_buf(struct hantro_ctx *ctx,
357 				   unsigned int dpb_idx)
358 {
359 	struct v4l2_h264_dpb_entry *dpb = ctx->h264_dec.dpb;
360 	dma_addr_t dma_addr = 0;
361 	s32 cur_poc = ctx->h264_dec.cur_poc;
362 	u32 flags;
363 
364 	if (dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE)
365 		dma_addr = hantro_get_ref(ctx, dpb[dpb_idx].reference_ts);
366 
367 	if (!dma_addr) {
368 		struct vb2_v4l2_buffer *dst_buf;
369 		struct vb2_buffer *buf;
370 
371 		/*
372 		 * If a DPB entry is unused or invalid, address of current
373 		 * destination buffer is returned.
374 		 */
375 		dst_buf = hantro_get_dst_buf(ctx);
376 		buf = &dst_buf->vb2_buf;
377 		dma_addr = hantro_get_dec_buf_addr(ctx, buf);
378 	}
379 
380 	flags = dpb[dpb_idx].flags & V4L2_H264_DPB_ENTRY_FLAG_FIELD ? 0x2 : 0;
381 	flags |= abs(dpb[dpb_idx].top_field_order_cnt - cur_poc) <
382 		 abs(dpb[dpb_idx].bottom_field_order_cnt - cur_poc) ?
383 		 0x1 : 0;
384 
385 	return dma_addr | flags;
386 }
387 
hantro_h264_get_ref_nbr(struct hantro_ctx * ctx,unsigned int dpb_idx)388 u16 hantro_h264_get_ref_nbr(struct hantro_ctx *ctx, unsigned int dpb_idx)
389 {
390 	const struct v4l2_h264_dpb_entry *dpb = &ctx->h264_dec.dpb[dpb_idx];
391 
392 	if (!(dpb->flags & V4L2_H264_DPB_ENTRY_FLAG_ACTIVE))
393 		return 0;
394 	return dpb->frame_num;
395 }
396 
397 /*
398  * Removes all references with the same parity as the current picture from the
399  * reference list. The remaining list will have references with the opposite
400  * parity. This is effectively a deduplication of references since each buffer
401  * stores two fields. For this reason, each buffer is found twice in the
402  * reference list.
403  *
404  * This technique has been chosen through trial and error. This simple approach
405  * resulted in the highest conformance score. Note that this method may suffer
406  * worse quality in the case an opposite reference frame has been lost. If this
407  * becomes a problem in the future, it should be possible to add a preprocessing
408  * to identify un-paired fields and avoid removing them.
409  */
deduplicate_reflist(struct v4l2_h264_reflist_builder * b,struct v4l2_h264_reference * reflist)410 static void deduplicate_reflist(struct v4l2_h264_reflist_builder *b,
411 				struct v4l2_h264_reference *reflist)
412 {
413 	int write_idx = 0;
414 	int i;
415 
416 	if (b->cur_pic_fields == V4L2_H264_FRAME_REF) {
417 		write_idx = b->num_valid;
418 		goto done;
419 	}
420 
421 	for (i = 0; i < b->num_valid; i++) {
422 		if (!(b->cur_pic_fields == reflist[i].fields)) {
423 			reflist[write_idx++] = reflist[i];
424 			continue;
425 		}
426 	}
427 
428 done:
429 	/* Should not happen unless we have a bug in the reflist builder. */
430 	if (WARN_ON(write_idx > 16))
431 		write_idx = 16;
432 
433 	/* Clear the remaining, some streams fails otherwise */
434 	for (; write_idx < 16; write_idx++)
435 		reflist[write_idx].index = 15;
436 }
437 
hantro_h264_dec_prepare_run(struct hantro_ctx * ctx)438 int hantro_h264_dec_prepare_run(struct hantro_ctx *ctx)
439 {
440 	struct hantro_h264_dec_hw_ctx *h264_ctx = &ctx->h264_dec;
441 	struct hantro_h264_dec_ctrls *ctrls = &h264_ctx->ctrls;
442 	struct v4l2_h264_reflist_builder reflist_builder;
443 
444 	hantro_start_prepare_run(ctx);
445 
446 	ctrls->scaling =
447 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SCALING_MATRIX);
448 	if (WARN_ON(!ctrls->scaling))
449 		return -EINVAL;
450 
451 	ctrls->decode =
452 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_DECODE_PARAMS);
453 	if (WARN_ON(!ctrls->decode))
454 		return -EINVAL;
455 
456 	ctrls->sps =
457 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_SPS);
458 	if (WARN_ON(!ctrls->sps))
459 		return -EINVAL;
460 
461 	ctrls->pps =
462 		hantro_get_ctrl(ctx, V4L2_CID_STATELESS_H264_PPS);
463 	if (WARN_ON(!ctrls->pps))
464 		return -EINVAL;
465 
466 	/* Update the DPB with new refs. */
467 	update_dpb(ctx);
468 
469 	/* Build the P/B{0,1} ref lists. */
470 	v4l2_h264_init_reflist_builder(&reflist_builder, ctrls->decode,
471 				       ctrls->sps, ctx->h264_dec.dpb);
472 	h264_ctx->cur_poc = reflist_builder.cur_pic_order_count;
473 
474 	/* Prepare data in memory. */
475 	prepare_table(ctx);
476 
477 	v4l2_h264_build_p_ref_list(&reflist_builder, h264_ctx->reflists.p);
478 	v4l2_h264_build_b_ref_lists(&reflist_builder, h264_ctx->reflists.b0,
479 				    h264_ctx->reflists.b1);
480 
481 	/*
482 	 * Reduce ref lists to at most 16 entries, Hantro hardware will deduce
483 	 * the actual picture lists in field through the dpb_valid,
484 	 * dpb_longterm bitmap along with the current frame parity.
485 	 */
486 	if (reflist_builder.cur_pic_fields != V4L2_H264_FRAME_REF) {
487 		deduplicate_reflist(&reflist_builder, h264_ctx->reflists.p);
488 		deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b0);
489 		deduplicate_reflist(&reflist_builder, h264_ctx->reflists.b1);
490 	}
491 
492 	return 0;
493 }
494 
hantro_h264_dec_exit(struct hantro_ctx * ctx)495 void hantro_h264_dec_exit(struct hantro_ctx *ctx)
496 {
497 	struct hantro_dev *vpu = ctx->dev;
498 	struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
499 	struct hantro_aux_buf *priv = &h264_dec->priv;
500 
501 	dma_free_coherent(vpu->dev, priv->size, priv->cpu, priv->dma);
502 }
503 
hantro_h264_dec_init(struct hantro_ctx * ctx)504 int hantro_h264_dec_init(struct hantro_ctx *ctx)
505 {
506 	struct hantro_dev *vpu = ctx->dev;
507 	struct hantro_h264_dec_hw_ctx *h264_dec = &ctx->h264_dec;
508 	struct hantro_aux_buf *priv = &h264_dec->priv;
509 	struct hantro_h264_dec_priv_tbl *tbl;
510 
511 	priv->cpu = dma_alloc_coherent(vpu->dev, sizeof(*tbl), &priv->dma,
512 				       GFP_KERNEL);
513 	if (!priv->cpu)
514 		return -ENOMEM;
515 
516 	priv->size = sizeof(*tbl);
517 	tbl = priv->cpu;
518 	memcpy(tbl->cabac_table, h264_cabac_table, sizeof(tbl->cabac_table));
519 
520 	return 0;
521 }
522