Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Fast C2P (Chunky-to-Planar) Conversion | |
3 | * | |
4 | * Copyright (C) 2003 Geert Uytterhoeven | |
5 | * | |
6 | * NOTES: | |
7 | * - This code was inspired by Scout's C2P tutorial | |
8 | * - It assumes to run on a big endian system | |
9 | * | |
10 | * This file is subject to the terms and conditions of the GNU General Public | |
11 | * License. See the file COPYING in the main directory of this archive | |
12 | * for more details. | |
13 | */ | |
14 | ||
8b54b613 | 15 | #include <linux/module.h> |
1da177e4 LT |
16 | #include <linux/string.h> |
17 | #include "c2p.h" | |
18 | ||
19 | ||
20 | /* | |
21 | * Basic transpose step | |
22 | */ | |
23 | ||
24 | #define _transp(d, i1, i2, shift, mask) \ | |
25 | do { \ | |
26 | u32 t = (d[i1] ^ (d[i2] >> shift)) & mask; \ | |
27 | d[i1] ^= t; \ | |
28 | d[i2] ^= t << shift; \ | |
29 | } while (0) | |
30 | ||
31 | static inline u32 get_mask(int n) | |
32 | { | |
33 | switch (n) { | |
34 | case 1: | |
35 | return 0x55555555; | |
36 | break; | |
37 | ||
38 | case 2: | |
39 | return 0x33333333; | |
40 | break; | |
41 | ||
42 | case 4: | |
43 | return 0x0f0f0f0f; | |
44 | break; | |
45 | ||
46 | case 8: | |
47 | return 0x00ff00ff; | |
48 | break; | |
49 | ||
50 | case 16: | |
51 | return 0x0000ffff; | |
52 | break; | |
53 | } | |
54 | return 0; | |
55 | } | |
56 | ||
57 | #define transp_nx1(d, n) \ | |
58 | do { \ | |
59 | u32 mask = get_mask(n); \ | |
60 | /* First block */ \ | |
61 | _transp(d, 0, 1, n, mask); \ | |
62 | /* Second block */ \ | |
63 | _transp(d, 2, 3, n, mask); \ | |
64 | /* Third block */ \ | |
65 | _transp(d, 4, 5, n, mask); \ | |
66 | /* Fourth block */ \ | |
67 | _transp(d, 6, 7, n, mask); \ | |
68 | } while (0) | |
69 | ||
70 | #define transp_nx2(d, n) \ | |
71 | do { \ | |
72 | u32 mask = get_mask(n); \ | |
73 | /* First block */ \ | |
74 | _transp(d, 0, 2, n, mask); \ | |
75 | _transp(d, 1, 3, n, mask); \ | |
76 | /* Second block */ \ | |
77 | _transp(d, 4, 6, n, mask); \ | |
78 | _transp(d, 5, 7, n, mask); \ | |
79 | } while (0) | |
80 | ||
81 | #define transp_nx4(d, n) \ | |
82 | do { \ | |
83 | u32 mask = get_mask(n); \ | |
84 | _transp(d, 0, 4, n, mask); \ | |
85 | _transp(d, 1, 5, n, mask); \ | |
86 | _transp(d, 2, 6, n, mask); \ | |
87 | _transp(d, 3, 7, n, mask); \ | |
88 | } while (0) | |
89 | ||
90 | #define transp(d, n, m) transp_nx ## m(d, n) | |
91 | ||
92 | ||
93 | /* | |
94 | * Perform a full C2P step on 32 8-bit pixels, stored in 8 32-bit words | |
95 | * containing | |
96 | * - 32 8-bit chunky pixels on input | |
97 | * - permuted planar data on output | |
98 | */ | |
99 | ||
100 | static void c2p_8bpp(u32 d[8]) | |
101 | { | |
102 | transp(d, 16, 4); | |
103 | transp(d, 8, 2); | |
104 | transp(d, 4, 1); | |
105 | transp(d, 2, 4); | |
106 | transp(d, 1, 2); | |
107 | } | |
108 | ||
109 | ||
110 | /* | |
111 | * Array containing the permution indices of the planar data after c2p | |
112 | */ | |
113 | ||
114 | static const int perm_c2p_8bpp[8] = { 7, 5, 3, 1, 6, 4, 2, 0 }; | |
115 | ||
116 | ||
117 | /* | |
118 | * Compose two values, using a bitmask as decision value | |
119 | * This is equivalent to (a & mask) | (b & ~mask) | |
120 | */ | |
121 | ||
122 | static inline unsigned long comp(unsigned long a, unsigned long b, | |
123 | unsigned long mask) | |
124 | { | |
125 | return ((a ^ b) & mask) ^ b; | |
126 | } | |
127 | ||
128 | ||
129 | /* | |
130 | * Store a full block of planar data after c2p conversion | |
131 | */ | |
132 | ||
133 | static inline void store_planar(char *dst, u32 dst_inc, u32 bpp, u32 d[8]) | |
134 | { | |
135 | int i; | |
136 | ||
137 | for (i = 0; i < bpp; i++, dst += dst_inc) | |
138 | *(u32 *)dst = d[perm_c2p_8bpp[i]]; | |
139 | } | |
140 | ||
141 | ||
142 | /* | |
143 | * Store a partial block of planar data after c2p conversion | |
144 | */ | |
145 | ||
146 | static inline void store_planar_masked(char *dst, u32 dst_inc, u32 bpp, | |
147 | u32 d[8], u32 mask) | |
148 | { | |
149 | int i; | |
150 | ||
151 | for (i = 0; i < bpp; i++, dst += dst_inc) | |
152 | *(u32 *)dst = comp(d[perm_c2p_8bpp[i]], *(u32 *)dst, mask); | |
153 | } | |
154 | ||
155 | ||
156 | /* | |
157 | * c2p - Copy 8-bit chunky image data to a planar frame buffer | |
158 | * @dst: Starting address of the planar frame buffer | |
159 | * @dx: Horizontal destination offset (in pixels) | |
160 | * @dy: Vertical destination offset (in pixels) | |
161 | * @width: Image width (in pixels) | |
162 | * @height: Image height (in pixels) | |
163 | * @dst_nextline: Frame buffer offset to the next line (in bytes) | |
164 | * @dst_nextplane: Frame buffer offset to the next plane (in bytes) | |
165 | * @src_nextline: Image offset to the next line (in bytes) | |
166 | * @bpp: Bits per pixel of the planar frame buffer (1-8) | |
167 | */ | |
168 | ||
169 | void c2p(u8 *dst, const u8 *src, u32 dx, u32 dy, u32 width, u32 height, | |
170 | u32 dst_nextline, u32 dst_nextplane, u32 src_nextline, u32 bpp) | |
171 | { | |
172 | int dst_idx; | |
173 | u32 d[8], first, last, w; | |
174 | const u8 *c; | |
175 | u8 *p; | |
176 | ||
177 | dst += dy*dst_nextline+(dx & ~31); | |
178 | dst_idx = dx % 32; | |
179 | first = ~0UL >> dst_idx; | |
180 | last = ~(~0UL >> ((dst_idx+width) % 32)); | |
181 | while (height--) { | |
182 | c = src; | |
183 | p = dst; | |
184 | w = width; | |
185 | if (dst_idx+width <= 32) { | |
186 | /* Single destination word */ | |
187 | first &= last; | |
188 | memset(d, 0, sizeof(d)); | |
189 | memcpy((u8 *)d+dst_idx, c, width); | |
190 | c += width; | |
191 | c2p_8bpp(d); | |
192 | store_planar_masked(p, dst_nextplane, bpp, d, first); | |
193 | p += 4; | |
194 | } else { | |
195 | /* Multiple destination words */ | |
196 | w = width; | |
197 | /* Leading bits */ | |
198 | if (dst_idx) { | |
199 | w = 32 - dst_idx; | |
200 | memset(d, 0, dst_idx); | |
201 | memcpy((u8 *)d+dst_idx, c, w); | |
202 | c += w; | |
203 | c2p_8bpp(d); | |
204 | store_planar_masked(p, dst_nextplane, bpp, d, first); | |
205 | p += 4; | |
206 | w = width-w; | |
207 | } | |
208 | /* Main chunk */ | |
209 | while (w >= 32) { | |
210 | memcpy(d, c, 32); | |
211 | c += 32; | |
212 | c2p_8bpp(d); | |
213 | store_planar(p, dst_nextplane, bpp, d); | |
214 | p += 4; | |
215 | w -= 32; | |
216 | } | |
217 | /* Trailing bits */ | |
218 | w %= 32; | |
219 | if (w > 0) { | |
220 | memcpy(d, c, w); | |
221 | memset((u8 *)d+w, 0, 32-w); | |
222 | c2p_8bpp(d); | |
223 | store_planar_masked(p, dst_nextplane, bpp, d, last); | |
224 | } | |
225 | } | |
226 | src += src_nextline; | |
227 | dst += dst_nextline; | |
228 | } | |
229 | } | |
880e5e21 | 230 | EXPORT_SYMBOL_GPL(c2p); |
1da177e4 | 231 | |
8b54b613 | 232 | MODULE_LICENSE("GPL"); |