5 * Routines for converting between UTF-8 and OSTA Compressed Unicode.
6 * Also handles filename mangling
9 * OSTA Compressed Unicode is explained in the OSTA UDF specification.
10 * http://www.osta.org/
11 * UTF-8 is explained in the IETF RFC XXXX.
12 * ftp://ftp.internic.net/rfc/rfcxxxx.txt
15 * This file is distributed under the terms of the GNU General Public
16 * License (GPL). Copies of the GPL can be obtained from:
17 * ftp://prep.ai.mit.edu/pub/gnu/GPL
18 * Each contributing author retains all rights to their own work.
23 #include <linux/kernel.h>
24 #include <linux/string.h> /* for memset */
25 #include <linux/nls.h>
26 #include <linux/crc-itu-t.h>
27 #include <linux/slab.h>
31 static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int,
32 const uint8_t *, int);
34 static int udf_uni2char_utf8(wchar_t uni
,
44 out
[u_len
++] = (unsigned char)uni
;
45 } else if (uni
< 0x800) {
48 out
[u_len
++] = (unsigned char)(0xc0 | (uni
>> 6));
49 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
53 out
[u_len
++] = (unsigned char)(0xe0 | (uni
>> 12));
54 out
[u_len
++] = (unsigned char)(0x80 | ((uni
>> 6) & 0x3f));
55 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
60 static int udf_char2uni_utf8(const unsigned char *in
,
64 unsigned int utf_char
;
70 for (u_len
= 0; u_len
< boundlen
;) {
73 /* Complete a multi-byte UTF-8 character */
75 utf_char
= (utf_char
<< 6) | (c
& 0x3f);
79 /* Check for a multi-byte UTF-8 character */
81 /* Start a multi-byte UTF-8 character */
82 if ((c
& 0xe0) == 0xc0) {
85 } else if ((c
& 0xf0) == 0xe0) {
88 } else if ((c
& 0xf8) == 0xf0) {
91 } else if ((c
& 0xfc) == 0xf8) {
94 } else if ((c
& 0xfe) == 0xfc) {
103 /* Single byte UTF-8 character (most common) */
117 static int udf_name_from_CS0(uint8_t *str_o
, int str_max_len
,
118 const uint8_t *ocu
, int ocu_len
,
119 int (*conv_f
)(wchar_t, unsigned char *, int))
125 if (str_max_len
<= 0)
129 memset(str_o
, 0, str_max_len
);
134 if (cmp_id
!= 8 && cmp_id
!= 16) {
135 memset(str_o
, 0, str_max_len
);
136 pr_err("unknown compression code (%d) stri=%s\n", cmp_id
, ocu
);
140 for (i
= 1; (i
< ocu_len
) && (str_o_len
< str_max_len
);) {
141 /* Expand OSTA compressed Unicode to Unicode */
142 uint32_t c
= ocu
[i
++];
144 c
= (c
<< 8) | ocu
[i
++];
146 len
= conv_f(c
, &str_o
[str_o_len
], str_max_len
- str_o_len
);
147 /* Valid character? */
150 else if (len
== -ENAMETOOLONG
)
153 str_o
[str_o_len
++] = '?';
159 static int udf_name_to_CS0(uint8_t *ocu
, int ocu_max_len
,
160 const uint8_t *str_i
, int str_len
,
161 int (*conv_f
)(const unsigned char *, int, wchar_t *))
164 unsigned int max_val
;
168 if (ocu_max_len
<= 0)
171 memset(ocu
, 0, ocu_max_len
);
178 for (i
= 0; i
< str_len
; i
++) {
179 /* Name didn't fit? */
180 if (u_len
+ u_ch
> ocu_max_len
)
182 len
= conv_f(&str_i
[i
], str_len
- i
, &uni_char
);
185 /* Invalid character, deal with it */
191 if (uni_char
> max_val
) {
198 if (max_val
== 0xffff)
199 ocu
[u_len
++] = (uint8_t)(uni_char
>> 8);
200 ocu
[u_len
++] = (uint8_t)(uni_char
& 0xff);
207 int udf_CS0toUTF8(uint8_t *utf_o
, int o_len
, const uint8_t *ocu_i
, int i_len
)
209 return udf_name_from_CS0(utf_o
, o_len
, ocu_i
, i_len
,
213 int udf_get_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
214 uint8_t *dname
, int dlen
)
217 int (*conv_f
)(wchar_t, unsigned char *, int);
226 filename
= kmalloc(dlen
, GFP_NOFS
);
230 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
231 conv_f
= udf_uni2char_utf8
;
232 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
233 conv_f
= UDF_SB(sb
)->s_nls_map
->uni2char
;
237 ret
= udf_name_from_CS0(filename
, dlen
, sname
, slen
, conv_f
);
239 udf_debug("Failed in udf_get_filename: sname = %s\n", sname
);
243 ret
= udf_translate_to_linux(dname
, dlen
, filename
, dlen
,
244 sname
+ 1, slen
- 1);
245 /* Zero length filename isn't valid... */
253 int udf_put_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
254 uint8_t *dname
, int dlen
)
256 int (*conv_f
)(const unsigned char *, int, wchar_t *);
258 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
259 conv_f
= udf_char2uni_utf8
;
260 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
261 conv_f
= UDF_SB(sb
)->s_nls_map
->char2uni
;
265 return udf_name_to_CS0(dname
, dlen
, sname
, slen
, conv_f
);
268 #define ILLEGAL_CHAR_MARK '_'
272 /* Number of chars we need to store generated CRC to make filename unique */
275 static int udf_translate_to_linux(uint8_t *newName
, int newLen
,
276 const uint8_t *udfName
, int udfLen
,
277 const uint8_t *fidName
, int fidNameLen
)
279 int index
, newIndex
= 0, needsCRC
= 0;
280 int extIndex
= 0, newExtIndex
= 0, hasExt
= 0;
281 unsigned short valueCRC
;
284 if (udfName
[0] == '.' &&
285 (udfLen
== 1 || (udfLen
== 2 && udfName
[1] == '.'))) {
288 memcpy(newName
, udfName
, udfLen
);
290 for (index
= 0; index
< udfLen
; index
++) {
291 curr
= udfName
[index
];
292 if (curr
== '/' || curr
== 0) {
294 curr
= ILLEGAL_CHAR_MARK
;
295 while (index
+ 1 < udfLen
&&
296 (udfName
[index
+ 1] == '/' ||
297 udfName
[index
+ 1] == 0))
300 if (curr
== EXT_MARK
&&
301 (udfLen
- index
- 1) <= EXT_SIZE
) {
302 if (udfLen
== index
+ 1)
307 newExtIndex
= newIndex
;
310 if (newIndex
< newLen
)
311 newName
[newIndex
++] = curr
;
317 uint8_t ext
[EXT_SIZE
];
318 int localExtIndex
= 0;
323 index
< EXT_SIZE
&& extIndex
+ index
+ 1 < udfLen
;
325 curr
= udfName
[extIndex
+ index
+ 1];
327 if (curr
== '/' || curr
== 0) {
329 curr
= ILLEGAL_CHAR_MARK
;
330 while (extIndex
+ index
+ 2 < udfLen
&&
331 (index
+ 1 < EXT_SIZE
&&
332 (udfName
[extIndex
+ index
+ 2] == '/' ||
333 udfName
[extIndex
+ index
+ 2] == 0)))
336 ext
[localExtIndex
++] = curr
;
338 maxFilenameLen
= newLen
- CRC_LEN
- localExtIndex
;
339 if (newIndex
> maxFilenameLen
)
340 newIndex
= maxFilenameLen
;
342 newIndex
= newExtIndex
;
343 } else if (newIndex
> newLen
- CRC_LEN
)
344 newIndex
= newLen
- CRC_LEN
;
345 newName
[newIndex
++] = CRC_MARK
;
346 valueCRC
= crc_itu_t(0, fidName
, fidNameLen
);
347 newName
[newIndex
++] = hex_asc_upper_hi(valueCRC
>> 8);
348 newName
[newIndex
++] = hex_asc_upper_lo(valueCRC
>> 8);
349 newName
[newIndex
++] = hex_asc_upper_hi(valueCRC
);
350 newName
[newIndex
++] = hex_asc_upper_lo(valueCRC
);
353 newName
[newIndex
++] = EXT_MARK
;
354 for (index
= 0; index
< localExtIndex
; index
++)
355 newName
[newIndex
++] = ext
[index
];
This page took 0.048908 seconds and 6 git commands to generate.