5 * Routines for converting between UTF-8 and OSTA Compressed Unicode.
6 * Also handles filename mangling
9 * OSTA Compressed Unicode is explained in the OSTA UDF specification.
10 * http://www.osta.org/
11 * UTF-8 is explained in the IETF RFC XXXX.
12 * ftp://ftp.internic.net/rfc/rfcxxxx.txt
15 * This file is distributed under the terms of the GNU General Public
16 * License (GPL). Copies of the GPL can be obtained from:
17 * ftp://prep.ai.mit.edu/pub/gnu/GPL
18 * Each contributing author retains all rights to their own work.
23 #include <linux/kernel.h>
24 #include <linux/string.h> /* for memset */
25 #include <linux/nls.h>
26 #include <linux/crc-itu-t.h>
27 #include <linux/slab.h>
31 static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *,
34 static int udf_char_to_ustr(struct ustr
*dest
, const uint8_t *src
, int strlen
)
36 if ((!dest
) || (!src
) || (!strlen
) || (strlen
> UDF_NAME_LEN
- 2))
39 memset(dest
, 0, sizeof(struct ustr
));
40 memcpy(dest
->u_name
, src
, strlen
);
50 int udf_build_ustr(struct ustr
*dest
, dstring
*ptr
, int size
)
54 if (!dest
|| !ptr
|| !size
)
58 usesize
= min_t(size_t, ptr
[size
- 1], sizeof(dest
->u_name
));
59 usesize
= min(usesize
, size
- 2);
60 dest
->u_cmpID
= ptr
[0];
61 dest
->u_len
= usesize
;
62 memcpy(dest
->u_name
, ptr
+ 1, usesize
);
63 memset(dest
->u_name
+ usesize
, 0, sizeof(dest
->u_name
) - usesize
);
69 * udf_build_ustr_exact
71 static void udf_build_ustr_exact(struct ustr
*dest
, dstring
*ptr
, int exactsize
)
73 memset(dest
, 0, sizeof(struct ustr
));
74 dest
->u_cmpID
= ptr
[0];
75 dest
->u_len
= exactsize
- 1;
76 memcpy(dest
->u_name
, ptr
+ 1, exactsize
- 1);
79 static int udf_uni2char_utf8(wchar_t uni
,
89 out
[u_len
++] = (unsigned char)uni
;
90 } else if (uni
< 0x800) {
93 out
[u_len
++] = (unsigned char)(0xc0 | (uni
>> 6));
94 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
98 out
[u_len
++] = (unsigned char)(0xe0 | (uni
>> 12));
99 out
[u_len
++] = (unsigned char)(0x80 | ((uni
>> 6) & 0x3f));
100 out
[u_len
++] = (unsigned char)(0x80 | (uni
& 0x3f));
105 static int udf_char2uni_utf8(const unsigned char *in
,
109 unsigned int utf_char
;
115 for (u_len
= 0; u_len
< boundlen
;) {
118 /* Complete a multi-byte UTF-8 character */
120 utf_char
= (utf_char
<< 6) | (c
& 0x3f);
124 /* Check for a multi-byte UTF-8 character */
126 /* Start a multi-byte UTF-8 character */
127 if ((c
& 0xe0) == 0xc0) {
130 } else if ((c
& 0xf0) == 0xe0) {
133 } else if ((c
& 0xf8) == 0xf0) {
136 } else if ((c
& 0xfc) == 0xf8) {
139 } else if ((c
& 0xfe) == 0xfc) {
148 /* Single byte UTF-8 character (most common) */
162 static int udf_name_from_CS0(struct ustr
*utf_o
,
163 const struct ustr
*ocu_i
,
164 int (*conv_f
)(wchar_t, unsigned char *, int))
167 uint8_t cmp_id
, ocu_len
;
171 ocu_len
= ocu_i
->u_len
;
173 memset(utf_o
, 0, sizeof(struct ustr
));
177 cmp_id
= ocu_i
->u_cmpID
;
178 if (cmp_id
!= 8 && cmp_id
!= 16) {
179 memset(utf_o
, 0, sizeof(struct ustr
));
180 pr_err("unknown compression code (%d) stri=%s\n",
181 cmp_id
, ocu_i
->u_name
);
187 for (i
= 0; (i
< ocu_len
) && (utf_o
->u_len
<= (UDF_NAME_LEN
- 3));) {
188 /* Expand OSTA compressed Unicode to Unicode */
189 uint32_t c
= ocu
[i
++];
191 c
= (c
<< 8) | ocu
[i
++];
193 len
= conv_f(c
, &utf_o
->u_name
[utf_o
->u_len
],
194 UDF_NAME_LEN
- 2 - utf_o
->u_len
);
195 /* Valid character? */
198 else if (len
== -ENAMETOOLONG
)
201 utf_o
->u_name
[utf_o
->u_len
++] = '?';
208 static int udf_name_to_CS0(dstring
*ocu
, struct ustr
*uni
, int length
,
209 int (*conv_f
)(const unsigned char *, int, wchar_t *))
212 unsigned int max_val
;
216 memset(ocu
, 0, sizeof(dstring
) * length
);
223 for (i
= 0; i
< uni
->u_len
; i
++) {
224 /* Name didn't fit? */
225 if (u_len
+ 1 + u_ch
>= length
)
227 len
= conv_f(&uni
->u_name
[i
], uni
->u_len
- i
, &uni_char
);
230 /* Invalid character, deal with it */
236 if (uni_char
> max_val
) {
243 if (max_val
== 0xffff)
244 ocu
[++u_len
] = (uint8_t)(uni_char
>> 8);
245 ocu
[++u_len
] = (uint8_t)(uni_char
& 0xff);
249 ocu
[length
- 1] = (uint8_t)u_len
+ 1;
253 int udf_CS0toUTF8(struct ustr
*utf_o
, const struct ustr
*ocu_i
)
255 return udf_name_from_CS0(utf_o
, ocu_i
, udf_uni2char_utf8
);
258 int udf_get_filename(struct super_block
*sb
, uint8_t *sname
, int slen
,
259 uint8_t *dname
, int dlen
)
261 struct ustr
*filename
, *unifilename
;
262 int (*conv_f
)(wchar_t, unsigned char *, int);
268 filename
= kmalloc(sizeof(struct ustr
), GFP_NOFS
);
272 unifilename
= kmalloc(sizeof(struct ustr
), GFP_NOFS
);
278 udf_build_ustr_exact(unifilename
, sname
, slen
);
279 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
280 conv_f
= udf_uni2char_utf8
;
281 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
282 conv_f
= UDF_SB(sb
)->s_nls_map
->uni2char
;
286 ret
= udf_name_from_CS0(filename
, unifilename
, conv_f
);
288 udf_debug("Failed in udf_get_filename: sname = %s\n", sname
);
292 ret
= udf_translate_to_linux(dname
, dlen
,
293 filename
->u_name
, filename
->u_len
,
294 unifilename
->u_name
, unifilename
->u_len
);
295 /* Zero length filename isn't valid... */
305 int udf_put_filename(struct super_block
*sb
, const uint8_t *sname
, int slen
,
306 uint8_t *dname
, int dlen
)
308 struct ustr unifilename
;
309 int (*conv_f
)(const unsigned char *, int, wchar_t *);
311 if (!udf_char_to_ustr(&unifilename
, sname
, slen
))
314 if (UDF_QUERY_FLAG(sb
, UDF_FLAG_UTF8
)) {
315 conv_f
= udf_char2uni_utf8
;
316 } else if (UDF_QUERY_FLAG(sb
, UDF_FLAG_NLS_MAP
)) {
317 conv_f
= UDF_SB(sb
)->s_nls_map
->char2uni
;
321 return udf_name_to_CS0(dname
, &unifilename
, dlen
, conv_f
);
324 #define ILLEGAL_CHAR_MARK '_'
328 /* Number of chars we need to store generated CRC to make filename unique */
331 static int udf_translate_to_linux(uint8_t *newName
, int newLen
,
332 uint8_t *udfName
, int udfLen
,
333 uint8_t *fidName
, int fidNameLen
)
335 int index
, newIndex
= 0, needsCRC
= 0;
336 int extIndex
= 0, newExtIndex
= 0, hasExt
= 0;
337 unsigned short valueCRC
;
340 if (udfName
[0] == '.' &&
341 (udfLen
== 1 || (udfLen
== 2 && udfName
[1] == '.'))) {
344 memcpy(newName
, udfName
, udfLen
);
346 for (index
= 0; index
< udfLen
; index
++) {
347 curr
= udfName
[index
];
348 if (curr
== '/' || curr
== 0) {
350 curr
= ILLEGAL_CHAR_MARK
;
351 while (index
+ 1 < udfLen
&&
352 (udfName
[index
+ 1] == '/' ||
353 udfName
[index
+ 1] == 0))
356 if (curr
== EXT_MARK
&&
357 (udfLen
- index
- 1) <= EXT_SIZE
) {
358 if (udfLen
== index
+ 1)
363 newExtIndex
= newIndex
;
366 if (newIndex
< newLen
)
367 newName
[newIndex
++] = curr
;
373 uint8_t ext
[EXT_SIZE
];
374 int localExtIndex
= 0;
379 index
< EXT_SIZE
&& extIndex
+ index
+ 1 < udfLen
;
381 curr
= udfName
[extIndex
+ index
+ 1];
383 if (curr
== '/' || curr
== 0) {
385 curr
= ILLEGAL_CHAR_MARK
;
386 while (extIndex
+ index
+ 2 < udfLen
&&
387 (index
+ 1 < EXT_SIZE
&&
388 (udfName
[extIndex
+ index
+ 2] == '/' ||
389 udfName
[extIndex
+ index
+ 2] == 0)))
392 ext
[localExtIndex
++] = curr
;
394 maxFilenameLen
= newLen
- CRC_LEN
- localExtIndex
;
395 if (newIndex
> maxFilenameLen
)
396 newIndex
= maxFilenameLen
;
398 newIndex
= newExtIndex
;
399 } else if (newIndex
> newLen
- CRC_LEN
)
400 newIndex
= newLen
- CRC_LEN
;
401 newName
[newIndex
++] = CRC_MARK
;
402 valueCRC
= crc_itu_t(0, fidName
, fidNameLen
);
403 newName
[newIndex
++] = hex_asc_upper_hi(valueCRC
>> 8);
404 newName
[newIndex
++] = hex_asc_upper_lo(valueCRC
>> 8);
405 newName
[newIndex
++] = hex_asc_upper_hi(valueCRC
);
406 newName
[newIndex
++] = hex_asc_upper_lo(valueCRC
);
409 newName
[newIndex
++] = EXT_MARK
;
410 for (index
= 0; index
< localExtIndex
; index
++)
411 newName
[newIndex
++] = ext
[index
];
This page took 0.039709 seconds and 6 git commands to generate.