1 files changed, 90 insertions, 188 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 47e61883275d..4d7a674ebce5 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
 	memcpy(dest->u_name, ptr + 1, exactsize - 1);
 }
 
-/*
- * udf_CS0toUTF8
- *
- * PURPOSE
- *	Convert OSTA Compressed Unicode to the UTF-8 equivalent.
- *
- * PRE-CONDITIONS
- *	utf			Pointer to UTF-8 output buffer.
- *	ocu			Pointer to OSTA Compressed Unicode input buffer
- *				of size UDF_NAME_LEN bytes.
- * 				both of type "struct ustr *"
- *
- * POST-CONDITIONS
- *	<return>		>= 0 on success.
- *
- * HISTORY
- *	November 12, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
+static int udf_uni2char_utf8(wchar_t uni,
+			     unsigned char *out,
+			     int boundlen)
 {
-	const uint8_t *ocu;
-	uint8_t cmp_id, ocu_len;
-	int i;
-
-	ocu_len = ocu_i->u_len;
-	if (ocu_len == 0) {
-		memset(utf_o, 0, sizeof(struct ustr));
-		return 0;
-	}
-
-	cmp_id = ocu_i->u_cmpID;
-	if (cmp_id != 8 && cmp_id != 16) {
-		memset(utf_o, 0, sizeof(struct ustr));
-		pr_err("unknown compression code (%d) stri=%s\n",
-		       cmp_id, ocu_i->u_name);
-		return -EINVAL;
-	}
-
-	ocu = ocu_i->u_name;
-	utf_o->u_len = 0;
-	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
-
-		/* Expand OSTA compressed Unicode to Unicode */
-		uint32_t c = ocu[i++];
-		if (cmp_id == 16)
-			c = (c << 8) | ocu[i++];
-
-		/* Compress Unicode to UTF-8 */
-		if (c < 0x80U)
-			utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
-		else if (c < 0x800U) {
-			if (utf_o->u_len > (UDF_NAME_LEN - 4))
-				break;
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0xc0 | (c >> 6));
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0x80 | (c & 0x3f));
-		} else {
-			if (utf_o->u_len > (UDF_NAME_LEN - 5))
-				break;
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0xe0 | (c >> 12));
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0x80 |
-							  ((c >> 6) & 0x3f));
-			utf_o->u_name[utf_o->u_len++] =
-						(uint8_t)(0x80 | (c & 0x3f));
-		}
+	int u_len = 0;
+
+	if (boundlen <= 0)
+		return -ENAMETOOLONG;
+
+	if (uni < 0x80) {
+		out[u_len++] = (unsigned char)uni;
+	} else if (uni < 0x800) {
+		if (boundlen < 2)
+			return -ENAMETOOLONG;
+		out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
+		out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
+	} else {
+		if (boundlen < 3)
+			return -ENAMETOOLONG;
+		out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
+		out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
+		out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
 	}
-	utf_o->u_cmpID = 8;
-
-	return utf_o->u_len;
+	return u_len;
 }
 
-/*
- *
- * udf_UTF8toCS0
- *
- * PURPOSE
- *	Convert UTF-8 to the OSTA Compressed Unicode equivalent.
- *
- * DESCRIPTION
- *	This routine is only called by udf_lookup().
- *
- * PRE-CONDITIONS
- *	ocu			Pointer to OSTA Compressed Unicode output
- *				buffer of size UDF_NAME_LEN bytes.
- *	utf			Pointer to UTF-8 input buffer.
- *	utf_len			Length of UTF-8 input buffer in bytes.
- *
- * POST-CONDITIONS
- *	<return>		Zero on success.
- *
- * HISTORY
- *	November 12, 1997 - Andrew E. Mileski
- *	Written, tested, and released.
- */
-static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
+static int udf_char2uni_utf8(const unsigned char *in,
+			     int boundlen,
+			     wchar_t *uni)
 {
-	unsigned c, i, max_val, utf_char;
-	int utf_cnt, u_len, u_ch;
+	unsigned int utf_char;
+	unsigned char c;
+	int utf_cnt, u_len;
 
-	memset(ocu, 0, sizeof(dstring) * length);
-	ocu[0] = 8;
-	max_val = 0xffU;
-	u_ch = 1;
-
-try_again:
-	u_len = 0U;
-	utf_char = 0U;
-	utf_cnt = 0U;
-	for (i = 0U; i < utf->u_len; i++) {
-		/* Name didn't fit? */
-		if (u_len + 1 + u_ch >= length)
-			return 0;
-
-		c = (uint8_t)utf->u_name[i];
+	utf_char = 0;
+	utf_cnt = 0;
+	for (u_len = 0; u_len < boundlen;) {
+		c = in[u_len++];
 
 		/* Complete a multi-byte UTF-8 character */
 		if (utf_cnt) {
-			utf_char = (utf_char << 6) | (c & 0x3fU);
+			utf_char = (utf_char << 6) | (c & 0x3f);
 			if (--utf_cnt)
 				continue;
 		} else {
 			/* Check for a multi-byte UTF-8 character */
-			if (c & 0x80U) {
+			if (c & 0x80) {
 				/* Start a multi-byte UTF-8 character */
-				if ((c & 0xe0U) == 0xc0U) {
-					utf_char = c & 0x1fU;
+				if ((c & 0xe0) == 0xc0) {
+					utf_char = c & 0x1f;
 					utf_cnt = 1;
-				} else if ((c & 0xf0U) == 0xe0U) {
-					utf_char = c & 0x0fU;
+				} else if ((c & 0xf0) == 0xe0) {
+					utf_char = c & 0x0f;
 					utf_cnt = 2;
-				} else if ((c & 0xf8U) == 0xf0U) {
-					utf_char = c & 0x07U;
+				} else if ((c & 0xf8) == 0xf0) {
+					utf_char = c & 0x07;
 					utf_cnt = 3;
-				} else if ((c & 0xfcU) == 0xf8U) {
-					utf_char = c & 0x03U;
+				} else if ((c & 0xfc) == 0xf8) {
+					utf_char = c & 0x03;
 					utf_cnt = 4;
-				} else if ((c & 0xfeU) == 0xfcU) {
-					utf_char = c & 0x01U;
+				} else if ((c & 0xfe) == 0xfc) {
+					utf_char = c & 0x01;
 					utf_cnt = 5;
 				} else {
-					goto error_out;
+					utf_cnt = -1;
+					break;
 				}
 				continue;
 			} else {
@@ -228,36 +149,19 @@ try_again:
 				utf_char = c;
 			}
 		}
-
-		/* Choose no compression if necessary */
-		if (utf_char > max_val) {
-			if (max_val == 0xffU) {
-				max_val = 0xffffU;
-				ocu[0] = (uint8_t)0x10U;
-				u_ch = 2;
-				goto try_again;
-			}
-			goto error_out;
-		}
-
-		if (max_val == 0xffffU)
-			ocu[++u_len] = (uint8_t)(utf_char >> 8);
-		ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
+		*uni = utf_char;
+		break;
 	}
-
 	if (utf_cnt) {
-error_out:
-		ocu[++u_len] = '?';
-		printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
+		*uni = '?';
+		return -EINVAL;
 	}
-
-	ocu[length - 1] = (uint8_t)u_len + 1;
-
-	return u_len + 1;
+	return u_len;
 }
 
-static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
-			const struct ustr *ocu_i)
+static int udf_name_from_CS0(struct ustr *utf_o,
+			     const struct ustr *ocu_i,
+			     int (*conv_f)(wchar_t, unsigned char *, int))
 {
 	const uint8_t *ocu;
 	uint8_t cmp_id, ocu_len;
@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 		if (cmp_id == 16)
 			c = (c << 8) | ocu[i++];
 
-		len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
-				    UDF_NAME_LEN - 2 - utf_o->u_len);
+		len = conv_f(c, &utf_o->u_name[utf_o->u_len],
+			     UDF_NAME_LEN - 2 - utf_o->u_len);
 		/* Valid character? */
 		if (len >= 0)
 			utf_o->u_len += len;
+		else if (len == -ENAMETOOLONG)
+			break;
 		else
 			utf_o->u_name[utf_o->u_len++] = '?';
 	}
@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 	return utf_o->u_len;
 }
 
-static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
-			int length)
+static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
+			   int (*conv_f)(const unsigned char *, int, wchar_t *))
 {
-	int len;
-	unsigned i, max_val;
-	uint16_t uni_char;
+	int i, len;
+	unsigned int max_val;
+	wchar_t uni_char;
 	int u_len, u_ch;
 
 	memset(ocu, 0, sizeof(dstring) * length);
 	ocu[0] = 8;
-	max_val = 0xffU;
+	max_val = 0xff;
 	u_ch = 1;
 
 try_again:
-	u_len = 0U;
-	for (i = 0U; i < uni->u_len; i++) {
+	u_len = 0;
+	for (i = 0; i < uni->u_len; i++) {
 		/* Name didn't fit? */
 		if (u_len + 1 + u_ch >= length)
 			return 0;
-		len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
+		len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
 		if (!len)
 			continue;
 		/* Invalid character, deal with it */
@@ -328,15 +234,15 @@ try_again:
 		}
 
 		if (uni_char > max_val) {
-			max_val = 0xffffU;
-			ocu[0] = (uint8_t)0x10U;
+			max_val = 0xffff;
+			ocu[0] = 0x10;
 			u_ch = 2;
 			goto try_again;
 		}
 
-		if (max_val == 0xffffU)
+		if (max_val == 0xffff)
 			ocu[++u_len] = (uint8_t)(uni_char >> 8);
-		ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+		ocu[++u_len] = (uint8_t)(uni_char & 0xff);
 		i += len - 1;
 	}
 
@@ -344,10 +250,16 @@ try_again:
 	return u_len + 1;
 }
 
+int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
+{
+	return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
+}
+
 int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
 		     uint8_t *dname, int dlen)
 {
 	struct ustr *filename, *unifilename;
+	int (*conv_f)(wchar_t, unsigned char *, int);
 	int ret;
 
 	if (!slen)
@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
 
 	udf_build_ustr_exact(unifilename, sname, slen);
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
-		ret = udf_CS0toUTF8(filename, unifilename);
-		if (ret < 0) {
-			udf_debug("Failed in udf_get_filename: sname = %s\n",
-				  sname);
-			goto out2;
-		}
+		conv_f = udf_uni2char_utf8;
 	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
-		ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
-				   unifilename);
-		if (ret < 0) {
-			udf_debug("Failed in udf_get_filename: sname = %s\n",
-				  sname);
-			goto out2;
-		}
+		conv_f = UDF_SB(sb)->s_nls_map->uni2char;
 	} else
 		BUG();
 
+	ret = udf_name_from_CS0(filename, unifilename, conv_f);
+	if (ret < 0) {
+		udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
+		goto out2;
+	}
+
 	ret = udf_translate_to_linux(dname, dlen,
 				     filename->u_name, filename->u_len,
 				     unifilename->u_name, unifilename->u_len);
@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
 		     uint8_t *dname, int dlen)
 {
 	struct ustr unifilename;
-	int namelen;
+	int (*conv_f)(const unsigned char *, int, wchar_t *);
 
 	if (!udf_char_to_ustr(&unifilename, sname, slen))
 		return 0;
 
 	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
-		namelen = udf_UTF8toCS0(dname, &unifilename, dlen);
-		if (!namelen)
-			return 0;
+		conv_f = udf_char2uni_utf8;
 	} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
-		namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
-					&unifilename, dlen);
-		if (!namelen)
-			return 0;
+		conv_f = UDF_SB(sb)->s_nls_map->char2uni;
 	} else
-		return 0;
+		BUG();
 
-	return namelen;
+	return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
 }
 
 #define ILLEGAL_CHAR_MARK	'_'