/**
* ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
* @ins: input multibyte string buffer
* @outs: on return contains the (allocated) output Unicode string
* @outs_len: length of output buffer in Unicode characters
*
* Convert the input multibyte string @ins, from the current locale into the
* corresponding little endian, 2-byte Unicode string. //16bit的unicode
*
* If *@outs is NULL, the function allocates the string and the caller is
* responsible for calling free(*@outs); when finished with it.
*
* On success the function returns the number of Unicode characters written to
* the output string *@outs (>= 0), not counting the terminating Unicode NULL
* character. If the output string buffer was allocated, *@outs is set to it.
*
* On error, -1 is returned, and errno is set to the error code. The following
* error codes can be expected:
* EINVAL Invalid arguments (e.g. @ins or @outs is NULL).
* EILSEQ The input string cannot be represented as a Unicode
* string according to the current locale.
* ENAMETOOLONG Destination buffer is too small for input string.
* ENOMEM Not enough memory to allocate destination buffer.
*/
int ntfs_mbstoucs(const char *ins, ntfschar **outs, int outs_len)
{
ntfschar *ucs;
const char *s;
wchar_t wc;
int i, o, cnt, ins_len, ucs_len, ins_size;
#ifdef HAVE_MBSINIT
mbstate_t mbstate;
#endif
if (!ins || !outs) {
errno = EINVAL;
return -1;
}
ucs = *outs;
ucs_len = outs_len;
if (ucs && !ucs_len) {
errno = ENAMETOOLONG;
return -1;
}
/* Determine the size of the multi-byte string in bytes. */
ins_size = strlen(ins);
/* Determine the length of the multi-byte string. */
s = ins;
#if defined(HAVE_MBSINIT)
memset(&mbstate, 0, sizeof(mbstate));
ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
#ifdef __CYGWIN32__
if (!ins_len && *ins) {
/* Older Cygwin had broken mbsrtowcs() implementation. */
ins_len = strlen(ins);
}
#endif
#elif !defined(DJGPP)
ins_len = mbstowcs(NULL, s, 0);
#else
/* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
ins_len = strlen(ins);
#endif
if (ins_len == -1)
return ins_len;
#ifdef HAVE_MBSINIT
if ((s != ins) || !mbsinit(&mbstate)) {
#else
if (s != ins) {
#endif
errno = EILSEQ;
return -1;
}
/* Add the NULL terminator. */
ins_len++;
printf("ins_len=%d\n",ins_len);
if (!ucs) {
ucs_len = ins_len;
ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
if (!ucs)
return -1;
}
#ifdef HAVE_MBSINIT
memset(&mbstate, 0, sizeof(mbstate));
#else
mbtowc(NULL, NULL, 0);
#endif
for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
/* Reallocate memory if necessary or abort. */
if (o >= ucs_len) {
ntfschar *tc;
if (ucs == *outs) {
errno = ENAMETOOLONG;
return -1;
}
/*
* We will never get here but hey, it's only a bit of
* extra code...
*/
ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
tc = (ntfschar*)realloc(ucs, ucs_len);
if (!tc)
goto err_out;
ucs = tc;
ucs_len /= sizeof(ntfschar);
}
/* Convert the multibyte character to a wide character. */
#ifdef HAVE_MBSINIT
cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
#else
cnt = mbtowc(&wc, ins + i, ins_size - i);
#endif
printf("cnt=%d\n",cnt);
printf("wc=%ls\n",&wc);
if (!cnt)
break;
if (cnt == -1)
goto err_out;
if (cnt < -1) {
ntfs_log_trace("Eeek. cnt = %i\n", cnt);
errno = EINVAL;
goto err_out;
}
/* Make sure we are not overflowing the NTFS Unicode set. */
if ((unsigned long)wc >= (unsigned long)(1 <<
(8 * sizeof(ntfschar)))) {
errno = EILSEQ;
goto err_out;
}
/* Convert the CPU wide character to a LE Unicode character. */
ucs[o] = cpu_to_le16(wc);
}
#ifdef HAVE_MBSINIT
/* Make sure we are back in the initial state. */
if (!mbsinit(&mbstate)) {
ntfs_log_trace("Eeek. mbstate not in initial state!\n");
errno = EILSEQ;
goto err_out;
}
#endif
/* Now write the NULL character. */
ucs[o] = cpu_to_le16(L'\0');
if (*outs != ucs)
*outs = ucs;
return o;
err_out:
if (ucs != *outs) {
int eo = errno;
free(ucs);
errno = eo;
}
return -1;
}
|