This is the mail archive of the cygwin mailing list for the Cygwin project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

cygwin-1.7 utf8 path support (exp)


Attached patch is for interested developers which want to try out the
upcoming cygwin-1.7
for utf8 path conversion support. It uses now the wide char api with
MAX_PATH of 32KB length.
Do not apply yet.

The cygwin 1.7 gcc suite is not yet stable enough to finish perl
compilation for me
so I couldn't test it. Maybe someone else is more lucky.
-- 
Reini Urban
http://phpwiki.org/              http://murbreak.at/
diff -bu perl-current/cygwin/cygwin.c.orig perl-current/cygwin/cygwin.c
--- perl-current/cygwin/cygwin.c.orig	2007-12-22 22:38:46.000000000 +0100
+++ perl-current/cygwin/cygwin.c	2008-04-13 14:24:16.875000000 +0200
@@ -10,9 +10,13 @@
 #include <unistd.h>
 #include <process.h>
 #include <sys/cygwin.h>
+#include <cygwin/version.h>
 #include <mntent.h>
 #include <alloca.h>
 #include <dlfcn.h>
+#if (CYGWIN_VERSION_API_MINOR >= 181)
+#include <wchar.h>
+#endif
 
 /*
  * pp_system() implemented via spawn()
@@ -191,7 +195,12 @@
 
     pid = (pid_t)SvIV(ST(0));
 
-    if ((RETVAL = cygwin32_winpid_to_pid(pid)) > 0) {
+#if (CYGWIN_VERSION_API_MINOR >= 181)
+    RETVAL = cygwin_winpid_to_pid(pid);
+#else
+    RETVAL = cygwin32_winpid_to_pid(pid);
+#endif
+    if (RETVAL > 0) {
         XSprePUSH; PUSHi((IV)RETVAL);
         XSRETURN(1);
     }
@@ -205,6 +214,7 @@
     STRLEN len;
     int err;
     char *pathname, *buf;
+    int isutf8 = 0;
 
     if (items < 1 || items > 2)
         Perl_croak(aTHX_ "Usage: Cygwin::win_to_posix_path(pathname, [absolute])");
@@ -215,14 +225,58 @@
 
     if (!len)
 	Perl_croak(aTHX_ "can't convert empty path");
-    buf = (char *) safemalloc (len + 260 + 1001);
+    isutf8 = SvUTF8(ST(0));
 
+#if (CYGWIN_VERSION_API_MINOR >= 181)
+    /* Check utf8 flag and use wide api then.
+       Size calculation: On overflow let cygwin_conv_path calculate the final size.
+     */
+    if (isutf8) {
+	int what = absolute_flag ? CCP_WIN_W_TO_POSIX : CCP_WIN_W_TO_POSIX | CCP_RELATIVE;
+	int wlen = sizeof(wchar_t)*(len + 260 + 1001);
+	wchar_t *wpath = (wchar_t *) safemalloc(sizeof(wchar_t)*len);
+	wchar_t *wbuf = (wchar_t *) safemalloc(wlen);
+	set_locale(LC_CTYPE, "utf8");
+	if (!IN_BYTES) {
+	    mbstate_t mbs;
+	    /* utf8_to_uvuni(pathname, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
+	    wlen = mbsrtowcs(wpath, (const char**)&pathname, wlen, &mbs);
+	    if (wlen > 0)
+		err = cygwin_conv_path(what, wpath, wbuf, wlen);
+	} else { /* use bytes; assume already ucs-2 encoded bytestream */
+	    err = cygwin_conv_path(what, pathname, wbuf, wlen);
+	}
+	if (err == ENOSPC) { /* our space assumption was wrong, not enough space */
+	    int newlen = cygwin_conv_path(what, wpath, wbuf, 0);
+	    wbuf = (wchar_t *) realloc(&wbuf, newlen);
+	    err = cygwin_conv_path(what, wpath, wbuf, newlen);
+	    wlen = newlen;
+	}
+	/* uvuni_to_utf8(buf, chr) or Encoding::_bytes_to_utf8(sv, "UCS-2BE"); */
+	wlen = wcsrtombs(NULL, (const wchar_t **)&wbuf, wlen, NULL);
+	buf = (char *) safemalloc(wlen+1);
+	wcsrtombs(buf, (const wchar_t **)&wbuf, wlen, NULL);
+    } else {
+	int what = absolute_flag ? CCP_WIN_A_TO_POSIX : CCP_WIN_A_TO_POSIX | CCP_RELATIVE;
+	buf = (char *) safemalloc (len + 260 + 1001);
+	err = cygwin_conv_path(what, pathname, buf, len + 260 + 1001);
+	if (err == ENOSPC) { /* our space assumption was wrong, not enough space */
+	    int newlen = cygwin_conv_path(what, pathname, buf, 0);
+	    buf = (char *) realloc(&buf, newlen);
+	    err = cygwin_conv_path(what, pathname, buf, newlen);
+	}
+    }
+#else
     if (absolute_flag)
 	err = cygwin_conv_to_full_posix_path(pathname, buf);
     else
 	err = cygwin_conv_to_posix_path(pathname, buf);
+#endif
     if (!err) {
 	ST(0) = sv_2mortal(newSVpv(buf, 0));
+	if (isutf8) {
+	    SvUTF8_on(ST(0));
+	}
 	safefree(buf);
        XSRETURN(1);
     } else {
@@ -238,24 +292,71 @@
     STRLEN len;
     int err;
     char *pathname, *buf;
+    int isutf8 = 0;
 
     if (items < 1 || items > 2)
         Perl_croak(aTHX_ "Usage: Cygwin::posix_to_win_path(pathname, [absolute])");
 
-    pathname = SvPV(ST(0), len);
+    pathname = SvPVx(ST(0), len);
     if (items == 2)
 	absolute_flag = SvTRUE(ST(1));
 
     if (!len)
 	Perl_croak(aTHX_ "can't convert empty path");
+    isutf8 = SvUTF8(ST(0));
+#if (CYGWIN_VERSION_API_MINOR >= 181)
+    /* Check utf8 flag and use wide api then.
+       Size calculation: On overflow let cygwin_conv_path calculate the final size.
+     */
+    if (isutf8) {
+	int what = absolute_flag ? CCP_POSIX_TO_WIN_W : CCP_POSIX_TO_WIN_W | CCP_RELATIVE;
+	int wlen = sizeof(wchar_t)*(len + 260 + 1001);
+	wchar_t *wpath = (wchar_t *) safemalloc(sizeof(wchar_t)*len);
+	wchar_t *wbuf = (wchar_t *) safemalloc(wlen);
+	set_locale(LC_CTYPE, "utf8");
+	if (!IN_BYTES) {
+	    mbstate_t mbs;
+	    /* utf8_to_uvuni(pathname, wpath) or Encoding::_utf8_to_bytes(sv, "UCS-2BE"); */
+	    wlen = mbsrtowcs(wpath, (const char**)&pathname, wlen, &mbs);
+	    if (wlen > 0)
+		err = cygwin_conv_path(what, wpath, wbuf, wlen);
+	} else { /* use bytes; assume already ucs-2 encoded bytestream */
+	    err = cygwin_conv_path(what, pathname, wbuf, wlen);
+	}
+	if (err == ENOSPC) { /* our space assumption was wrong, not enough space */
+	    int newlen = cygwin_conv_path(what, wpath, wbuf, 0);
+	    wbuf = (wchar_t *) realloc(&wbuf, newlen);
+	    err = cygwin_conv_path(what, wpath, wbuf, newlen);
+	    wlen = newlen;
+	}
+	/* uvuni_to_utf8(buf, chr) or Encoding::_bytes_to_utf8(sv, "UCS-2BE"); */
+	wlen = wcsrtombs(NULL, (const wchar_t **)&wbuf, wlen, NULL);
+	buf = (char *) safemalloc(wlen+1);
+	wcsrtombs(buf, (const wchar_t **)&wbuf, wlen, NULL);
+    } else {
+	int what = absolute_flag ? CCP_POSIX_TO_WIN_A : CCP_POSIX_TO_WIN_A | CCP_RELATIVE;
+	buf = (char *) safemalloc(len + 260 + 1001);
+	err = cygwin_conv_path(what, pathname, buf, len + 260 + 1001);
+	if (err == ENOSPC) { /* our space assumption was wrong, not enough space */
+	    int newlen = cygwin_conv_path(what, pathname, buf, 0);
+	    buf = (char *) realloc(&buf, newlen);
+	    err = cygwin_conv_path(what, pathname, buf, newlen);
+	}
+    }
+#else
+    if (isutf8)
+	Perl_warn(aTHX_ "can't convert utf8 path");
     buf = (char *) safemalloc(len + 260 + 1001);
-
     if (absolute_flag)
 	err = cygwin_conv_to_full_win32_path(pathname, buf);
     else
 	err = cygwin_conv_to_win32_path(pathname, buf);
+#endif
     if (!err) {
 	ST(0) = sv_2mortal(newSVpv(buf, 0));
+	if (isutf8) {
+	    SvUTF8_on(ST(0));
+	}
 	safefree(buf);
        XSRETURN(1);
     } else {
--
Unsubscribe info:      http://cygwin.com/ml/#unsubscribe-simple
Problem reports:       http://cygwin.com/problems.html
Documentation:         http://cygwin.com/docs.html
FAQ:                   http://cygwin.com/faq/

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]