mirror of
https://github.com/python/cpython.git
synced 2024-11-28 08:20:55 +01:00
Patch #1272, by Christian Heimes and Alexandre Vassalotti.
Changes to make __file__ a proper Unicode object, using the default filesystem encoding. This is a bit tricky because the default filesystem encoding isn't set by the time we import the first modules; at that point we fudge things a bit. This is okay since __file__ isn't really used much except for error reporting. Tested on OSX and Linux only so far.
This commit is contained in:
parent
cdadf242ba
commit
00bc0e0a2d
@ -21,8 +21,8 @@ typedef struct {
|
||||
PyObject *co_freevars; /* tuple of strings (free variable names) */
|
||||
PyObject *co_cellvars; /* tuple of strings (cell variable names) */
|
||||
/* The rest doesn't count for hash/cmp */
|
||||
PyObject *co_filename; /* string (where it was loaded from) */
|
||||
PyObject *co_name; /* string (name, for reference) */
|
||||
PyObject *co_filename; /* unicode (where it was loaded from) */
|
||||
PyObject *co_name; /* unicode (name, for reference) */
|
||||
int co_firstlineno; /* first source line number */
|
||||
PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) */
|
||||
void *co_zombieframe; /* for optimization only (see frameobject.c) */
|
||||
|
@ -154,6 +154,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||
# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
|
||||
# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
|
||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
|
||||
# define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
|
||||
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
|
||||
@ -245,6 +246,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
|
||||
# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
|
||||
# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
|
||||
# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
|
||||
# define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
|
||||
# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
|
||||
# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
|
||||
# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
|
||||
@ -641,6 +643,20 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
|
||||
PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
|
||||
PyObject *, const char *);
|
||||
|
||||
/* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
|
||||
|
||||
If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
|
||||
UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
|
||||
invalid characters with '?'.
|
||||
|
||||
The function is intended to be used for paths and file names only
|
||||
during bootstrapping process where the codecs are not set up.
|
||||
*/
|
||||
|
||||
PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
|
||||
const char *s /* encoded string */
|
||||
);
|
||||
|
||||
/* Return a char* holding the UTF-8 encoded value of the
|
||||
Unicode object.
|
||||
|
||||
|
@ -273,6 +273,7 @@ Larry Hastings
|
||||
Shane Hathaway
|
||||
Rycharde Hawkes
|
||||
Jochen Hayek
|
||||
Christian Heimes
|
||||
Thomas Heller
|
||||
Lance Finn Helsten
|
||||
Jonathan Hendry
|
||||
@ -667,6 +668,7 @@ Michael Urman
|
||||
Hector Urtubia
|
||||
Atul Varma
|
||||
Dmitry Vasiliev
|
||||
Alexandre Vassalotti
|
||||
Frank Vercruesse
|
||||
Mike Verdone
|
||||
Jaap Vermeulen
|
||||
|
@ -34,9 +34,9 @@ void _AddTraceback(char *funcname, char *filename, int lineno)
|
||||
PyCodeObject *py_code = 0;
|
||||
PyFrameObject *py_frame = 0;
|
||||
|
||||
py_srcfile = PyString_FromString(filename);
|
||||
py_srcfile = PyUnicode_DecodeFSDefault(filename);
|
||||
if (!py_srcfile) goto bad;
|
||||
py_funcname = PyString_FromString(funcname);
|
||||
py_funcname = PyUnicode_FromString(funcname);
|
||||
if (!py_funcname) goto bad;
|
||||
py_globals = PyDict_New();
|
||||
if (!py_globals) goto bad;
|
||||
|
@ -5370,7 +5370,7 @@ posix_tempnam(PyObject *self, PyObject *args)
|
||||
#endif
|
||||
if (name == NULL)
|
||||
return PyErr_NoMemory();
|
||||
result = PyString_FromString(name);
|
||||
result = PyUnicode_DecodeFSDefault(name);
|
||||
free(name);
|
||||
return result;
|
||||
}
|
||||
@ -5428,7 +5428,7 @@ posix_tmpnam(PyObject *self, PyObject *noargs)
|
||||
Py_XDECREF(err);
|
||||
return NULL;
|
||||
}
|
||||
return PyString_FromString(buffer);
|
||||
return PyUnicode_DecodeFSDefault(buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -232,13 +232,13 @@ getcode(enum HandlerTypes slot, char* func_name, int lineno)
|
||||
code = PyString_FromString("");
|
||||
if (code == NULL)
|
||||
goto failed;
|
||||
name = PyString_FromString(func_name);
|
||||
name = PyUnicode_FromString(func_name);
|
||||
if (name == NULL)
|
||||
goto failed;
|
||||
nulltuple = PyTuple_New(0);
|
||||
if (nulltuple == NULL)
|
||||
goto failed;
|
||||
filename = PyString_FromString(__FILE__);
|
||||
filename = PyUnicode_DecodeFSDefault(__FILE__);
|
||||
handler_info[slot].tb_code =
|
||||
PyCode_New(0, /* argcount */
|
||||
0, /* kwonlyargcount */
|
||||
|
@ -50,6 +50,7 @@ PyCode_New(int argcount, int kwonlyargcount,
|
||||
{
|
||||
PyCodeObject *co;
|
||||
Py_ssize_t i;
|
||||
|
||||
/* Check argument types */
|
||||
if (argcount < 0 || nlocals < 0 ||
|
||||
code == NULL ||
|
||||
@ -58,20 +59,16 @@ PyCode_New(int argcount, int kwonlyargcount,
|
||||
varnames == NULL || !PyTuple_Check(varnames) ||
|
||||
freevars == NULL || !PyTuple_Check(freevars) ||
|
||||
cellvars == NULL || !PyTuple_Check(cellvars) ||
|
||||
name == NULL || (!PyString_Check(name) && !PyUnicode_Check(name)) ||
|
||||
filename == NULL || !PyString_Check(filename) ||
|
||||
name == NULL || !PyUnicode_Check(name) ||
|
||||
filename == NULL || !PyUnicode_Check(filename) ||
|
||||
lnotab == NULL || !PyString_Check(lnotab) ||
|
||||
!PyObject_CheckReadBuffer(code)) {
|
||||
PyErr_BadInternalCall();
|
||||
return NULL;
|
||||
}
|
||||
if (PyString_Check(name)) {
|
||||
name = PyUnicode_FromString(PyString_AS_STRING(name));
|
||||
if (name == NULL)
|
||||
return NULL;
|
||||
} else {
|
||||
Py_INCREF(name);
|
||||
}
|
||||
Py_INCREF(name);
|
||||
Py_INCREF(filename);
|
||||
|
||||
intern_strings(names);
|
||||
intern_strings(varnames);
|
||||
intern_strings(freevars);
|
||||
@ -299,8 +296,8 @@ code_repr(PyCodeObject *co)
|
||||
|
||||
if (co->co_firstlineno != 0)
|
||||
lineno = co->co_firstlineno;
|
||||
if (co->co_filename && PyString_Check(co->co_filename))
|
||||
filename = PyString_AS_STRING(co->co_filename);
|
||||
if (co->co_filename && PyUnicode_Check(co->co_filename))
|
||||
filename = PyUnicode_AsString(co->co_filename);
|
||||
return PyUnicode_FromFormat(
|
||||
"<code object %.100U at %p, file \"%.300s\", line %d>",
|
||||
co->co_name, co, filename, lineno);
|
||||
|
@ -86,12 +86,12 @@ PyModule_GetFilename(PyObject *m)
|
||||
d = ((PyModuleObject *)m)->md_dict;
|
||||
if (d == NULL ||
|
||||
(fileobj = PyDict_GetItemString(d, "__file__")) == NULL ||
|
||||
!PyString_Check(fileobj))
|
||||
!PyUnicode_Check(fileobj))
|
||||
{
|
||||
PyErr_SetString(PyExc_SystemError, "module filename missing");
|
||||
return NULL;
|
||||
}
|
||||
return PyString_AsString(fileobj);
|
||||
return PyUnicode_AsString(fileobj);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -117,7 +117,11 @@ static PyUnicodeObject *unicode_latin1[256];
|
||||
|
||||
/* Default encoding to use and assume when NULL is passed as encoding
|
||||
parameter; it is fixed to "utf-8". Always use the
|
||||
PyUnicode_GetDefaultEncoding() API to access this global. */
|
||||
PyUnicode_GetDefaultEncoding() API to access this global.
|
||||
|
||||
Don't forget to alter Py_FileSystemDefaultEncoding() if you change the
|
||||
hard coded default!
|
||||
*/
|
||||
static const char unicode_default_encoding[] = "utf-8";
|
||||
|
||||
Py_UNICODE
|
||||
@ -1231,6 +1235,35 @@ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
|
||||
return v;
|
||||
}
|
||||
|
||||
PyObject*
|
||||
PyUnicode_DecodeFSDefault(const char *s)
|
||||
{
|
||||
Py_ssize_t size = (Py_ssize_t)strlen(s);
|
||||
|
||||
/* During the early bootstrapping process, Py_FileSystemDefaultEncoding
|
||||
can be undefined. If it is case, decode using UTF-8. The following assumes
|
||||
that Py_FileSystemDefaultEncoding is set to a built-in encoding during the
|
||||
bootstrapping process where the codecs aren't ready yet.
|
||||
*/
|
||||
if (Py_FileSystemDefaultEncoding) {
|
||||
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||
if (strcmp(Py_FileSystemDefaultEncoding, "mbcs")) {
|
||||
return PyUnicode_DecodeMBCS(s, size, "replace");
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
if (strcmp(Py_FileSystemDefaultEncoding, "utf-8")) {
|
||||
return PyUnicode_DecodeUTF8(s, size, "replace");
|
||||
}
|
||||
#endif
|
||||
return PyUnicode_Decode(s, size,
|
||||
Py_FileSystemDefaultEncoding,
|
||||
"replace");
|
||||
}
|
||||
else {
|
||||
return PyUnicode_DecodeUTF8(s, size, "replace");
|
||||
}
|
||||
}
|
||||
|
||||
char*
|
||||
PyUnicode_AsStringAndSize(PyObject *unicode, Py_ssize_t *psize)
|
||||
{
|
||||
|
@ -10,6 +10,9 @@
|
||||
|
||||
/* The default encoding used by the platform file system APIs
|
||||
Can remain NULL for all platforms that don't have such a concept
|
||||
|
||||
Don't forget to modify PyUnicode_DecodeFSDefault() if you touch any of the
|
||||
values for Py_FileSystemDefaultEncoding!
|
||||
*/
|
||||
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
|
||||
const char *Py_FileSystemDefaultEncoding = "mbcs";
|
||||
|
@ -767,7 +767,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
||||
lltrace = PyDict_GetItemString(f->f_globals, "__lltrace__") != NULL;
|
||||
#endif
|
||||
#if defined(Py_DEBUG) || defined(LLTRACE)
|
||||
filename = PyString_AsString(co->co_filename);
|
||||
filename = PyUnicode_AsString(co->co_filename);
|
||||
#endif
|
||||
|
||||
why = WHY_NOT;
|
||||
@ -2565,7 +2565,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
if (argcount > co->co_argcount) {
|
||||
if (!(co->co_flags & CO_VARARGS)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() takes %s %d "
|
||||
"%U() takes %s %d "
|
||||
"%spositional argument%s (%d given)",
|
||||
co->co_name,
|
||||
defcount ? "at most" : "exactly",
|
||||
@ -2599,7 +2599,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
int j;
|
||||
if (keyword == NULL || !PyUnicode_Check(keyword)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() keywords must be strings",
|
||||
"%U() keywords must be strings",
|
||||
co->co_name);
|
||||
goto fail;
|
||||
}
|
||||
@ -2622,7 +2622,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
if (j >= co->co_argcount + co->co_kwonlyargcount) {
|
||||
if (kwdict == NULL) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() got an unexpected "
|
||||
"%U() got an unexpected "
|
||||
"keyword argument '%S'",
|
||||
co->co_name,
|
||||
keyword);
|
||||
@ -2633,7 +2633,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
else {
|
||||
if (GETLOCAL(j) != NULL) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() got multiple "
|
||||
"%U() got multiple "
|
||||
"values for keyword "
|
||||
"argument '%S'",
|
||||
co->co_name,
|
||||
@ -2661,7 +2661,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
continue;
|
||||
}
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() needs keyword-only argument %S",
|
||||
"%U() needs keyword-only argument %S",
|
||||
co->co_name, name);
|
||||
goto fail;
|
||||
}
|
||||
@ -2671,7 +2671,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
for (i = argcount; i < m; i++) {
|
||||
if (GETLOCAL(i) == NULL) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() takes %s %d "
|
||||
"%U() takes %s %d "
|
||||
"%spositional argument%s "
|
||||
"(%d given)",
|
||||
co->co_name,
|
||||
@ -2699,7 +2699,7 @@ PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
|
||||
else {
|
||||
if (argcount > 0 || kwcount > 0) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%S() takes no arguments (%d given)",
|
||||
"%U() takes no arguments (%d given)",
|
||||
co->co_name,
|
||||
argcount + kwcount);
|
||||
goto fail;
|
||||
|
@ -1247,7 +1247,7 @@ compiler_make_closure(struct compiler *c, PyCodeObject *co, int args)
|
||||
PyObject_REPR(name),
|
||||
PyString_AS_STRING(c->u->u_name),
|
||||
reftype, arg,
|
||||
PyString_AS_STRING(co->co_name),
|
||||
PyUnicode_AsString(co->co_name),
|
||||
PyObject_REPR(co->co_freevars));
|
||||
Py_FatalError("compiler_make_closure()");
|
||||
}
|
||||
@ -4001,7 +4001,7 @@ makecode(struct compiler *c, struct assembler *a)
|
||||
freevars = dict_keys_inorder(c->u->u_freevars, PyTuple_Size(cellvars));
|
||||
if (!freevars)
|
||||
goto error;
|
||||
filename = PyString_FromString(c->c_filename);
|
||||
filename = PyUnicode_DecodeFSDefault(c->c_filename);
|
||||
if (!filename)
|
||||
goto error;
|
||||
|
||||
|
@ -17,7 +17,7 @@ static unsigned char M___hello__[] = {
|
||||
131,1,0,1,100,1,0,83,40,2,0,0,0,117,14,0,
|
||||
0,0,72,101,108,108,111,32,119,111,114,108,100,46,46,46,
|
||||
78,40,1,0,0,0,117,5,0,0,0,112,114,105,110,116,
|
||||
40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,115,
|
||||
40,0,0,0,0,40,0,0,0,0,40,0,0,0,0,117,
|
||||
8,0,0,0,104,101,108,108,111,46,112,121,117,8,0,0,
|
||||
0,60,109,111,100,117,108,101,62,1,0,0,0,115,0,0,
|
||||
0,0,
|
||||
|
@ -74,10 +74,11 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *);
|
||||
3040 (added signature annotations)
|
||||
3050 (print becomes a function)
|
||||
3060 (PEP 3115 metaclass syntax)
|
||||
3070 (PEP 3109 raise changes)
|
||||
3070 (PEP 3109 raise changes)
|
||||
3080 (PEP 3137 make __file__ and __name__ unicode)
|
||||
.
|
||||
*/
|
||||
#define MAGIC (3070 | ((long)'\r'<<16) | ((long)'\n'<<24))
|
||||
#define MAGIC (3080 | ((long)'\r'<<16) | ((long)'\n'<<24))
|
||||
|
||||
/* Magic word as global; note that _PyImport_Init() can change the
|
||||
value of this global to accommodate for alterations of how the
|
||||
@ -652,7 +653,7 @@ PyImport_ExecCodeModuleEx(char *name, PyObject *co, char *pathname)
|
||||
/* Remember the filename as the __file__ attribute */
|
||||
v = NULL;
|
||||
if (pathname != NULL) {
|
||||
v = PyString_FromString(pathname);
|
||||
v = PyUnicode_DecodeFSDefault(pathname);
|
||||
if (v == NULL)
|
||||
PyErr_Clear();
|
||||
}
|
||||
@ -983,7 +984,7 @@ load_package(char *name, char *pathname)
|
||||
PySys_WriteStderr("import %s # directory %s\n",
|
||||
name, pathname);
|
||||
d = PyModule_GetDict(m);
|
||||
file = PyString_FromString(pathname);
|
||||
file = PyUnicode_DecodeFSDefault(pathname);
|
||||
if (file == NULL)
|
||||
goto error;
|
||||
path = Py_BuildValue("[O]", file);
|
||||
|
@ -62,7 +62,9 @@ _PyImport_LoadDynamicModule(char *name, char *pathname, FILE *fp)
|
||||
return NULL;
|
||||
}
|
||||
/* Remember the filename as the __file__ attribute */
|
||||
if (PyModule_AddStringConstant(m, "__file__", pathname) < 0)
|
||||
PyObject *path;
|
||||
path = PyUnicode_DecodeFSDefault(pathname);
|
||||
if (PyModule_AddObject(m, "__file__", path) < 0)
|
||||
PyErr_Clear(); /* Not important enough to report */
|
||||
|
||||
if (_PyImport_FixupExtension(name, pathname) == NULL)
|
||||
|
@ -867,7 +867,8 @@ PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
|
||||
return -1;
|
||||
d = PyModule_GetDict(m);
|
||||
if (PyDict_GetItemString(d, "__file__") == NULL) {
|
||||
PyObject *f = PyString_FromString(filename);
|
||||
PyObject *f;
|
||||
f = PyUnicode_DecodeFSDefault(filename);
|
||||
if (f == NULL)
|
||||
return -1;
|
||||
if (PyDict_SetItemString(d, "__file__", f) < 0) {
|
||||
|
@ -229,10 +229,10 @@ tb_printinternal(PyTracebackObject *tb, PyObject *f, int limit)
|
||||
while (tb != NULL && err == 0) {
|
||||
if (depth <= limit) {
|
||||
err = tb_displayline(f,
|
||||
PyString_AsString(
|
||||
PyUnicode_AsString(
|
||||
tb->tb_frame->f_code->co_filename),
|
||||
tb->tb_lineno,
|
||||
PyString_AsString(tb->tb_frame->f_code->co_name));
|
||||
PyUnicode_AsString(tb->tb_frame->f_code->co_name));
|
||||
}
|
||||
depth--;
|
||||
tb = tb->tb_next;
|
||||
|
Loading…
Reference in New Issue
Block a user