Update of /cvsroot/python/python/dist/src/Objects In directory usw-pr-cvs1:/tmp/cvs-serv19540/python/dist/src/Objects Modified Files: fileobject.c Log Message: MS Win32 .readline() speedup, as discussed on Python-Dev. This is a tricky variant that never needs to "search from the right". Also fixed unlikely memory leak in get_line, if string size overflows INTMAX. Also new std test test_bufio to make sure .readline() works. Index: fileobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v retrieving revision 2.98 retrieving revision 2.99 diff -C2 -r2.98 -r2.99 *** fileobject.c 2001/01/07 20:51:39 2.98 --- fileobject.c 2001/01/07 21:19:34 2.99 *************** *** 245,249 **** return 0 on success, non-zero on failure (with errno set) */ int ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 _portable_fseek(FILE *fp, fpos_t offset, int whence) #else --- 245,249 ---- return 0 on success, non-zero on failure (with errno set) */ int ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 _portable_fseek(FILE *fp, fpos_t offset, int whence) #else *************** *** 257,261 **** #elif defined(__BEOS__) return _fseek(fp, offset, whence); ! #elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8 /* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable fsetpos() and tell() to implement fseek()*/ --- 257,261 ---- #elif defined(__BEOS__) return _fseek(fp, offset, whence); ! #elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8 /* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable fsetpos() and tell() to implement fseek()*/ *************** *** 288,292 **** Return -1 on failure with errno set appropriately, current file position on success */ ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t #else --- 288,292 ---- Return -1 on failure with errno set appropriately, current file position on success */ ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t #else *************** *** 315,319 **** int whence; int ret; ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t offset, pos; #else --- 315,319 ---- int whence; int ret; ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t offset, pos; #else *************** *** 321,325 **** #endif /* !MS_WIN64 */ PyObject *offobj; ! if (f->f_fp == NULL) return err_closed(); --- 321,325 ---- #endif /* !MS_WIN64 */ PyObject *offobj; ! if (f->f_fp == NULL) return err_closed(); *************** *** 335,339 **** if (PyErr_Occurred()) return NULL; ! Py_BEGIN_ALLOW_THREADS errno = 0; --- 335,339 ---- if (PyErr_Occurred()) return NULL; ! Py_BEGIN_ALLOW_THREADS errno = 0; *************** *** 356,360 **** { int ret; ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t newsize; #else --- 356,360 ---- { int ret; ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t newsize; #else *************** *** 362,366 **** #endif PyObject *newsizeobj; ! if (f->f_fp == NULL) return err_closed(); --- 362,366 ---- #endif PyObject *newsizeobj; ! if (f->f_fp == NULL) return err_closed(); *************** *** 417,421 **** if (ret != 0) goto onioerror; #endif /* !MS_WIN32 */ ! Py_INCREF(Py_None); return Py_None; --- 417,421 ---- if (ret != 0) goto onioerror; #endif /* !MS_WIN32 */ ! Py_INCREF(Py_None); return Py_None; *************** *** 431,435 **** file_tell(PyFileObject *f, PyObject *args) { ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t pos; #else --- 431,435 ---- file_tell(PyFileObject *f, PyObject *args) { ! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 fpos_t pos; #else *************** *** 471,475 **** { int res; ! if (f->f_fp == NULL) return err_closed(); --- 471,475 ---- { int res; ! if (f->f_fp == NULL) return err_closed(); *************** *** 560,564 **** size_t bytesread, buffersize, chunksize; PyObject *v; ! if (f->f_fp == NULL) return err_closed(); --- 560,564 ---- size_t bytesread, buffersize, chunksize; PyObject *v; ! if (f->f_fp == NULL) return err_closed(); *************** *** 611,615 **** char *ptr; size_t ntodo, ndone, nnow; ! if (f->f_fp == NULL) return err_closed(); --- 611,615 ---- char *ptr; size_t ntodo, ndone, nnow; ! if (f->f_fp == NULL) return err_closed(); *************** *** 635,638 **** --- 635,802 ---- } + /************************************************************************** + Win32 MS routine to get next line. + + Under MSVC 6: + + + MS threadsafe getc is very slow (multiple layers of function calls + before+after each character, to lock+unlock the stream). + + The stream-locking functions are MS-internal -- can't access them + from user code. + + There's nothing Tim could find in the MS C or platform SDK libraries + that can worm around this. + + MS fgets locks/unlocks only once per line; it's the only hook we have. + + So we use fgets for speed(!), despite that it's painful. + + MS realloc is also slow. + + In the usual case, we have one pleasantly small line already sitting in a + stdio buffer, and we optimize heavily for that case. + + CAUTION: This routine cheats, relying on how MSVC 6 works internally. + They seem to be relatively safe cheats, but we should expect this code + to break someday. + **************************************************************************/ + + /* if Win32 and MS's compiler */ + #if defined(MS_WIN32) && defined(_MSC_VER) + #define USE_MS_GETLINE_HACK + #endif + + #ifdef USE_MS_GETLINE_HACK + static PyObject* + ms_getline_hack(FILE *fp) + { + #define INITBUFSIZE 100 + #define INCBUFSIZE 1000 + PyObject* v; /* the string object result */ + size_t total_v_size; /* total # chars in v's buffer */ + char* pvfree; /* address of next free slot */ + char* pvend; /* address one beyond last free slot */ + char* p; /* temp */ + + if (fp->_cnt > 0) { /* HACK: "_cnt" isn't advertised */ + /* optimize for normal case: something sitting in the + * buffer ready to go; avoid thread fiddling & realloc + * if possible + */ + char msbuf[INITBUFSIZE]; + memset(msbuf, '\n', INITBUFSIZE); + p = fgets(msbuf, INITBUFSIZE, fp); + /* since we didn't lock the file, there's no guarantee + * anything was still in the buffer + */ + if (p == NULL) { + clearerr(fp); + if (PyErr_CheckSignals()) + return NULL; + v = PyString_FromStringAndSize("", 0); + return v; + } + /* fgets read *something* */ + p = memchr(msbuf, '\n', INITBUFSIZE); + if (p != NULL) { + /* Did the \n come from fgets or from us? + * Since fgets stops at the first \n, and then + * writes \0, if it's from fgets a \0 must be next. + * But if that's so, it could not have come from us, + * since the \n's we filled the buffer with have only + * more \n's to the right. + */ + pvend = msbuf + INITBUFSIZE; + if (p+1 < pvend && *(p+1) == '\0') { + /* it's from fgets: we win! */ + v = PyString_FromStringAndSize(msbuf, + p - msbuf + 1); + return v; + } + /* Must be from us: fgets didn't fill the buffer + * and didn't find a newline, so it must be the + * last and newline-free line of the file. + */ + assert(p > msbuf && *(p-1) == '\0'); + v = PyString_FromStringAndSize(msbuf, p - msbuf - 1); + return v; + } + /* yuck: fgets overwrote all the newlines, i.e. the entire + * buffer. So this line isn't over yet, or maybe it is but + * we're exactly at EOF; in either case, we're tired <wink>. + */ + assert(msbuf[INITBUFSIZE-1] == '\0'); + total_v_size = INITBUFSIZE + INCBUFSIZE; + v = PyString_FromStringAndSize((char*)NULL, + (int)total_v_size); + if (v == NULL) + return v; + /* copy over everything except the last null byte */ + memcpy(BUF(v), msbuf, INITBUFSIZE-1); + pvfree = BUF(v) + INITBUFSIZE - 1; + } + else { + /* The stream isn't ready or isn't buffered. */ + v = PyString_FromStringAndSize((char*)NULL, INITBUFSIZE); + if (v == NULL) + return v; + total_v_size = INITBUFSIZE; + pvfree = BUF(v); + } + + /* Keep reading stuff into v; if it ever ends successfully, break + * after setting p one beyond the end of the line. + */ + for (;;) { + size_t nfree; + + Py_BEGIN_ALLOW_THREADS + pvend = BUF(v) + total_v_size; + nfree = pvend - pvfree; + memset(pvfree, '\n', nfree); + p = fgets(pvfree, nfree, fp); + Py_END_ALLOW_THREADS + + if (p == NULL) { + clearerr(fp); + if (PyErr_CheckSignals()) { + Py_DECREF(v); + return NULL; + } + p = pvfree; + break; + } + /* See the "normal case" comments above for details. */ + p = memchr(pvfree, '\n', nfree); + if (p != NULL) { + if (p+1 < pvend && *(p+1) == '\0') { + /* \n came from fgets */ + ++p; + break; + } + /* \n came from us; last line of file, no newline */ + assert(p > pvfree && *(p-1) == '\0'); + --p; + break; + } + /* expand buffer and try again */ + assert(*(pvend-1) == '\0'); + total_v_size += INCBUFSIZE; + if (total_v_size > INT_MAX) { + PyErr_SetString(PyExc_OverflowError, + "line is longer than a Python string can hold"); + Py_DECREF(v); + return NULL; + } + if (_PyString_Resize(&v, (int)total_v_size) < 0) + return NULL; + /* overwrite the trailing null byte */ + pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1); + } + if (BUF(v) + total_v_size != p) + _PyString_Resize(&v, p - BUF(v)); + return v; + #undef INITBUFSIZE + #undef INCBUFSIZE + } + #endif /* ifdef USE_MS_GETLINE_HACK */ /* Internal routine to get a line. *************** *** 662,665 **** --- 826,833 ---- PyObject *v; + #ifdef USE_MS_GETLINE_HACK + if (n == 0) + return ms_getline_hack(fp); + #endif n2 = n > 0 ? n : 100; v = PyString_FromStringAndSize((char *)NULL, n2); *************** *** 696,699 **** --- 864,868 ---- PyErr_SetString(PyExc_OverflowError, "line is longer than a Python string can hold"); + Py_DECREF(v); return NULL; } *************** *** 1000,1004 **** const char *buffer; int len; ! if (((f->f_binary && PyObject_AsReadBuffer(v, (const void**)&buffer, --- 1169,1173 ---- const char *buffer; int len; ! if (((f->f_binary && PyObject_AsReadBuffer(v, (const void**)&buffer, *************** *** 1256,1260 **** if (fno == NULL) return -1; ! if (PyInt_Check(fno)) { fd = PyInt_AsLong(fno); --- 1425,1429 ---- if (fno == NULL) return -1; ! if (PyInt_Check(fno)) { fd = PyInt_AsLong(fno);
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4