Update of /cvsroot/python/python/dist/src/Objects In directory usw-pr-cvs1:/tmp/cvs-serv9879/python/dist/src/objects Modified Files: fileobject.c Log Message: Fiddled ms_getline_hack after talking w/ Guido: made clearer that the code duplication is to let us get away without a realloc whenever possible; boosted the init buf size (the cutoff at which we *can* get away without a realloc) from 100 to 200 so that more files can enjoy this boost; and allowed other threads to run in all cases. The last two cost something, but not significantly: in my fat test case, less than a 1% slowdown total. Since my test case has a great many short lines, that's probably the worst slowdown, too. While the logic barely changed, there were lots of edits. This also gets rid of the reference to fp->_cnt, so the last platform assumption being made here is that fgets doesn't overwrite bytes capriciously (== beyond the terminating null byte it must write). Index: fileobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v retrieving revision 2.99 retrieving revision 2.100 diff -C2 -r2.99 -r2.100 *** fileobject.c 2001/01/07 21:19:34 2.99 --- fileobject.c 2001/01/08 00:53:12 2.100 *************** *** 655,661 **** stdio buffer, and we optimize heavily for that case. ! CAUTION: This routine cheats, relying on how MSVC 6 works internally. ! They seem to be relatively safe cheats, but we should expect this code ! to break someday. **************************************************************************/ --- 655,662 ---- stdio buffer, and we optimize heavily for that case. ! CAUTION: This routine cheats, relying on that MSVC 6 fgets doesn't overwrite ! any buffer positions to the right of the terminating null byte. Seems ! unlikely that will change in the future, but ... std test test_bufio should ! catch it if that changes. **************************************************************************/ *************** *** 669,673 **** ms_getline_hack(FILE *fp) { ! #define INITBUFSIZE 100 #define INCBUFSIZE 1000 PyObject* v; /* the string object result */ --- 670,684 ---- ms_getline_hack(FILE *fp) { ! /* INITBUFSIZE is the maximum line length that lets us get away with the fast ! * no-realloc path. get_line uses 100 for its initial size, but isn't trying ! * to avoid reallocs. Under MSVC 6, and using files with lines all under 100 ! * chars long, dropping this from 200 to 100 bought less than 1% speedup. ! * Since many kinds of log files have lines exceeding 100 chars, the tiny ! * slowdown from using 200 is more than offset by the large speedup for such ! * log files. ! * INCBUFSIZE is the amount by which we grow the buffer, if INITBUFSIZE isn't ! * enough. It doesn't much matter what this set to. ! */ ! #define INITBUFSIZE 200 #define INCBUFSIZE 1000 PyObject* v; /* the string object result */ *************** *** 676,748 **** char* pvend; /* address one beyond last free slot */ char* p; /* temp */ ! if (fp->_cnt > 0) { /* HACK: "_cnt" isn't advertised */ ! /* optimize for normal case: something sitting in the ! * buffer ready to go; avoid thread fiddling & realloc ! * if possible ! */ ! char msbuf[INITBUFSIZE]; ! memset(msbuf, '\n', INITBUFSIZE); ! p = fgets(msbuf, INITBUFSIZE, fp); ! /* since we didn't lock the file, there's no guarantee ! * anything was still in the buffer */ ! if (p == NULL) { ! clearerr(fp); ! if (PyErr_CheckSignals()) ! return NULL; ! v = PyString_FromStringAndSize("", 0); ! return v; ! } ! /* fgets read *something* */ ! p = memchr(msbuf, '\n', INITBUFSIZE); ! if (p != NULL) { ! /* Did the \n come from fgets or from us? ! * Since fgets stops at the first \n, and then ! * writes \0, if it's from fgets a \0 must be next. ! * But if that's so, it could not have come from us, ! * since the \n's we filled the buffer with have only ! * more \n's to the right. ! */ ! pvend = msbuf + INITBUFSIZE; ! if (p+1 < pvend && *(p+1) == '\0') { ! /* it's from fgets: we win! */ ! v = PyString_FromStringAndSize(msbuf, ! p - msbuf + 1); ! return v; ! } ! /* Must be from us: fgets didn't fill the buffer ! * and didn't find a newline, so it must be the ! * last and newline-free line of the file. */ ! assert(p > msbuf && *(p-1) == '\0'); ! v = PyString_FromStringAndSize(msbuf, p - msbuf - 1); return v; } ! /* yuck: fgets overwrote all the newlines, i.e. the entire ! * buffer. So this line isn't over yet, or maybe it is but ! * we're exactly at EOF; in either case, we're tired <wink>. */ ! assert(msbuf[INITBUFSIZE-1] == '\0'); ! total_v_size = INITBUFSIZE + INCBUFSIZE; ! v = PyString_FromStringAndSize((char*)NULL, ! (int)total_v_size); ! if (v == NULL) ! return v; ! /* copy over everything except the last null byte */ ! memcpy(BUF(v), msbuf, INITBUFSIZE-1); ! pvfree = BUF(v) + INITBUFSIZE - 1; } ! else { ! /* The stream isn't ready or isn't buffered. */ ! v = PyString_FromStringAndSize((char*)NULL, INITBUFSIZE); ! if (v == NULL) ! return v; ! total_v_size = INITBUFSIZE; ! pvfree = BUF(v); ! } /* Keep reading stuff into v; if it ever ends successfully, break ! * after setting p one beyond the end of the line. */ for (;;) { --- 687,751 ---- char* pvend; /* address one beyond last free slot */ char* p; /* temp */ + char msbuf[INITBUFSIZE]; ! /* Optimize for normal case: avoid _PyString_Resize if at all ! * possible via first reading into auto msbuf. ! */ ! Py_BEGIN_ALLOW_THREADS ! memset(msbuf, '\n', INITBUFSIZE); ! p = fgets(msbuf, INITBUFSIZE, fp); ! Py_END_ALLOW_THREADS ! ! if (p == NULL) { ! clearerr(fp); ! if (PyErr_CheckSignals()) ! return NULL; ! v = PyString_FromStringAndSize("", 0); ! return v; ! } ! /* fgets read *something* */ ! p = memchr(msbuf, '\n', INITBUFSIZE); ! if (p != NULL) { ! /* Did the \n come from fgets or from us? ! * Since fgets stops at the first \n, and then writes \0, if ! * it's from fgets a \0 must be next. But if that's so, it ! * could not have come from us, since the \n's we filled the ! * buffer with have only more \n's to the right. */ ! pvend = msbuf + INITBUFSIZE; ! if (p+1 < pvend && *(p+1) == '\0') { ! /* It's from fgets: we win! In particular, we ! * haven't done any mallocs yet, and can build the ! * final result on the first try. */ ! v = PyString_FromStringAndSize(msbuf, p - msbuf + 1); return v; } ! /* Must be from us: fgets didn't fill the buffer and didn't ! * find a newline, so it must be the last and newline-free ! * line of the file. */ ! assert(p > msbuf && *(p-1) == '\0'); ! v = PyString_FromStringAndSize(msbuf, p - msbuf - 1); ! return v; } ! /* yuck: fgets overwrote all the newlines, i.e. the entire buffer. ! * So this line isn't over yet, or maybe it is but we're exactly at ! *EOF; in either case, we're tired <wink>. ! */ ! assert(msbuf[INITBUFSIZE-1] == '\0'); ! total_v_size = INITBUFSIZE + INCBUFSIZE; ! v = PyString_FromStringAndSize((char*)NULL, ! (int)total_v_size); ! if (v == NULL) ! return v; ! /* copy over everything except the last null byte */ ! memcpy(BUF(v), msbuf, INITBUFSIZE-1); ! pvfree = BUF(v) + INITBUFSIZE - 1; /* Keep reading stuff into v; if it ever ends successfully, break ! * after setting p one beyond the end of the line. The code here is ! * very much like the code above, except reads into v's buffer; see ! * the code above for detailed comments about the logic. */ for (;;) { *************** *** 765,769 **** break; } - /* See the "normal case" comments above for details. */ p = memchr(pvfree, '\n', nfree); if (p != NULL) { --- 768,771 ----
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4