1
+
#!/usr/bin/env python3.3
2
+
"""Example HTTP client using yield-from coroutines (PEP 380).
3
+
4
+
Requires Python 3.3.
5
+
6
+
There are many micro-optimizations possible here, but that's not the point.
7
+
8
+
Some incomplete laundry lists:
9
+
10
+
TODO:
11
+
- Use poll() or better; need to figure out how to keep fds registered.
12
+
- Separate scheduler and event loop.
13
+
- A more varied set of test URLs.
14
+
- A Hg repo.
15
+
- Profiling.
16
+
- Unittests.
17
+
18
+
PATTERNS TO TRY:
19
+
- Wait for all, collate results.
20
+
- Wait for first N that are ready.
21
+
- Wait until some predicate becomes true.
22
+
23
+
FUNCTIONALITY:
24
+
- Connection pool (keep connection open).
25
+
- Chunked encoding (request and response).
26
+
- Pipelining, e.g. zlib (request and response).
27
+
- Automatic encoding/decoding.
28
+
- Thread pool and getaddrinfo() calling.
29
+
- A write() call that isn't a generator.
30
+
"""
31
+
32
+
__author__ = 'Guido van Rossum <guido@python.org>'
33
+
34
+
import collections
35
+
import errno
36
+
import logging
37
+
import re
38
+
import select
39
+
import socket
40
+
import time
41
+
42
+
43
+
class Scheduler:
44
+
45
+
def __init__(self):
46
+
self.runnable = collections.deque()
47
+
self.current = None
48
+
self.readers = {}
49
+
self.writers = {}
50
+
51
+
def run(self, task):
52
+
self.runnable.append(task)
53
+
54
+
def loop(self):
55
+
while self.runnable or self.readers or self.writers:
56
+
self.loop1()
57
+
58
+
def loop1(self):
59
+
## print('loop1')
60
+
while self.runnable:
61
+
self.current = self.runnable.popleft()
62
+
try:
63
+
next(self.current)
64
+
except StopIteration:
65
+
self.current = None
66
+
except Exception:
67
+
self.current = None
68
+
logging.exception('Exception in task')
69
+
else:
70
+
if self.current is not None:
71
+
self.runnable.append(self.current)
72
+
self.current = None
73
+
if self.readers or self.writers:
74
+
# TODO: Schedule timed calls as well.
75
+
# TODO: Use poll() or better.
76
+
t0 = time.time()
77
+
ready_r, ready_w, _ = select.select(self.readers, self.writers, [])
78
+
t1 = time.time()
79
+
## print('select({}, {}) took {:.3f} secs to return {}, {}'
80
+
## .format(list(self.readers), list(self.writers),
81
+
## t1 - t0, ready_r, ready_w))
82
+
for fd in ready_r:
83
+
self.unblock(self.readers.pop(fd))
84
+
for fd in ready_w:
85
+
self.unblock(self.writers.pop(fd))
86
+
87
+
def unblock(self, task):
88
+
assert task
89
+
self.runnable.append(task)
90
+
91
+
def block(self, queue, fd):
92
+
assert isinstance(fd, int)
93
+
assert fd not in queue
94
+
assert self.current is not None
95
+
queue[fd] = self.current
96
+
self.current = None
97
+
98
+
def block_r(self, fd):
99
+
self.block(self.readers, fd)
100
+
101
+
def block_w(self, fd):
102
+
self.block(self.writers, fd)
103
+
104
+
105
+
sched = Scheduler()
106
+
107
+
108
+
class RawReader:
109
+
110
+
def __init__(self, sock):
111
+
self.sock = sock
112
+
113
+
def read(self, n):
114
+
"""Read up to n bytes, blocking at most once."""
115
+
assert n >= 0, n
116
+
sched.block_r(self.sock.fileno())
117
+
yield
118
+
return self.sock.recv(n)
119
+
120
+
121
+
class BufferedReader:
122
+
123
+
def __init__(self, raw, limit=8192):
124
+
self.raw = raw
125
+
self.limit = limit
126
+
self.buffer = b''
127
+
self.eof = False
128
+
129
+
def read(self, n):
130
+
"""Read up to n bytes, blocking at most once."""
131
+
assert n >= 0, n
132
+
if not self.buffer and not self.eof:
133
+
yield from self.fillbuffer(max(n, self.limit))
134
+
return self.getfrombuffer(n)
135
+
136
+
def readexactly(self, n):
137
+
"""Read exactly n bytes, or until EOF."""
138
+
blocks = []
139
+
count = 0
140
+
while n > count:
141
+
block = yield from self.read(n - count)
142
+
blocks.append(block)
143
+
count += len(block)
144
+
return b''.join(blocks)
145
+
146
+
def readline(self):
147
+
"""Read up to newline or limit, whichever comes first."""
148
+
end = self.buffer.find(b'\n') + 1 # Point past newline, or 0.
149
+
while not end and not self.eof and len(self.buffer) < self.limit:
150
+
anchor = len(self.buffer)
151
+
yield from self.fillbuffer(self.limit)
152
+
end = self.buffer.find(b'\n', anchor) + 1
153
+
if not end:
154
+
end = len(self.buffer)
155
+
if end > self.limit:
156
+
end = self.limit
157
+
return self.getfrombuffer(end)
158
+
159
+
def getfrombuffer(self, n):
160
+
"""Read up to n bytes without blocking."""
161
+
if n >= len(self.buffer):
162
+
result, self.buffer = self.buffer, b''
163
+
else:
164
+
result, self.buffer = self.buffer[:n], self.buffer[n:]
165
+
return result
166
+
167
+
def fillbuffer(self, n):
168
+
"""Fill buffer with one (up to) n bytes from raw reader."""
169
+
assert not self.eof, 'fillbuffer called at eof'
170
+
data = yield from self.raw.read(n)
171
+
## print('fillbuffer:', repr(data)[:100])
172
+
if data:
173
+
self.buffer += data
174
+
else:
175
+
self.eof = True
176
+
177
+
178
+
def send(sock, data):
179
+
## print('send:', repr(data))
180
+
while data:
181
+
sched.block_w(sock.fileno())
182
+
yield
183
+
n = sock.send(data)
184
+
assert 0 <= n <= len(data), (n, len(data))
185
+
if n == len(data):
186
+
break
187
+
data = data[n:]
188
+
189
+
190
+
def newsocket():
191
+
sock = socket.socket()
192
+
sock.setblocking(False)
193
+
return sock
194
+
195
+
196
+
def connect(sock, address):
197
+
## print('connect:', address)
198
+
err = sock.connect_ex(address)
199
+
assert err == errno.EINPROGRESS, err
200
+
sched.block_w(sock.fileno())
201
+
yield
202
+
err = sock.connect_ex(address)
203
+
if err == errno.ECONNREFUSED:
204
+
raise IOError('Connection refused')
205
+
if err != errno.EISCONN:
206
+
raise IOError('Connect error %d: %s' % (err, errno.errorcode.get(err)))
207
+
208
+
209
+
def urlfetch(host, port=80, method='GET', path='/',
210
+
body=None, hdrs=None, encoding='utf-8'):
211
+
t0 = time.time()
212
+
# Must pass in an IP address. Later we'll call getaddrinfo()
213
+
# using a thread pool. We'll also support IPv6.
214
+
assert re.match(r'(\d+)(\.\d+)(\.\d+)(\.\d+)\Z', host), repr(host)
215
+
sock = newsocket()
216
+
yield from connect(sock, (host, port))
217
+
yield from send(sock,
218
+
method.encode(encoding) + b' ' +
219
+
path.encode(encoding) + b' HTTP/1.0\r\n')
220
+
if hdrs:
221
+
kwds = dict(hdrs)
222
+
else:
223
+
kwds = {}
224
+
if body is not None:
225
+
kwds['content_length'] = len(body)
226
+
for header, value in kwds.items():
227
+
yield from send(sock,
228
+
header.replace('_', '-').encode(encoding) + b': ' +
229
+
value.encode(encoding) + b'\r\n')
230
+
231
+
yield from send(sock, b'\r\n')
232
+
if body is not None:
233
+
yield from send(sock, body)
234
+
##sock.shutdown(1) # Close the writing end of the socket.
235
+
236
+
# Read HTTP response line.
237
+
raw = RawReader(sock)
238
+
buf = BufferedReader(raw)
239
+
resp = yield from buf.readline()
240
+
## print('resp =', repr(resp))
241
+
m = re.match(br'(?ix) http/(\d\.\d) \s+ (\d\d\d) \s+ ([^\r]*)\r?\n\Z', resp)
242
+
if not m:
243
+
sock.close()
244
+
raise IOError('No valid HTTP response: %r' % response)
245
+
http_version, status, message = m.groups()
246
+
247
+
# Read HTTP headers.
248
+
headers = []
249
+
hdict = {}
250
+
while True:
251
+
line = yield from buf.readline()
252
+
if not line.strip():
253
+
break
254
+
m = re.match(br'([^\s:]+):\s*([^\r]*)\r?\n\Z', line)
255
+
if not m:
256
+
raise IOError('Invalid header: %r' % line)
257
+
header, value = m.groups()
258
+
headers.append((header, value))
259
+
hdict[header.decode(encoding).lower()] = value.decode(encoding)
260
+
261
+
# Read response body.
262
+
content_length = hdict.get('content-length')
263
+
if content_length is not None:
264
+
size = int(content_length) # TODO: Catch errors.
265
+
assert size >= 0, size
266
+
else:
267
+
size = 2**20 # Protective limit (1 MB).
268
+
data = yield from buf.readexactly(size)
269
+
sock.close() # Can this block?
270
+
t1 = time.time()
271
+
## print(http_version, status, message, headers, hdict, len(data))
272
+
print(host, port, path, status, len(data), '{:.3}'.format(t1-t0))
273
+
274
+
275
+
def doit():
276
+
gen1 = urlfetch('127.0.0.1', 8080, path='/', hdrs={'host': 'localhost'})
277
+
gen2 = urlfetch('82.94.164.162', 80, path='/', hdrs={'host': 'python.org'})
278
+
sched.run(gen1)
279
+
sched.run(gen2)
280
+
for x in '123':
281
+
for y in '0123456789':
282
+
g = urlfetch('82.94.164.162', 80,
283
+
path='/{}.{}'.format(x, y),
284
+
hdrs={'host': 'python.org'})
285
+
sched.run(g)
286
+
sched.loop()
287
+
288
+
289
+
def main():
290
+
doit()
291
+
292
+
293
+
if __name__ == '__main__':
294
+
main()
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4