Rewritten the improved keep-alive handler

This commit is contained in:
Miroslav Štampar 2026-06-21 00:39:33 +02:00
parent e1aac02ef2
commit 6d306ba50d
8 changed files with 289 additions and 703 deletions

View file

@ -85,11 +85,11 @@ a65b6e29389b1543f54da6aced3ca4abdcd68cb626ceefc61fb9985bda692251 data/xml/paylo
997556b6170964a64474a2e053abe33cf2cf029fb1acec660d4651cc67a3c7e1 data/xml/payloads/time_blind.xml
40a4878669f318568097719d07dc906a19b8520bc742be3583321fc1e8176089 data/xml/payloads/union_query.xml
ff368554d3320ffa50751e32c903aeec21221f351f3efa573a211081947f69e8 data/xml/queries.xml
e043101194219a2e4c8bc352f0d3a04b87e1c28b1bcd6c13f6d5d1c9e260b653 doc/ARCHITECTURE.md
127799739f9aeabca367027197f3c0240f141303bd7499928ccfa1443bf148c7 doc/ARCHITECTURE.md
0f5a9c84cb57809be8759f483c7d05f54847115e715521ac0ecf390c0aa68465 doc/AUTHORS
ce20a4b452f24a97fde7ec9ed816feee12ac148e1fde5f1722772cc866b12740 doc/CHANGELOG.md
233fb10dff24a2436eb24496db7fadb46659da6745a0d53c744db701188041ef doc/THANKS.md
59697fb4f118a3197f5b3dc9057351797767c8bcc748e0286e3f7ad74ec3afb6 doc/THIRD-PARTY.md
b6fcc489c6eaca2a7d0d031bd04fe28e6790ffe4dfd4bdf055b6dc83b992dc86 doc/THIRD-PARTY.md
2af9b7a8c5f24de68f9b8b1bcf3a7f2b0e55fdb48b6545e1fc8b13f406ac97c2 doc/translations/README-ar-AR.md
c25f7d7f0cc5e13db71994d2b34ada4965e06c87778f1d6c1a103063d25e2c89 doc/translations/README-bg-BG.md
e85c82df1a312d93cd282520388c70ecb48bfe8692644fe8dbbf7d43244cda41 doc/translations/README-bn-BD.md
@ -182,14 +182,14 @@ c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6 lib/core/data.
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py
914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py
31690232f12d0590c8cbea7245ded86875f63c078da99673af4ab7451f0fffcb lib/core/optiondict.py
7357efadb3fc8305a1b2a0b1be1915099c5c87bdbe1e95fafcd008043a58039d lib/core/option.py
598f48639bcc7bb665a2adc538e5349999de620ea9ed5a821f89a823dc5fa093 lib/core/option.py
ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch.py
49c0fa7e3814dfda610d665ee02b12df299b28bc0b6773815b4395514ddf8dec lib/core/profiling.py
03db48f02c3d07a047ddb8fe33a757b6238867352d8ddda2a83e4fec09a98d04 lib/core/readlineng.py
48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py
0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py
888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py
f79f96c5f073b663cc494c57b9641dc41e7ed13a28d5cec62bb9ca8904110d9c lib/core/settings.py
ab38bb42e8e2a7eda7380574f5083e0a65daa154fb345ef3385dbb2c128ed9df lib/core/settings.py
cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py
bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py
70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6 lib/core/target.py
@ -218,6 +218,7 @@ cf019248253a5d7edb7bc474aa020b9e8625d73008a463c56ba2b539d7f2d8ec lib/request/dn
92c81cc31ff4a396723242058fb2152c9e9745f8412d01ea74480b048a53af6c lib/request/httpshandler.py
1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/request/__init__.py
7a0ac2522213e756348fd871a7af74cc963bdc82f9d7ade57be5de42b5bf7cab lib/request/inject.py
ab3afa064a84029ef55804bfcc02e49c7f8f47aad51448b5b3adfda1dd59073e lib/request/keepalive.py
ada4d305d6ce441f79e52ec3f2fc23869ee2fa87c017723e8f3ed0dfa61cdab4 lib/request/methodrequest.py
43a7fdf64e7ba63c6b2d641c9f999a63c12ac23b43b64fedfce4e05b863de568 lib/request/pkihandler.py
b90feeb16e89a844427df42373b0139eb6f6cf3c48ccec32b3e3a3f540c2451e lib/request/rangehandler.py
@ -675,8 +676,6 @@ e5c0b59577c30bb44c781d2f129580eaa003e46dcc4f307f08bc7f15e1555a2e thirdparty/ide
edf23e7105539d700a1ae1bc52436e57e019b345a7d0227e4d85b6353ef535fa thirdparty/identywaf/__init__.py
d846fdc47a11a58da9e463a948200f69265181f3dbc38148bfe4141fade10347 thirdparty/identywaf/LICENSE
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 thirdparty/__init__.py
879d96f2460bc6c79c0db46b5813080841c7403399292ce76fe1dc0a6ed353d8 thirdparty/keepalive/__init__.py
ae394bfae5204dfeffeccc15c356d9bf21708f9e48016681cfb8040ff8857998 thirdparty/keepalive/keepalive.py
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 thirdparty/magic/__init__.py
4d89a52f809c28ce1dc17bb0c00c775475b8ce01c2165942877596a6180a2fd8 thirdparty/magic/magic.py
e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 thirdparty/multipart/__init__.py

View file

@ -68,7 +68,7 @@ Identifiers in the codebase are camelCase.
| `data/xml/` | the data-driven engine: `boundaries.xml`, `payloads/*.xml`, `queries.xml`, `errors.xml` |
| `data/` (other) | wordlists/common tables/columns (`txt/`), UDFs (`udf/`), stored procs (`procs/`), shells (`shell/`) |
| `tests/` | stdlib-unittest suite (offline); see section 11 |
| `thirdparty/` | vendored dependencies (six, bottle, keepalive, chardet, ...) - no pip at runtime |
| `thirdparty/` | vendored dependencies (six, bottle, chardet, ...) - no pip at runtime |
| `extra/` | auxiliary tools (e.g. `vulnserver` used by `--vuln-test`) |
---
@ -179,7 +179,7 @@ Enumeration is DBMS-agnostic at the top and specialized underneath:
`lib/request/connect.py` (`Connect.getPage`) is the single HTTP chokepoint. Around it:
protocol handlers (`httpshandler`, `redirecthandler`, `chunkedhandler`, `rangehandler`,
keep-alive via `thirdparty/keepalive`), response processing (`basic.py`), and the
persistent connections via `lib/request/keepalive.py`), response processing (`basic.py`), and the
comparison oracle (`comparison.py`).
**Tamper scripts** (`tamper/`) mutate the payload just before sending to evade WAF/IPS.

View file

@ -46,8 +46,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* The `Chardet` library located under `thirdparty/chardet/`.
Copyright (C) 2008, Mark Pilgrim.
* The `KeepAlive` library located under `thirdparty/keepalive/`.
Copyright (C) 2002-2003, Michael D. Stenner.
* The `MultipartPost` library located under `thirdparty/multipart/`.
Copyright (C) 2006, Will Holcomb.
* The `icmpsh` tool located under `extra/icmpsh/`.

View file

@ -145,6 +145,8 @@ from lib.request.chunkedhandler import ChunkedHandler
from lib.request.connect import Connect as Request
from lib.request.dns import DNSServer
from lib.request.httpshandler import HTTPSHandler
from lib.request.keepalive import HTTPKeepAliveHandler
from lib.request.keepalive import HTTPSKeepAliveHandler
from lib.request.pkihandler import HTTPSPKIAuthHandler
from lib.request.rangehandler import HTTPRangeHandler
from lib.request.redirecthandler import SmartRedirectHandler
@ -154,7 +156,6 @@ from lib.utils.har import HTTPCollectorFactory
from lib.utils.purge import purge
from lib.utils.search import search
from thirdparty import six
from thirdparty.keepalive import keepalive
from thirdparty.multipart import multipartpost
from thirdparty.six.moves import collections_abc as _collections
from thirdparty.six.moves import http_client as _http_client
@ -166,7 +167,8 @@ from xml.etree.ElementTree import ElementTree
authHandler = _urllib.request.BaseHandler()
chunkedHandler = ChunkedHandler()
httpsHandler = HTTPSHandler()
keepAliveHandler = keepalive.HTTPHandler()
keepAliveHandler = HTTPKeepAliveHandler()
keepAliveHandlerHTTPS = HTTPSKeepAliveHandler()
proxyHandler = _urllib.request.ProxyHandler()
redirectHandler = SmartRedirectHandler()
rangeHandler = HTTPRangeHandler()
@ -1250,7 +1252,12 @@ def _setHTTPHandlers():
warnMsg += "with authentication methods"
logger.warning(warnMsg)
else:
# Note: persistent connections for both HTTP and HTTPS; the keep-alive
# HTTPS handler supersedes the regular one (reusing its SSL connection)
if httpsHandler in handlers:
handlers.remove(httpsHandler)
handlers.append(keepAliveHandler)
handlers.append(keepAliveHandlerHTTPS)
opener = _urllib.request.build_opener(*handlers)
opener.addheaders = [] # Note: clearing default "User-Agent: Python-urllib/X.Y"

View file

@ -20,7 +20,7 @@ from lib.core.enums import OS
from thirdparty import six
# sqlmap version (<major>.<minor>.<month>.<monthly commit>)
VERSION = "1.10.6.132"
VERSION = "1.10.6.133"
TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable"
TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34}
VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE)
@ -833,6 +833,12 @@ MAX_CONNECTION_READ_SIZE = 10 * 1024 * 1024
# Maximum response total page size (trimmed if larger)
MAX_CONNECTION_TOTAL_SIZE = 100 * 1024 * 1024
# Maximum number of requests served over a single persistent (Keep-Alive) connection before it is recycled
KEEPALIVE_MAX_REQUESTS = 1000
# Maximum idle time (in seconds) a pooled persistent (Keep-Alive) connection is considered reusable before being recycled
KEEPALIVE_IDLE_TIMEOUT = 30
# For preventing MemoryError exceptions (caused when using large sequences in difflib.SequenceMatcher)
MAX_DIFFLIB_SEQUENCE_LENGTH = 10 * 1024 * 1024

266
lib/request/keepalive.py Normal file
View file

@ -0,0 +1,266 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
"""
import socket
import threading
import time
from lib.core.data import conf
from lib.core.settings import KEEPALIVE_IDLE_TIMEOUT
from lib.core.settings import KEEPALIVE_MAX_REQUESTS
from thirdparty.six.moves import http_client as _http_client
from thirdparty.six.moves import urllib as _urllib
# Note: prior to Python 2.4 it was the HTTP handler's job to decide what to handle
# specially; since 2.4 that belongs to HTTPErrorProcessor, hence everything is passed up
HANDLE_ERRORS = 0
class _ConnectionPool(threading.local):
"""
Per-thread pool of reusable persistent connections.
Keeping one connection per (scheme, host) and per worker thread is what
keeps Keep-Alive safe under '--threads': a socket is never shared between
threads, so concurrent requests can never interleave on the same wire (the
classic cause of response desynchronization). Synchronous reuse within a
single thread is fine because the previous response is always fully drained
before the next request is issued (see L{_KeepAliveResponseMixin}).
"""
def __init__(self):
self.conns = {} # key -> [connection, request_count, last_used]
class _KeepAliveHandler(object):
def __init__(self):
self._pool = _ConnectionPool()
def _take(self, key):
"""
Returns a (still usable) pooled connection for L{key} or None
"""
entry = self._pool.conns.pop(key, None)
if entry is not None:
conn, count, last = entry
if (time.time() - last) <= KEEPALIVE_IDLE_TIMEOUT and count < KEEPALIVE_MAX_REQUESTS:
return conn, count
# Too old or too heavily used; drop it
try:
conn.close()
except Exception:
pass
return None, 0
def _give_back(self, key, conn, count):
self._pool.conns[key] = [conn, count, time.time()]
def do_open(self, req):
# Note: 'selector'/'host' attributes on Python 3 (Request.get_host() was deprecated since
# 3.3 and removed in 3.12); the get_*() fallbacks are only reachable under Python 2
host = req.host if hasattr(req, "host") else req.get_host()
if not host:
raise _urllib.error.URLError("no host given")
key = "%s://%s" % (self._scheme, host)
conn, count = self._take(key)
reused = conn is not None
try:
if reused:
# A pooled socket may have been closed by the server in the
# meantime; treat any failure (or a bogus HTTP/0.9 reply, which
# is httplib's tell-tale for a dead socket) as a stale connection
try:
self._send_request(conn, req)
response = conn.getresponse()
if response is None or getattr(response, "version", 0) == 9:
raise _http_client.HTTPException("stale connection")
except (socket.error, _http_client.HTTPException):
try:
conn.close()
except Exception:
pass
conn = None
reused = False
if conn is None:
conn = self._get_connection(host)
count = 0
self._send_request(conn, req)
response = conn.getresponse()
except (socket.error, _http_client.HTTPException) as ex:
raise _urllib.error.URLError(ex)
count += 1
# Honor an explicit 'Connection: close' even when L{will_close} wasn't set
willClose = response.will_close
if not willClose:
try:
headers = getattr(response, "msg", None) or getattr(response, "headers", None)
value = headers.get("connection") or headers.get("Connection") if headers else None
if value and "close" in value.lower():
willClose = True
except Exception:
pass
keep = not willClose and count < KEEPALIVE_MAX_REQUESTS
self._adapt(response, req.get_full_url())
self._instrument(response, key, conn, count, keep)
if response.status == 200 or not HANDLE_ERRORS:
return response
else:
return self.parent.error("http", req, response, response.status, response.reason, response.headers)
def _adapt(self, response, url):
"""
Makes a raw httplib response indistinguishable from the object normally
returned by C{urlopen} (the surface the rest of sqlmap relies on)
"""
headers = getattr(response, "headers", None)
if headers is None:
headers = response.msg # Python 2: msg holds the parsed headers
response.url = url
response.code = response.status
response.headers = headers
if not hasattr(response, "info"):
response.info = lambda headers=headers: headers
if not hasattr(response, "geturl"):
response.geturl = lambda url=url: url
if not hasattr(response, "getcode"):
response.getcode = lambda response=response: response.status
# Note: must come last as on Python 3 'msg' initially aliases the headers
response.msg = response.reason
def _instrument(self, response, key, conn, count, keep):
"""
Returns the connection to the pool once (and only once) its body has been
fully consumed; otherwise the socket is closed. A partially read response
(e.g. sqlmap hitting a size cap) leaves unread bytes on the wire, so such
a connection is never reused.
"""
state = {"handled": False}
_read = response.read
_close = response.close
def drained():
checker = getattr(response, "isclosed", None)
if callable(checker):
try:
return checker()
except Exception:
return False
return getattr(response, "fp", None) is None
def settle():
# Once (and only once) the body is fully drained, decide the socket's fate
if state["handled"] or not drained():
return
state["handled"] = True
if keep:
self._give_back(key, conn, count)
else:
try:
conn.close()
except Exception:
pass
def read(*args, **kwargs):
data = _read(*args, **kwargs)
settle()
return data
def close():
# Note: on Python 2 httplib.read() calls close() itself upon EOF
_close()
settle()
if not state["handled"]:
# Closed before the body was fully consumed; unsafe to reuse
state["handled"] = True
try:
conn.close()
except Exception:
pass
response.read = read
response.close = close
class HTTPKeepAliveHandler(_KeepAliveHandler, _urllib.request.HTTPHandler):
_scheme = "http"
def __init__(self):
_KeepAliveHandler.__init__(self)
def http_open(self, req):
return self.do_open(req)
def _get_connection(self, host):
return _http_client.HTTPConnection(host)
def _send_request(self, conn, req):
_sendRequest(conn, req)
class HTTPSKeepAliveHandler(_KeepAliveHandler, _urllib.request.HTTPSHandler):
_scheme = "https"
def __init__(self):
_KeepAliveHandler.__init__(self)
def https_open(self, req):
return self.do_open(req)
def _get_connection(self, host):
# Note: reuses sqlmap's SSL-negotiating connection (lib/request/httpshandler.py)
from lib.request.httpshandler import HTTPSConnection
from lib.request.httpshandler import ssl
return HTTPSConnection(host) if ssl else _http_client.HTTPSConnection(host)
def _send_request(self, conn, req):
_sendRequest(conn, req)
def _sendRequest(conn, req):
"""
Issues L{req} on the (possibly reused) low-level connection L{conn}
"""
data = getattr(req, "data", None)
method = req.get_method() or ("POST" if data is not None else "GET")
selector = req.selector if hasattr(req, "selector") else req.get_selector()
try:
conn.putrequest(method, selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
if data is not None:
if not req.has_header("Content-type"):
conn.putheader("Content-type", "application/x-www-form-urlencoded")
if not req.has_header("Content-length"):
conn.putheader("Content-length", "%d" % len(data))
except (socket.error, _http_client.HTTPException) as ex:
raise _urllib.error.URLError(ex)
if not req.has_header("Connection"):
conn.putheader("Connection", "keep-alive")
for key, value in req.header_items():
conn.putheader(key, value)
conn.endheaders()
if data is not None:
conn.send(data)

View file

@ -1,19 +0,0 @@
#!/usr/bin/env python
#
# Copyright 2002-2003 Michael D. Stenner
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
pass

View file

@ -1,671 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the
# Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
# This file was part of urlgrabber, a high-level cross-protocol url-grabber
# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
# Copyright 2015 Sergio Fernández
"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
>>> import urllib2
>>> from keepalive import HTTPHandler
>>> keepalive_handler = HTTPHandler()
>>> opener = _urllib.request.build_opener(keepalive_handler)
>>> _urllib.request.install_opener(opener)
>>>
>>> fo = _urllib.request.urlopen('http://www.python.org')
If a connection to a given host is requested, and all of the existing
connections are still in use, another connection will be opened. If
the handler tries to use an existing connection but it fails in some
way, it will be closed and removed from the pool.
To remove the handler, simply re-run build_opener with no arguments, and
install that opener.
You can explicitly close connections by using the close_connection()
method of the returned file-like object (described below) or you can
use the handler methods:
close_connection(host)
close_all()
open_connections()
NOTE: using the close_connection and close_all methods of the handler
should be done with care when using multiple threads.
* there is nothing that prevents another thread from creating new
connections immediately after connections are closed
* no checks are done to prevent in-use connections from being closed
>>> keepalive_handler.close_all()
EXTRA ATTRIBUTES AND METHODS
Upon a status of 200, the object returned has a few additional
attributes and methods, which should not be used if you want to
remain consistent with the normal urllib2-returned objects:
close_connection() - close the connection to the host
readlines() - you know, readlines()
status - the return status (ie 404)
reason - english translation of status (ie 'File not found')
If you want the best of both worlds, use this inside an
AttributeError-catching try:
>>> try: status = fo.status
>>> except AttributeError: status = None
Unfortunately, these are ONLY there if status == 200, so it's not
easy to distinguish between non-200 responses. The reason is that
urllib2 tries to do clever things with error codes 301, 302, 401,
and 407, and it wraps the object upon return.
For python versions earlier than 2.4, you can avoid this fancy error
handling by setting the module-level global HANDLE_ERRORS to zero.
You see, prior to 2.4, it's the HTTP Handler's job to determine what
to handle specially, and what to just pass up. HANDLE_ERRORS == 0
means "pass everything up". In python 2.4, however, this job no
longer belongs to the HTTP Handler and is now done by a NEW handler,
HTTPErrorProcessor. Here's the bottom line:
python version < 2.4
HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
errors
HANDLE_ERRORS == 0 pass everything up, error processing is
left to the calling code
python version >= 2.4
HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
HANDLE_ERRORS == 0 (default) pass everything up, let the
other handlers (specifically,
HTTPErrorProcessor) decide what to do
In practice, setting the variable either way makes little difference
in python 2.4, so for the most consistent behavior across versions,
you probably just want to use the defaults, which will give you
exceptions on errors.
"""
from __future__ import print_function
try:
from thirdparty.six.moves import http_client as _http_client
from thirdparty.six.moves import range as _range
from thirdparty.six.moves import urllib as _urllib
except ImportError:
from six.moves import http_client as _http_client
from six.moves import range as _range
from six.moves import urllib as _urllib
import socket
import threading
DEBUG = None
import sys
if sys.version_info < (2, 4): HANDLE_ERRORS = 1
else: HANDLE_ERRORS = 0
class ConnectionManager:
"""
The connection manager must be able to:
* keep track of all existing
"""
def __init__(self):
self._lock = threading.Lock()
self._hostmap = {} # map hosts to a list of connections
self._connmap = {} # map connections to host
self._readymap = {} # map connection to ready state
def add(self, host, connection, ready):
self._lock.acquire()
try:
if host not in self._hostmap: self._hostmap[host] = []
self._hostmap[host].append(connection)
self._connmap[connection] = host
self._readymap[connection] = ready
finally:
self._lock.release()
def remove(self, connection):
self._lock.acquire()
try:
try:
host = self._connmap[connection]
except KeyError:
pass
else:
del self._connmap[connection]
del self._readymap[connection]
try:
self._hostmap[host].remove(connection)
except ValueError:
pass
if not self._hostmap[host]: del self._hostmap[host]
finally:
self._lock.release()
def set_ready(self, connection, ready):
self._lock.acquire()
try:
if connection in self._readymap: self._readymap[connection] = ready
finally:
self._lock.release()
def get_ready_conn(self, host):
conn = None
try:
self._lock.acquire()
if host in self._hostmap:
for c in self._hostmap[host]:
if self._readymap.get(c):
self._readymap[c] = 0
conn = c
break
finally:
self._lock.release()
return conn
def get_all(self, host=None):
self._lock.acquire()
try:
if host:
return list(self._hostmap.get(host, []))
else:
return dict(self._hostmap)
finally:
self._lock.release()
class KeepAliveHandler:
def __init__(self):
self._cm = ConnectionManager()
#### Connection Management
def open_connections(self):
"""return a list of connected hosts and the number of connections
to each. [('foo.com:80', 2), ('bar.org', 1)]"""
return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
def close_connection(self, host):
"""close connection(s) to <host>
host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
no error occurs if there is no connection to that host."""
for h in self._cm.get_all(host):
self._cm.remove(h)
h.close()
def close_all(self):
"""close all open connections"""
for host, conns in self._cm.get_all().items():
for h in conns:
self._cm.remove(h)
h.close()
def _request_closed(self, request, host, connection):
"""tells us that this request is now closed and the the
connection is ready for another request"""
self._cm.set_ready(connection, 1)
def _remove_connection(self, host, connection, close=0):
if close: connection.close()
self._cm.remove(connection)
#### Transaction Execution
def do_open(self, req):
host = req.host
if not host:
raise _urllib.error.URLError('no host given')
try:
h = self._cm.get_ready_conn(host)
while h:
r = self._reuse_connection(h, req, host)
# if this response is non-None, then it worked and we're
# done. Break out, skipping the else block.
if r: break
# connection is bad - possibly closed by server
# discard it and ask for the next free connection
h.close()
self._cm.remove(h)
h = self._cm.get_ready_conn(host)
else:
# no (working) free connections were found. Create a new one.
h = self._get_connection(host)
if DEBUG: DEBUG.info("creating new connection to %s (%d)",
host, id(h))
self._start_transaction(h, req)
r = h.getresponse()
self._cm.add(host, h, 0)
except (socket.error, _http_client.HTTPException) as err:
raise _urllib.error.URLError(err)
if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
if not r.will_close:
try:
headers = getattr(r, 'msg', None)
if headers:
c_head = headers.get("connection")
if c_head and "close" in c_head.lower():
r.will_close = True
except Exception:
pass
# if not a persistent connection, don't try to reuse it
if r.will_close:
if DEBUG: DEBUG.info('server will close connection, discarding')
self._cm.remove(h)
h.close()
r._handler = self
r._host = host
r._url = req.get_full_url()
r._connection = h
r.code = r.status
r.headers = r.msg
if r.status == 200 or not HANDLE_ERRORS:
return r
else:
return self.parent.error('http', req, r,
r.status, r.reason, r.headers)
def _reuse_connection(self, h, req, host):
"""start the transaction with a re-used connection
return a response object (r) upon success or None on failure.
This DOES not close or remove bad connections in cases where
it returns. However, if an unexpected exception occurs, it
will close and remove the connection before re-raising.
"""
try:
self._start_transaction(h, req)
r = h.getresponse()
# note: just because we got something back doesn't mean it
# worked. We'll check the version below, too.
except (socket.error, _http_client.HTTPException):
r = None
except Exception:
# adding this block just in case we've missed
# something we will still raise the exception, but
# lets try and close the connection and remove it
# first. We previously got into a nasty loop
# where an exception was uncaught, and so the
# connection stayed open. On the next try, the
# same exception was raised, etc. The tradeoff is
# that it's now possible this call will raise
# a DIFFERENT exception
if DEBUG: DEBUG.error("unexpected exception - closing " + \
"connection to %s (%d)", host, id(h))
self._cm.remove(h)
h.close()
raise
if r is None or r.version == 9:
# httplib falls back to assuming HTTP 0.9 if it gets a
# bad header back. This is most likely to happen if
# the socket has been closed by the server since we
# last used the connection.
if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
host, id(h))
r = None
else:
if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
return r
def _start_transaction(self, h, req):
try:
if req.data:
data = req.data
if hasattr(req, 'selector'):
h.putrequest(req.get_method() or 'POST', req.selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
else:
h.putrequest(req.get_method() or 'POST', req.get_selector(), skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
if 'Content-type' not in req.headers:
h.putheader('Content-type',
'application/x-www-form-urlencoded')
if 'Content-length' not in req.headers:
h.putheader('Content-length', '%d' % len(data))
else:
if hasattr(req, 'selector'):
h.putrequest(req.get_method() or 'GET', req.selector, skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
else:
h.putrequest(req.get_method() or 'GET', req.get_selector(), skip_host=req.has_header("Host"), skip_accept_encoding=req.has_header("Accept-encoding"))
except (socket.error, _http_client.HTTPException) as err:
raise _urllib.error.URLError(err)
if 'Connection' not in req.headers:
h.putheader('Connection', 'keep-alive')
for args in self.parent.addheaders:
if args[0] not in req.headers:
h.putheader(*args)
for k, v in req.headers.items():
h.putheader(k, v)
h.endheaders()
if req.data:
h.send(req.data)
def _get_connection(self, host):
raise NotImplementedError()
class HTTPHandler(KeepAliveHandler, _urllib.request.HTTPHandler):
def __init__(self):
KeepAliveHandler.__init__(self)
def http_open(self, req):
return self.do_open(req)
def _get_connection(self, host):
return HTTPConnection(host)
class HTTPSHandler(KeepAliveHandler, _urllib.request.HTTPSHandler):
def __init__(self, ssl_factory=None):
KeepAliveHandler.__init__(self)
if not ssl_factory:
try:
import sslfactory
ssl_factory = sslfactory.get_factory()
except ImportError:
pass
self._ssl_factory = ssl_factory
def https_open(self, req):
return self.do_open(req)
def _get_connection(self, host):
if self._ssl_factory:
return self._ssl_factory.get_https_connection(host)
else:
return HTTPSConnection(host)
class HTTPResponse(_http_client.HTTPResponse):
# we need to subclass HTTPResponse in order to
# 1) add readline() and readlines() methods
# 2) add close_connection() methods
# 3) add info() and geturl() methods
# in order to add readline(), read must be modified to deal with a
# buffer. example: readline must read a buffer and then spit back
# one line at a time. The only real alternative is to read one
# BYTE at a time (ick). Once something has been read, it can't be
# put back (ok, maybe it can, but that's even uglier than this),
# so if you THEN do a normal read, you must first take stuff from
# the buffer.
# the read method wraps the original to accomodate buffering,
# although read() never adds to the buffer.
# Both readline and readlines have been stolen with almost no
# modification from socket.py
def __init__(self, sock, debuglevel=0, strict=0, method=None):
if method:
_http_client.HTTPResponse.__init__(self, sock, debuglevel, method=method)
else:
_http_client.HTTPResponse.__init__(self, sock, debuglevel)
self.fileno = sock.fileno
self.code = None
self._method = method
self._rbuf = b""
self._rbufsize = 8096
self._handler = None # inserted by the handler later
self._host = None # (same)
self._url = None # (same)
self._connection = None # (same)
_raw_read = _http_client.HTTPResponse.read
def close(self):
if self.fp:
self.fp.close()
self.fp = None
if self._handler:
self._handler._request_closed(self, self._host,
self._connection)
# Note: Patch for Python3 (otherwise, connections won't be reusable)
def _close_conn(self):
self.close()
def close_connection(self):
self._handler._remove_connection(self._host, self._connection, close=1)
self.close()
def info(self):
return self.headers
def geturl(self):
return self._url
def read(self, amt=None):
# the _rbuf test is only in this first if for speed. It's not
# logically necessary
if self._rbuf and not amt is None:
L = len(self._rbuf)
if amt > L:
amt -= L
else:
s = self._rbuf[:amt]
self._rbuf = self._rbuf[amt:]
return s
s = self._rbuf + self._raw_read(amt)
self._rbuf = b""
return s
def readline(self, limit=-1):
data = b""
i = self._rbuf.find(b'\n')
while i < 0 and not (0 < limit <= len(self._rbuf)):
new = self._raw_read(self._rbufsize)
if not new: break
i = new.find(b'\n')
if i >= 0: i = i + len(self._rbuf)
self._rbuf = self._rbuf + new
if i < 0: i = len(self._rbuf)
else: i = i+1
if 0 <= limit < len(self._rbuf): i = limit
data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
return data
def readlines(self, sizehint = 0):
total = 0
lines = []
while 1:
line = self.readline()
if not line: break
lines.append(line)
total += len(line)
if sizehint and total >= sizehint:
break
return lines
class HTTPConnection(_http_client.HTTPConnection):
# use the modified response class
response_class = HTTPResponse
class HTTPSConnection(_http_client.HTTPSConnection):
response_class = HTTPResponse
#########################################################################
##### TEST FUNCTIONS
#########################################################################
def error_handler(url):
global HANDLE_ERRORS
orig = HANDLE_ERRORS
keepalive_handler = HTTPHandler()
opener = _urllib.request.build_opener(keepalive_handler)
_urllib.request.install_opener(opener)
pos = {0: 'off', 1: 'on'}
for i in (0, 1):
print(" fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i))
HANDLE_ERRORS = i
try:
fo = _urllib.request.urlopen(url)
foo = fo.read()
fo.close()
try: status, reason = fo.status, fo.reason
except AttributeError: status, reason = None, None
except IOError as e:
print(" EXCEPTION: %s" % e)
raise
else:
print(" status = %s, reason = %s" % (status, reason))
HANDLE_ERRORS = orig
hosts = keepalive_handler.open_connections()
print("open connections:", hosts)
keepalive_handler.close_all()
def continuity(url):
from hashlib import md5
format = '%25s: %s'
# first fetch the file with the normal http handler
opener = _urllib.request.build_opener()
_urllib.request.install_opener(opener)
fo = _urllib.request.urlopen(url)
foo = fo.read()
fo.close()
m = md5(foo)
print(format % ('normal urllib', m.hexdigest()))
# now install the keepalive handler and try again
opener = _urllib.request.build_opener(HTTPHandler())
_urllib.request.install_opener(opener)
fo = _urllib.request.urlopen(url)
foo = fo.read()
fo.close()
m = md5(foo)
print(format % ('keepalive read', m.hexdigest()))
fo = _urllib.request.urlopen(url)
foo = b''
while 1:
f = fo.readline()
if f: foo += f
else: break
fo.close()
m = md5(foo)
print(format % ('keepalive readline', m.hexdigest()))
def comp(N, url):
print(' making %i connections to:\n %s' % (N, url))
sys.stdout.write(' first using the normal urllib handlers')
# first use normal opener
opener = _urllib.request.build_opener()
_urllib.request.install_opener(opener)
t1 = fetch(N, url)
print(' TIME: %.3f s' % t1)
sys.stdout.write(' now using the keepalive handler ')
# now install the keepalive handler and try again
opener = _urllib.request.build_opener(HTTPHandler())
_urllib.request.install_opener(opener)
t2 = fetch(N, url)
print(' TIME: %.3f s' % t2)
print(' improvement factor: %.2f' % (t1/t2, ))
def fetch(N, url, delay=0):
import time
lens = []
starttime = time.time()
for i in _range(N):
if delay and i > 0: time.sleep(delay)
fo = _urllib.request.urlopen(url)
foo = fo.read()
fo.close()
lens.append(len(foo))
diff = time.time() - starttime
j = 0
for i in lens[1:]:
j = j + 1
if not i == lens[0]:
print("WARNING: inconsistent length on read %i: %i" % (j, i))
return diff
def test_timeout(url):
global DEBUG
dbbackup = DEBUG
class FakeLogger:
def debug(self, msg, *args): print(msg % args)
info = warning = error = debug
DEBUG = FakeLogger()
print(" fetching the file to establish a connection")
fo = _urllib.request.urlopen(url)
data1 = fo.read()
fo.close()
i = 20
print(" waiting %i seconds for the server to close the connection" % i)
while i > 0:
sys.stdout.write('\r %2i' % i)
sys.stdout.flush()
time.sleep(1)
i -= 1
sys.stderr.write('\r')
print(" fetching the file a second time")
fo = _urllib.request.urlopen(url)
data2 = fo.read()
fo.close()
if data1 == data2:
print(' data are identical')
else:
print(' ERROR: DATA DIFFER')
DEBUG = dbbackup
def test(url, N=10):
print("checking error hander (do this on a non-200)")
try: error_handler(url)
except IOError as e:
print("exiting - exception will prevent further tests")
sys.exit()
print()
print("performing continuity test (making sure stuff isn't corrupted)")
continuity(url)
print()
print("performing speed comparison")
comp(N, url)
print()
print("performing dropped-connection check")
test_timeout(url)
if __name__ == '__main__':
import time
import sys
try:
N = int(sys.argv[1])
url = sys.argv[2]
except:
print("%s <integer> <url>" % sys.argv[0])
else:
test(url, N)