mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2026-06-11 02:31:51 +00:00
461 lines
22 KiB
Python
461 lines
22 KiB
Python
#!/usr/bin/env python
|
|
|
|
"""
|
|
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
|
|
See the file 'LICENSE' for copying permission
|
|
"""
|
|
|
|
import codecs
|
|
import gzip
|
|
import io
|
|
import logging
|
|
import re
|
|
import zlib
|
|
|
|
from lib.core.common import Backend
|
|
from lib.core.common import extractErrorMessage
|
|
from lib.core.common import extractRegexResult
|
|
from lib.core.common import filterNone
|
|
from lib.core.common import getPublicTypeMembers
|
|
from lib.core.common import getSafeExString
|
|
from lib.core.common import isListLike
|
|
from lib.core.common import randomStr
|
|
from lib.core.common import readInput
|
|
from lib.core.common import resetCookieJar
|
|
from lib.core.common import singleTimeLogMessage
|
|
from lib.core.common import singleTimeWarnMessage
|
|
from lib.core.common import unArrayizeValue
|
|
from lib.core.convert import decodeHex
|
|
from lib.core.convert import getBytes
|
|
from lib.core.convert import getText
|
|
from lib.core.convert import getUnicode
|
|
from lib.core.data import conf
|
|
from lib.core.data import kb
|
|
from lib.core.data import logger
|
|
from lib.core.decorators import cachedmethod
|
|
from lib.core.decorators import lockedmethod
|
|
from lib.core.dicts import HTML_ENTITIES
|
|
from lib.core.enums import DBMS
|
|
from lib.core.enums import HTTP_HEADER
|
|
from lib.core.enums import PLACE
|
|
from lib.core.exception import SqlmapCompressionException
|
|
from lib.core.settings import BLOCKED_IP_REGEX
|
|
from lib.core.settings import DEFAULT_COOKIE_DELIMITER
|
|
from lib.core.settings import EVENTVALIDATION_REGEX
|
|
from lib.core.settings import HEURISTIC_PAGE_SIZE_THRESHOLD
|
|
from lib.core.settings import IDENTYWAF_PARSE_COUNT_LIMIT
|
|
from lib.core.settings import IDENTYWAF_PARSE_PAGE_LIMIT
|
|
from lib.core.settings import MAX_CONNECTION_TOTAL_SIZE
|
|
from lib.core.settings import META_CHARSET_REGEX
|
|
from lib.core.settings import PARSE_HEADERS_LIMIT
|
|
from lib.core.settings import PRINTABLE_BYTES
|
|
from lib.core.settings import SELECT_FROM_TABLE_REGEX
|
|
from lib.core.settings import UNICODE_ENCODING
|
|
from lib.core.settings import VIEWSTATE_REGEX
|
|
from lib.parse.headers import headersParser
|
|
from lib.parse.html import htmlParser
|
|
from thirdparty import six
|
|
from thirdparty.chardet import detect
|
|
from thirdparty.identywaf import identYwaf
|
|
from thirdparty.odict import OrderedDict
|
|
from thirdparty.six import unichr as _unichr
|
|
from thirdparty.six.moves import http_client as _http_client
|
|
|
|
@lockedmethod
|
|
def forgeHeaders(items=None, base=None):
|
|
"""
|
|
Prepare HTTP Cookie, HTTP User-Agent and HTTP Referer headers to use when performing
|
|
the HTTP requests
|
|
"""
|
|
|
|
items = items or {}
|
|
|
|
for _ in list(items.keys()):
|
|
if items[_] is None:
|
|
del items[_]
|
|
|
|
headers = OrderedDict(conf.httpHeaders if base is None else base)
|
|
headers.update(items.items())
|
|
|
|
class _str(str):
|
|
def capitalize(self):
|
|
return _str(self)
|
|
|
|
def title(self):
|
|
return _str(self)
|
|
|
|
_ = headers
|
|
headers = OrderedDict()
|
|
for key, value in _.items():
|
|
success = False
|
|
|
|
for _ in headers:
|
|
if _.upper() == key.upper():
|
|
del headers[_]
|
|
break
|
|
|
|
if key.upper() not in (_.upper() for _ in getPublicTypeMembers(HTTP_HEADER, True)):
|
|
try:
|
|
headers[_str(key)] = value # dirty hack for http://bugs.python.org/issue12455
|
|
except UnicodeEncodeError: # don't do the hack on non-ASCII header names (they have to be properly encoded later on)
|
|
pass
|
|
else:
|
|
success = True
|
|
if not success:
|
|
key = '-'.join(_.capitalize() for _ in key.split('-'))
|
|
headers[key] = value
|
|
|
|
if conf.cj:
|
|
if HTTP_HEADER.COOKIE in headers:
|
|
for cookie in conf.cj:
|
|
if cookie is None or cookie.domain_specified and not (conf.hostname or "").endswith(cookie.domain):
|
|
continue
|
|
|
|
if ("%s=" % getUnicode(cookie.name)) in getUnicode(headers[HTTP_HEADER.COOKIE]):
|
|
if conf.loadCookies:
|
|
conf.httpHeaders = filterNone((item if item[0] != HTTP_HEADER.COOKIE else None) for item in conf.httpHeaders)
|
|
elif kb.mergeCookies is None:
|
|
message = "you provided a HTTP %s header value, while " % HTTP_HEADER.COOKIE
|
|
message += "target URL provides its own cookies within "
|
|
message += "HTTP %s header which intersect with yours. " % HTTP_HEADER.SET_COOKIE
|
|
message += "Do you want to merge them in further requests? [Y/n] "
|
|
|
|
kb.mergeCookies = readInput(message, default='Y', boolean=True)
|
|
|
|
if kb.mergeCookies and kb.injection.place != PLACE.COOKIE:
|
|
def _(value):
|
|
return re.sub(r"(?i)\b%s=[^%s]+" % (re.escape(getUnicode(cookie.name)), conf.cookieDel or DEFAULT_COOKIE_DELIMITER), ("%s=%s" % (getUnicode(cookie.name), getUnicode(cookie.value))).replace('\\', r'\\'), value)
|
|
|
|
headers[HTTP_HEADER.COOKIE] = _(headers[HTTP_HEADER.COOKIE])
|
|
|
|
if PLACE.COOKIE in conf.parameters:
|
|
conf.parameters[PLACE.COOKIE] = _(conf.parameters[PLACE.COOKIE])
|
|
|
|
conf.httpHeaders = [(item[0], item[1] if item[0] != HTTP_HEADER.COOKIE else _(item[1])) for item in conf.httpHeaders]
|
|
|
|
elif not kb.testMode:
|
|
headers[HTTP_HEADER.COOKIE] += "%s %s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, getUnicode(cookie.name), getUnicode(cookie.value))
|
|
|
|
if kb.testMode and not any((conf.csrfToken, conf.safeUrl)):
|
|
resetCookieJar(conf.cj)
|
|
|
|
return headers
|
|
|
|
def parseResponse(page, headers, status=None):
|
|
"""
|
|
@param page: the page to parse to feed the knowledge base htmlFp
|
|
(back-end DBMS fingerprint based upon DBMS error messages return
|
|
through the web application) list and absFilePaths (absolute file
|
|
paths) set.
|
|
"""
|
|
|
|
if headers:
|
|
headersParser(headers)
|
|
|
|
if page:
|
|
htmlParser(page if not status else "%s\n\n%s" % (status, page))
|
|
|
|
@cachedmethod
|
|
def checkCharEncoding(encoding, warn=True):
|
|
"""
|
|
Checks encoding name, repairs common misspellings and adjusts to
|
|
proper namings used in codecs module
|
|
|
|
>>> checkCharEncoding('iso-8858', False)
|
|
'iso8859-1'
|
|
>>> checkCharEncoding('en_us', False)
|
|
'utf8'
|
|
"""
|
|
|
|
if isinstance(encoding, six.binary_type):
|
|
encoding = getUnicode(encoding)
|
|
|
|
if isListLike(encoding):
|
|
encoding = unArrayizeValue(encoding)
|
|
|
|
if encoding:
|
|
encoding = encoding.lower()
|
|
else:
|
|
return encoding
|
|
|
|
# Reference: http://www.destructor.de/charsets/index.htm
|
|
translate = {"windows-874": "iso-8859-11", "utf-8859-1": "utf8", "en_us": "utf8", "macintosh": "iso-8859-1", "euc_tw": "big5_tw", "th": "tis-620", "unicode": "utf8", "utc8": "utf8", "ebcdic": "ebcdic-cp-be", "iso-8859": "iso8859-1", "iso-8859-0": "iso8859-1", "ansi": "ascii", "gbk2312": "gbk", "windows-31j": "cp932", "en": "us"}
|
|
|
|
for delimiter in (';', ',', '('):
|
|
if delimiter in encoding:
|
|
encoding = encoding[:encoding.find(delimiter)].strip()
|
|
|
|
encoding = encoding.replace(""", "")
|
|
|
|
# popular typos/errors
|
|
if "8858" in encoding:
|
|
encoding = encoding.replace("8858", "8859") # iso-8858 -> iso-8859
|
|
elif "8559" in encoding:
|
|
encoding = encoding.replace("8559", "8859") # iso-8559 -> iso-8859
|
|
elif "8895" in encoding:
|
|
encoding = encoding.replace("8895", "8859") # iso-8895 -> iso-8859
|
|
elif "5889" in encoding:
|
|
encoding = encoding.replace("5889", "8859") # iso-5889 -> iso-8859
|
|
elif "5589" in encoding:
|
|
encoding = encoding.replace("5589", "8859") # iso-5589 -> iso-8859
|
|
elif "2313" in encoding:
|
|
encoding = encoding.replace("2313", "2312") # gb2313 -> gb2312
|
|
elif encoding.startswith("x-"):
|
|
encoding = encoding[len("x-"):] # x-euc-kr -> euc-kr / x-mac-turkish -> mac-turkish
|
|
elif "windows-cp" in encoding:
|
|
encoding = encoding.replace("windows-cp", "windows") # windows-cp-1254 -> windows-1254
|
|
|
|
# name adjustment for compatibility
|
|
if encoding.startswith("8859"):
|
|
encoding = "iso-%s" % encoding
|
|
elif encoding.startswith("cp-"):
|
|
encoding = "cp%s" % encoding[3:]
|
|
elif encoding.startswith("euc-"):
|
|
encoding = "euc_%s" % encoding[4:]
|
|
elif encoding.startswith("windows") and not encoding.startswith("windows-"):
|
|
encoding = "windows-%s" % encoding[7:]
|
|
elif encoding.find("iso-88") > 0:
|
|
encoding = encoding[encoding.find("iso-88"):]
|
|
elif encoding.startswith("is0-"):
|
|
encoding = "iso%s" % encoding[4:]
|
|
elif encoding.find("ascii") > 0:
|
|
encoding = "ascii"
|
|
elif encoding.find("utf8") > 0:
|
|
encoding = "utf8"
|
|
elif encoding.find("utf-8") > 0:
|
|
encoding = "utf-8"
|
|
|
|
# Reference: http://philip.html5.org/data/charsets-2.html
|
|
if encoding in translate:
|
|
encoding = translate[encoding]
|
|
elif encoding in ("null", "{charset}", "charset", "*") or not re.search(r"\w", encoding):
|
|
return None
|
|
|
|
# Reference: http://www.iana.org/assignments/character-sets
|
|
# Reference: http://docs.python.org/library/codecs.html
|
|
try:
|
|
codecs.lookup(encoding)
|
|
except:
|
|
encoding = None
|
|
|
|
if encoding:
|
|
try:
|
|
six.text_type(getBytes(randomStr()), encoding)
|
|
except:
|
|
if warn:
|
|
warnMsg = "invalid web page charset '%s'" % encoding
|
|
singleTimeLogMessage(warnMsg, logging.WARN, encoding)
|
|
encoding = None
|
|
|
|
return encoding
|
|
|
|
@lockedmethod
|
|
def getHeuristicCharEncoding(page):
|
|
"""
|
|
Returns page encoding charset detected by usage of heuristics
|
|
|
|
Reference: https://chardet.readthedocs.io/en/latest/usage.html
|
|
|
|
>>> getHeuristicCharEncoding(b"<html></html>")
|
|
'ascii'
|
|
>>> getHeuristicCharEncoding(b'<!DOCTYPE html><html><head><meta charset="windows-1251"><title>\\xd2\\xe5\\xf1\\xf2</title></head><body>\\xc2 \\xf1\\xee\\xee\\xf2\\xe2\\xe5\\xf2\\xf1\\xf2\\xe2\\xe8\\xe8 \\xf1 \\xef\\xf0\\xe8\\xed\\xf6\\xe8\\xef\\xe0\\xec\\xe8 \\xf0\\xe0\\xe1\\xee\\xf2\\xfb \\xf3\\xf2\\xe8\\xeb\\xe8\\xf2\\xfb \\xe0\\xe2\\xf2\\xee\\xec\\xe0\\xf2\\xe8\\xf7\\xe5\\xf1\\xea\\xee\\xe3\\xee \\xee\\xef\\xf0\\xe5\\xe4\\xe5\\xeb\\xe5\\xed\\xe8\\xff \\xea\\xee\\xe4\\xe8\\xf0\\xee\\xe2\\xea\\xe8, \\xed\\xe0\\xec \\xf2\\xf0\\xe5\\xe1\\xf3\\xe5\\xf2\\xf1\\xff \\xef\\xf0\\xe5\\xe4\\xee\\xf1\\xf2\\xe0\\xe2\\xe8\\xf2\\xfc \\xe7\\xed\\xe0\\xf7\\xe8\\xf2\\xe5\\xeb\\xfc\\xed\\xee \\xe1\\xee\\xeb\\xe5\\xe5 \\xe4\\xeb\\xe8\\xed\\xed\\xfb\\xe9 \\xf4\\xf0\\xe0\\xe3\\xec\\xe5\\xed\\xf2 \\xf2\\xe5\\xea\\xf1\\xf2\\xe0 \\xed\\xe0 \\xf0\\xf3\\xf1\\xf1\\xea\\xee\\xec \\xff\\xe7\\xfb\\xea\\xe5. \\xdd\\xf2\\xee \\xed\\xe5\\xee\\xb1\\xf5\\xee\\xe4\\xe8\\xec\\xee \\xe4\\xeb\\xff \\xf2\\xee\\xe3\\xee, \\xf7\\xf2\\xee\\xf1\\xfb \\xf1\\xf2\\xe0\\xf2\\xe8\\xf1\\xf2\\xe8\\xf7\\xe5\\xf1\\xea\\xe8\\xe9 \\xe0\\xed\\xe0\\xeb\\xe8\\xe7\\xe0\\xf2\\xee\\xf0 \\xf7\\xe0\\xf1\\xf2\\xee\\xf2\\xed\\xee\\xf1\\xf2\\xe8 \\xf1\\xe8\\xec\\xe2\\xee\\xeb\\xee\\xe2 \\xe8 \\xe4\\xe2\\xf3\\xf5\\xe1\\xf3\\xea\\xe2\\xe5\\xed\\xed\\xfb\\xf5 \\xf1\\xee\\xf7\\xe5\\xf2\\xe0\\xed\\xe8\\xb9 \\xf1\\xec\\xee\\xe3 \\xf1 \\xe2\\xfb\\xf1\\xee\\xea\\xee\\xb9 \\xf1\\xf2\\xe5\\xef\\xe5\\xed\\xfc\\xf2 \\xf3\\xe2\\xe5\\xf0\\xe5\\xed\\xed\\xee\\xf1\\xf2\\xe8 \\xe7\\xe0\\xf4\\xe8\\xea\\xf1\\xe8\\xf0\\xee\\xe2\\xe0\\xf2\\xfc \\xe8\\xec\\xe5\\xed\\xed\\xee \\xf1\\xf2\\xe0\\xed\\xe4\\xe0\\xf0\\xf2 Windows-1251, \\xe0 \\xed\\xe5 MacCyrillic \\xe8\\xeb\\xe8 ISO-8859-5. \\xd0\\xf3\\xf1\\xf1\\xea\\xe8\\xb9 \\xff\\xe7\\xfb\\xea \\xee\\xe1\\xbb\\xe0\\xe4\\xe0\\xe5\\xf2 \\xf3\\xed\\xe8\\xea\\xe0\\xeb\\xfc\\xed\\xfb\\xec \\xf0\\xe0\\xf1\\xef\\xf0\\xe5\\xe4\\xe5\\xeb\\xe5\\xed\\xe8\\xe5\\xec \\xe3\\xeb\\xe0\\xf1\\xed\\xfb\\xf5 \\xe8 \\xf1\\xee\\xe3\\xeb\\xe0\\xf1\\xed\\xfb\\xf5 \\xe1\\xf3\\xea\\xe2, \\xf2\\xe0\\xea\\xe8\\xf5 \\xea\\xe0\\xea \\xee, \\xe5, \\xe0, \\xe8, \\xed, \\xf2, \\xea\\xee\\xf2\\xee\\xf0\\xfb\\xe5 \\xe2 \\xf0\\xe0\\xe7\\xed\\xfb\\xf5 \\xea\\xee\\xe4\\xee\\xe2\\xfb\\xf5 \\xf1\\xf2\\xf0\\xe0\\xed\\xe8\\xf6\\xe0\\xf5 \\xe7\\xe0\\xed\\xe8\\xec\\xe0\\xf3\\xf2 \\xf1\\xee\\xe2\\xe5\\xf0\\xf8\\xe5\\xed\\xed\\xee \\xf0\\xe0\\xe7\\xed\\xfb\\xe5 \\xef\\xee\\xf7\\xe8\\xf6\\xe8\\xe8 \\xe2 \\xf2\\xe0\\xe1\\xeb\\xe8\\xf6\\xe5 \\xe1\\xe0\\xb9\\xf2\\xee\\xe2. \\xca\\xee\\xe3\\xe4\\xe0 \\xf2\\xe5\\xea\\xf1\\xf2\\xe0 \\xf1\\xf2\\xe0\\xed\\xee\\xe2\\xe8\\xf2\\xf1\\xff \\xe4\\xee\\xf1\\xf2\\xe0\\xf2\\xee\\xf7\\xed\\xee \\xec\\xed\\xee\\xe3\\xee, \\xe2\\xe5\\xf0\\xee\\xff\\xf2\\xed\\xee\\xf1\\xf2\\xfc \\xee\\xf8\\xe8\\xe1\\xea\\xe8 \\xf1\\xed\\xe8\\xe6\\xe0\\xe5\\xf2\\xf1\\xff \\xef\\xf0\\xe0\\xea\\xf2\\xe8\\xf7\\xe5\\xf1\\xea\\xe8 \\xe4\\xee \\xed\\xf3\\xeb\\xff. \\xcc\\xfb \\xe4\\xee\\xe1\\xe0\\xe2\\xeb\\xff\\xe5\\xec \\xe5\\xf9\\xe5 \\xed\\xe5\\xf1\\xea\\xee\\xeb\\xfc\\xea\\xee \\xef\\xf0\\xe5\\xe4\\xeb\\xee\\xe6\\xe5\\xed\\xe8\\xb9, \\xf7\\xf2\\xee\\xf1\\xfb \\xf0\\xe0\\xf1\\xf8\\xe8\\xf0\\xe8\\xf2\\xfc \\xe2\\xfb\\xe1\\xee\\xf0\\xea\\xf3 \\xe4\\xe0\\xed\\xed\\xfb\\xf5 \\xe4\\xeb\\xff \\xea\\xee\\xf0\\xf0\\xe5\\xea\\xf2\\xed\\xee\\xe3\\xee \\xf2\\xe5\\xf1\\xf2\\xe8\\xf0\\xee\\xe2\\xe0\\xed\\xe8\\xff \\xe2\\xe0\\xf8\\xe5\\xb9 \\xe1\\xe8\\xe1\\xeb\\xe8\\xee\\xf2\\xe5\\xea\\xe8 \\xe2 \\xf1\\xf0\\xe5\\xe4\\xe5 Python.</body></html>')
|
|
'windows-1251'
|
|
"""
|
|
|
|
key = (len(page), hash(page))
|
|
retVal = kb.cache.encoding.get(key)
|
|
if retVal is None:
|
|
retVal = detect(page[:HEURISTIC_PAGE_SIZE_THRESHOLD])["encoding"]
|
|
kb.cache.encoding[key] = retVal
|
|
|
|
if retVal and retVal.lower().replace('-', "") == UNICODE_ENCODING.lower().replace('-', ""):
|
|
infoMsg = "heuristics detected web page charset '%s'" % retVal
|
|
singleTimeLogMessage(infoMsg, logging.INFO, retVal)
|
|
|
|
return retVal
|
|
|
|
def decodePage(page, contentEncoding, contentType, percentDecode=True):
|
|
"""
|
|
Decode compressed/charset HTTP response
|
|
|
|
>>> getText(decodePage(b"<html>foo&bar</html>", None, "text/html; charset=utf-8"))
|
|
'<html>foo&bar</html>'
|
|
>>> getText(decodePage(b"	", None, "text/html; charset=utf-8"))
|
|
'\\t'
|
|
"""
|
|
|
|
if not page or (conf.nullConnection and len(page) < 2):
|
|
return getUnicode(page)
|
|
|
|
contentEncoding = getText(contentEncoding).lower() if contentEncoding else ""
|
|
contentType = getText(contentType).lower() if contentType else ""
|
|
|
|
if contentEncoding in ("gzip", "x-gzip", "deflate"):
|
|
if not kb.pageCompress:
|
|
return None
|
|
|
|
try:
|
|
if contentEncoding == "deflate":
|
|
obj = zlib.decompressobj(-15)
|
|
page = obj.decompress(page, MAX_CONNECTION_TOTAL_SIZE + 1)
|
|
|
|
# catch the deflate bomb before flush() forcefully expands it into RAM
|
|
if len(page) > MAX_CONNECTION_TOTAL_SIZE:
|
|
raise Exception("size too large")
|
|
|
|
page += obj.flush()
|
|
if len(page) > MAX_CONNECTION_TOTAL_SIZE:
|
|
raise Exception("size too large")
|
|
else:
|
|
data = gzip.GzipFile("", "rb", 9, io.BytesIO(page))
|
|
page = data.read(MAX_CONNECTION_TOTAL_SIZE + 1)
|
|
if len(page) > MAX_CONNECTION_TOTAL_SIZE:
|
|
raise Exception("size too large")
|
|
except Exception as ex:
|
|
if b"<html" not in page: # in some cases, invalid "Content-Encoding" appears for plain HTML (should be ignored)
|
|
errMsg = "detected invalid data for declared content "
|
|
errMsg += "encoding '%s' ('%s')" % (contentEncoding, getSafeExString(ex))
|
|
singleTimeLogMessage(errMsg, logging.ERROR)
|
|
|
|
warnMsg = "turning off page compression"
|
|
singleTimeWarnMessage(warnMsg)
|
|
|
|
kb.pageCompress = False
|
|
raise SqlmapCompressionException
|
|
|
|
if not conf.encoding:
|
|
httpCharset, metaCharset = None, None
|
|
|
|
# Reference: http://stackoverflow.com/questions/1020892/python-urllib2-read-to-unicode
|
|
if contentType.find("charset=") != -1:
|
|
httpCharset = checkCharEncoding(contentType.split("charset=")[-1])
|
|
|
|
metaCharset = checkCharEncoding(extractRegexResult(META_CHARSET_REGEX, page))
|
|
|
|
if (any((httpCharset, metaCharset)) and (not all((httpCharset, metaCharset)) or isinstance(page, six.binary_type) and all(_ in PRINTABLE_BYTES for _ in page))) or (httpCharset == metaCharset and all((httpCharset, metaCharset))):
|
|
kb.pageEncoding = httpCharset or metaCharset # Reference: http://bytes.com/topic/html-css/answers/154758-http-equiv-vs-true-header-has-precedence
|
|
debugMsg = "declared web page charset '%s'" % kb.pageEncoding
|
|
singleTimeLogMessage(debugMsg, logging.DEBUG, debugMsg)
|
|
else:
|
|
kb.pageEncoding = None
|
|
else:
|
|
kb.pageEncoding = conf.encoding
|
|
|
|
# can't do for all responses because we need to support binary files too
|
|
if isinstance(page, six.binary_type) and "text/" in contentType:
|
|
if not kb.disableHtmlDecoding:
|
|
# e.g. 	Ãëàâà
|
|
if b"&#" in page:
|
|
page = re.sub(b"&#x([0-9a-f]{1,2});", lambda _: decodeHex(_.group(1) if len(_.group(1)) == 2 else b"0%s" % _.group(1)), page)
|
|
page = re.sub(b"&#(\\d{1,3});", lambda _: six.int2byte(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
|
|
|
|
# e.g. %20%28%29
|
|
if percentDecode:
|
|
if b"%" in page:
|
|
page = re.sub(b"%([0-9a-f]{2})", lambda _: decodeHex(_.group(1)), page)
|
|
page = re.sub(b"%([0-9A-F]{2})", lambda _: decodeHex(_.group(1)), page) # Note: %DeepSee_SQL in CACHE
|
|
|
|
# e.g. &
|
|
page = re.sub(b"&([^;]+);", lambda _: six.int2byte(HTML_ENTITIES[getText(_.group(1))]) if HTML_ENTITIES.get(getText(_.group(1)), 256) < 256 else _.group(0), page)
|
|
|
|
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
|
|
|
|
if (kb.pageEncoding or "").lower() == "utf-8-sig":
|
|
kb.pageEncoding = "utf-8"
|
|
if page and page.startswith(b"\xef\xbb\xbf"): # Reference: https://docs.python.org/2/library/codecs.html (Note: noticed problems when "utf-8-sig" is left to Python for handling)
|
|
page = page[3:]
|
|
|
|
page = getUnicode(page, kb.pageEncoding)
|
|
|
|
# e.g. ’…™
|
|
if "&#" in page:
|
|
def _(match):
|
|
retVal = match.group(0)
|
|
try:
|
|
retVal = _unichr(int(match.group(1)))
|
|
except (ValueError, OverflowError):
|
|
pass
|
|
return retVal
|
|
page = re.sub(r"&#(\d+);", _, page)
|
|
|
|
# e.g. ζ
|
|
page = re.sub(r"&([^;]+);", lambda _: _unichr(HTML_ENTITIES[_.group(1)]) if HTML_ENTITIES.get(_.group(1), 0) > 255 else _.group(0), page)
|
|
else:
|
|
page = getUnicode(page, kb.pageEncoding)
|
|
|
|
return page
|
|
|
|
def processResponse(page, responseHeaders, code=None, status=None):
|
|
kb.processResponseCounter += 1
|
|
page = page or ""
|
|
|
|
parseResponse(page, responseHeaders if kb.processResponseCounter < PARSE_HEADERS_LIMIT else None, status)
|
|
|
|
if not kb.tableFrom and Backend.getIdentifiedDbms() in (DBMS.ACCESS,):
|
|
kb.tableFrom = extractRegexResult(SELECT_FROM_TABLE_REGEX, page)
|
|
else:
|
|
kb.tableFrom = None
|
|
|
|
if conf.parseErrors:
|
|
msg = extractErrorMessage(page)
|
|
|
|
if msg:
|
|
logger.warning("parsed DBMS error message: '%s'" % msg.rstrip('.'))
|
|
|
|
if not conf.skipWaf and kb.processResponseCounter < IDENTYWAF_PARSE_COUNT_LIMIT:
|
|
rawResponse = "%s %s %s\n%s\n%s" % (_http_client.HTTPConnection._http_vsn_str, code or "", status or "", "".join(getUnicode(responseHeaders.headers if responseHeaders else [])), page[:IDENTYWAF_PARSE_PAGE_LIMIT] if not kb.checkWafMode else page[:HEURISTIC_PAGE_SIZE_THRESHOLD])
|
|
|
|
with kb.locks.identYwaf:
|
|
identYwaf.non_blind.clear()
|
|
try:
|
|
if identYwaf.non_blind_check(rawResponse, silent=True):
|
|
for waf in set(identYwaf.non_blind):
|
|
if waf not in kb.identifiedWafs:
|
|
kb.identifiedWafs.add(waf)
|
|
errMsg = "WAF/IPS identified as '%s'" % identYwaf.format_name(waf)
|
|
singleTimeLogMessage(errMsg, logging.CRITICAL)
|
|
except Exception as ex:
|
|
singleTimeWarnMessage("internal error occurred in WAF/IPS detection ('%s')" % getSafeExString(ex))
|
|
|
|
if kb.originalPage is None:
|
|
for regex in (EVENTVALIDATION_REGEX, VIEWSTATE_REGEX):
|
|
match = re.search(regex, page)
|
|
if match and PLACE.POST in conf.parameters:
|
|
name, value = match.groups()
|
|
if PLACE.POST in conf.paramDict and name in conf.paramDict[PLACE.POST]:
|
|
if conf.paramDict[PLACE.POST][name] in page:
|
|
continue
|
|
else:
|
|
msg = "do you want to automatically adjust the value of '%s'? [y/N]" % name
|
|
|
|
if not readInput(msg, default='N', boolean=True):
|
|
continue
|
|
|
|
conf.paramDict[PLACE.POST][name] = value
|
|
conf.parameters[PLACE.POST] = re.sub(r"(?i)(%s=)[^&]+" % re.escape(name), r"\g<1>%s" % value.replace('\\', r'\\'), conf.parameters[PLACE.POST])
|
|
|
|
if not kb.browserVerification and re.search(r"(?i)browser.?verification", page or ""):
|
|
kb.browserVerification = True
|
|
warnMsg = "potential browser verification protection mechanism detected"
|
|
if re.search(r"(?i)CloudFlare", page):
|
|
warnMsg += " (CloudFlare)"
|
|
singleTimeWarnMessage(warnMsg)
|
|
|
|
if not kb.captchaDetected and re.search(r"(?i)captcha", page or ""):
|
|
for match in re.finditer(r"(?si)<form.+?</form>", page):
|
|
if re.search(r"(?i)captcha", match.group(0)):
|
|
kb.captchaDetected = True
|
|
break
|
|
|
|
if re.search(r"<meta[^>]+\brefresh\b[^>]+\bcaptcha\b", page):
|
|
kb.captchaDetected = True
|
|
|
|
if kb.captchaDetected:
|
|
warnMsg = "potential CAPTCHA protection mechanism detected"
|
|
if re.search(r"(?i)<title>[^<]*CloudFlare", page):
|
|
warnMsg += " (CloudFlare)"
|
|
singleTimeWarnMessage(warnMsg)
|
|
|
|
if re.search(BLOCKED_IP_REGEX, page):
|
|
warnMsg = "it appears that you have been blocked by the target server"
|
|
singleTimeWarnMessage(warnMsg)
|