diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt index 2b0de81f4..c71f09fc9 100644 --- a/data/txt/sha256sums.txt +++ b/data/txt/sha256sums.txt @@ -162,13 +162,13 @@ df768bcb9838dc6c46dab9b4a877056cb4742bd6cfaaf438c4a3712c5cc0d264 extra/shutils/ 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 extra/vulnserver/__init__.py 617cec1b731e0baacafa6f58c2f56a85b6128d1416627cc1b2f61519c8539a2e extra/vulnserver/vulnserver.py a2bf70d7f87c3a4e0675c0bad54119a4e04efa6ea2730a8338d5aebcd995630e lib/controller/action.py -9137a8f7368496c84b21944f6b94c28004d3a2a849ac9c8e0b20e294e4c4a93a lib/controller/checks.py +f4fb3839e5accd1b58b34226e4b26f5079d9696e24d335d37d870cd5e62d1e80 lib/controller/checks.py 666935b658074dc9c42153622b75d4ec7bfe56fbe0742de827a5d30a1a0f9d96 lib/controller/controller.py d69e84f1648cdb907f5d2dd454f03874a4613752b07867510145d51d84b3c56f lib/controller/handler.py 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/controller/__init__.py 9c5764c92ce536d1f0f96200359ee5ef1f37f9128769bf990cb77f1d1f8e17b1 lib/core/agent.py c51c33501cc905586a9aaac93b06f2ac6f71628d032a7dc39fd0ef05d7ee3856 lib/core/bigarray.py -122767794156afa41b19baa706ad4c124eef6eaf73ed8fd208d8f634e97e82eb lib/core/common.py +d143df718fbaacb617b6046c73cf4e47932e1a25928a4e1ecb87ea77a3b154ed lib/core/common.py 8f1272487e1adfcc8c755a2f56f0c6d21eac5e685a73a9a159482f9dc9142bc5 lib/core/compat.py a683d0ad9ba543587382c4903d28db610ae20394fcf9045a68b2ab54a39381ae lib/core/convert.py c03dc585f89642cfd81b087ac2723e3e1bb3bfa8c60e6f5fe58ef3b0113ebfe6 lib/core/data.py @@ -182,14 +182,14 @@ f8de57606325456928e46ae2896f5f8bbec9ad18b1c644b492a566fa992216f6 lib/core/decor 1966ca704961fb987ab757f0a4afddbf841d1a880631b701487c75cef63d60c3 lib/core/__init__.py 914a13ee21fd610a6153a37cbe50830fcbd1324c7ebc1e7fc206d5e598b0f7ad lib/core/log.py 5a576f802f1298d0aa357e766ae6502fa53cacbbe0b1d328b7410a8b20a885b2 lib/core/optiondict.py -e033b20a0f7821797a10f4bf4235723f38c7db551c611fbb713faa621b123c4a lib/core/option.py +98d3d61278794705c7039e40fab66a626e8d6ab765383c5379cec7a066b09301 lib/core/option.py 21b2b1745107c211fc7593923a3da7a808d40763c00091c28de5f7c129bcf3bc lib/core/patch.py 49c0fa7e3814dfda610d665ee02b12df299b28bc0b6773815b4395514ddf8dec lib/core/profiling.py 0c36a65b6237732eb001d333f80f0c58c088ff01ae80cf07e4dcc6da2a806364 lib/core/readlineng.py 9bf174058f15d14e24e94f9aaf42df045119d3617c6c54bd2f3af79b462f331d lib/core/replication.py 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py -098e5d86a0da05d4be5f5ed5371083954be2369abce57fda4bd906d12e1f8870 lib/core/settings.py +a2fb281b59c4526613f22fc0e994b68db91c1263db415aa86002ec4e20773639 lib/core/settings.py c7804223319e18eb0b8e2cbf0a8b6896d1cefb7b0b1a2e9f1cf826a8a3b56750 lib/core/shell.py a2e98a94b231432736d6b304fc75525c8b5fdb4768c418387c5b4c1a610dad64 lib/core/subprocessng.py 19f1e3c5e3ba703d28d510cd7a9ab8284d5fbe9df5ce7e77c86e5931571364b7 lib/core/target.py @@ -211,7 +211,7 @@ c2f34e27578742e729c2fa9c1d4f0a0d8f8f7f4cf0fc14c62ec817a260c71dec lib/parse/site 1be3da334411657461421b8a26a0f2ff28e1af1e28f1e963c6c92768f9b0847c lib/request/basicauthhandler.py 369484a2999d29f49bf839a329d1686ed94f6ea27c695e027fe08c8da51f30a3 lib/request/basic.py bc61bc944b81a7670884f82231033a6ac703324b34b071c9834886a92e249d0e lib/request/chunkedhandler.py -d4bb0869b03602a0c8f9e0e0fd217753f14ddadf848fc9f3c65a74d03feb9958 lib/request/comparison.py +9c0dccc1cee66d38478aaf75a7c513d0d136d50a90b15fed146faa1653899fe1 lib/request/comparison.py 729e07a2ca6b1d83563e9c6dc5a884d1b664c1764be06776ea93bde305164f0c lib/request/connect.py 8e06682280fce062eef6174351bfebcb6040e19976acff9dc7b3699779783498 lib/request/direct.py a6b37b436838caeb197fea858d0a39fadbff4736256e741b5fcec1f28fcf1ce0 lib/request/dns.py diff --git a/lib/controller/checks.py b/lib/controller/checks.py index 4589599de..6a7043cc9 100644 --- a/lib/controller/checks.py +++ b/lib/controller/checks.py @@ -16,6 +16,7 @@ from extra.beep.beep import beep from lib.core.agent import agent from lib.core.common import Backend from lib.core.common import extractRegexResult +from lib.core.common import extractStructuralTokens from lib.core.common import extractTextTagContent from lib.core.common import filterNone from lib.core.common import findDynamicContent @@ -1390,7 +1391,26 @@ def checkStability(): raise SqlmapNoneDataException(errMsg) else: - checkDynamicContent(firstPage, secondPage) + # Before engaging the (lossy) dynamic-content removal / '--text-only' escalation, check + # whether the page is structurally stable (identical tag/class/id skeleton across the two + # requests) despite differing text. If so, base the comparison on that value-free structure + # so that dynamic content (e.g. per-render result rows) does not mask an injection. This is + # the HTML counterpart of the structure-aware JSON comparison + if firstPage and secondPage and extractStructuralTokens(firstPage) == extractStructuralTokens(secondPage): + kb.pageStructurallyStable = True + + if kb.nullConnection: + debugMsg = "turning off NULL connection " + debugMsg += "support because of structural page comparison" + logger.debug(debugMsg) + + kb.nullConnection = None + + infoMsg = "target URL content is not byte-stable but structurally stable; sqlmap " + infoMsg += "will base the page comparison on the page structure" + logger.info(infoMsg) + else: + checkDynamicContent(firstPage, secondPage) return kb.pageStable diff --git a/lib/core/common.py b/lib/core/common.py index a8eca14ad..937064d70 100644 --- a/lib/core/common.py +++ b/lib/core/common.py @@ -176,6 +176,9 @@ from lib.core.settings import REPLACEMENT_MARKER from lib.core.settings import SENSITIVE_DATA_REGEX from lib.core.settings import SENSITIVE_OPTIONS from lib.core.settings import STDIN_PIPE_DASH +from lib.core.settings import STRUCTURAL_CLASS_REGEX +from lib.core.settings import STRUCTURAL_ID_REGEX +from lib.core.settings import STRUCTURAL_TAG_REGEX from lib.core.settings import SUPPORTED_DBMS from lib.core.settings import TEXT_TAG_REGEX from lib.core.settings import TIME_STDEV_COEFF @@ -3227,6 +3230,45 @@ def extractTextTagContent(page): return filterNone(_.group("result").strip() for _ in re.finditer(TEXT_TAG_REGEX, page)) +def extractStructuralTokens(page): + """ + Returns a set of value-free structural tokens (tag names and class/id attribute hooks) of a + (HTML) page, discarding all textual content. Used for structure-aware page comparison when the + page is byte-unstable but structurally stable (e.g. dynamic result rows in a fixed layout), so + that dynamic text does not perturb the comparison while a structural change (e.g. a results + table appearing or disappearing) still does. HTML counterpart of jsonMinimize() + + >>> sorted(extractStructuralTokens(u'
x
')) == [u'cls:div.a', u'cls:div.b', u'id:div#g', u'tag:div', u'tag:span'] + True + >>> extractStructuralTokens(u'
1
') == set([u'tag:table', u'tag:tr', u'tag:td']) + True + >>> extractStructuralTokens(u'') == set() + True + """ + + page = page or "" + + if REFLECTED_VALUE_MARKER in page: + page = re.sub(r"(?i)<[^>]*%s[^>]*>" % REFLECTED_VALUE_MARKER, " ", page) + + page = re.sub(r"(?si)||", " ", page) + + retVal = set() + + for match in re.finditer(STRUCTURAL_TAG_REGEX, page): + tag = match.group(1).lower() + attrs = match.group(2) or "" + retVal.add("tag:%s" % tag) + for _ in re.finditer(STRUCTURAL_CLASS_REGEX, attrs): + for value in (_.group(1) or _.group(2) or _.group(3) or "").split(): + retVal.add("cls:%s.%s" % (tag, value)) + for _ in re.finditer(STRUCTURAL_ID_REGEX, attrs): + value = (_.group(1) or _.group(2) or _.group(3) or "").strip() + if value: + retVal.add("id:%s#%s" % (tag, value)) + + return retVal + def trimAlphaNum(value): """ Trims alpha numeric characters from start and ending of a given value diff --git a/lib/core/option.py b/lib/core/option.py index 332053b13..f7d269074 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -2210,6 +2210,7 @@ def _setKnowledgeBaseAttributes(flushAll=True): kb.pageTemplates = dict() kb.pageEncoding = DEFAULT_PAGE_ENCODING kb.pageStable = None + kb.pageStructurallyStable = None kb.partRun = None kb.permissionFlag = False kb.place = None diff --git a/lib/core/settings.py b/lib/core/settings.py index e55e69f12..43667bf80 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -20,7 +20,7 @@ from lib.core.enums import OS from thirdparty import six # sqlmap version (...) -VERSION = "1.10.6.198" +VERSION = "1.10.6.199" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) @@ -180,6 +180,13 @@ DUMMY_SEARCH_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64; rv:141.0) Gecko/20100 # Regular expression used for extracting content from "textual" tags TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h[1-6]|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P[^<]+)" +# Regular expressions used for extracting a value-free structural skeleton of a (HTML) page (tag +# names and class/id attribute hooks), for structure-aware comparison of pages whose textual +# content is dynamic but whose layout is stable +STRUCTURAL_TAG_REGEX = r"(?si)<\s*([a-z][a-z0-9]*)((?:\s+[^<>]*)?)/?>" +STRUCTURAL_CLASS_REGEX = r"""(?si)\bclass\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'<>]+))""" +STRUCTURAL_ID_REGEX = r"""(?si)\bid\s*=\s*(?:"([^"]*)"|'([^']*)'|([^\s"'<>]+))""" + # Regular expression used for recognition of IP addresses IP_ADDRESS_REGEX = r"\b(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\b" diff --git a/lib/request/comparison.py b/lib/request/comparison.py index 1206e6814..e32782973 100644 --- a/lib/request/comparison.py +++ b/lib/request/comparison.py @@ -10,6 +10,7 @@ from __future__ import division import re from lib.core.common import extractRegexResult +from lib.core.common import extractStructuralTokens from lib.core.common import getFilteredPageContent from lib.core.common import jsonMinimize from lib.core.common import listToStrValue @@ -177,6 +178,15 @@ def _comparison(page, headers, code, getRatioValue, pageLength): seq1 = jsonMinimize(kb.pageTemplate) seq2 = jsonMinimize(rawPage) + # Structure-aware comparison for a structurally-stable (but byte-unstable) HTML page: + # compare the value-free tag/class/id skeleton so dynamic text does not perturb the ratio + # while a structural change (e.g. a results table appearing/disappearing) still does + if seq1 is None and kb.pageStructurallyStable and not (conf.titles or conf.textOnly or kb.nullConnection): + _ = "\n".join(sorted(extractStructuralTokens(kb.pageTemplate))) + if _: # only engage when the page actually exposes structure (HTML tags); tagless content falls back to text + seq1 = _ + seq2 = "\n".join(sorted(extractStructuralTokens(rawPage))) + if seq1 is None or seq2 is None: if conf.titles: seq1 = extractRegexResult(HTML_TITLE_REGEX, seqMatcher.a)