sqlmap/lib/utils/keysetdump.py
Miroslav Štampar 497d3772bd
Some checks are pending
/ build (macos-latest, 3.8) (push) Waiting to run
/ build (ubuntu-latest, pypy-2.7) (push) Waiting to run
/ build (windows-latest, 3.14) (push) Waiting to run
Adding keyset (seek) pagination for faster blind table dumps
2026-06-20 00:00:40 +02:00

312 lines
11 KiB
Python

#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
"""
import re
from lib.core.agent import agent
from lib.core.bigarray import BigArray
from lib.core.common import Backend
from lib.core.common import isNoneValue
from lib.core.common import singleTimeWarnMessage
from lib.core.common import unArrayizeValue
from lib.core.common import unsafeSQLIdentificatorNaming
from lib.core.compat import xrange
from lib.core.convert import getConsoleLength
from lib.core.convert import getUnicode
from lib.core.data import conf
from lib.core.data import logger
from lib.core.data import queries
from lib.core.dicts import DUMP_REPLACEMENTS
from lib.core.enums import CHARSET_TYPE
from lib.core.enums import DBMS
from lib.core.enums import EXPECTED
from lib.core.settings import NULL
from lib.core.unescaper import unescaper
from lib.request import inject
from lib.utils.safe2bin import safechardecode
# back-end DBMSes whose dump table reference is schema/database-qualified (db.table).
# Note: for MSSQL the table identifier already carries its schema (e.g. dbo.users), so the
# plain db.table form yields the correct db.schema.table (e.g. [master].dbo.users).
KEYSET_SCHEMA_QUALIFIED = (DBMS.MYSQL, DBMS.PGSQL, DBMS.CRATEDB, DBMS.MSSQL, DBMS.H2, DBMS.HSQLDB)
def _tableRef(tbl):
dbms = Backend.getIdentifiedDbms()
if dbms in (DBMS.ORACLE,) and conf.db:
return "%s.%s" % (conf.db.upper(), tbl.upper())
if dbms in KEYSET_SCHEMA_QUALIFIED and conf.db:
return "%s.%s" % (conf.db, tbl)
return tbl
def keysetSupported():
"""
Whether the back-end DBMS declares the keyset (seek) pagination queries and a
cursor source (a physical row-id pseudo-column or a primary-key catalog lookup)
"""
dumpNode = queries[Backend.getIdentifiedDbms()].dump_table
return "keyset_next" in dumpNode.blind and ("rowid" in dumpNode.blind or "primary_key" in dumpNode)
def _integerCursor(tbl, cursor):
"""
Whether every cursor column holds integer values, probed via MIN(col).
Only integer keys are accepted: _embed() emits them as bare numeric literals, giving a
numeric comparison that matches MIN/ORDER BY. String (and even decimal) keys would be
escaped to a binary/hex literal whose order can differ from MIN's collation and silently
skip rows, so they are rejected here and fall back to the OFFSET dump.
"""
blind = queries[Backend.getIdentifiedDbms()].dump_table.blind
ref = _tableRef(tbl)
for column in cursor:
query = agent.whereQuery(blind.keyset_first % (agent.preprocessField(tbl, column), ref))
value = unArrayizeValue(inject.getValue(query))
# empty/NULL MIN (e.g. empty table) is not disqualifying; the walk just yields no rows
if not isNoneValue(value) and re.match(r"\A-?[0-9]+\Z", getUnicode(value).strip()) is None:
return False
return True
def resolveKeysetCursor(tbl, colList):
"""
Returns the list of column(s) forming a stable, indexed cursor for keyset (seek)
pagination of the table: a declared physical row-id pseudo-column when available,
otherwise the indexed primary key (single or composite) resolved from the catalog.
Returns None when neither applies or a key column is not part of the dumped columns.
"""
if not keysetSupported():
return None
dumpNode = queries[Backend.getIdentifiedDbms()].dump_table
# 1) a declared physical row-id pseudo-column (always unique + indexed where supported)
if "rowid" in dumpNode.blind:
return [dumpNode.blind.rowid]
# 2) the indexed primary key (single-column, or composite when keyset_ordered is declared)
pkNode = dumpNode.primary_key
# Note: schema/table are string literals in the catalog lookups, so the unquoted
# (identifier-unescaped) names are used (the dump queries keep the quoted form)
unsafeDb = unsafeSQLIdentificatorNaming(conf.db)
unsafeTbl = unsafeSQLIdentificatorNaming(tbl)
# Note: no whereQuery() here - these are catalog (schema) lookups, so the data-row
# filter from --where must not be appended to them
query = pkNode.count % (unsafeDb, unsafeTbl)
count = inject.getValue(query, expected=EXPECTED.INT, charsetType=CHARSET_TYPE.DIGITS)
try:
count = int(count)
except (ValueError, TypeError):
return None
if count < 1:
return None
# composite keys require the row-value/ordered keyset form
if count > 1 and "keyset_ordered" not in dumpNode.blind:
return None
cursor = []
for index in xrange(count):
query = pkNode.query % (unsafeDb, unsafeTbl, index)
column = unArrayizeValue(inject.getValue(query))
if not column:
return None
match = None
for _ in colList:
if _ and _.lower() == column.lower():
match = _
break
if match is None:
return None
cursor.append(match)
# restrict to integer cursors: a string key's escaped-literal comparison may order
# differently than MIN/ORDER BY and silently skip rows (such keys fall back to OFFSET)
if not _integerCursor(tbl, cursor):
return None
return cursor
def _lit(value):
"""
Type-correct SQL literal for a cursor value: a bare numeric literal for numeric keys
(so the index is still used and the comparison is numeric), otherwise the DBMS-escaped
(e.g. 0x.. hex) form for string keys. Both forms are self-contained (no surrounding quotes).
"""
if value is not None and re.match(r"\A-?[0-9]+\Z", value):
return value
return unescaper.escape(value, False)
def _embed(template, value, *fixed):
"""
Fills a single-column keyset template whose trailing placeholder is the cursor value.
"""
template = template.replace("'%s'", "%s")
return template % (fixed + (_lit(value),))
def _dumpSingle(tbl, colList, count, cursor, tableRef, entries, lengths):
blind = queries[Backend.getIdentifiedDbms()].dump_table.blind
field = agent.preprocessField(tbl, cursor)
if conf.limitStart and conf.limitStop:
target = max(0, conf.limitStop - conf.limitStart + 1)
elif conf.limitStop:
target = conf.limitStop
elif conf.limitStart:
target = max(0, count - conf.limitStart + 1)
else:
target = count
pivotValue = None
# hybrid: a single OFFSET jump to seed the cursor just before --start, then pure keyset
if conf.limitStart and conf.limitStart > 1 and "keyset_seed" in blind:
query = agent.whereQuery(blind.keyset_seed % (field, tableRef, field, conf.limitStart - 2))
seed = unArrayizeValue(inject.getValue(query))
if isNoneValue(seed) or seed == NULL:
return
pivotValue = safechardecode(seed)
produced = 0
while produced < target:
if pivotValue is None:
query = blind.keyset_first % (field, tableRef)
else:
query = _embed(blind.keyset_next, pivotValue, field, tableRef, field)
query = agent.whereQuery(query)
value = unArrayizeValue(inject.getValue(query))
if isNoneValue(value) or value == NULL:
break
value = safechardecode(value)
# safety latch against a non-advancing cursor (e.g. encoding edge cases)
if value == pivotValue:
singleTimeWarnMessage("keyset cursor stopped advancing prematurely")
break
pivotValue = value
for column in colList:
if column == cursor:
colValue = pivotValue
else:
query = _embed(blind.keyset_by, pivotValue, agent.preprocessField(tbl, column), tableRef, field)
query = agent.whereQuery(query)
colValue = unArrayizeValue(inject.getValue(query, dump=True))
colValue = "" if isNoneValue(colValue) else colValue
lengths[column] = max(lengths[column], getConsoleLength(DUMP_REPLACEMENTS.get(getUnicode(colValue), getUnicode(colValue))))
entries[column].append(colValue)
produced += 1
def _dumpComposite(tbl, colList, count, cursorCols, tableRef, entries, lengths):
blind = queries[Backend.getIdentifiedDbms()].dump_table.blind
fields = [agent.preprocessField(tbl, _) for _ in cursorCols]
orderExpr = ','.join(fields)
startSkip = (conf.limitStart - 1) if conf.limitStart else 0
if conf.limitStart and conf.limitStop:
target = max(0, conf.limitStop - conf.limitStart + 1)
elif conf.limitStop:
target = conf.limitStop
elif conf.limitStart:
target = max(0, count - conf.limitStart + 1)
else:
target = count
prev = None
produced = 0
seen = 0
while produced < target and seen < count:
if prev is None:
condition = "1=1"
else:
# ANSI row-value (tuple) comparison advances the composite cursor lexicographically
condition = "(%s)>(%s)" % (orderExpr, ','.join(_lit(_) for _ in prev))
tup = []
for field in fields:
query = agent.whereQuery(blind.keyset_ordered % (field, tableRef, condition, orderExpr))
value = unArrayizeValue(inject.getValue(query))
tup.append(None if isNoneValue(value) else safechardecode(value))
if all(isNoneValue(_) for _ in tup):
break
if prev is not None and tup == prev:
singleTimeWarnMessage("keyset cursor stopped advancing prematurely")
break
prev = tup
seen += 1
if seen <= startSkip:
continue
equals = " AND ".join("%s=%s" % (field, _lit(value)) for field, value in zip(fields, tup))
for column in colList:
if column in cursorCols:
colValue = tup[cursorCols.index(column)]
else:
query = agent.whereQuery(blind.keyset_where % (agent.preprocessField(tbl, column), tableRef, equals))
colValue = unArrayizeValue(inject.getValue(query, dump=True))
colValue = "" if isNoneValue(colValue) else colValue
lengths[column] = max(lengths[column], getConsoleLength(DUMP_REPLACEMENTS.get(getUnicode(colValue), getUnicode(colValue))))
entries[column].append(colValue)
produced += 1
def keysetDumpTable(tbl, colList, count, cursor):
"""
Dumps a table one row at a time using keyset (seek) pagination on 'cursor' (a list of
one or more indexed key columns): the next row is reached with a >/row-value comparison
against the previous cursor (index range scan) and every other column is fetched with an
exact equality on the cursor (index point seek), so no row is skipped via OFFSET and no
per-row ORDER BY filesort is needed. A deep --start uses a single OFFSET "seed" jump
(single-column cursors), after which the walk is pure keyset.
"""
tableRef = _tableRef(tbl)
lengths = {}
entries = {}
for column in colList:
lengths[column] = 0
entries[column] = BigArray()
if len(cursor) == 1:
_dumpSingle(tbl, colList, count, cursor[0], tableRef, entries, lengths)
else:
_dumpComposite(tbl, colList, count, cursor, tableRef, entries, lengths)
debugMsg = "keyset pagination retrieved %d row(s) for table '%s'" % (len(entries[colList[0]]) if colList and colList[0] in entries else 0, unsafeSQLIdentificatorNaming(tbl))
logger.debug(debugMsg)
return entries, lengths