sqlmap/lib/utils/keysetdump.py

#!/usr/bin/env python

"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
"""

import re

from lib.core.agent import agent
from lib.core.bigarray import BigArray
from lib.core.common import Backend
from lib.core.common import isNoneValue
from lib.core.common import singleTimeWarnMessage
from lib.core.common import unArrayizeValue
from lib.core.common import unsafeSQLIdentificatorNaming
from lib.core.compat import xrange
from lib.core.convert import getConsoleLength
from lib.core.convert import getUnicode
from lib.core.data import conf
from lib.core.data import logger
from lib.core.data import queries
from lib.core.dicts import DUMP_REPLACEMENTS
from lib.core.enums import CHARSET_TYPE
from lib.core.enums import DBMS
from lib.core.enums import EXPECTED
from lib.core.settings import NULL
from lib.core.unescaper import unescaper
from lib.request import inject
from lib.utils.safe2bin import safechardecode

# back-end DBMSes whose dump table reference is schema/database-qualified (db.table).
# Note: for MSSQL the table identifier already carries its schema (e.g. dbo.users), so the
# plain db.table form yields the correct db.schema.table (e.g. [master].dbo.users).
KEYSET_SCHEMA_QUALIFIED = (DBMS.MYSQL, DBMS.PGSQL, DBMS.CRATEDB, DBMS.MSSQL, DBMS.H2, DBMS.HSQLDB)

def _tableRef(tbl):
    dbms = Backend.getIdentifiedDbms()
    if dbms in (DBMS.ORACLE,) and conf.db:
        return "%s.%s" % (conf.db.upper(), tbl.upper())
    if dbms in KEYSET_SCHEMA_QUALIFIED and conf.db:
        return "%s.%s" % (conf.db, tbl)
    return tbl

def keysetSupported():
    """
    Whether the back-end DBMS declares the keyset (seek) pagination queries and a
    cursor source (a physical row-id pseudo-column or a primary-key catalog lookup)
    """

    dumpNode = queries[Backend.getIdentifiedDbms()].dump_table
    return "keyset_next" in dumpNode.blind and ("rowid" in dumpNode.blind or "primary_key" in dumpNode)

def _integerCursor(tbl, cursor):
    """
    Whether every cursor column holds integer values, probed via MIN(col).

    Only integer keys are accepted: _embed() emits them as bare numeric literals, giving a
    numeric comparison that matches MIN/ORDER BY. String (and even decimal) keys would be
    escaped to a binary/hex literal whose order can differ from MIN's collation and silently
    skip rows, so they are rejected here and fall back to the OFFSET dump.
    """

    blind = queries[Backend.getIdentifiedDbms()].dump_table.blind
    ref = _tableRef(tbl)

    for column in cursor:
        query = agent.whereQuery(blind.keyset_first % (agent.preprocessField(tbl, column), ref))
        value = unArrayizeValue(inject.getValue(query))

        # empty/NULL MIN (e.g. empty table) is not disqualifying; the walk just yields no rows
        if not isNoneValue(value) and re.match(r"\A-?[0-9]+\Z", getUnicode(value).strip()) is None:
            return False

    return True

def resolveKeysetCursor(tbl, colList):
    """
    Returns the list of column(s) forming a stable, indexed cursor for keyset (seek)
    pagination of the table: a declared physical row-id pseudo-column when available,
    otherwise the indexed primary key (single or composite) resolved from the catalog.
    Returns None when neither applies or a key column is not part of the dumped columns.
    """

    if not keysetSupported():
        return None

    dumpNode = queries[Backend.getIdentifiedDbms()].dump_table

    # 1) a declared physical row-id pseudo-column (always unique + indexed where supported)
    if "rowid" in dumpNode.blind:
        return [dumpNode.blind.rowid]

    # 2) the indexed primary key (single-column, or composite when keyset_ordered is declared)
    pkNode = dumpNode.primary_key

    # Note: schema/table are string literals in the catalog lookups, so the unquoted
    # (identifier-unescaped) names are used (the dump queries keep the quoted form)
    unsafeDb = unsafeSQLIdentificatorNaming(conf.db)
    unsafeTbl = unsafeSQLIdentificatorNaming(tbl)

    # Note: no whereQuery() here - these are catalog (schema) lookups, so the data-row
    # filter from --where must not be appended to them
    query = pkNode.count % (unsafeDb, unsafeTbl)
    count = inject.getValue(query, expected=EXPECTED.INT, charsetType=CHARSET_TYPE.DIGITS)

    try:
        count = int(count)
    except (ValueError, TypeError):
        return None

    if count < 1:
        return None

    # composite keys require the row-value/ordered keyset form
    if count > 1 and "keyset_ordered" not in dumpNode.blind:
        return None

    cursor = []
    for index in xrange(count):
        query = pkNode.query % (unsafeDb, unsafeTbl, index)
        column = unArrayizeValue(inject.getValue(query))

        if not column:
            return None

        match = None
        for _ in colList:
            if _ and _.lower() == column.lower():
                match = _
                break

        if match is None:
            return None

        cursor.append(match)

    # restrict to integer cursors: a string key's escaped-literal comparison may order
    # differently than MIN/ORDER BY and silently skip rows (such keys fall back to OFFSET)
    if not _integerCursor(tbl, cursor):
        return None

    return cursor

def _lit(value):
    """
    Type-correct SQL literal for a cursor value: a bare numeric literal for numeric keys
    (so the index is still used and the comparison is numeric), otherwise the DBMS-escaped
    (e.g. 0x.. hex) form for string keys. Both forms are self-contained (no surrounding quotes).
    """

    if value is not None and re.match(r"\A-?[0-9]+\Z", value):
        return value
    return unescaper.escape(value, False)

def _embed(template, value, *fixed):
    """
    Fills a single-column keyset template whose trailing placeholder is the cursor value.
    """

    template = template.replace("'%s'", "%s")
    return template % (fixed + (_lit(value),))

def _dumpSingle(tbl, colList, count, cursor, tableRef, entries, lengths):
    blind = queries[Backend.getIdentifiedDbms()].dump_table.blind
    field = agent.preprocessField(tbl, cursor)

    if conf.limitStart and conf.limitStop:
        target = max(0, conf.limitStop - conf.limitStart + 1)
    elif conf.limitStop:
        target = conf.limitStop
    elif conf.limitStart:
        target = max(0, count - conf.limitStart + 1)
    else:
        target = count

    pivotValue = None

    # hybrid: a single OFFSET jump to seed the cursor just before --start, then pure keyset
    if conf.limitStart and conf.limitStart > 1 and "keyset_seed" in blind:
        query = agent.whereQuery(blind.keyset_seed % (field, tableRef, field, conf.limitStart - 2))
        seed = unArrayizeValue(inject.getValue(query))

        if isNoneValue(seed) or seed == NULL:
            return

        pivotValue = safechardecode(seed)

    produced = 0

    while produced < target:
        if pivotValue is None:
            query = blind.keyset_first % (field, tableRef)
        else:
            query = _embed(blind.keyset_next, pivotValue, field, tableRef, field)

        query = agent.whereQuery(query)
        value = unArrayizeValue(inject.getValue(query))

        if isNoneValue(value) or value == NULL:
            break

        value = safechardecode(value)

        # safety latch against a non-advancing cursor (e.g. encoding edge cases)
        if value == pivotValue:
            singleTimeWarnMessage("keyset cursor stopped advancing prematurely")
            break

        pivotValue = value

        for column in colList:
            if column == cursor:
                colValue = pivotValue
            else:
                query = _embed(blind.keyset_by, pivotValue, agent.preprocessField(tbl, column), tableRef, field)
                query = agent.whereQuery(query)
                colValue = unArrayizeValue(inject.getValue(query, dump=True))

            colValue = "" if isNoneValue(colValue) else colValue
            lengths[column] = max(lengths[column], getConsoleLength(DUMP_REPLACEMENTS.get(getUnicode(colValue), getUnicode(colValue))))
            entries[column].append(colValue)

        produced += 1

def _dumpComposite(tbl, colList, count, cursorCols, tableRef, entries, lengths):
    blind = queries[Backend.getIdentifiedDbms()].dump_table.blind
    fields = [agent.preprocessField(tbl, _) for _ in cursorCols]
    orderExpr = ','.join(fields)

    startSkip = (conf.limitStart - 1) if conf.limitStart else 0
    if conf.limitStart and conf.limitStop:
        target = max(0, conf.limitStop - conf.limitStart + 1)
    elif conf.limitStop:
        target = conf.limitStop
    elif conf.limitStart:
        target = max(0, count - conf.limitStart + 1)
    else:
        target = count

    prev = None
    produced = 0
    seen = 0

    while produced < target and seen < count:
        if prev is None:
            condition = "1=1"
        else:
            # ANSI row-value (tuple) comparison advances the composite cursor lexicographically
            condition = "(%s)>(%s)" % (orderExpr, ','.join(_lit(_) for _ in prev))

        tup = []
        for field in fields:
            query = agent.whereQuery(blind.keyset_ordered % (field, tableRef, condition, orderExpr))
            value = unArrayizeValue(inject.getValue(query))
            tup.append(None if isNoneValue(value) else safechardecode(value))

        if all(isNoneValue(_) for _ in tup):
            break

        if prev is not None and tup == prev:
            singleTimeWarnMessage("keyset cursor stopped advancing prematurely")
            break

        prev = tup
        seen += 1

        if seen <= startSkip:
            continue

        equals = " AND ".join("%s=%s" % (field, _lit(value)) for field, value in zip(fields, tup))

        for column in colList:
            if column in cursorCols:
                colValue = tup[cursorCols.index(column)]
            else:
                query = agent.whereQuery(blind.keyset_where % (agent.preprocessField(tbl, column), tableRef, equals))
                colValue = unArrayizeValue(inject.getValue(query, dump=True))

            colValue = "" if isNoneValue(colValue) else colValue
            lengths[column] = max(lengths[column], getConsoleLength(DUMP_REPLACEMENTS.get(getUnicode(colValue), getUnicode(colValue))))
            entries[column].append(colValue)

        produced += 1

def keysetDumpTable(tbl, colList, count, cursor):
    """
    Dumps a table one row at a time using keyset (seek) pagination on 'cursor' (a list of
    one or more indexed key columns): the next row is reached with a >/row-value comparison
    against the previous cursor (index range scan) and every other column is fetched with an
    exact equality on the cursor (index point seek), so no row is skipped via OFFSET and no
    per-row ORDER BY filesort is needed. A deep --start uses a single OFFSET "seed" jump
    (single-column cursors), after which the walk is pure keyset.
    """

    tableRef = _tableRef(tbl)
    lengths = {}
    entries = {}

    for column in colList:
        lengths[column] = 0
        entries[column] = BigArray()

    if len(cursor) == 1:
        _dumpSingle(tbl, colList, count, cursor[0], tableRef, entries, lengths)
    else:
        _dumpComposite(tbl, colList, count, cursor, tableRef, entries, lengths)

    debugMsg = "keyset pagination retrieved %d row(s) for table '%s'" % (len(entries[colList[0]]) if colList and colList[0] in entries else 0, unsafeSQLIdentificatorNaming(tbl))
    logger.debug(debugMsg)

    return entries, lengths