#!/usr/bin/env python
"""
Copyright (c) 2006-2026 sqlmap developers (https://sqlmap.org)
See the file 'LICENSE' for copying permission
Request-body injection-point handling:
- recognition regexes (REAL, imported from settings) classify JSON/JSON_LIKE/XML/PLAIN
- JSON/XML injection-point marking preserves every value (mirrors target.py)
- HPP transform reconstructs the original SQL after ASP comma-join
"""
import os
import re
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from _testutils import bootstrap
bootstrap()
from lib.core.settings import (JSON_RECOGNITION_REGEX, JSON_LIKE_RECOGNITION_REGEX,
XML_RECOGNITION_REGEX, PAYLOAD_DELIMITER,
CUSTOM_INJECTION_MARK_CHAR)
# The real source marks injection points with kb.customInjectionMark, which defaults to
# CUSTOM_INJECTION_MARK_CHAR ('*'). Tie the test's mark char to the source constant so a
# change there is reflected here too.
MARK = CUSTOM_INJECTION_MARK_CHAR
def classify(d):
if re.search(JSON_RECOGNITION_REGEX, d):
return "JSON"
if re.search(JSON_LIKE_RECOGNITION_REGEX, d):
return "JSON_LIKE"
if re.search(XML_RECOGNITION_REGEX, d):
return "XML"
return "PLAIN"
def _drive_request_marking(body):
"""Run sqlmap's REAL request-body injection-point marking on `body`.
Approach (a): drive the genuine code path in lib.core.target._setRequestParams()
(the same function the CLI uses) with a minimal conf/kb state, a POST body, and
readInput auto-answering 'Y'. The marking regexes (target.py:159-215) run against
`conf.data`; the fully-marked string is the snapshot of conf.data carrying the most
injection marks, captured BEFORE the later strip (target.py:~348) removes them.
Returns (fully_marked_data, kb.postHint). A regression in the source marking regexes
changes this output and breaks the asserting tests.
"""
import lib.core.target as target
from lib.core.data import conf, kb
from lib.core.enums import HTTPMETHOD
snapshots = []
base = type(conf)
orig_setitem = base.__setitem__
def _record(self, key, value):
if key == "data" and isinstance(value, str):
snapshots.append(value)
orig_setitem(self, key, value)
orig_readInput = target.readInput
target.readInput = lambda *a, **k: 'Y'
base.__setitem__ = _record
try:
conf.parameters = {}
conf.paramDict = {}
conf.direct = False
conf.method = HTTPMETHOD.POST
conf.url = "http://test.invalid/"
conf.cookie = None
conf.httpHeaders = []
conf.testParameter = None
conf.forms = None
conf.crawlDepth = None
kb.processUserMarks = None
kb.postHint = None
kb.customInjectionMark = MARK
kb.testOnlyCustom = False
conf.data = body
target._setRequestParams()
postHint = kb.postHint
finally:
base.__setitem__ = orig_setitem
target.readInput = orig_readInput
fully_marked = max(snapshots, key=lambda s: s.count(MARK))
return fully_marked, postHint
class TestRecognitionRegexes(unittest.TestCase):
CASES = [
('{"id":1}', "JSON"),
('{"a":"b"}', "JSON"),
('{"n":1,"m":"s"}', "JSON"),
('[{"id":1}]', "JSON"),
('[{"id":1},{"id":2}]', "JSON"),
("{'a':'b'}", "JSON_LIKE"),
("1", "XML"),
("1
", "XML"),
("v", "XML"),
("id=1&x=2", "PLAIN"),
("just text", "PLAIN"),
]
def test_classification(self):
for body, expected in self.CASES:
self.assertEqual(classify(body), expected, msg="classify(%r)" % body)
class TestJsonMarking(unittest.TestCase):
# Approach (a): exercises the REAL JSON injection-point marking in
# lib.core.target._setRequestParams() (target.py:159-162) via _drive_request_marking().
# No source logic is copied into the test; a regression in the source regexes fails it.
@staticmethod
def mark(data):
marked, postHint = _drive_request_marking(data)
assert postHint == "JSON", "expected JSON postHint, got %r for %r" % (postHint, data)
return marked
CASES = [
('{"id":1}', '{"id":1*}'),
('{"name":"abc"}', '{"name":"abc*"}'),
('{"a":{"b":"1"}}', '{"a":{"b":"1*"}}'),
('{"empty":""}', '{"empty":"*"}'),
('{"b":true,"n":null}', '{"b":true*,"n":null*}'),
('{"a":"x","b":"y"}', '{"a":"x*","b":"y*"}'),
('{"url":"http://h:8080/p"}', '{"url":"http://h:8080/p*"}'),
]
def test_cases(self):
for inp, expected in self.CASES:
self.assertEqual(self.mark(inp), expected, msg="mark(%r)" % inp)
def test_value_preserved_property(self):
# marking must not delete/garble the original value characters
for inp, _ in self.CASES:
out = self.mark(inp)
self.assertEqual(out.replace(MARK, ""), inp, msg="marking altered %r" % inp)
class TestXmlMarking(unittest.TestCase):
# Approach (a): exercises the REAL SOAP/XML injection-point marking in
# lib.core.target._setRequestParams() (target.py:215) via _drive_request_marking().
# A regression in the source XML regex fails this test.
def mark(self, data):
from lib.core.enums import POST_HINT
marked, postHint = _drive_request_marking(data)
self.assertIn(postHint, (POST_HINT.XML, POST_HINT.SOAP),
msg="expected XML/SOAP postHint, got %r for %r" % (postHint, data))
return marked
CASES = [
("x", "x*"),
('x', 'x*'),
("bob5", "bob*5*"),
("v", "v*"),
("1
", "1*
"),
]
def test_cases(self):
for inp, expected in self.CASES:
self.assertEqual(self.mark(inp), expected, msg="xmlmark(%r)" % inp)
def _drive_hpp(payload, name="id"):
"""Run sqlmap's REAL HTTP-parameter-pollution payload reconstruction on `payload`.
Approach (a): drive the genuine HPP block inside lib.request.connect.Connect.queryPage()
(connect.py:1168-1192) -- the same method the engine uses to issue every request -- with
conf.hpp enabled and a GET value carrying the payload between PAYLOAD_DELIMITERs.
conf.skipUrlEncode is set so the unencoded splitter branch runs (matching the pinned
expected strings). queryPage's network call (agent.removePayloadDelimiters, invoked
immediately AFTER the HPP block) is hijacked to capture the transformed `value` and abort
before any I/O; the payload is then extracted from between the delimiters. A regression in
the source HPP logic changes this output and breaks the asserting tests.
"""
from lib.core.data import conf, kb
from lib.core.enums import PLACE
from lib.core.agent import agent
from lib.request.connect import Connect
class _Sentinel(Exception):
pass
captured = {}
def _capture(value):
captured["value"] = value
raise _Sentinel()
orig_remove = agent.removePayloadDelimiters
agent.removePayloadDelimiters = _capture
try:
conf.direct = False
conf.hpp = True
conf.method = "GET"
conf.paramDel = None
conf.skipUrlEncode = True
conf.url = "http://test.invalid/page.asp?%s=1" % name
kb.postUrlEncode = False
kb.tamperFunctions = []
kb.postSpaceToPlus = False
value = "%s=%s%s%s" % (name, PAYLOAD_DELIMITER, payload, PAYLOAD_DELIMITER)
try:
Connect.queryPage(value=value, place=PLACE.GET, disableTampering=True)
except _Sentinel:
pass
finally:
agent.removePayloadDelimiters = orig_remove
_ = re.escape(PAYLOAD_DELIMITER)
return re.search(r"(?s)%s(?P.*?)%s" % (_, _), captured["value"]).group("result")
class TestHppReconstruction(unittest.TestCase):
# Approach (a): drives the REAL HPP reconstruction (connect.py:1168-1192) via _drive_hpp().
def hpp(self, payload, name="id"):
return _drive_hpp(payload, name)
# Exact transform outputs (verified live against an ASP-style join). We pin the produced
# string rather than "reconstruct the SQL", because reconstruction depends on the SQL parser
# treating /* */ as a token separator (1/*,*/AND -> "1 AND"), which a string compare can't model.
CASES = [
("1", "1"),
("1 AND 2=2", "1/*&id=*/AND/*&id=*/2=2"),
("1 AND 'a'='a'", "1/*&id=*/AND/*&id=*/'a'='a'"),
]
def test_exact_outputs(self):
for payload, expected in self.CASES:
self.assertEqual(self.hpp(payload), expected, msg="hpp(%r)" % payload)
def test_balanced_comments(self):
# every /* must have a matching */ (no dangling comment bridge)
for payload in ["1 UNION SELECT a,b", "1 AND 2=2 OR 3=3", "x y z"]:
out = self.hpp(payload)
self.assertEqual(out.count("/*"), out.count("*/"), msg="unbalanced comments for %r" % payload)
if __name__ == "__main__":
unittest.main(verbosity=2)