ok
Direktori : /opt/imunify360/venv/lib/python3.11/site-packages/im360/subsys/ |
Current File : //opt/imunify360/venv/lib/python3.11/site-packages/im360/subsys/modsec_audit_log.py |
""" SecAuditLog parser """ import json import ipaddress import os import os.path import posixpath import re import urllib.parse from abc import ABCMeta, abstractmethod from contextlib import suppress from copy import copy from http.cookies import CookieError, SimpleCookie from itertools import product from logging import getLogger from typing import Optional, Tuple from urllib.parse import parse_qs from defence360agent.utils.common import DAY, rate_limit from defence360agent.utils import USER_IDENTITY_FIELD, user_identity from im360.contracts.config import ModsecSensor ANOMALITY_SCORE_FIELDS = ( "inbound_anomality_score", "outbound_anomality_score", ) SEVERITY, ADVANCED, HEADERS = "severity", "advanced", "headers" URI, HTTP_METHOD, FORM_PARAMS, QUERY_PARAMS, ATTACKERS_IP, STATUS_CODE = ( "uri", "http_method", "form", "query", "attackers_ip", "status_code", ) ENGINE_MODE = "engine_mode" # fields to pass from ModSecurity to Imunify sensor socket PICK_SECAUDITLOG_FIELDS = ( "User-Agent", "Host", ATTACKERS_IP, "transaction_id", "rule", "msg", "message", "access_denied", SEVERITY, "ver", "tag", "Producer", "modsec_version", "vendor", STATUS_CODE, ENGINE_MODE, ADVANCED, ) + ANOMALITY_SCORE_FIELDS _COOKIE, _ = _SENSITIVE_HEADERS = ("cookie", "authorization") _PARSE_SCORE_REGEX = re.compile( r"(Inbound|Outbound) Anomaly Score.*?(\d+)", re.IGNORECASE ) logger = getLogger(__name__) throttled_log_exception = rate_limit(period=DAY)(logger.exception) class ParseError(RuntimeError): """ log as logger.exception(*e.args) to avoid sentry duplicates """ pass class _AmbiguousSeverity(RuntimeError): """ to log as warning """ pass class MalformedFileError(RuntimeError): """ log as logger.exception(*e.args) to avoid sentry duplicates """ pass class _MiscDataNotInteresting(Exception): pass class _SerialLogSectionParser(metaclass=ABCMeta): _IS_APPLICABLE_SINCE_REGEX = None @classmethod def is_applicable_since(cls, line): assert cls._IS_APPLICABLE_SINCE_REGEX, "regex should be implemented" return cls._IS_APPLICABLE_SINCE_REGEX.match(line) is not None @classmethod @abstractmethod def parse_name_value_tokens(cls, line): """ :return: name, value tokens :raise ValueError: if cannot parse string """ pass class _SectionAParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}A--$") _PARSE_IP_REGEX = re.compile(r"^\[.*?\] (?P<id>\S+) (?P<ip>\S+)") @classmethod def parse_name_value_tokens(cls, line): match = cls._PARSE_IP_REGEX.match(line) if not match: logger.warning( "--section-A--: cannot parse this line: %s", repr(line) ) raise _MiscDataNotInteresting() try: ipv4_or_ipv6 = ipaddress.ip_address(match.group("ip")) except ValueError: logger.warning( "--section-A--: cannot use %s for IPv4 or IPv6 address", match.group(1), ) raise _MiscDataNotInteresting() yield "attackers_ip", str(ipv4_or_ipv6) yield "transaction_id", match.group("id") class _SectionBParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}B--$") _PARSE_NAME_VALUE = re.compile(r"^(\S*): ?(.*)$") @classmethod def parse_name_value_tokens(cls, line): match = cls._PARSE_NAME_VALUE.match(line) if match: # supply None instead of an empty string name, value = match.group(1), match.group(2) or None ( maybe_obfuscated_value, is_required, ) = obfuscate_if_sensitive_and_required(name, value) if is_required: yield HEADERS, [name, maybe_obfuscated_value] else: try: # GET /a/c/getName/?param1=abs HTTP/1.1 HTTP/1.1 method, uri, *_ = line.split() if method.isupper() and os.path.isabs(uri): yield HTTP_METHOD, method parsed = urllib.parse.urlparse(uri) yield URI, parsed.path if ModsecSensor.SEND_ADDITIONAL_DATA: yield QUERY_PARAMS, obfuscate(parsed.query) except ValueError: raise _MiscDataNotInteresting() class _SectionCParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}C--$") @classmethod def parse_name_value_tokens(cls, line): if ModsecSensor.SEND_ADDITIONAL_DATA: yield FORM_PARAMS, obfuscate(line) class _SectionFParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}F--$") _STATUS_CODE_REGEX = re.compile(r"^HTTP/\d\.\d (\d+).*$") @classmethod def parse_name_value_tokens(cls, line): match = cls._STATUS_CODE_REGEX.match(line) if not match: raise _MiscDataNotInteresting() else: yield STATUS_CODE, match.group(1) class _BaseSectionHParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}H--$") _PARSE_MESSAGE_APACHE_ERROR = re.compile( r"([^=]*) (ModSecurity):\s+([^=]*)" ) _PARSE_MESSAGE_REGEX = re.compile(r'(\[\w+ ".*?"\])') _PARSE_ACCESS_DENIED_CODE = re.compile(r"Access denied with code \d+") _PARSE_MESSAGE_TOKENS_REGEX = re.compile(r'\[(\w+) "(.*?)"\]') _INCOMPLETE_MESSAGE_REGEX = re.compile(r'(\[\w+ ".*?$)') @classmethod def parse_name_value_tokens(cls, line): name, value = cls._parse_name_value(line) # ModSecurity name used for modsec version 3.x if name in ["Message", "ModSecurity"]: return name, *cls._parse_message(value, line) if name == "Apache-Error": return cls._parse_apache_err(value, line) # note: ModSecurity v3 with `SecAuditLogFormat Native` has not # `Engine-Mode` (secrules_engine) field currently if name == "Engine-Mode": name = ENGINE_MODE value = value.strip('"') # remove surrounding double quotes return name, value @classmethod def _parse_name_value(cls, s): """ :raise ValueError: if cannot parse string """ name, value = s.split(maxsplit=1) if name[-1] != ":": raise ValueError('Expecting sort of "Name: value"') else: # chop colon character name = name[:-1] return name, value @classmethod def _parse_apache_err(cls, msgval, _): match = cls._PARSE_MESSAGE_APACHE_ERROR.match(msgval) if match: _, name, modsec = match.groups() return name, *cls._parse_message(modsec, "") else: # Apache-Error line dosn't have ModSecurity part raise _MiscDataNotInteresting() @classmethod def _parse_message(cls, msgval, audit_log_line): """ Parse string with format [key "val"] [key "val"] ... [key "val"] to dictionary {"key": "val", "key": "val", ...} In case when there is uncompleted value part e.g. '[key "val' with lost '"]' in the end it also returns incomplete part as string '[key "val' """ tokens_list = cls._PARSE_MESSAGE_REGEX.findall(msgval) tokens = " ".join(tokens_list) msgtxt = msgval for token in tokens_list: msgtxt = msgtxt.replace(token, "").strip() incomplete_part = cls._INCOMPLETE_MESSAGE_REGEX.findall(msgtxt) for token in incomplete_part: msgtxt = msgtxt.replace(token, "").strip() result = dict(message=msgtxt) match = cls._PARSE_ACCESS_DENIED_CODE.search(msgtxt) if match: result["access_denied"] = True for token in cls._PARSE_MESSAGE_TOKENS_REGEX.finditer(tokens): name, text = token.groups() existing = result.get(name) if existing is not None: if name == "severity": logger.warning("Ambiguous severity: %r", audit_log_line) raise _AmbiguousSeverity() if not isinstance(existing, list): # convert multiple token entries into a list existing = result[name] = [existing] existing.append(text) elif name == "tag": # 'tag' is always a list result[name] = [text] elif name == "id": # "rule_id" to be consistent with other plugins result["rule"] = text if text is None: throttled_log_exception( "rule field is None, modsec message: %s", msgval ) else: result[name] = text if name == "msg": anomality_score_items = get_anomality_score_items(text) result.update(anomality_score_items) return result, *incomplete_part class _SectionHParser(_BaseSectionHParser): """ _BaseSectionHParser + DEF-2617 workaround """ _INCOMPLETE_RESULT = { "name": "", "value": dict(), "incomplete_part": "", "count": 0, } _MAX_INCOMPLETE_MESSAGE = 4 def __init__(self): self._open_matched_string = False self._incomplete_result = copy(self._INCOMPLETE_RESULT) def parse_name_value_tokens(self, line): """ In case with correct line e.g. Name: Message [key "val"] [key "val"] [key "val"] if there is no data in self._incomplete_result: returns result of _BaseSectionHParser.parse_name_value_tokens else returns name, value of self._incomplete_result and name, value of current line it is possible since this function is generator. In case with incomplete line e.g. Name: Message [key "val"] [key "val"] [key "val if there is no data in self._incomplete_result: collect name, value and incomplete part values that was got from _BaseSectionHParser.parse_name_value_tokens to self._incomplete_result and wait next line. returns empty list else: does the same but returns name, value of self._incomplete_result In case with non 'name: value' format: if there is no data in self._incomplete_result: raises else: Concatenate incomplete result of previous iteration with current line and parse it by _BaseSectionHParser._parse_message. if there is still incomplete line: it updates current self._incomplete_result and returns empty list else: returns name, value from collected data """ result = [] try: _res = super().parse_name_value_tokens(line) except ValueError: _count = self._incomplete_result["count"] if not _count or _count > self._MAX_INCOMPLETE_MESSAGE: if not self._open_matched_string: raise else: raise _MiscDataNotInteresting() _res = self._parse_message( self._incomplete_result["incomplete_part"] + line, line ) _res[0].pop("message") if len(_res) == 1: self._incomplete_result["value"].update(_res[0]) name = self._incomplete_result["name"] value = self._incomplete_result["value"] result.append((name, value)) self._incomplete_result = copy(self._INCOMPLETE_RESULT) else: self._incomplete_result["value"].update(_res[0]) self._incomplete_result["incomplete_part"] = _res[1] self._incomplete_result["count"] += 1 else: # If line was successfully parsed if self._incomplete_result["count"]: _name = self._incomplete_result["name"] _value = self._incomplete_result["value"] _value["message"] += self._incomplete_result["incomplete_part"] result.append((_name, _value)) if len(_res) == 3: name, value, incomplete = _res[0], _res[1], _res[2] if name == "Message" and "[MatchedString" in line: value["message"] += incomplete result.append((name, value)) self._incomplete_result = copy(self._INCOMPLETE_RESULT) else: self._incomplete_result["name"] = name self._incomplete_result["value"] = value self._incomplete_result["incomplete_part"] = incomplete self._incomplete_result["count"] = 1 else: name, value = _res[0], _res[1] result.append((name, value)) self._incomplete_result = copy(self._INCOMPLETE_RESULT) self._open_matched_string = ( name == "Message" and "[MatchedString" in line ) finally: for name, value in result: yield name, value class _SectionZParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}Z--$") @classmethod def parse_name_value_tokens(cls, line): raise NotImplementedError("Expecting this method never be called.") class _SectionNotInterestedInParser(_SerialLogSectionParser): _IS_APPLICABLE_SINCE_REGEX = re.compile(r"^-{2,3}\w+-{1,3}[^ABHZ]--$") @classmethod def parse_name_value_tokens(cls, line): raise _MiscDataNotInteresting() class Parser(metaclass=ABCMeta): # all of ascii table from 32 to 126 (printable ascii chars) but, '%' _SAFE_UNQUOTED = "".join( chr(c) for c in range( 32, # off-by-1 obviously 126 + 1, ) if chr(c) != "%" ) @abstractmethod def feed(self, bytes_): """ :param bytes_: audit log line :return dict: for complete result and None for incomplete. See unit tests for result dict fields. :raise ParseError: """ pass def flush(self): pass @classmethod def _adaptive_decode_bytes(cls, bytes_): """ apply fallback on UnicodeDecodeError """ try: return cls.decode_bytes(bytes_) except UnicodeDecodeError: return urllib.parse.quote_from_bytes( bytes_, safe=cls._SAFE_UNQUOTED ) @staticmethod def decode_bytes(bytes_): """ because cannot mock 'read-only' bstr.decode() attr """ return bytes_.decode() class _RevolverParser(Parser): # skip first batch of errors if Parsers starts parsing # in the middle of serial log on agent restart _SKIP_FIRST_NUM_ERRORS = 11 _available_parsers = [] def __init__(self, audit_logdir_path=None): """ :param str audit_logdir_path: audit log dir path for ModSecurity concurrent mode """ self._audit_logdir_path = audit_logdir_path self._revolve() self._skip_first_num_errors = self._SKIP_FIRST_NUM_ERRORS def feed(self, bytes_): try: result = self._feed_impl(bytes_) except ParseError: if self._skip_first_num_errors > 0: # skip "first num errors" self._skip_first_num_errors -= 1 return None else: raise else: if bytes_.strip(): # starting from the first non empty and succefully processed # token we expect no errors self._skip_first_num_errors = 0 return result def flush(self): return self._current_parser.flush() def _feed_impl(self, bytes_): for attempt in range(1, len(self._available_parsers) + 1): try: return self._current_parser.feed(bytes_) except ParseError as e: if attempt < len(self._available_parsers): # give the next parser a chance... self._revolve() else: # raise on last attempt raise ParseError( "Neither of available parsers is capable " "to parse this: %r ", bytes_, ) from e def _revolve(self): """Exchange parser to the next""" assert ( len(self._available_parsers) >= 2 ), "Count of available_parsers should not be less than 2" logger.info( "Swap %s<->%s", self._available_parsers[0].__name__, self._available_parsers[1].__name__, ) self._available_parsers = ( self._available_parsers[1:] + self._available_parsers[:1] ) # use blank parser state cls = self._available_parsers[0] parser_kwargs = dict() if self._audit_logdir_path is not None: parser_kwargs["audit_logdir_path"] = self._audit_logdir_path self._current_parser = cls(**parser_kwargs) class _JsonLogParserBase(Parser): def __init__(self, **_): self._accumulating_result = {} self._accumulating_lines = [] def parse_json_data(self, data: dict) -> dict: raise NotImplementedError() def feed(self, bytes_): line = self._adaptive_decode_bytes(bytes_.rstrip()) if line: try: self._accumulating_lines.append(line) self._accumulating_result = self.parse_json_data( json.loads(line) ) return self.flush() except _MiscDataNotInteresting: return None except Exception as e: raise ParseError( "Error occurs while parse json line %r, reason: %r" % (line, e) ) from e def flush(self): try: return mk_modsec_indicent_list( self._accumulating_result, debug_ctx=self._accumulating_lines ) finally: self._accumulating_result = {} self._accumulating_lines = [] class _JsonLogParserv2(_JsonLogParserBase): """Parse ModSecurity v2 json log entries.""" def parse_json_data(self, data: dict) -> dict: """ The contents of the log entry can be seen here https://github.com/SpiderLabs/ModSecurity/blob/v2/master/apache2/msc_logging.c#L644 Optional fields may be omitted due to SecAuditLogParts setting. Expected data (some unused keys are omitted): {'transaction': { 'time': str, 'transaction_id': str, 'remote_address': str, 'remote_port': int, 'local_address': str, 'local_port': int }, 'request': { 'request_line': str, # optional field 'body': [], # optional field 'headers': {}, # optional field }, 'response': { 'protocol': str, # optional field 'body': str, # optional field 'headers': {}, # optional field 'status': int, # optional field }, 'audit_data': { 'producer': str or [], # optional field 'messages': [], # optional field 'error_messages': [], # optional field 'engine_mode': str, # optional field } } """ # noqa: E501 expected_data_keys = { "transaction", "request", "response", "audit_data", } if not expected_data_keys.issubset(data.keys()): raise ParseError("Not expected json data for ModSecurity v2") result = {} transaction = data["transaction"] additional_data_is_required = ModsecSensor.SEND_ADDITIONAL_DATA # section A try: result[ATTACKERS_IP] = str( ipaddress.ip_address(transaction["remote_address"]) ) except ValueError: logger.warning( "Cannot use %s for IPv4 or IPv6 address", transaction["remote_address"], ) raise _MiscDataNotInteresting() result["transaction_id"] = transaction.get("transaction_id", "-") # section B result[HEADERS] = [] for header, value in data["request"].get("headers", {}).items(): ( maybe_obfuscated_value, is_required, ) = obfuscate_if_sensitive_and_required(header, value) if is_required: result[HEADERS].append([header, maybe_obfuscated_value]) if data["request"].get("request_line"): # GET /a/c/getName/?param1=abs HTTP/1.1 method, uri, *_ = data["request"]["request_line"].split() if method.isupper() and os.path.isabs(uri): result[HTTP_METHOD] = method parsed = urllib.parse.urlparse(uri) result[URI] = parsed.path if additional_data_is_required: result[QUERY_PARAMS] = obfuscate(parsed.query) # section C if data["request"].get("body") and additional_data_is_required: result[FORM_PARAMS] = obfuscate("".join(data["request"]["body"])) # section F if data["response"].get("status"): result[STATUS_CODE] = data["response"]["status"] # section H if data["audit_data"].get("producer"): if isinstance(data["audit_data"]["producer"], str): version, _ = get_producer_data(data["audit_data"]["producer"]) result["modsec_version"] = version else: modsec_full_name, *vendors = data["audit_data"]["producer"] version, _ = get_producer_data(modsec_full_name) result["vendor"] = vendors result["modsec_version"] = version if data["audit_data"].get("engine_mode"): result[ENGINE_MODE] = data["audit_data"]["engine_mode"] if data["audit_data"].get("messages"): result["MessageList"] = [ _SectionHParser._parse_message(msg, "")[0] for msg in data["audit_data"]["messages"] ] return result class _JsonLogParserv3(_JsonLogParserBase): """Parse ModSecurity v3 json log entries.""" def _to_lower_keys(self, obj): """ Assume that *obj* is json serializeble. """ if isinstance(obj, dict): # Leave the headers as is return { k.lower(): ( self._to_lower_keys(v) if k.lower() != "headers" else v ) for k, v in obj.items() } elif isinstance(obj, list): return [self._to_lower_keys(item) for item in obj] else: return obj def parse_json_data(self, data: dict) -> dict: """ Parse log entry data for JSON SecAuditLogFormat. https://github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual-(v2.x)#SecAuditLogFormat The contents of the log entry can be seen here https://github.com/SpiderLabs/ModSecurity/blob/v3/master/src/transaction.cc#L1622 Optional fields may be omitted due to SecAuditLogParts setting. Expected data: {'transaction': { 'client_ip': str, 'time_stamp': str, 'server_id': str, 'client_port': int, 'host_ip': str, 'host_port': int, 'unique_id': str, 'request': { 'http_version': str, 'method': str, 'uri': str, 'body': str, # optional field 'headers': {}, # optional field }, 'response': { 'body': str, # optional field 'headers': {}, # optional field 'http_code': int, }, # optional fields below 'producer': { 'components': [], 'connector': str, 'modsecurity': str, 'secrules_engine': str, }, 'messages': [{'details': {'accuracy': str, 'data': str, 'file': str 'lineNumber': str, 'match': str, 'maturity': str, 'reference': str, 'rev': '1', 'ruleId': str, 'severity': str, 'tags': [], 'ver': str}, 'message': str}]}} """ # noqa: E501 error_data = { "error": "ModSecurity was not compiled with JSON support." } if data == error_data: logger.warning( "SecAuditLogFormat set to JSON, but " "ModSecurity was not compiled with JSON support." ) raise _MiscDataNotInteresting() expected_data_keys = {"transaction"} if data.keys() != expected_data_keys: raise ParseError("Not expected json data for ModSecurity v3") # to avoid possible key format changes in ModSecurity # convert dict keys to lowercase data = self._to_lower_keys(data) result = {} transaction = data["transaction"] additional_data_is_required = ModsecSensor.SEND_ADDITIONAL_DATA # section A try: result[ATTACKERS_IP] = str( ipaddress.ip_address(transaction["client_ip"]) ) except ValueError: logger.warning( "Cannot use %s for IPv4 or IPv6 address", transaction["client_ip"], ) raise _MiscDataNotInteresting() # looks like there could be both string and int values, # so let's make it a string result["transaction_id"] = str(transaction.get("unique_id", "-")) # section B result[HEADERS] = [] for header, value in transaction["request"].get("headers", {}).items(): ( maybe_obfuscated_value, is_required, ) = obfuscate_if_sensitive_and_required(header, value) if is_required: result[HEADERS].append([header, maybe_obfuscated_value]) result[HTTP_METHOD] = transaction["request"]["method"] parsed = urllib.parse.urlparse(transaction["request"]["uri"]) result[URI] = parsed.path if additional_data_is_required: result[QUERY_PARAMS] = obfuscate(parsed.query) # section C if transaction["request"].get("body") and additional_data_is_required: result[FORM_PARAMS] = obfuscate(transaction["request"]["body"]) # section F result[STATUS_CODE] = transaction["response"]["http_code"] # section H if transaction.get("producer"): result["vendor"] = transaction["producer"]["components"] version, _ = get_producer_data( transaction["producer"]["modsecurity"] ) result["modsec_version"] = version result[ENGINE_MODE] = transaction["producer"]["secrules_engine"] if transaction.get("messages"): result["MessageList"] = [] for msg in transaction["messages"]: message = msg["details"] message["rule"] = str(message.pop("ruleid")) message["msg"] = ( msg["msg"] if "msg" in msg else msg.get("message") ) # Headers might be in lowercase e.g.: # tests/core/fixtures/test_modsec_audit_log/ # test_parser_log_sample-13_json_v3 # test_parser_log_sample-29_pretty_json_litespeed headers = transaction["request"]["headers"] hostname = headers.get("Host") or headers.get("host") message["hostname"] = hostname message["access_denied"] = result["status_code"] == 403 if message.get("msg"): message.update(get_anomality_score_items(message["msg"])) result["MessageList"].append(message) return result class _JsonPrettyLogParser(_JsonLogParserv3): def feed(self, bytes_): line = self._adaptive_decode_bytes(bytes_.rstrip()) if line: if line == "{": if self._accumulating_lines: self._accumulating_lines = [] raise ParseError( "Error occurs while parse json line %r, " "not empty line buffer on start new json" ) else: if not self._accumulating_lines: raise ParseError( "Error occurs while parse json line %r, " "empty line buffer in a middle of json dict" ) self._accumulating_lines.append(line) if line == "}": try: self._accumulating_result = self.parse_json_data( json.loads("".join(self._accumulating_lines)) ) return self.flush() except _MiscDataNotInteresting: return None except Exception as e: raise ParseError( "Error occurs while parse json line %r, reason: %r" % (line, e) ) from e class _NativeLogParser(Parser): """ audit log parser state machine Parse log entry ModSecurity 2 Data for Native audit log format. https://github.com/SpiderLabs/ModSecurity/wiki/ModSecurity-2-Data-Formats#Audit_Log More about ModSecurity log message format https://gerrit.cloudlinux.com/plugins/gitiles/defence360/+/refs/changes/73/111973/1/opt/DEF-21076-multiline-modsec-header-field-alert/README.txt Expected data: --2172df61-A-- [02/Feb/2018:11:49:22 +0000] WnRQQvAURie7wq9bOfH25AAAAAE ::1 57480 ::1 80 --2172df61-B-- POST /1/request?x=yxz&z=xy HTTP/1.1 User-Agent: Nessus Host: localhost Accept: */* cOOkIe: ABC=abc;SESSIONID=hash256 Content-Length: 9 Content-Type: application/x-www-form-urlencoded --2172df61-C-- bc=d123&cd=e&bc=a0 --2172df61-F-- HTTP/1.1 404 Not Found Accept-Ranges: bytes Transfer-Encoding: chunked Content-Type: text/html --2172df61-H-- Message: Warning. Matched phrase "nessus" at REQUEST_HEADERS:User-Agent. [file "/etc/apache2/conf.d/modsec_vendor_configs/imunify360_full_apache/103_Global_Agents.conf"] [line "17"] [id "210801"] [rev "2"] [msg "COMODO WAF: Request Indicates a Security Scanner Scanned the Site||localhost|F|2"] [data "nessus"] [severity "CRITICAL"] [tag "CWAF"] [tag "Agents"] Message: Warning. Pattern match "(?i:(?:^(?:microsoft url|user-Agent|www\\.weblogs\\.com|(?:jakart|vi)a|(google|i{0,1}explorer{0,1}\\.exe|(ms){0,1}ie( [0-9.]{1,}){0,1} {0,1}(compatible( browser){0,1}){0,1})$)|\\bdatacha0s\\b|; widows|\\\\r|a(?: href=|d(?:sarobot|vanced email extractor ..." at REQUEST_HEADERS:User-Agent. [file "/etc/apache2/conf.d/modsec_vendor_configs/imunify360_full_apache/103_Global_Agents.conf"] [line "29"] [id "210831"] [rev "2"] [msg "COMODO WAF: Rogue web site crawler||localhost|F|4"] [data "Nessus"] [severity "WARNING"] [tag "CWAF"] [tag "Agents"] Message: Warning. Operator GE matched 5 at TX:incoming_points. [file "/etc/apache2/conf.d/modsec_vendor_configs/imunify360_full_apache/122_Outgoing_FiltersEnd.conf"] [line "35"] [id "214930"] [rev "1"] [msg "COMODO WAF: Inbound Points Exceeded|Total Incoming Points: 8|localhost|F|2"] [severity "CRITICAL"] [tag "CWAF"] [tag "FiltersEnd"] Apache-Handler: default-handler Stopwatch: 1517572162346260 98908 (- - -) Stopwatch2: 1517572162346260 98908; combined=33129, p1=737, p2=32228, p3=0, p4=0, p5=163, sr=0, sw=1, l=0, gc=0 Producer: ModSecurity for Apache/2.9.2 (http://www.modsecurity.org/); CWAF_Apache. Server: Apache Engine-Mode: "DETECTION_ONLY" --2172df61-Z-- """ # noqa: E501 _AVAILABLE_SUBPARSERS = ( _SectionAParser, _SectionBParser, _SectionCParser, _SectionFParser, _SectionHParser, _SectionZParser, _SectionNotInterestedInParser, ) def __init__(self, **_): self._state_parser = None self._accumulating_result = {} self._accumulating_lines = [] def feed(self, bytes_): self._accumulating_lines.append(bytes_) return self._feed_impl(bytes_) def _feed_impl(self, bytes_): line = self._adaptive_decode_bytes(bytes_.rstrip()) if not line: return None try: # peek next available parser next_parser = next( parser for parser in self._AVAILABLE_SUBPARSERS if parser.is_applicable_since(line) ) if next_parser is _SectionZParser: return self.flush() except StopIteration: if self._state_parser is None: raise ParseError("No parser for line %r", line) else: self._state_parser = next_parser() # waiting for the next feed() return None name_value_tokens = self.parse_name_value_tokens(line) for name, value in name_value_tokens: # continue accumulating result if name == "Message": self._accumulating_result.setdefault("MessageList", []).append( value ) # save meta data from ModSecurity itself # if Message data was not found before elif ( name == "ModSecurity" and "MessageList" not in self._accumulating_result ): self._accumulating_result.setdefault("MessageList", []).append( value ) elif name == HEADERS: self._accumulating_result.setdefault(HEADERS, []).append(value) elif name == "Producer": self._parse_producer_filed(value) else: self._accumulating_result[name] = value return None def _parse_producer_filed(self, line): modsec_ver, vendors = get_producer_data(line) if modsec_ver is not None: self._accumulating_result["modsec_version"] = modsec_ver if vendors is not None: self._accumulating_result["vendor"] = vendors def parse_name_value_tokens(self, line): """ Gets the result of parse_name_value_tokens and returns it as a list If ValueError excepted while message processing raises ParseError In cases when we should not interrupt parse process returns empty list. """ try: return list(self._state_parser.parse_name_value_tokens(line)) except ValueError as e: raise ParseError(str(e)) from e except (_MiscDataNotInteresting, _AmbiguousSeverity): # continue accumulating result return [] def flush(self): try: return mk_modsec_indicent_list( self._accumulating_result, debug_ctx=self._accumulating_lines ) finally: self._state_parser = None self._accumulating_result = {} self._accumulating_lines = [] class SerialLogParser(_RevolverParser): _SKIP_FIRST_NUM_ERRORS = 0 # don't skip errors _available_parsers = [ _JsonLogParserv2, _JsonLogParserv3, _NativeLogParser, _JsonPrettyLogParser, ] @classmethod def parse_file(cls, filepath): """do-it-all style""" parser = cls() with open(filepath, "rb") as filestream: for lineno, bytes_ in enumerate(filestream): try: result = parser.feed(bytes_) if result is not None: # then we are done return result except ParseError as e: # return file:line in exception for better debug experience raise MalformedFileError( "Error in audit log file %r line %d", filepath, # use line 1 as base (most text editors do): lineno + 1, ) from e # return at least something logger.error( "Incomplete audit log file %r: %r", filepath, parser._current_parser._accumulating_lines, ) return parser.flush() class ConcurrentLogParser(Parser): def __init__(self, audit_logdir_path): """ :param str audit_logdir_path: audit log dir path for ModSecurity concurrent mode """ self._audit_logdir_path = audit_logdir_path def _normconcat(self, token): """ :param str: token is expected to start with posixpath.sep """ return self._audit_logdir_path + token def _directadmin_concat(self, token): """ :param str: token is expected to start with posixpath.sep """ return ( self._audit_logdir_path + posixpath.sep + token.split(posixpath.sep)[1] + token ) def feed(self, bytes_): try: str_ = bytes_.decode() except UnicodeDecodeError: # 'pass' to raise # ParseError("No audit log found in %r", bytes_) then pass else: reversed_tokens = reversed([t.strip("[]") for t in str_.split()]) filtered_tokens = filter(os.path.isabs, reversed_tokens) for token, concat_fun in product( filtered_tokens, (self._normconcat, self._directadmin_concat) ): try: log_path = None token_path = concat_fun(token) if os.path.isfile(token_path): # Modsecurity v2 log_path = token_path elif os.path.isfile(token): # Modsecurity v3 log_path = token except (UnicodeEncodeError, ValueError): # another corner case handling: # 'pass' is to raise # ParseError("No audit log found in %r", bytes_) then pass else: logger.debug( "os.path.isfile({!r}) = {!r}".format( (token_path, token), bool(log_path) ) ) with suppress(FileNotFoundError): # handle race condition if log_path and os.path.getsize(log_path) > 0: return SerialLogParser.parse_file(log_path) raise ParseError("No audit log found in %r", bytes_) class RevolverParser(_RevolverParser): _available_parsers = [SerialLogParser, ConcurrentLogParser] class _IncidentFixupList: class InvalidIncident(RuntimeError): """ For incidents we cannot use data from, e.g. "Message: Rule processing failed." """ pass @classmethod def apply(cls, incidents, debug_ctx): for incident in incidents: incident = cls._fixup_camel_case(incident) try: cls._fixup_msg_inplace(incident) cls._fixup_host_tag_inplace(incident) cls._fixup_severity_inplace(incident) cls._fixup_useragent_inplace(incident) yield incident except _IncidentFixupList.InvalidIncident: if "[msg " in incident["message"]: cls._fixup_unparsed_message(incident) yield incident @classmethod def _fixup_unparsed_message(cls, incident): incident["tag"] = ["noshow"] incident["severity"] = 7 incident["message"] = incident["message"].partition("[msg")[2] @classmethod def _fixup_camel_case(cls, incident): """ make "Host" be in the same case as "msg", "rule", etc. """ return {k.lower(): v for k, v in incident.items()} @classmethod def _fixup_msg_inplace(cls, incident): if incident.get("msg") is None: raise cls.InvalidIncident() else: # to avoid KeyError: 'message' in sensor_incident_aggregate.py msg = incident.pop("msg") incident["message"] = msg # Try to extract 'constant' part as name parts = msg.split("||", maxsplit=1) incident["name"] = parts[0] @classmethod def _fixup_useragent_inplace(cls, incident): """ fixup 'user-agent' to 'useragent' to match agent sqlitedb naming """ if "user-agent" in incident: incident["user_agent"] = incident.pop("user-agent") @classmethod def _fixup_host_tag_inplace(cls, incident): """ remove "Host: %hostname%" field duplicate in tag """ if "tag" in incident: host = next( ( value for header, value in incident.get(ADVANCED, {}).get( HEADERS, [] ) if header == "Host" ), None, ) if host: # remove "Host: %hostname%" field duplicate in tag incident["tag"] = [ tag for tag in incident["tag"] if tag != "Host: %s" % host ] @classmethod def _fixup_severity_inplace(cls, incident): """ map modsec severity string <-> ossec modsec severity int, incident table expects int for severity and also consistent severity level is good for ML. github.com/SpiderLabs/ModSecurity/wiki/Reference-Manual#severity """ map_ = { # Severe attack - No chances of false positives. # Immediate attention is necessary. "EMERGENCY": 0, # High importance security event. "ALERT": 1, # Multiple user generated errors "CRITICAL": 2, # First time seen "ERROR": 3, # System low priority error "WARNING": 4, # Successful/Authorized events "NOTICE": 5, # System low priority notification "INFO": 6, # - None - "DEBUG": 7, } severity = incident.get("severity") if severity is None: # this will be OK with imunify360.db return try: incident["severity"] = int(severity) # for modsec 3.x return except ValueError: pass try: incident["severity"] = map_[severity] except KeyError: logger.error( "Cannot measure severity level for %s literal in %s plugin", repr(severity), repr(ModsecSensor.PLUGIN_ID), ) def mk_modsec_indicent_list(top_level_tokens, debug_ctx=None): """ unroll modsec audit log into incident list :param top_level_tokens: see how unit test describe this data structure :param debug_ctx: to be shown in sentry incident :return list: """ raw_incidents = [] if "MessageList" in top_level_tokens: user_id = user_identity( top_level_tokens.get(ATTACKERS_IP, ""), dict(top_level_tokens.get(HEADERS, [])), ) for message_enclosing_tokens in top_level_tokens["MessageList"]: incident = { "method": "INCIDENT", "plugin_id": ModsecSensor.PLUGIN_ID, } # We lookup PICK_SECAUDITLOG_FIELDS fields # first in "Message: [name value] [name value]" tokens # then in "Host: %name%", "User-Agent: %ua" top level fields update_dict = dict( ( field, message_enclosing_tokens.get(field) or top_level_tokens.get(field), ) for field in PICK_SECAUDITLOG_FIELDS if field in message_enclosing_tokens or field in top_level_tokens ) incident.update(update_dict) # generate `advanced` section incident[ADVANCED] = {HEADERS: top_level_tokens.get(HEADERS, [])} for field in URI, HTTP_METHOD: if top_level_tokens.get(field): incident[ADVANCED][field] = top_level_tokens[field] incident.update({USER_IDENTITY_FIELD: user_id}) raw_incidents.append(incident) result = [*_IncidentFixupList.apply(raw_incidents, debug_ctx)] return result def get_anomality_score_items(msg: str) -> dict: result = {} for score_type, value in _PARSE_SCORE_REGEX.findall(msg): key = "%s_anomality_score" % score_type.lower() assert key in ANOMALITY_SCORE_FIELDS, ( "invalid anomality score key %s detected" % key ) result[key] = value return result def obfuscate_if_sensitive_and_required( name: str, value: str ) -> Tuple[Optional[str], bool]: """ Return value/obfuscated value if header is (not) sensitive and whether it is required """ result_value = obfuscate_item_if_sensitive(name, value) is_required = result_value == value or ModsecSensor.SEND_ADDITIONAL_DATA return (result_value, is_required) def obfuscate_item_if_sensitive(name, value) -> Optional[str]: """ If header name is 'authorization' or 'cookie', then obfuscate it so as not to disclosure sensitive client info """ name = name.lower() return ( obfuscate_cookie(value) if name == _COOKIE else obfuscate_item(value) if name in _SENSITIVE_HEADERS else value ) def obfuscate_cookie(cookie): try: sc = SimpleCookie(cookie) except CookieError as e: value = str(e) else: value = [[k, obfuscate_item(v.value)] for k, v in sorted(sc.items())] return value def _obfuscate_items(data: dict): result = {} for k, items in data.items(): for item in items: result.setdefault(k, []).append(obfuscate_item(item)) return result def obfuscate(query): return _obfuscate_items(parse_qs(query)) def obfuscate_item(item: Optional[str]) -> Optional[str]: if not item: # nothing to obfuscate return item # In this example we are not do urldecode for every param in query # We need to do it and check it always obf_buff = [] pos = 0 special_dict = { 0x20: "[space]", 0x09: "[tab]", 0x0A: "[LF]", 0x0D: "[CR]", 0x00: "[NULL]", } item = list(item) while pos < len(item): x = item[pos] if ord(x) in special_dict.keys(): obf_buff.append(special_dict[ord(x)]) pos += 1 else: if str(x).isalpha(): start_pos = pos while pos < len(item): x = item[pos] if not str(x).isalpha(): break pos += 1 end_pos = pos if (end_pos - start_pos) <= 1: obf_buff.append("[chr]") else: obf_buff.append("[chr]{%d}" % (end_pos - start_pos)) continue if str(x).isnumeric(): start_pos = pos while pos < len(item): x = item[pos] if not str(x).isnumeric(): break pos += 1 end_pos = pos if (end_pos - start_pos) <= 1: obf_buff.append("[digit]") else: obf_buff.append("[digit]{%d}" % (end_pos - start_pos)) continue obf_buff.append(x) pos += 1 return "".join(obf_buff) def get_producer_data(line: str) -> tuple: # ModSecurity for Apache/2.9.0 (# http://www.modsecurity.org/); # CWAF_Apache. vendors = None modsec_ver = None if ";" in line: version, *vendors = line.split(";") vendors = list(map(lambda s: s.strip(".| "), vendors)) else: # ModSecurity for Apache/2.5.5 version = line match = re.search(r"\d\.\d\.\d", version) if match: modsec_ver = match.group() return modsec_ver, vendors