Source code for feedr.formatters
# TODO: (FEAT) support additional apache formats (http://ossec-docs.readthedocs.org/en/latest/log_samples/apache/apache.html) # NOQA
# TODO: (FEAT) maybe consolidate Json and Custom formatters to the same class, to let other custom formatters use them both for each type implemented # NOQA
# TODO: (FEAT) implement Tomcat Formatter
import random
from abc import abstractmethod, ABCMeta
from faker import Factory
from format_mappings import InHouseFaker
import json
import uuid
DEFAULT_CUSTOM_FORMAT = [
'current_date_time', ' ', 'uuid', ' ', 'level', ': ',
'module', ' - ', 'free_email'
]
DEFAULT_CUSTOM_DATA = {
'current_date_time': '$RAND',
'uuid': [str(uuid.uuid1()) for i in xrange(100)],
'level': ['WARNING', 'ERROR', 'DEBUG', 'INFO', 'CRITICAL'],
'module': ['module1', 'module2'],
'free_email': '$RAND',
}
DEFAULT_JSONIFY = True
[docs]def fake_data(data_type):
"""returns fake data for the data type requested.
will try and get fake data from the local format mappings.
if fake data for the specific data type wasn't found, it will try and use
fake-factory to fake the data.
:param string data_type: the type of data to fake.
:rtype: string
"""
fake = Factory.create()
try:
return getattr(InHouseFaker(), 'default')(data_type)
except KeyError:
if hasattr(InHouseFaker, data_type):
return getattr(InHouseFaker(), data_type)()
elif hasattr(fake, data_type):
return str(getattr(fake, data_type)())
# except AttributeError:
print('cannot randomize data for {0}. run "feedr list fake" '
'to print a list of possible types.'.format(data_type))
raise RuntimeError('cannot randomize data type {0}'.format(
data_type))
[docs]class BaseFormatter(object):
"""base class for all formatters
"""
__metaclass__ = ABCMeta
@abstractmethod
def __init__(self, config):
return
@abstractmethod
[docs] def f(self, config, name):
"""retrieves configuration keys
if config wasn't supplied or a key doesn't exist, returns $RAND
which will initiate data faking
"""
return config['data'].get(name, '$RAND') if config else '$RAND'
[docs]class CustomFormatter(BaseFormatter):
"""returns a generated log string in a custom format
this is also a formatter other formatters
can rely on to generate application specific logs.
see the ApacheAccessFormatter class for reference.
for every item in the format list, if an item in the data dict
corresponds with it and the field's data equals "$RAND", use faker
to fake an item for it. else, choose one item from the list randomly.
if there no item in the data to correspond with the format, it will
just append to format's field name to the log.
example:
.. code-block:: python
'CustomFormatter': {
'format': ['name', ' - ', 'level'],
'data': {
'name': $RAND,
'level': ['ERROR', 'DEBUG', 'INFO', 'CRITICAL'],
}
}
the output of the above example might be:
.. code-block:: python
Sally Fields - ERROR
or
Jason Banks - DEBUG
or
Danny Milwee - ERROR
or
...
"""
def __init__(self, config):
self.format = config.get('format', DEFAULT_CUSTOM_FORMAT)
self.data = config.get('data', DEFAULT_CUSTOM_DATA)
[docs] def generate_data(self):
"""this will generate a message according to `self.format`
with data from `self.data`.
all fields in the data dict will be iterated over and matched to
the items in the format list. if a match is found and $RAND is set
in one of the fields, random data will be generated for that field.
If not, data will be chosen from the list.
If no match is found, the explicit item in the format list will be
appended.
example:
.. code-block:: python
format = ['Mr. ' 'first_name', 'last_name']
data = {
'first_name': ['Jason, Josh]',
'last_name': '$RAND'
}
the output of the above example might be:
.. code-block:: python
'Mr. Jason Williams'
or
'Mr. Josh Brolin'
or
'Mr. Jason Bananas'
...
"""
log = ''
# iterate over the format
for field_name in self.format:
# for each field in the data dictionary
for field, data in self.data.items():
# if the field name exists in the format and the data
if field_name == field:
# and rand is set
if data == '$RAND':
# fake the data
log += fake_data(field_name)
else:
# else choose randomly from the field
log += random.choice(self.data[field_name])
# if the field doesn't exist in the data, only in the format
if field_name not in self.data.keys():
# we'll assume that the field name in the format itself
# should be appended to the log.
log += field_name
return log
[docs]class JsonFormatter(BaseFormatter):
"""generates log strings in json format (or leave as dict)
all fields in the data dict will be iterated over.
if $RAND is set in one of the fields, random data will be generated
for that field. If not, data will be chosen from the list.
example:
.. code-block:: python
'JsonFormatter': {
'data': {
'date_time': '$RAND',
'level': ['ERROR', 'DEBUG'],
'address': '$RAND',
}
},
the output of the above example might be:
.. code-block:: python
{"date_time": "2006-11-05 13:31:09", "name": "Miss Nona Breitenberg DVM", "level": "ERROR"} # NOQA
or
{"date_time": "1985-01-20 11:41:16", "name": "Almeda Lindgren", "level": "DEBUG"} # NOQA
or
{"date_time": "1973-05-21 01:06:04", "name": "Jase Heaney", "level": "DEBUG"} # NOQA
or
...
"""
def __init__(self, config):
self.data = config.get('data', DEFAULT_CUSTOM_DATA)
self.jsonify = config.get('jsonify', DEFAULT_JSONIFY)
self.stringify = config.get('stringify', False)
[docs] def generate_data(self):
# TODO: (FEAT) support randomizing data fields in Json formatter
log = {}
for field, data in self.data.items():
if data == '$RAND':
log[field] = fake_data(field)
else:
log[field] = random.choice(data)
if self.jsonify:
log = json.dumps(log)
if self.stringify:
log = str(log)
return log
[docs]class ApacheAccessFormatter(CustomFormatter):
"""returns an apache-access-log like string
you can easily construct new formatters by inheriting the custom formatter.
all you have to do is specify the format and the data. a helper
method `f` is supplied in the `BaseFormatter` Class that will allow you to
retrieve basic formatter configuration for your fields.
"""
# 192.168.72.177 - - [22/Dec/2002:23:32:19 -0400] "GET /search.php HTTP/1.1" 400 1997 www.yahoo.com "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; ...)" "-" # NOQA
# %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) # NOQA
def __init__(self, config):
self.format = [
'ipv4', ' - - [', 'current_day_of_month', '/',
'current_month_name_short', '/',
'current_year', ':', 'current_time', ' ',
'current_time_zone_number', '] "', 'http_verbs',
' /', 'uri_path', ' ', 'http_versions', '" ',
'http_error_codes', ' ', 'random_int'
]
self.data = {
'ipv4': self.f(config, 'ipv4'),
'current_day_of_month': self.f(config, 'current_day_of_month'),
'current_month_name_short': self.f(config, 'current_month_name_short'), # NOQA
'current_year': self.f(config, 'current_year'),
'current_time': self.f(config, 'current_time'),
'current_time_zone_number': self.f(config, 'current_time_zone_number'), # NOQA
'http_versions': self.f(config, 'http_versions'),
'http_verbs': self.f(config, 'http_verbs'),
'uri_path': self.f(config, 'uri_path'),
'http_error_codes': self.f(config, 'http_error_codes'),
'random_int': self.f(config, 'random_int'),
}
[docs]class ApacheAccessExFormatter(CustomFormatter):
"""returns an apache-extended-access-log like string"""
# http://httpd.apache.org/docs/2.2/logs.html
# 127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326 # NOQA
# 192.168.2.20 - - [28/Jul/2006:10:27:10 -0300] "GET /cgi-bin/try/ HTTP/1.0" 200 3395 # NOQA
# %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "(?:%{WORD:verb} %{NOTSPACE:request}(?: HTTP/%{NUMBER:httpversion})?|%{DATA:rawrequest})" %{NUMBER:response} (?:%{NUMBER:bytes}|-) # NOQA
def __init__(self, config):
self.format = [
'ipv4', ' - - [', 'current_day_of_month', '/',
'current_month_name_short', '/',
'current_year', ':', 'current_time', ' ',
'current_time_zone_number', '] "', 'http_verbs',
' /', 'uri_path', ' ', 'http_versions', '" ',
'http_error_codes', ' ', 'random_int', ' "',
'uri', '" "', 'user_agent', '"'
]
self.data = {
'ipv4': self.f(config, 'ipv4'),
'current_day_of_month': self.f(config, 'current_day_of_month'),
'current_month_name_short': self.f(config, 'current_month_name_short'), # NOQA
'current_year': self.f(config, 'current_year'),
'current_time': self.f(config, 'current_time'),
'current_time_zone_number': self.f(config, 'current_time_zone_number'), # NOQA
'http_versions': self.f(config, 'http_versions'),
'http_verbs': self.f(config, 'http_verbs'),
'uri_path': self.f(config, 'uri_path'),
'http_error_codes': self.f(config, 'http_error_codes'),
'random_int': self.f(config, 'random_int'),
'uri': self.f(config, 'uri'),
'user_agent': self.f(config, 'user_agent'),
}
[docs]class ApacheErrorFormatter(CustomFormatter):
"""returns an apache-error-log like string"""
# [Fri Dec 16 01:46:23 2005] [error] [client 1.2.3.4] Directory index forbidden by rule: /home/test/ # NOQA
# [Mon Dec 19 23:02:01 2005] [error] [client 1.2.3.4] user test: authentication failure for "/~dcid/test1": Password Mismatch # NOQA
def __init__(self, config):
self.format = [
'[', 'current_day_of_week_short', ' ', 'current_month_name_short',
' ',
'current_day_of_month', ' ', 'current_time', ' ', 'current_year',
'] [',
'syslog_error_levels_lower', '] [client ', 'ipv4', '] ',
'catch_phrase'
]
self.data = {
'current_day_of_week_short': self.f(config, 'current_day_of_week_short'), # NOQA
'ipv4': self.f(config, 'ipv4'),
'current_day_of_month': self.f(config, 'current_day_of_month'),
'current_month_name_short': self.f(config, 'current_month_name_short'), # NOQA
'current_year': self.f(config, 'current_year'),
'current_time': self.f(config, 'current_time'),
'catch_phrase': self.f(config, 'catch_phrase'),
'syslog_error_levels_lower': self.f(config, 'syslog_error_levels_lower'), # NOQA
}