0x00000001


Performance Metrics & Splunk

Install & configure collectd

yum install -y epel-release
yum install -y collectd policycoreutils-python
setsebool -P collectd_tcp_network_connect 1
semanage permissive -a collectd_t
service collectd restart


/etc/collectd.conf

FQDNLookup false
LoadPlugin logfile
LoadPlugin cpu
LoadPlugin df
LoadPlugin disk
LoadPlugin interface
LoadPlugin load
LoadPlugin memory
LoadPlugin processes
LoadPlugin protocols
LoadPlugin swap
LoadPlugin tcpconns
#LoadPlugin thermal
LoadPlugin uptime

<Plugin memory>
  ValuesAbsolute true
  ValuesPercentage true
</Plugin>

<Plugin swap>
  ReportByDevice true
  ValuesPercentage true
</Plugin>

<Plugin df>
  ReportByDevice true
  ValuesPercentage true
</Plugin>

<Plugin logfile>
  LogLevel info
  File "/var/log/collectd.log"
  Timestamp true
  PrintSeverity false
</Plugin>

<Plugin load>
  ReportRelative true
</Plugin>

<Plugin processes>
  ProcessMatch "all" "(.*)"
</Plugin>

<Plugin cpu>
  ValuesPercentage true
</Plugin>

<Plugin interface>
  Interface "lo"
  IgnoreSelected false
  ReportInactive true
  UniqueName false
</Plugin>

<Plugin protocols>
  Value "/^Tcp:/"
  IgnoreSelected false
</Plugin>

<Plugin tcpconns>
  ListeningPorts true
  AllPortsSummary true
</Plugin>

Collectd to Splunk HEC

Workflow

[System] -> [Splunk HF HEC] -> [Splunk Indexer]

System Configuration

/etc/collectd.d/hec.conf

LoadPlugin write_http

<Plugin write_http>
  <Node "HEC">
    URL "https://<splunk-hec>:8088/services/collector/raw"
    Header "Authorization: Splunk <token>"
    Format "JSON"
    VerifyPeer false
    VerifyHost false
    Metrics true
    StoreRates true
  </Node>
</Plugin>

HF HEC Configuration

inputs.conf

[http]
port = 8088
disabled = 0

[http://collectd]
token = <guid>
disabled=0
index=collectd
source=collectd token
sourcetype=collectd_http
outputgroup = INDEXER

outputs.conf

[indexAndForward]
index = false

[tcpout:INDEXER]
server = <indexer_ip>:<indexer_port>
useACK=false
maxQueueSize = 50MB

Indexer Configuration

inputs.conf

[splunktcp://9997]
index =  collectd

indexers.conf

[collectd]
datatype   = metric
homePath   = $SPLUNK_DB/collectd/db
coldPath   = $SPLUNK_DB/collectd/colddb
thawedPath = $SPLUNK_DB/collectd/thaweddb


Collectd to Splunk TCP

Workflow

[System] -> [Splunk HF TCP] -> [Splunk Indexer]


System Configuration

/etc/collectd.d/tcp.conf

LoadPlugin write_graphite

<Plugin write_graphite>
  <Carbon>
    Host "<splunk-hf>"
    Port "9997"
    Protocol "tcp"
  </Carbon>
</Plugin>

HF TCP Configuration

inputs.conf

[tcp://9997]
index = collectd
sourcetype = graphite_collectd
outputgroup = INDEXER

outputs.conf

[indexAndForward]
index = false

[tcpout:INDEXER]
server = <indexer_ip>:<indexer_port>
useACK=false
maxQueueSize = 50MB

Indexer Configuration

inputs.conf

[splunktcp://9997]
index =  collectd

indexers.conf

[collectd]
homePath   = $SPLUNK_DB/collectd/db
coldPath   = $SPLUNK_DB/collectd/colddb
thawedPath = $SPLUNK_DB/collectd/thaweddb

props.conf

[graphite_collectd]
TIME_PREFIX = ^.+\..+\..+\s.+\s
EXTRACT-metric_value = ^(?P<host>[^\.]+)[^\.\n]*\.(?P<object>[^\.]+)\.(?P<metric>[^ ]+)\s+(?P<value>[^ ]+)
EXTRACT-metric_kv = ^(?P<host>[^\.]+)[^\.\n]*\.(?P<object>[^\.]+)\.(?P<_KEY_1>[^ ]+)\s+(?P<_VAL_1>[^ ]+)
SHOULD_LINEMERGE=false
TRANSFORMS-mask1= mask-collectd1
TRANSFORMS-mask2= mask-collectd2
TRANSFORMS-mask3= mask-collectd3
TRANSFORMS-mask4= mask-collectd4
TRANSFORMS-mask5= mask-collectd5
TRANSFORMS-mask6= mask-collectd6
TRANSFORMS-mask7= mask-collectd7
TRANSFORMS-mask8= mask-collectd8
TRANSFORMS-mask9= mask-collectd9
EVAL-host = replace(host,"_",".")

transforms.conf

[mask-collectd1]
REGEX = ^([^_]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$1

[mask-collectd2]
REGEX = ^([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$2.$1

[mask-collectd3]
REGEX = ^([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$3.$2.$1

[mask-collectd4]
REGEX = ^([^_]+)_([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$4.$3.$2.$1

[mask-collectd5]
REGEX = ^([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$5.$4.$3.$2.$1

[mask-collectd6]
REGEX = ^([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$6.$5.$4.$3.$2.$1

[mask-collectd7]
REGEX = ^([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$7.$6.$5.$4.$3.$2.$1

[mask-collectd8]
REGEX = ^([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$8.$7.$6.$5.$4.$3.$2.$1

[mask-collectd9]
REGEX = ^([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_]+)_([^_.]+)\..+\..+\s.+\s.+
DEST_KEY = MetaData:Host
FORMAT =  host::$9.$8.$7.$6.$5.$4.$3.$2.$1


Bonus: Collectd to Splunk CSV monitor

Workflow

[System] -> [Splunk HF Monitor] -> [Splunk Indexer]

collectd csv plugin (/usr/local/lib/collectd/python/python_csv_plugin.py)

import collectd

COLLECTD_METRCIS_LOG = "/var/log/collectd-metrics.log"

def config(ObjConfiguration):
    collectd.debug('Configuring')

def init():
    collectd.debug('Initialization')

def write(vl, data=None):

      row = ["%s,%s,%s,%s,%s,%s\n" % \
                (vl.time, vl.host, vl.plugin, \
                vl.type, vl.plugin_instance, value) \
                for value in vl.values]

      with open(COLLECTD_METRCIS_LOG, 'a+') as csvfile:
          for r in row:
              csvfile.write(r)

collectd.register_config(config)
collectd.register_init(init)
collectd.register_write(write)

/etc/collectd.d/csv.conf

LoadPlugin python

<Plugin python>
  ModulePath "/usr/local/lib/collectd/python"
  LogTraces true
  Interactive false
  Import python_csv_plugin
  <Module python_csv_plugin>
  </Module>
</Plugin>

Log Sample (/var/log/collectd-metrics.log):

1515962589.16,system,processes,ps_rss,collectd,12378112.0
1515962589.16,system,processes,ps_vm,collectd,1015365632.0
1515962589.16,system,processes,ps_stacksize,collectd,2176.0
1515962589.16,system,processes,ps_cputime,collectd,600000
1515962589.16,system,processes,ps_cputime,collectd,1150000
1515962589.16,system,processes,ps_code,collectd,15937536.0
1515962589.16,system,processes,ps_pagefaults,collectd,11078
1515962589.16,system,processes,ps_pagefaults,collectd,0
1515962589.16,system,processes,io_octets,collectd,2785618
1515962589.16,system,processes,io_octets,collectd,503948


Refs