#!/usr/local/bin/php
<?php

/*
 * Gaby Vanhegan <gaby@vanhegan.net> 20060411
 *
 * Munge processed logs from flowd and put them into a database structure,
 * the same as used by pmacctd, so we can generate nice graphs from them.
 * This involves using a 5-minute time slice to cut the data up a bit, to
 * hopefully keep the volume down a bit.
 *
 */

/*
Sample log lines:

FLOW recv_time 2006-04-12T12:00:42.10691282 proto 17 tcpflags 00 tos 00 agent [127.0.0.1] src [195.224.72.148]:9157 dst [195.224.72.2]:53 packets 1 octets 75 
FLOW recv_time 2006-04-12T12:00:42.10691282 proto 17 tcpflags 00 tos 00 agent [127.0.0.1] src [195.224.72.2]:53 dst [195.224.72.148]:9157 packets 1 octets 131 
FLOW recv_time 2006-04-12T12:00:42.10691282 proto 17 tcpflags 00 tos 00 agent [127.0.0.1] src [195.224.72.148]:17883 dst [195.224.72.2]:53 packets 1 octets 70 
FLOW recv_time 2006-04-12T12:00:42.10691282 proto 17 tcpflags 00 tos 00 agent [127.0.0.1] src [195.224.72.2]:53 dst [195.224.72.148]:17883 packets 1 octets 126 

*/

// What is the base folder that this script is working in?
$BASE		= "/home/whoever/path/to/file";

// What folder are the RRD files found in?
$BASE_RRD	= "/var/symon/rrds/localhost";

// What is the source file for log entries?
$LOG		= "/path/to/flowd.log";

// What is the temporary file we work with?
$LOG_TMP	= tempnam( "/tmp", "rrd-flowd" );

// What is the cache file for entries that we're logging later?
// We must be able to write to this file
$CACHE		= "$BASE/flowd.cache";

// What's the IP address that we're traffic counting for?
// This is used to detect inbound or outbound traffic, and 
// differentiate between internal and external connections.
$MY_IP		= "1.2.3.4";

// List of services and the port numbers they operate on
// The format is:
//
//	$services["name"]	= array( 1, 2, 3, 4, 5, etc );
//
// The name part refers to an rrd file.  If name was 'rpc', then 
// the RRD file would be $BASE_RRD/flowd_rpc.rrd
//
// The create script already makes rrd's for each of these
// protocols listed below, so adding new protocol groups
// in here requires more rrd files creating accordingly.
$services				= array();
$services["smtp"]		= array( "25",  "587" );
$services["pop"]		= array( "110", "995" );
$services["imap"]		= array( "143", "993" );
$services["ssh"]		= array( "22" );
$services["http"]		= array( "80",  "443" );
$services["ftp"]		= array_merge( array( "21" ), range( 57000, 58000 ) );
$services["mysql"]		= array( "3306" );
$services["dns"]		= array( "53" );
$services["im"]			= array_merge( array( "5060", "5190", "5297", "5298", "5353", "5678" ), 
									   range( 16384, 16403 ),
									   array( "5222", "5333" ) );

/* --- === Nothing to edit below this line === --- */

// Log some output text please
function log_msg ( $input ) {
	
	// Comment out this line for debug
	return false;
	
	$msg	= trim( $input );
	$lines	= explode( "\n", $msg );
	foreach ( $lines as $line ) {
		$when	= date( "Y-m-d H:i:s" );
		print "[$when] $line\n";
		}
	}

// A nice lookup table for protocols
$protocols				= array();
$protocols["1"]			= "ICMP";
$protocols["2"]			= "IGMP";
$protocols["6"]			= "TCP";
$protocols["17"]		= "UDP";

// Invert this list into a list of services, indexed by port number
$service_by_port		= array();
foreach ( $services as $name => $ports ) {
	foreach ( $ports as $port => $portnum ) {
		$service_by_port[ $portnum ]	= $name;
	}
}

$lists		= array();

// Move the log file out of active duty
if ( file_exists( $LOG ) ) {
	 
	rename( $LOG, $LOG_TMP );

	// Append the logfile to end of the cache
	$fh				= fopen( $LOG_TMP, "r" );
	$cache			= fopen( $CACHE,   "a" );
	$cache_write	= 0;
	while( $line = fgets( $fh ) ) { 
		$line	= trim( $line );
		fputs( $cache, "$line\n" ); 
		$cache_write++;
		}

	log_msg( "Wrote: $cache_write lines to $CACHE from $LOG_TMP" );

	// Close both files
	fclose( $cache );
	fclose( $fh    );

	// Clear the temp logfile
	unlink( $LOG_TMP );
}
else {
	log_msg( "No new data to cache" );
}

// Now open up our composite file
$fh				= fopen( $CACHE, "r" );

// What is the time the script was run?
$right_now		= time();

// What is the cut off time for entries to process?
$cut_off_time	= ( $right_now - 300 );
$cut_off_time	= ( $cut_off_time - ( $cut_off_time % 300 ) );

// Lines that we should cache instead of processing now
$new_cache		= array();

// Now going through our log data...
while( $line = fgets( $fh ) ) {

	$line	= trim( $line );

	if ( eregi( "^FLOW .+", $line ) ) { 

		$matches	= explode( " ", $line );
		
		/*
	    [0] => FLOW
	    [1] => recv_time
	    [2] => 2006-04-12T12:00:42.10691282
	    [3] => proto
	    [4] => 17
	    [5] => tcpflags
	    [6] => 00
	    [7] => tos
	    [8] => 00
	    [9] => agent
	    [10] => [127.0.0.1]
	    [11] => src
	    [12] => [195.224.72.2]:53
	    [13] => dst
	    [14] => [195.224.72.148]:17883
	    [15] => packets
	    [16] => 1
	    [17] => octets
	    [18] => 126
		*/

		$count++;
		//if ( ( $count % 500 ) == 0 ) { print "Handled: $count\n"; }

		list( $time_date, $time_time )	= explode( "T", $matches[2] );
		list( $time_time, $time_usec )	= explode( ".", $time_time  );
		$the_time	= strtotime( "$time_date $time_time" );
 		$the_time	= $the_time - ( $the_time % 300 );
		
		$proto		= $protocols[ $matches[4] ];

		$src		= $matches[12];
		$tmp		= str_replace( "[", "", $src );
		$tmp		= str_replace( "]", "", $tmp );
		list( $src_ip, $src_port )	= explode( ":", $tmp );

		$dst		= $matches[14];
		$tmp		= str_replace( "[", "", $dst );
		$tmp		= str_replace( "]", "", $tmp );
		list( $dst_ip, $dst_port )	= explode( ":", $tmp );
		
		$packets	= sprintf( "%u", $matches[16] );
		$bytes		= sprintf( "%u", $matches[18] );
		
		// If the time of this entry is after the cut off time, we'll
		// add this entry to the cache for processing next time
		if ( $the_time >= $cut_off_time ) { 
			array_push( $new_cache, $line );
			}
		
		// Otherwise, we'll process it now
		else {

			// There are two types of connection, inbound and outbound.
			//
			// Inbound connections are connections that originate from 
			// the outside world, eg an incoming request for a web page.
			// These are easily identified by ourselves being the 
			// target IP, and the destination port being that of a known
			// service we run.  The response usually goes over the same
			// connection, so will have us as the source IP, and the same
			// service as the source port.
			//
			// Outbound connections are originated from the machine, such
			// as Gaby being ssh'd in and wget'ing a file from a website,
			// or scp'in a file from the machine to another.  These 
			// connections have ourselves as the source IP, but a known
			// service as the destination IP.  The response will come to
			// our IP, but will originate from the the same known port on
			// the remote machine.
			//
			// This allows us to differentiate between traffic that the 
			// world generates, and traffic that we generate.

			$ext_bytes_in	= 0;
			$int_bytes_in	= 0;

			$ext_bytes_out	= 0;
			$int_bytes_out	= 0;

			// Is the port known?
			$src_service	= $service_by_port[ $src_port ];
			$dst_service	= $service_by_port[ $dst_port ];

			// If we were the source of this connection
			if ( $src_ip == $MY_IP ) { 
				
				// If we're the source, on port X, it's an external reply
				if ( $src_service != "" ) { 
					$ext_bytes_out	+= $bytes;
					}
					
				// If we're the source, to port X, it's an internal new connection
				if ( $dst_service != "" ) { 
					$int_bytes_out	+= $bytes;
					}
					
				}
			
			// If we're the destination of this connection
			if ( $dst_ip == $MY_IP ) {

				// If we're the dest from port X, it's an internal reply
				if ( $src_service != "" ) { 
					$int_bytes_in	+= $bytes;
					}
					
				// If we're the dest to port X, it's an external new connection
				if ( $dst_service != "" ) { 
					$ext_bytes_in	+= $bytes;
					}
				}

			// Which list?
			$list		= false;

			// If we had any recognised service going in or out...
			if ( $src_service != "" ) { 
				$list	= $src_service;
				}
			else if ( $dst_service != "" ) { 
				$list	= $dst_service;
				}
				
			// No recognised service, we'll take it to the misc list
			// and just assume it's all external traffic
			else {
				$list			= "misc";
				$ext_bytes_in	= $bytes;
				$ext_bytes_out	= $bytes;
				$int_bytes_in	= 0;
				$int_bytes_out	= 0;
			}

			// If we have a list...
			if ( $list ) {
				
				// Store for this list
				$lists[ $list ][ $the_time ][ "ext_in" ]	+= $ext_bytes_in;
				$lists[ $list ][ $the_time ][ "ext_out" ]	+= $ext_bytes_out;
				$lists[ $list ][ $the_time ][ "int_in" ]	+= $int_bytes_in;
				$lists[ $list ][ $the_time ][ "int_out" ]	+= $int_bytes_out;

				// Store for the all-list summary
				$lists[ "all" ][ $the_time ][ "ext_in" ]	+= $ext_bytes_in;
				$lists[ "all" ][ $the_time ][ "ext_out" ]	+= $ext_bytes_out;
				$lists[ "all" ][ $the_time ][ "int_in" ]	+= $int_bytes_in;
				$lists[ "all" ][ $the_time ][ "int_out" ]	+= $int_bytes_out;
				}
			}
		}
	}

fclose( $fh );
reset( $new_cache );

// Now write out the cache file again
$cache			= fopen( $CACHE, "w" );
$cache_write	= 0;
foreach( $new_cache as $line ) { 
	$line		= trim( $line );
	fputs( $cache, "$line\n" ); 
	$cache_write++;
	}
fclose( $cache );
log_msg( "Wrote: $cache_write lines to $CACHE" );

foreach ( $lists as $list => $data ) {

	$rrd_file		= "flowd_" . $list . ".rrd";
	$rrd_file		= "$BASE_RRD/$rrd_file";
	$command_base	= "/usr/local/bin/rrdtool update $rrd_file";
	$command		= $command_base;
	$count			= 1;

	if ( file_exists( $rrd_file ) ) {

		log_msg( "Now handling: $list -> $rrd_file" );

		// Sort out data in ascending date please
		ksort( $data );

		foreach ( $data as $time => $values ) {
						
			$count++;

			$ext_bytes_in	= sprintf( "%u", $values["ext_in"] );
			$ext_bytes_out	= sprintf( "%u", $values["ext_out"] );
			$int_bytes_in	= sprintf( "%u", $values["int_in"] );
			$int_bytes_out	= sprintf( "%u", $values["int_out"] );

			if ( ( $count % 30 ) < 1 ) {
				log_msg( "Running: $command" );
				print trim( shell_exec( $command ) );
				$command	= $command_base;
				}

			// Here's the command anyway
			$command	.= " $time:$ext_bytes_in:$ext_bytes_out:$int_bytes_in:$int_bytes_out";
			}

		// If there's data to log
		if ( $command != $command_base ) {
			log_msg( "Running: $command" );
			print trim( shell_exec( $command ) );
			}
		}
	else {
		log_msg( "$rrd_file does not exist!" );
		}
	}

?>