#!/usr/bin/perl
# 
# spamlog.pl
#
# usage: spamlog.pl [-n] [-t|-a]
#
# -n: do not write and save byte offset, ie test mode.
# -t: include all of Today's maillog.
# -a: include the entire maillog.
#
# Otherwise, only data since the last invocation is processed.
#
# Description: Extracts spam entries from postfix log, and formats it.
#
# Purpose: The purpose is to make it easy to identify false positive SPAM
# rejects from Postfix logfiles. This script writes 2 files, one with simple,
# minimal formatting of the most important elements of each maillog entry, and
# the other just dumps the raw log contents (adds a newline between each) for
# the same entries. The summary file grabs the from= and tries to get a
# snippet of why it was rejected, and then fit that within 72 chars or so for
# readability.
# 
# Requires: Works only with Postfix logs.
#
# See also: spamcheck.sh (which can mail results bypassing Postfix to avoid 
# the sad :( situation where the logfile contents trigger a SPAM filter).
#
# Hal Burgiss <hal@foobox.net>  Fri 08/23/02 08:53:08 PM
#
###################################################################

use Getopt::Std;

## process switches and arguments
getopts("nta", \%args);

$offset_file = "/var/tmp/spam.offset";

$log="/var/log/maillog";
$raw="/var/tmp/spam.dump";
$summary="/var/tmp/spam.summary";
$strlen=50;
$offset=0;

# Command line option values.
$no_update=0;             # -n flag, do not update file offset
$all=0;                   # -a flag, all of log
$today=0;                 # -t flag, all of today only
#$debug=1;

if (defined $args{n}) {
 $no_update=1;
 print "no updating, test mode only\n" if $debug;
}

if (defined $args{t}) {
 print "doing today only\n" if $debug;
 $today=1;
 $date=localtime;
 $date =~ s/^\w+ (\w+ \d+) .*/$1/;
 print "Looking for date: $date\n" if $debug;
}

if (defined $args{a}) {
 $all=1;
 $today=0;
 print "doing all of $log\n" if $debug;
}

print "\n" if $debug;

open(LOG, "$log") || die "Cannot open $log";
open(RAW, ">$raw") || die "Cannot open $raw";
open(SPAM, ">$summary") || die "Cannot open $summary";

# Get the saved file offset from where we left off last time
if (open(FD,"$offset_file")) {
   while (<FD>) {
     $offset=$_ if (/(\d+)/) ;
   }
}

# stat log file and get current size in bytes
($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size) = stat($log);

# if -a or -t option is invoked, we need to check the whole log.
$offset=0 if ($all || $today);

# if saved offset is larger than bytes, then logfile has been rotated
$offset=0 if ($offset > $size);

seek(LOG,$offset,SEEK_SET);

while (<LOG>) { 

  # If -t today flag is present, look for current date only.
  /^$date/ || next if $today;

  # Postfix keyword test == reject: == spam filter hit.
  /reject:/ || next;
  
  # make full copy of log entry with added newline
  print RAW "$_\n";
      
      # get the From: for each line.
      if (/from=(.*) to=/) {
        printf SPAM "%-26s", substr("$1",0,25);
      }
      
      # Try to get only the important stuff next...
      if (/RCPT from (.*): (5\d\d|4\d\d|Helo ).*: (.*) from=/) { 
        printf SPAM "%s\n", substr("$1 $3",0,$strlen); 
      } elsif (/reject: (header|body) (.*) from=/){ 
        $block=substr("$2",0,$strlen);
        print SPAM "$block\n";
      } else {
        print SPAM "\n";
      }

}

# reset offset pointer, and save for next run.
if (! $no_update && open(FD,">$offset_file")) {
  print FD $size;
  close FD;
}

close LOG;
close RAW;
close SPAM;

# delete tmp files.
#unlink $raw;

#--- eof spamlog.pl
