********************************************************************;
* read in syslog files ;
* ------------------------------------------------------------------;
data mail (drop=id id2 id3 rectype spamscore) spam (keep=squid spamscore); infile r3 pad missover dlm=' ,';
length mon $3.
day 3
time $8.
host $30.
rectype $6.
id $2.
id2 $2.
id3 $2.
squid $18. * SQuID = SendMail Queue ID # ;
to $40; * Destination email address ;
input mon day time host rectype @;
if rectype="sm-mta" then do;
* --------------------------------------------------;
* $char informat reads in entire string, including ;
* blanks and punctuation. ;
* --------------------------------------------------;
input id id2 id3 squid @ 'to=' to $char50.;
* --------------------------------------------------;
* functions on squid effectively remove a trailing ;
* colon (e.g. k3I400V6014162:) ;
* --------------------------------------------------;
squid=substr(squid,1,(indexc(squid,':')-1)) ;
* --------------------------------------------------;
* make 'to' lowercase, remove any blanks, and stop ;
* reading the line when encountering a comma, ;
* keeping only the first forwarding destination. ;
* (e.g. to=First Last ,fl@user.edu) ;
* --------------------------------------------------;
to=lowcase(compress(scan(to,1,',')));
* --------------------------------------------------;
* extract email addresses that are enclosed in ;
* brackets (e.g to=) ;
* --------------------------------------------------;
if index(to,' 0 then
to=scan(substr(to,index(to,'');
* --------------------------------------------------;
* extract the information to the right of the colon ;
* e.g. to= ;
* --------------------------------------------------;
if index(to,':') > 0 then
to=scan(substr(to,index(to,':')+1),1,'>');
output mail; /* legimate email */
end;
else if rectype="mimede" then do;
input @ 'MDLOG,' squid
@ 'spammish,' spamscore;
output spam;
end;
run;