# <@LICENSE> # Copyright 2004 Apache Software Foundation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # ------------------------------------------------------- # ImageInfo Plugin for SpamAssassin # Version: 0.7 # Current Home: http://www.rulesemporium.com/plugins.htm#imageinfo # Created: 2006-08-02 # Modified: 2007-01-17 # By: Dallas Engelken # # Changes: # 0.7 - added image_name_regex to allow pattern matching on the image name # - added support for image/pjpeg content types (progressive jpeg) # - updated imageinfo.cf with a few sample rules for using image_name_regex() # 0.6 - fixed dems_ bug in image_size_range_ # 0.5 - added image_named and image_to_text_ratio # 0.4 - added image_size_exact and image_size_range # 0.3 - added jpeg support # 0.2 - optimized by theo # 0.1 - added gif/png support # # Files: # ImageInfo.pm (plugin) - http://www.rulesemporium.com/plugins/ImageInfo.pm # imageinfo.cf (ruleset) - http://www.rulesemporium.com/plugins/imageinfo.cf # # Install: # 1) place ruleset in your local config dir # 2) place plugin in your plugins dir # 3) add to init.pre (or v310.pre) the following line # loadplugin Mail::SpamAssassin::Plugin::ImageInfo # or if not in plugin dir.. # loadplugin Mail::SpamAssassin::Plugin::ImageInfo /path/to/plugin # 4) restart spamd (if necessary) # # Usage: # image_count() # # body RULENAME eval:image_count(,,[max]) # type: 'all','gif','png', or 'jpeg' # min: required, message contains at least this # many images # max: optional, if specified, message must not # contain more than this number of images # # image_count() examples # # body ONE_IMAGE eval:image_count('all',1,1) # body ONE_OR_MORE_IMAGES eval:image_count('all',1) # body ONE_PNG eval:image_count('png',1,1) # body TWO_GIFS eval:image_count('gif',2,2) # body MANY_JPEGS eval:image_count('gif',5) # # pixel_coverage() # # body RULENAME eval:pixel_coverage(,,[max]) # type: 'all','gif','png', or 'jpeg' # min: required, message contains at least this # much pixel area # max: optional, if specified, message must not # contain more than this much pixel area # # pixel_coverage() examples # # body LARGE_IMAGE_AREA eval:pixel_coverage('all',150000) # catches any images that are 150k pixel/sq or higher # body SMALL_GIF_AREA eval:pixel_coverage('gif',1,40000) # catches only gifs that 1 to 40k pixel/sql # # image_name_regex() # # body RULENAME eval:image_name_regex() # regex: full quoted regexp, see examples below # # image_name_regex() examples # # body CG_DOUBLEDOT_GIF eval:image_name_regex('/^\w{2,9}\.\.gif$/i') # catches double dot gifs abcd..gif # # See the ruleset for more examples that arent documented here. # # New functions added in v0.5+ need some documentation here. Or just # see .cf for sample rules. # # ------------------------------------------------------- package Mail::SpamAssassin::Plugin::ImageInfo; use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::Logger; use strict; use warnings; use bytes; use vars qw(@ISA); @ISA = qw(Mail::SpamAssassin::Plugin); # constructor: register the eval rule sub new { my $class = shift; my $mailsaobject = shift; # some boilerplate... $class = ref($class) || $class; my $self = $class->SUPER::new($mailsaobject); bless ($self, $class); $self->register_eval_rule ("image_count"); $self->register_eval_rule ("pixel_coverage"); $self->register_eval_rule ("image_size_exact"); $self->register_eval_rule ("image_size_range"); $self->register_eval_rule ("image_named"); $self->register_eval_rule ("image_name_regex"); $self->register_eval_rule ("image_to_text_ratio"); return $self; } # ----------------------------------------- my %get_details = ( 'gif' => sub { my ($pms, $part) = @_; my $header = $part->decode(13); # make sure this is actually a valid gif.. return unless $header =~ s/^GIF(8[79]a)//; my $version = $1; my ($width, $height, $packed, $bgcolor, $aspect) = unpack("vvCCC", $header); my $color_table_size = 1 << (($packed & 0x07) + 1); # for future enhancements #my $global_color_table = $packed & 0x80; #my $has_global_color_table = $global_color_table ? 1 : 0; #my $sorted_colors = ($packed & 0x08)?1:0; #my $resolution = ((($packed & 0x70) >> 4) + 1); if ($height && $width) { my $area = $width * $height; $pms->{imageinfo}->{pc_gif} += $area; $pms->{imageinfo}->{dems_gif}->{"${height}x${width}"} = 1; $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'}; dbg("imageinfo: gif image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.), with $color_table_size color table"); } }, 'png' => sub { my ($pms, $part) = @_; my $data = $part->decode(); return unless (substr($data, 0, 8) eq "\x89PNG\x0d\x0a\x1a\x0a"); my $datalen = length $data; my $pos = 8; my $chunksize = 8; my ($width, $height) = ( 0, 0 ); my ($depth, $ctype, $compression, $filter, $interlace); while ($pos < $datalen) { my ($len, $type) = unpack("Na4", substr($data, $pos, $chunksize)); $pos += $chunksize; last if $type eq "IEND"; # end of png image. next unless ( $type eq "IHDR" && $len == 13 ); my $bytes = substr($data, $pos, $len + 4); my $crc = unpack("N", substr($bytes, -4, 4, "")); if ($type eq "IHDR" && $len == 13) { ($width, $height, $depth, $ctype, $compression, $filter, $interlace) = unpack("NNCCCCC", $bytes); last; } } if ($height && $width) { my $area = $width * $height; $pms->{imageinfo}->{pc_png} += $area; $pms->{imageinfo}->{dems_png}->{"${height}x${width}"} = 1; $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'}; dbg("imageinfo: png image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.)"); } }, 'jpeg' => sub { my ($pms, $part) = @_; my $data = $part->decode(); my $index = substr($data, 0, 2); return unless $index eq "\xFF\xD8"; my $pos = 2; my $chunksize = 4; my ($prec, $height, $width, $comps) = (undef,0,0,undef); while (1) { my ($xx, $mark, $len) = unpack("CCn", substr($data, $pos, $chunksize)); last if ($xx != 0xFF); last if ($mark == 0xDA || $mark == 0xD9); last if ($len < 2); $pos += $chunksize; my $block = substr($data, $pos, $len - 2); my $blocklen = length($block); if ( ($mark >= 0xC0 && $mark <= 0xC3) || ($mark >= 0xC5 && $mark <= 0xC7) || ($mark >= 0xC9 && $mark <= 0xCB) || ($mark >= 0xCD && $mark <= 0xCF) ) { ($prec, $height, $width, $comps) = unpack("CnnC", substr($block, 0, 6, "")); last; } $pos += $blocklen; } if ($height && $width) { my $area = $height * $width; $pms->{imageinfo}->{pc_jpeg} += $area; $pms->{imageinfo}->{dems_jpeg}->{"${height}x${width}"} = 1; $pms->{imageinfo}->{names_all}->{$part->{'name'}} = 1 if $part->{'name'}; dbg("imageinfo: jpeg image ".($part->{'name'} ? $part->{'name'} : '')." is $height x $width pixels ($area pixels sq.)"); } }, ); sub _get_images { my ($self,$pms) = @_; my $result = 0; foreach my $type ( 'all', keys %get_details ) { $pms->{'imageinfo'}->{"pc_$type"} = 0; $pms->{'imageinfo'}->{"count_$type"} = 0; } foreach my $p ($pms->{msg}->find_parts(qr@^image/(?:gif|png|jpeg)$@, 1)) { # make sure its base64 encoded my $cte = lc $p->get_header('content-transfer-encoding') || ''; next if ($cte !~ /^base64$/); my ($type) = $p->{'type'} =~ m@/(\w+)$@; if ($type && exists $get_details{$type}) { $get_details{$type}->($pms,$p); $pms->{'imageinfo'}->{"count_$type"} ++; } } foreach my $name ( keys %{$pms->{'imageinfo'}->{"names_all"}} ) { dbg("imageinfo: image name $name found"); } foreach my $type ( keys %get_details ) { $pms->{'imageinfo'}->{'pc_all'} += $pms->{'imageinfo'}->{"pc_$type"}; $pms->{'imageinfo'}->{'count_all'} += $pms->{'imageinfo'}->{"count_$type"}; foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}} ) { dbg("imageinfo: adding $dem to dems_all"); $pms->{'imageinfo'}->{'dems_all'}->{$dem} = 1; } } } # ----------------------------------------- sub image_named { my ($self,$pms,$body,$name) = @_; return unless (defined $name); # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } return 0 unless (exists $pms->{'imageinfo'}->{"names_all"}); return 1 if (exists $pms->{'imageinfo'}->{"names_all"}->{$name}); return 0; } # ----------------------------------------- sub image_name_regex { my ($self,$pms,$body,$re) = @_; return unless (defined $re); # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } return 0 unless (exists $pms->{'imageinfo'}->{"names_all"}); my $hit = 0; foreach my $name (keys %{$pms->{'imageinfo'}->{"names_all"}}) { dbg("imageinfo: checking image named $name against regex $re"); my $eval = 'if (q{'.$name.'} =~ ' . $re . ') { $hit = 1; } '; eval $eval; dbg("imageinfo: error in regex $re - $@") if $@; if ($hit) { dbg("imageinfo: image_name_regex hit on $name"); return 1; } } return 0; } # ----------------------------------------- sub image_count { my ($self,$pms,$body,$type,$min,$max) = @_; return unless defined $min; # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } # dbg("imageinfo: count: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"count_$type"}); return result_check($min, $max, $pms->{'imageinfo'}->{"count_$type"}); } # ----------------------------------------- sub pixel_coverage { my ($self,$pms,$body,$type,$min,$max) = @_; return unless (defined $type && defined $min); # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } # dbg("imageinfo: pc_$type: $min, ".($max ? $max:'').", $type, ".$pms->{'imageinfo'}->{"pc_$type"}); return result_check($min, $max, $pms->{'imageinfo'}->{"pc_$type"}); } # ----------------------------------------- sub image_to_text_ratio { my ($self,$pms,$body,$type,$min,$max) = @_; return unless (defined $type && defined $min && defined $max); # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } # depending on how you call this eval (body vs rawbody), # the $textlen will differ. my $textlen = length(join('',@$body)); return 0 unless ( $textlen > 0 && exists $pms->{'imageinfo'}->{"pc_$type"} && $pms->{'imageinfo'}->{"pc_$type"} > 0); my $ratio = $textlen / $pms->{'imageinfo'}->{"pc_$type"}; dbg("imageinfo: image ratio=$ratio, min=$min max=$max"); return result_check($min, $max, $ratio, 1); } # ----------------------------------------- sub image_size_exact { my ($self,$pms,$body,$type,$height,$width) = @_; return unless (defined $type && defined $height && defined $width); # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } return 0 unless (exists $pms->{'imageinfo'}->{"dems_$type"}); return 1 if (exists $pms->{'imageinfo'}->{"dems_$type"}->{"${height}x${width}"}); return 0; } # ----------------------------------------- sub image_size_range { my ($self,$pms,$body,$type,$minh,$minw,$maxh,$maxw) = @_; return unless (defined $type && defined $minh && defined $minw); # make sure we have image data read in. if (!exists $pms->{'imageinfo'}) { $self->_get_images($pms); } my $name = 'dems_'.$type; return unless (exists $pms->{'imageinfo'}->{$name}); foreach my $dem ( keys %{$pms->{'imageinfo'}->{"dems_$type"}}) { my ($h,$w) = split(/x/,$dem); next if ($h < $minh); # height less than min height next if ($w < $minw); # width less than min width next if (defined $maxh && $h > $maxh); # height more than max height next if (defined $maxw && $w > $maxw); # width more than max width # if we make it here, we have a match return 1; } return 0; } # ----------------------------------------- sub result_check { my ($min, $max, $value, $nomaxequal) = @_; return 0 unless defined $value; return 0 if ($value < $min); return 0 if (defined $max && $value > $max); return 0 if (defined $nomaxequal && $nomaxequal && $value == $max); return 1; } # ----------------------------------------- 1;