# Copyright (c) 1994 Regents of the University of California. # All rights reserved. # $Id: momemail.pl,v 1.7 1994/08/10 10:18:29 fielding Exp $ # --------------------------------------------------------------------------- # momemail: A package for building and sending an e-mail message based on # the results from a World-Wide Web traversal. The message # contains pointers to the "interesting" parts of the web -- # for MOMspider, this means broken or redirected links, # recently changed documents, and soon-to-be-expired documents. # # This software has been developed by Roy Fielding as # part of the Arcadia project at the University of California, Irvine. # # Redistribution and use in source and binary forms are permitted, # subject to the restriction noted below, provided that the above # copyright notice and this paragraph and the following paragraphs are # duplicated in all such forms and that any documentation, advertising # materials, and other materials related to such distribution and use # acknowledge that the software was developed in part by the University of # California, Irvine. The name of the University may not be used to # endorse or promote products derived from this software without # specific prior written permission. THIS SOFTWARE IS PROVIDED ``AS IS'' # AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT # LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE. # # Use of this software in any way or in any form, source or binary, # is not allowed in any country which prohibits disclaimers of any # implied warranties of merchantability or fitness for a particular # purpose or any disclaimers of a similar nature. # # IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY # FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES # ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION # (INCLUDING, BUT NOT LIMITED TO, LOST PROFITS) EVEN IF THE UNIVERSITY # OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # If you have any suggestions, bug reports, fixes, or enhancements, # send them to the author Roy Fielding at . # --------------------------------------------------------------------------- require "wwwdates.pl"; require "wwwerror.pl"; require "momconfig.pl"; require "momhistory.pl"; package momemail; # ========================================================================== # Get defaults from momconfig.pl $Version = $momconfig'Version || die "Program has no Version information, stopped"; # sendmailCommand should point to the sendmail binary. The assumption is # that this program accepts command-line arguments specifying addresses to # which messages should be mailed, and accepts other headers and message # text from stdin. $sendmailCommand = ($momconfig'sendmailCommand || "/usr/lib/sendmail"); # ========================================================================== # ========================================================================== # clearstatus(): Clear the Status of the mailing process sub clearstatus { undef $Active; undef $Started; undef $MailBroken; undef $MailRedirected; undef $MailChanged; undef $MailExpired; undef %BrokenNodes; undef %RedirectedNodes; undef %ChangedNodes; undef %ExpiredNodes; } # ========================================================================== # start(): Set up the collection process for an e-mail message at the start # of an infostructure traversal. # sub start { local($taskname, $dobroken, $doredir, $dochanged, $doexpired) = @_; if ($Active) { print STDERR "Warning: email start occurs before prior email ended\n"; &clearstatus; } $Active = $taskname; $Started = time; $MailBroken = $dobroken; $MailRedirected = $doredir; $MailChanged = $dochanged || 0; $MailExpired = $doexpired || 0; $ChangeWindow = $MailChanged * 86400; $ExpireWindow = $MailExpired * 86400; %BrokenNodes = (); %RedirectedNodes = (); %ChangedNodes = (); %ExpiredNodes = (); $Broken = 0; $Redirected = 0; $Changed = 0; $Expired = 0; } # ========================================================================== # tested(): Caller signals that the given node has been tested as part of # the current traversal process. # # $node -> The momhistory index number for the effected URL # sub tested { local($node) = @_; return unless ($MailBroken || $MailRedirected || $MailChanged || $MailExpired); return if (&momhistory'was_avoided($node)); local($url, %headers, $response, $current, $lmd, $lmt, $expd, $expt); $url = &momhistory'get_url($node); %headers = (); $response = &momhistory'recall($node, *headers); if ($MailBroken && ($momhistory'WhatToDo{$response} == $momhistory'DO_broken)) { return if defined($BrokenNodes{$node}); $BrokenNodes{$node} = $url; ++$Broken; } elsif ($MailRedirected && ($momhistory'WhatToDo{$response} == $momhistory'DO_redirect)) { return if defined($RedirectedNodes{$node}); $RedirectedNodes{$node} = $url; ++$Redirected; } elsif ($MailChanged || $MailExpired) { $current = time; if ($MailChanged && $ChangeWindow && !defined($ChangedNodes{$node})) { $lmd = $headers{'last-modified'}; if (defined($lmd) && ($lmt = &wwwdates'get_gmtime($lmd)) && (($lmt + $ChangeWindow) >= $current)) { $ChangedNodes{$node} = $url; ++$Changed; } } if ($MailExpired && $ExpireWindow && !defined($ExpiredNodes{$node})) { $expd = $headers{'expires'}; if (defined($expd) && ($expt = &wwwdates'get_gmtime($expd)) && (($expt - $ExpireWindow) <= $current)) { $ExpiredNodes{$node} = $url; ++$Expired; } } } } # ========================================================================== # end(): Write and send the e-mail message if there was something worth # reporting. # sub end { local($mailto, $title, $idxurl) = @_; local($time1, $time2, $oldname, $replyto); if (!$Active) { return; }; # Don't do anything if email not started if (!($Broken || $Redirected || $Changed || $Expired)) { &clearstatus; # Don't send anything if nothing to send return; } if (!open(EMAIL,"| $sendmailCommand $mailto")) { print STDERR "Unable to pipe email to $sendmailCommand: $!\n"; &clearstatus; # Reset the package status return; } $timestr = &wwwdates'wtime($Started,''); $replyto = &www'get_def_header('http','From'); print EMAIL <<"EOF"; Subject: $title Reply-To: $replyto This message was automatically generated by $Version after a web traversal on $timestr The following parts of the $Active infostructure may need inspection: EOF if ($Broken) { print EMAIL "Broken Links:\n"; foreach $node (keys(%BrokenNodes)) { print EMAIL '<',$BrokenNodes{$node},">\n"; } print EMAIL "\n"; } if ($Redirected) { print EMAIL "Redirected Links:\n"; foreach $node (keys(%RedirectedNodes)) { print EMAIL '<',$RedirectedNodes{$node},">\n"; } print EMAIL "\n"; } if ($Changed) { print EMAIL "Changed Since ", &wwwdates'wtime(($Started - $ChangeWindow),''), ":\n"; foreach $node (keys(%ChangedNodes)) { print EMAIL '<',$ChangedNodes{$node},">\n"; } print EMAIL "\n"; } if ($Expired) { print EMAIL "Will expire before ", &wwwdates'wtime(($Started + $ExpireWindow),''), ":\n"; foreach $node (keys(%ExpiredNodes)) { print EMAIL '<',$ExpiredNodes{$node},">\n"; } print EMAIL "\n"; } print EMAIL "For more information, see the index at\n", '<', $idxurl, ">\n\n"; close EMAIL; &clearstatus; # Reset the package status } # ========================================================================== 1;