#!/usr/bin/perl
#
#
#
# aub: assemble usenet binaries
#
#  Mark Stantz, stantz@sgi.com, stantz@sierra.stanford.edu
#
#  aub v1.0 1/92	Basic functionality only.  Buggy.
#      v1.1 3/92	Cleaned up, documented, released, added functionality.
#      v2.0 8/92.	Major rewrite, numerous enhancements, speed hacks.
#			Now requires a formal configuration file.
#
# This code is offered as-is.  Anyone is welcome to make improvements, 
# provided that my notice of authorship is retained.  I accept no 
# responsibility for loss or damage caused by this program, nor do I accept
# responsibility for supporting it.
#
# Most of the documentation for this program is self-contained.
#
# 	aub -m			Prints the short form of the documentation
#	aub -M			Prints the long form
#	aub -C 			Lists changes made since previous version
#
#
# DEPENDENCIES:
#
# /bin/echo			-- &find_pg will look for these for you if
# /bin/sum			    they aren't in the usual place.  Though
# /bin/cat			    I don't know of any *NIX so bizarre...
# /bin/mv
# which 			-- These had better be in your path;
# whereis			    I'm not looking for them.  But they
#				    only help you if your other stuff isn't
#				    in the standard place.

$ECHO = "/bin/echo"; &find_pg("echo", 0) unless (-x $ECHO);
$SUM = "/usr/bin/sum";   &find_pg("sum", 0)  unless (-x $SUM);
$CAT = "/bin/cat";   &find_pg("cat", 0)  unless (-x $CAT);
$MV = "/bin/mv";     &find_pg("mv", 0)   unless (-x $MV);
$HOSTNAME = "/bin/hostname"; $HOSTNAME = &find_pg("hostname", 0) unless
  (-x $HOSTNAME);

# CONSTANTS: 	most likely you want to leave these alone.  
#
#
# $aub_rcfile			Keeps track of what groups we assemble 
#				binaries in, and what articles there we've
#				not yet resolved.
#
# $aub_lock			A lockfile, used to ensure that only one
#				instance of aub runs at a time.
#
# $aub_tmp			A temporary file, used in the construction
#				of $aub_rcfile
#
# $general 			Magic cookie for accessing general aub
#				parameters.  Must be an invalid newsgroup
#				name.
#
# $aub_dir			If the AUBDIR environment variable is set, it
#				will override the general (but not specific)
#				aub directory specified in the configuration
#				file.
# 
# $aub_desc			Specifies the general description file.
#
# $aub_hook			Specified the general hook program.
#
# $mini_aub_config		Unqualified name of our configuration file.
#
# $aub_config			Qualified name of our configuration file.
#
#
# $timeout_interval		How long we'll wait for a response from an 
# 				NNTP server before deciding it's died on us.
#
# $version			Version number of this program.
#
# $last_version			Version number of previous release.
# 
# $temp_decode_file		Another temporary file; binaries are actually
#				assembled here before being moved into an
#				aub directory.
#
# $nntpserver_file		A place aub can find the name of the NNTP 
# 				server it's supposed to use.  This is ignored
#				if the user's NNTPSERVER environment variable
#				is set, or if an NNTP server is specified in
#				the configuration file.
#
# $obsolete_init		Name of obsolete v1.1 pre-warp-drive proto-
#				configuration file.
#
# @sigs				List of signals that might interrupt our 
#				progress.
#
# @extn_hints			Extensions which help us recognize binary
#				images in subject lines.

$aub_rcfile 	    = join("/", $ENV{"HOME"}, ".aubrc");
$aub_lock   	    = join("/", $ENV{"HOME"}, ".aub_lock");
$aub_tmp    	    = join("/", $ENV{"HOME"}, ".aubtmp");

$general 	    = ".";
$aub_dir{$general}  = $ENV{"AUBDIR"};
$aub_desc{$general} = $ENV{"AUBDESC"};
$aub_hook{$general} = $ENV{"AUBHOOK"};

$mini_aub_config    = ".aubconf";
$aub_config 	    = join("/", $ENV{"HOME"}, $mini_aub_config);

$timeout_interval   = 60;
$version	    = "2.0.5";
$last_version	    = "1.1";
$temp_decode_file   = "/var/tmp/aub.decode$$";
$nntpserver_file    = "/usr/local/lib/rn/nntpserver";
$obsolete_init	    = join("/", $ENV{"HOME"}, ".aubinit");

@sigs		    = ('HUP', 'INT', 'QUIT', 'ILL', 'TRAP', 'IOT', 'EMT', 'FPE',
		       'BUS', 'SEGV', 'SYS', 'TERM', 'USR1', 'USR2', 'XCPU', 
		       'XFSZ', 'PIPE');

@extn_hints 	    = (".gif", ".jpg", ".jpeg", ".gl", ".zip", ".au", ".zoo", 
                       ".exe", ".dl", ".snd", ".mpg", ".mpeg", ".tiff", ".lzh",
			".wav");

#
# GLOBAL variables
#
#
# There are zillions of 'em, and I don't have the energy to document them all
# just now.  If you see something that's not explicitly defined as local(),
# assume it's global.
#

#
# Main program  -- Setup code.
#

#
# Parse arguments.  If only documentation is requested, deal with it ASAP
# and bail out.
#
# I probably should have an argument that lets you specify an alternate 
# configuration file, but if I do that, there'll be some bozo who won't
# realize I've broken compatibility with the v1.1 configuration file and
# will just use the argument to point v2.0 at his obsolete .aubinit file.
# Then I'll get mail when it doesn't work.  I don't want to deal with that,
# so I'm delaying the command line option until v2.2.  (Almost certainly
# v2.1 will be bug fixes for v2.0.)
#

# Modified the 7th of december 1994 by Laurent VALLEE
# in purpose of compatibility with Perl 5 :
#
# load the socket library to connect the server in NNTP mode
BEGIN { require "Socket.pm"; import Socket; }

(&Getopts("cd:nCMm")) || (exit(1));

&long_manual if ($opt_M);			# None of these subroutines
&short_manual if ($opt_m);			#  will return...
&changes if ($opt_C);

# Make sure we clean up when we catch an unexpected signal

foreach (@sigs) {
  $SIG{$_} = 'handler';
}

# Make a first guess at what file we want to use as our NNTP server.  If the
# user's NNTPSERVER environment variable is defined, we'll use what's in it.
# Otherwise, if $nntpserver_file exists, we'll take a hint from it.

if ($ENV{"NNTPSERVER"}) {
  $server = $ENV{"NNTPSERVER"};
} elsif (-r $nntpserver_file) {
  chop($defserver = `$CAT $nntpserver_file 2>/dev/null`);
  $defserver = $1 if ($defserver =~ m/^([^\n]+)\n/);
}

# Load the configuration file.

&load_config_file;

#
# At this point, we should know for certain whether or not we're using
# disk-based news access or NNTP access.  If we're going to use NNTP,
# we have to have figured out what our server is.
#
# If our news access is NNTP-based, we need the &AF_INET and &SOCK_STREAM
# subroutines defined.  These must come from sys/socket.ph, a perl include
# file.  The user's system may not have this file on it if no-one's run
# h2ph there.  We'll explain all this to them if we have to.
#
# We don't want to depend on &AF_INET and &SOCK_STREAM being defined if
# we don't have to, so we'll only try to load them if we're running NNTP-
# based.

if ($spooldir) {
  print "Using disk-based news access; spool directory is $spooldir\n"
    if ($opt_d);
} else {
  &abort("Can't find an NNTP server; please define your NNTPSERVER " .
         "environment variable\nor specify an NNTP server or disk based " .
         "news access in your configuration file.") unless ($server);

# Modified the 7th of december 1994 by Laurent VALLEE
# in purpose of compatibility with Perl 5 :
#
# no need to load the socket library because it has already been done
# at the beginning of the script. So comment the lines...

#  if ($opt_d > 1) {
#    print "Searching library directories:";
#    foreach $libr (@INC) {
#      print " $libr";
#    }
#    print " for sys/socket.ph...\n";
#  } 

#  &need_to_run_h2ph unless (eval("require <sys/socket.ph>"));

  print "Using NNTP-based news access; server is $server\n"
    if ($opt_d);

# For NNTP access, we'll need to figure out the remote NNTP service port 
# number (119 is standard)

  ($name, $junk, $port, $junk) = getservbyname("nntp", "tcp");
  &abort("Can't find port number for NNTP service") unless ($name);

# Open up a connection to the remote nntp server.  
  &connect_tcp(SOCKET, $server, $port, 1);	# Connect or bust
  $connected_to_server++;			# For cleanup routines
  &setup_socket_io(SOCKET);			# Necessary I/O setup stuff
  &get_nntp_header(SOCKET);			# Discard server greeting banner
}

#
# Make sure we're the only instance of aub running for this user (actually,
# for the set of all users having the same home directory as this user.)
#

&get_lock;

#
# All the skanky code that used to be here to pre-validate groups has been
# removed for speed purposes.  We can validate on the fly.  
#

# Load in all the old group information.  
&load_aub_rcfile;

#
# Main Loop -- iterate over groups we're interested in decoding binaries from
#

foreach (@Groups) {
  $main_loop_group = $_;			# Isn't this ugly?

  $aubdir = ($aub_dir{$_}) ? $aub_dir{$_} : $aub_dir{$general};
  if (!chdir($aubdir)) {
    &warn("Could not cd to directory $aubdir...skipping group $_");
    next;
  }
  print "Assembling binaries from $_\n" if ($opt_d);
  print " Unpacking into directory $aubdir\n" if ($opt_d > 1);

  $aub_desc = ($aub_desc{$_}) ? $aub_desc{$_} : $aub_desc{$general};
  close(DESC);					# Can't hurt if not open 
   
  if (($aub_desc) && (!open(DESC, ">> $aub_desc"))) {
    &warn("Could not open (to append) description file $aub_desc...\n" .
          "...Skipping group $_");
    next;
  }

  $aub_hook = ($aub_hook{$_}) ? $aub_hook{$_} : $aub_hook{$general};

  @aub_pp = split(" ", ($postprocessor{$_} ? $postprocessor{$_} 
			                   : $postprocessor{$general}));

# Get the first and last article numbers in the group we're interested in.

  ($first, $last) = &set_group($_);
  if ((!$first) && (!$last)) {			# This group is bogus
    $_ = "";					# Don't checkpoint this
    next;
  } 

# We'll need to known next time what the last article we saw this time around
# was.

# Look at the list of unresolved articles for this group which was loaded
# from $aub_rcfile.  Any unresolved articles that are no longer stored on
# the nntp server must be discarded.  We load all other articles into memory.

  print "Previously unresolved articles: $unresolved_list{$_}\n" if ($opt_d);
  foreach (split(" ", $unresolved_list{$_})) {
    &load_article($_) unless ($_ < $first);
  } 
  $unresolved_list{$_} = "";

# We've also loaded from $aub_rcfile the article number of the last article
# we saw in this group the last time we ran.  If for some reason this has
# wound up undefined (it shouldn't be), or if the first article in the
# newsgroup is now bigger than the last article we've seen (newsgroup has
# been reset), we initialize this value.
#
# Note that when a new group is loaded for the first time, this is set to
# zero, which is most likely smaller than the first article in the group.
#

  $last_article_seen{$_} = $first - 1 
    if ((!$last_article_seen{$_}) || ($last_article_seen{$_} < $first));

  print "Last seen $last_article_seen{$_}, first $first, last $last\n"
    if ($opt_d);

# Load all of the new articles in the group into image queues

    &load_article(++$last_article_seen{$_}) 
      while ($last_article_seen{$_} < $last);

# We use the concept of 'pictures' (ie, binary images) and pieces of 
# pictures while naming our variables, but really we're just putting together
# the pieces of binary files...
#
# Each image queue (image queues are built by &load_article; see the
# discussion there) potentially contains a picture.
#
# For each potential picture, extract the name of the picture and the number
# of pieces that are known to be in it.  Build an array, @pairs, containing
# N items, where N is the number of pieces of the picture that we actually
# have, and where each item contains (encoded) the article number of the ith 
# piece.

  foreach $picture (keys %image_queues) {
    print "Image $picture..." if ($opt_d);
    ($picture_name, $picture_pieces) = split(":", $picture);
    @pairs = split("%", $image_queues{$picture});

# If we don't yet have all of the pieces of the picture, then this picture
# is unresolved.  We want to keep track of the pieces we do have in 
# a list, which we'll use later in creating our new $aub_rcfile.  We write
# out the information about unused pieces of images to our $aub_tmp file.

    if ($#pairs+1 < $picture_pieces) {		# Don't have all pieces yet
      print "incomplete\nNoting unresolved article(s) " if ($opt_d); 
      foreach $pair (@pairs) {
        ($piece, $article_no) = split(";", $pair);
        $unresolved_list{$_} = join(" ", $unresolved_list{$_}, $article_no);
        print "$article_no " if ($opt_d);
      }
      print "\n" if ($opt_d);
      next;
    }

# We've found a new, complete binary image.  Build a list of all of the
# article numbers in it, then pass this list to the &assemble_picture code,
# which creates the binary.

    undef @articles_this_picture;
    print "Complete\n" if ($opt_d);
    foreach $pair (@pairs) {
      ($piece, @articles_this_picture[++$#articles_this_picture]) = 
        split(";", $pair);
    }

# If the user has stuck a hook into aub, now's the time to ask it whether or
# not it really wants us to do the decoding.

    if ($aub_hook) {
      $sl = &get_subj_line(@articles_this_picture[0]);

      `$ECHO "$sl" 2>/dev/null | $aub_hook >/dev/null 2>/dev/null`;

      $hook_result = $?;
      print (($hook_result) ? "Hook $aub_hook returned FALSE; not decoding\n"
			   : "Hook $aub_hook returned TRUE; decoding\n")
        if ($opt_d);
    } else {
      $hook_result = 0;
    }

    if (!$hook_result) {
      (($newsgroupdir = $_) =~ y/\./\//) if ($spooldir);
      &assemble_picture(@articles_this_picture);

# Invoke a postprocessor, if necessary.

      @pp = @aub_pp;
      while ($#pp > -1) {
        if ($real_name =~ m/$pp[0]$/i) {
	  &post_process($real_name, $postprocessor_def{$pp[1]});
	  last;
        } else {
	  shift @pp;
	  shift @pp;
        }
      }
    }
  }

# Checkpoint the $aub_rcfile file.  Blow away our current %image_queues 
# array, which could well mess us up the next time around the loop.

  &checkpoint;
  undef %image_queues;	
}

#
# Cleanup code -- leave things in an orderly state
#

close(DESC) if ($aub_desc);
&cleanup;
exit(0);

#
# Subroutines -- the first bunch handle decoding images (recognition code)
#
# Things have gotten pretty ugly in here lately, with global variables
# in use all over the place...
#
# If you are hacking around in here improving aub's ability to recognize
# binary images, and you come up with something clever, let me know.
# Except, I'm not interested in hacks that modify &get_subj_line to scan
# the _body_ of the article to look for lines of the form 'begin X NNN' --
# you have to assume that people have some amount of common sense.
#

sub load_article {
#
# Given an article number, we grab the subject line of the article and
# look to see if it might be something we're interested in.  If it's not,
# we return.
#
#  We are certainly not interested in: 
#
#  -- Articles with no subject line
#  -- Articles beginning with "Re:"
#
#  We are interested in:
#
#  -- Articles containing a string of the form:
#      (all white space is optional, case not significant)
#	- N of N			(N is any number)
#	- N fo N			(bad typist not using posting software)
#	- N / N	
#	- N \ N				(dyslexic poster?)
#	- N | N				(dyslexic poster?)
#
# Given something sufficiently interesting, we guess that it's a piece of a 
# binary image. 
# 
# Given such a beast, we note how many pieces there are all together and
# which piece of the image this is, then try to deduce the name of the 
# entire image.  
#
# If we have no better guess, we use all the stuff preceding the 
# interesting part (described above) as the name of the image.  However,
# if we find a string containing ".gif", ".jpg" or something like that -- 
# a string containing any one of the extensions in the list of hints -- 
# we use that as the name of the image instead.
#
# It really doesn't matter what we use as the name, so long as it's the
# same for all of the pieces of the image, and so long as it's not the 
# same as any pieces of other images.  But people reposting articles
# with the same names will screw us up if we're too naive.  See below.
#
# We map colons in the name of the image to "X's", since they can really
# mess us up later in life (we use ":" as one of our delimiters, internally.)
#
# Each identified image piece is inserted into an image queue.  Image
# queues are named "PICTURE_NAME:NUMBER_OF_PIECES_IN_PICTURE".  They are
# priority queues; the priority associated with each piece of an image is
# the piece number, and the data is the image piece article number.
#
# Note that later while scanning all of the pieces in a given image queue,
# this use of priority values will cause us to see the pieces (article 
# numbers) in the correct order.  So we're doing some implicit sorting here.
#
# All numerical values derived from article subject lines are padded with
# a (rather large) number of zeros, so that we never get confused and think
# that piece "02 of 3" should follow piece "1 of 3".
#
# To keep from mixing pieces of reposted binaries up with the original 
# postings, we stick the string "reposted." in front of the name.
#
# There are some more smarts buried in the &insert_q routine.
#
# Our decoding ability is substantially greater than that of v1.1 now.
# 


  local($article) = @_[0];
  local($sl, $hint, $piece, $pieces, $name);

  $sl = &get_subj_line($article);
  print " Got article $sl\n" if ($opt_d > 1);

  return if (($sl eq "") || ($sl =~ m/^\s*Re:/i));
  $sl =~ y/A-Z/a-z/;

  return unless (($sl =~ m/^(.*\D)(\d+)\s*o\s*f\s*(\d+)/i) ||
                 ($sl =~ m/^(.*\D)(\d+)\s*f\s*o\s*(\d+)/i) ||
                 ($sl =~ m/^(.*\D)(\d+)\s*\/\s*(\d+)/) ||
                 ($sl =~ m/^(.*\D)(\d+)\s*\|\s*(\d+)/) ||
                 ($sl =~ m/^(.*\D)(\d+)\s*\\\s*(\d+)/));

  $piece  = &pad($2);
  $pieces = &pad($3);
  $name = $1;

  foreach $hint (@extn_hints) {
    if ($sl =~ m/\S+$hint/) {
      $name = $&;
      last;
    }
  } 

  $name = "reposted.$name" if ($sl =~ m/repost/i);
  $name =~ s/:/X/g;				# Avoid nasty, subtle bug

  print " Recognized piece $piece of $pieces, binary $name\n" if ($opt_d > 1);
  &insert_q(join(":", $name, $pieces), $piece, $article);
}

  
sub assemble_picture {
#
# Given a list of article numbers which make up a given image, we want to
# produce the complete, uudecoded file.
#
# For each article, we use a state machine to ensure that we only uudecode
# binary data, not news envelope or text or .sig garbage that the poster
# may have included.  
#
# The state machine is discussed a little in the &process_line subroutine.
# How it works is neither immediately obvious nor completely documented.
# You can mess things up really good by tinkering with it, too.  Unless you
# can rewrite uumerge or uucat from memory, better not tinker with this stuff.
#
# This could be made more readable, but it would be a performance trade-off,
# and good performance is highly desirable, since we're doing a lot of 
# interaction with the NNTP server / news files in here.
#
# We take care not to generate images which exactly match previously generated
# images with similar names.

  local($state, $last_piece, $rawfile);

#
# If we're in catch up mode, we don't want to do much.  We will go as far
# as to list the article numbers we would have assembled had we been really
# decoding articles.  This information can be very useful during debugging.
#

  if ($opt_c) {
    return unless ($opt_d);
    print "Catch-up mode...skipping assembly of articles ";
    foreach (@_) {
      print "$_ ";
    } 
    print "\n";
    return;
  }

# Yes, all of these are global...

  $state = 0;					# Initialize state machine
  $last_piece = 0;				# Set for last piece of image
  $desc_text  = "";				# Text describing this image
  $desc_lines = 0;				# Running count $desc_text lines
  $real_name = "";				# Name of binary actually made
  $possible_duplicate = "";			# This is a global

  print " Assembling articles " if ($opt_d > 1);
  if (!$spooldir) {				# ***Using NNTP***

    foreach (@_) {				# For each piece...
      print "$_ " if ($opt_d > 1);
      $state = 2 if ($state);			# This is black magic
      $last_piece++ if ($_ eq $_[$#_]);		# Set on last piece of image

      &putline(SOCKET, "body $_");		# Ask for article text
      $data = &getline(SOCKET);			# Get header response

      if ($data !~ m/^222\s/) {
        &warn("Could not get body of article $_");
        close(DECODE);				# Abandon ship
        unlink($temp_decode_file);
        return;					# Perhaps it just got expired
      }

      while (1) {  				# Not infinite, just fast
        $data = &getline(SOCKET);		# Get a line of the article
        last if ($last_nntp_line);		# Last line this article
        next if ($state == 3);			# Dump remainder of article
        $state = &process_line($data, $state);	# One step thru state machine
      } 
    }
  } else {					# ***Using raw disk files***

    foreach (@_) {				# For each piece
      print "$_ " if ($opt_d > 1);
      $state = 2 if ($state);			# Black magic again...
      $last_piece++ if ($_ eq $_[$#_]);		# Set on last piece of image
      $rawfile = "$spooldir/$newsgroupdir/$_";	# Filename of the article

      if (!open(RAWNEWS, "< $rawfile")) {	# Can't read the article?
        &warn("Could not open $rawfile for reading");
        close(DECODE);
        close(RAWNEWS);
        unlink($temp_decode_file);
        return;
      }

      do {					# Discard the news envelope
        chop($data = <RAWNEWS>);
        print "  Discard: $data\n" if ($opt_d > 2); 
      } while ($data !~ m/^\s*$/);

      foreach $data (<RAWNEWS>) {
        last if ($state == 3);
        chop $data;
        print "  Read: $data\n" if ($opt_d > 2);
        $state = &process_line($data, $state);
      }
      close(RAWNEWS);
    }
  }
  close(DECODE);				# Temporary file

  if ($possible_duplicate) {
    while ($possible_duplicate ne $real_name) {
      if (&identical($temp_decode_file, $possible_duplicate)) {
        print "\n" if ($opt_d > 1);
        print "Suppressing $real_name; would duplicate $possible_duplicate\n"
	  if ($opt_d);
	unlink($temp_decode_file);
        $suppressed++;
        return;
      }
      $possible_duplicate .= "+";
    }
  }

  `$MV '$temp_decode_file' '$real_name' >/dev/null 2>&1`;
  print DESC $desc_text if ($aub_desc);		# Print only if we decode
  print "\nActual image name is $real_name\n" if ($opt_d > 1);
}


sub process_line {
#
# State machine for processing data from articles:
#
# This isn't the best explanation possible, I realize.
#
# We process states in order of frequency of their occurrence, to make things
# go fast...
#
#  STATE		MEANING
#
#    0			Processing first article, "begin" line not yet seen.
#    1			Processing a not-last article, data lines should
#			begin with "M"
#    2			Processing non-first article, scanning for 
#			continuation of binary data (M-lines)
#    3			Through with an article; further data from this
#			article should be discarded.
#    4			Processing last article, data lines should be copied
#			until 'end' seen.
#
#
# There is now a terrible hack in here to make things work for the group
# comp.binaries.os2, where people like to post stuff that contains lines
# beginning with 'Minimum-OS2-Version:' before the uuencoded data continues
# in pieces 2+ of the binary.  aub was seeing the leading 'M' and expecting 
# uuencoded data to immediately follow, then dropping into state 3 when it 
# didn't appear.  Instant lobotomy.
#
# Too bad we can't dump anything beginning with /M\S+: /, but alas, that's 
# perfectly legal...
# 

  local($data, $state) = @_;
  local($begin, $mode, $rest);
  local($dashes) = "-------";

  if ($state == 1) {			# State 1 == most common state
    return 3 unless ($data =~ m/^M/);	# End of binary data this piece
    print DECODE unpack("u", "$data\n");# This is uuencoded data; decode it
    return 1;				# Expect to copy more binary data
  }

  if ($state == 4) {			# State 4 == 2nd most common state
    return 3 if ($data =~ m/^end/);	# Discontinue decoding when 'end' seen
    print DECODE unpack("u", "$data\n");# Decode this data, except for 'end'
    return 4;				# Continue this state until 'end'
  }
    
  if (!$state) {			# State 0 == 3rd most common state
    if ($data !~ m/^begin [0-9]/) {	# Looking for "begin"; not seen yet?
      return 0 if ($data =~ m/\-\-\-/); # HACK -- keep ugliness out of $aub_desc
      return 0 if ($desc_lines++ > 60); # HACK -- some morons post sh archives
      chop $desc_text if ($spooldir);	# HACK -- dump trailing \n when non-NNTP
      $desc_text = join("\n", $desc_text, $data) if ($aub_desc);
      return 0;				# Copy to desc file if $aub_desc defn.
    }

    ($begin, $mode, $real_name, $rest) = split(/\s+/, $data, 4);

# Sanity check binary names...we don't like things posted with pathname
# components in the name, weird characters, names beginning with ".", or 
# binaries names "." or ".."

    $real_name = $1 if ($real_name =~ m/\/([^\/]+)$/);
    $real_name = $1 if ($real_name =~ m/^\.(.*)$/);
    $real_name =~ tr/[A-Z][a-z][0-9]+-=_:;.,//dc;
    $real_name = "Mangled" if (($real_name eq ".") || ($real_name eq "..") ||
			       ($real_name eq ""));

    $mode = 644 if (!$mode);		# We don't like mode zero binaries

   if (-e $real_name) {
      $possible_duplicate = $real_name;	# Would collide with this...
      $real_name .= "+" while (-e $real_name);
    }

    
    $desc_text = 			# Stuff group, name into desc info
      "$dashes <$main_loop_group:$real_name> $dashes\n$desc_text\n\n"
      if ($aub_desc);

    unlink($temp_decode_file);
    (open(DECODE, "> $temp_decode_file")) || 
      &abort("Could not open temporary file $temp_decode_file for writing");
    chmod(oct($mode), $temp_decode_file);

    return 1 if (!$last_piece);		# If 1-N-1, -> state 1, else -> state 4
    return 4;
  }

  if ($state == 2) {			# Looking for beginning of >1st piece
    return 2 unless ($data =~ m/^M/);	# Haven't found it yet.

    return 2 if (length($data) < 61);	# Want uuencoded data, not just anything
    return 2 if ($data =~ m/^Minimum-OS2-Version:/); 	# What next?  
    print DECODE unpack("u", "$data\n");# Found it, need to decode it
    return 1 if (!$last_piece);		# Not last piece -> state 1
    return 4;				# Copy last piece data from state 4.
  }

# We should not be called when in state 3; all other states are undefined.
  &abort("Logic error in state machine");
}


sub insert_q {
#
# Insert item $item with priority $priority into an image queue $queue.
#
# Both the priority and the item are actually stored in the queue as
# the string "$priority;$item".
#
# A queue is just a string; values in the queue are separated by 
# "%" characters.
#
# Since our items are made entirely of numbers, we don't need to worry
# about getting confused by queue data that happens to coincide with the
# characters we use as delimiters.
#
# Sometimes we find ourselves in the position of trying to add an item 
# of priority P to a queue which already contains an item with priority P.
# We know that if we go ahead and do this, it will likely hose us, since
# there should be one and only one piece P of Q in any given binary.  Since
# we are called by code that processes articles in sequential order of 
# subject lines, as a heuristic, we'll assume that the later article should
# supersede the previous article.  This is reasonable; although it is 
# clearly not _defined_ to be the case that article X is more recent than
# article Y when the article number of X is larger than that of Y, such is
# often the case in practice.  Besides, there's no perfect way to decide 
# which of the two articles is to be preferred.  It turns out that this helps
# us a lot more often than it hurts us.
# 
# Things here should have been sped up slightly since v1.1.


  ($queue, $priority, $item) = @_;
  local(@a_queue) = split("%", $image_queues{$queue});

  for ($i=0; $i <= $#a_queue; $i++) {
    last if $a_queue[$i] >= $priority;
  }

  if ($a_queue[$i] =~ m/^${priority};/) {
    $a_queue[$i] = "$priority;$item";
    print " Replacing piece $priority of $queue with (presumably) newer data\n" 
      if ($opt_d> 1);
  } else {
    splice(@a_queue, $i, 0, "$priority;$item");
  }

  $image_queues{$queue} = join("%", @a_queue);
}


#
# More subroutines -- These deal with accessing news articles.
#

sub get_subj_line {
#
# Gets the header information of the specified article, and returns the 
# subject line, removing the "Subject:" portion.
#
# If we are using NNTP and the server understands the XHDR NNTP enhancement, 
# we use it, unless the NOHXDR keyword appeared in the configuration file.
#

  local($article_no) = pop(@_);
  local($sub_line) = "";
  local($resp);

# If $spooldir is defined, we're accessing articles directly instead of 
# using the NNTP protocol.

  if ($spooldir) {
    return "" unless (open(SUBJECT, "$spooldir/$newsgroupdir/$article_no"));
    while (!eof(SUBJECT)) {
      $sub_line = <SUBJECT>;
      next unless ($sub_line =~ m/Subject:\s*(.*)$/);
      close(SUBJECT);
      return $1;
    } 
    close(SUBJECT);
    return "";
  }

# At this point we know we're using NNTP.  See whether or not we've already
# decided if XHDR works or not.  If we haven't decided yet, we decide now.

  if (!$have_gotten_subj_line_before) {		# Another global variable
    $have_gotten_subj_line_before++;
    &putline(SOCKET, "xhdr subject $article_no");
    $resp = &getline(SOCKET);

    if ($resp =~ m/^221\s/) {			# XHDR is supported!
      $xhdr_supported++;			# Yup, this is global too
      do {
        $resp = &getline(SOCKET);		# Data's pending, so we use it
        $sub_line = $1 if ($resp =~ m/^\d+\s+(.*)$/);
      } until ($resp eq "\.");
      return $sub_line;				# There's the answer
    } 
  }

# How we make the NNTP query for the subject line depends on whether or not
# we can use XHDR.

  &putline(SOCKET, ($xhdr_supported) ? "xhdr subject $article_no" 
				     : "head $article_no");
  $resp = &getline(SOCKET);
  return "" unless ($resp =~ m/^221\s/);	# Not the expected response 

  do {
    $resp = &getline(SOCKET);
    if ($xhdr_supported) {
      $sub_line = $1 if ($resp =~ m/^\d+\s+(.*)$/);
    } else {
      $sub_line = $1 if ($resp =~ m/^Subject:\s*(.*)$/);
    }
  } until ($last_nntp_line);

  return $sub_line;
}


sub set_group {
#
# Return the first and last article numbers of the group we're interested in.
# If we're using NNTP, we'll use the 'GROUP' command to set the group we're
# talking about and return the data the remote nntpd gives to us.  If we're
# using raw disk files, we'll have to do a little more work than this...
#
# When processing raw spool directories, $newsgroupdir is set as a side-effect.
#
# We return a list (f,l) where f is the first available article in the
# group and l is the last available article.  (0,0) indicates we've run
# into a problem.
#

  local($group_to_examine) = @_[0];
  local($data, $file);
  local($min) = "first_time";
  local($max) = 0;

  if ($spooldir) {				# Disk based spool data
    ($newsgroupdir = $group_to_examine) =~ y/\./\/\//;
    if (!opendir(DIRECTORY, "$spooldir/$newsgroupdir")) {
        &warn((-d "$spooldir/$newsgroupdir") ? 
              "Cannot open $spooldir/$newsgroupdir; skipping it." :
              "Invalid group: $group_to_examine...ignoring");
        return (0,0);
    }

    foreach $file (readdir(DIRECTORY)) {
      next if (($file eq ".") || ($file eq ".."));
      next unless ($file =~ m/^\d+$/);
      
      $min = $file if (($min eq "first_time") || ($file < $min));
      $max = $file if ($file > $max);
    }
    close(DIRECTORY);

    return ($min eq "first_time" ? 0 : $min, $max);
  }

  &putline(SOCKET,"group $group_to_examine");
  $data = &getline(SOCKET);

  if ($data !~ m/^211\s/) {			# No such group
    &warn("Invalid group: $group_to_examine...ignoring");
    return (0,0);
  }

  $data =~ m/^211\s+\d+\s+(\d+)\s+(\d+)\s/;
  return ($1,$2);
}


sub get_nntp_header {
#
# Grab the nntp banner line from the server, sanity check it, and return.
# This code is only useful if we're doing NNTP.
#

  local($sock) = pop(@_);
  local($line) = &getline($sock);

  if ($line =~ m/INN/) {                        # This code by mwe@dfw.net,
    &putline($sock, "mode reader");             #  looks pretty good to me.
    &getline($sock);                            #   -mfs, 2/95.
  }

  return if (($line =~ m/^200\s/) || ($line =~ m/^201\s/));
  &abort("Remote nntp service is too busy to talk to us now.")
    if ($line =~ m/^400\s/);
  &abort("Remote nntp service doesn't look like nntp service to me.");
}


sub setup_socket_io {
#
# Make the passed socket unbuffered, and invoke a timeout routine if we 
# don't hear something within finite time.
# 
# I don't really have an 'initialization' section, so this is the next
# best place for this...
#


  local($sockname) = pop(@_);
  local($old_def_fh) = select($sockname);

  $SIG{"ALRM"} = 'getline_timeout';
  $|=1;
  select($old_def_fh);
}


sub getline {
#
# Get a line of data from a socket.  This code is used only if our news
# access is NNTP-based.
#
# Note that $socket_stuff is a global variable
#

  local($sockname) = pop(@_);
  local($data) = "";
  local($inp);

  $timeout_flag = 0;				# Not a local variable
  alarm($timeout_interval);			# Can't wait forever.

  while ($socket_stuff !~ m/\n/) {		# No complete line in buffer
    recv($sockname, $inp, 256, 0);		# Need more data
    $socket_stuff .= $inp;			# Append to our buffer.
  }
  alarm(0);					# Got data.  Shut off alarm.

  &abort("Line from server was too long") if    # Still not end of line?
    ($socket_stuff !~ m/\r\n/);
  
  ($data, $socket_stuff) = split("\r\n", $socket_stuff, 2);
  $last_nntp_line = ($data eq "\.");		# Can't confuse with ".."
  $data =~ s?\.?? if ($data =~ m/^\.\./);	# NNTP doubles leading "."

  print "  Received: $data\n" if ($opt_d > 2);
  return $data;					
}


sub getline_timeout {
#
# What can we do if our server isn't talking?  Not much.
#

  &abort("NNTP server not responding after $timeout_interval seconds.");
}


sub putline {
#
# Send a line of data over a socket.
#

  local($line) = pop(@_);
  local($sockname) = pop(@_);

  print $sockname "$line\r\n";
  print "  Sent: $line\n" if ($opt_d > 2);
} 


sub connect_tcp {
#
# Connect to a tcp port on some host.  This code is useful in more places
# than just in aub.

  local($e) = pop(@_);	      # 0=return on err, >0 = print error, abort on err
  local($port) = pop(@_);     # port to connect to
  local($server) = pop(@_);   # name of server to connect to
  local($sockname) = pop(@_); # socket to use

  local($packing_template) = "S n a4 x8";
  local($protocol) = "tcp";
  local($thishost, $problem, $junk);

  $thishost = `$HOSTNAME`; chop $thishost;

# Figure out our address...
  ($name, $junk, $junk, $junk, $ouraddr) = gethostbyname($thishost);
  if ($name eq "") {
    $problem="Can't get address of this host (\"$thishost\")";
    &abort($problem) if $e;
    return $e;
  }
 
# And the address of the host we want to connect to
  ($name, $junk, $junk, $junk, $theiraddr) = gethostbyname($server);
  if ($name eq "") {
    $problem = "Can't find address of server $server";
    &abort($problem) if $e;
    return $e;
  }

# Get the number of the protocol we're to use
  ($name, $junk, $proto) = getprotobyname($protocol);
  if ($name eq "") {
    $problem="Unrecognized protocol: $protocol";
    &abort($problem) if $e;
    return $e;
  }

  $us = pack($packing_template, &AF_INET, 0, $ouraddr);
  $them = pack($packing_template, &AF_INET, $port, $theiraddr);

# Get a socket filehandle
  if (!(socket($sockname, &AF_INET, &SOCK_STREAM, $proto))) {
    $problem="Could not create socket";
    &abort($problem) if $e;
    return $e;
  }

  if (!connect($sockname, $them)) {
    $problem="Could not connect to server";
    &abort($problem) if $e;
    return $e;
  }
}


#
# Subroutines -- dedicated to supporting postprocessors
#

sub post_process {
#
# Invoke postprocessor command $cmd on $file.
#
# The error checking here is pretty minimal...
#

  local($file, $cmd) = @_;
  local($head, $tail);

  if ($file =~ m/\./) {
    $head = $`;					# Though $' and $` look similar
    $tail = $';					#  one's forward, one's backward
  } else {
    $head = $file;				# If no "." in image name, 
    $tail = "";					#  there is no tail
  }

  $cmd =~ s/([^\\])\$h/$1$head/g;		# I'm sure there's a more 
  $cmd =~ s/([^\\])\$t/$1$tail/g;		#  graceful way of doing this,
  $cmd =~ s/([^\\])\$f/$1$file/g;		#  but I'm lazy.
  $cmd =~ s/\\\$h/\$h/g;
  $cmd =~ s/\\\$t/\$t/g;
  $cmd =~ s/\\\$f/\$f/g;

  print "Postprocessing: $cmd\n" if ($opt_d);
  
  `$cmd`;
}


#
# Subroutines -- dedicated to loading and parsing configuration files and 
#		 files maintained by aub.
#

sub load_config_file {
#
# Load the configuration file, which is now line-oriented.
#
# We provide the user with some helpful hints if we run into problems opening
# processing his configuration file.
#
# $current_group is a global variable used to keep track of which group 
# current keywords are to be applied to.
#
# &parse_line does most of the work; we also use it to verify that anything
# from the environment we'll be using is legitimate.
#

  local($extn, $cmd, $rest, $grp);

  if (!open(CONFIG, $aub_config)) {
    if (-f $obsolete_init) {
      print <<"EOF";

        Hi there.  It looks like you're running aub version $version for
the first time.

        The format of aub's configuration file has changed since the
version of aub you last ran.  You need to create a new one.  It's very
easy, and you can get all the details from this program.  You'll want 
to call your new configuration file \$HOME/$mini_aub_config.
 
        You can print the short form of the documentation by executing
'aub -m', and the long form by executing 'aub -M'.  Also, you can read
about changes made since the last version of aub by executing 'aub -C'.

	If you yet read the new documentation for aub v$version, it would really
be a good idea to go over the whole thing again.  Many things have changed.

                                                Mark Stantz

EOF
      exit(1);
    }

    &abort("Couldn't open your configuration file \"$aub_config\".\n" .
           "Execute 'aub -m' (short form) or 'aub -M' (long form) if you " .
           "need help\nfiguring out how to create a configuration file");
  }

# Validate any information which might be given to us via the environment

  $current_group = $general;
  &parse_line("directory", split(/\s+/, $aub_dir{$general}), 
              "environment variable \$AUBDIR") if ($aub_dir{$general});
  &parse_line("desc", split(/\s+/, $aub_desc{$general}), 
              "environment variable \$AUBDESC") if ($aub_desc{$general});
  &parse_line("hook", split(/\s+/, $aub_hook{$general}), 
              "environment variable \$AUBHOOK") if ($aub_hook{$general});

# Load and validate the configuration file

  foreach (<CONFIG>) {
    chop;                                       # Trim <CR>
    s/^#.*$//;                                  # Remove comments
    s/[^\\]#.*$//;
    s/\\#/#/g;					# Unescape escaped comments
    next if m/^\s*$/;                           # Skip blank lines
    s/\s+/ /;                                   # Fold white space into 1 <SP>
    s/^ //;                                     # Drop leading and trailing
    s/ $//;                                     #  white space

    ($cmd, $rest) = split(/\s+/, $_, 2);
    &parse_line($cmd, $rest, "configuration file $aub_config");
  }

# Make sure that certain things we need to know have been defined

  &conf_err("configuration file $aub_config", 
            "Configuration file doesn't specify any groups to access")
    if ($#Groups < 0);

  if (!$aub_dir{$general}) {
    foreach $grp (@Groups) {
      next if ($aub_dir{$grp});

      &conf_err("configuration file $aub_config", 
                "No directory to put decoded binaries in specified.\n Please " .
                "set the AUBDIR environment variable or modify your " .
                "configuration file");
    }
  }

# Use configuration file NNTP server if specified.  Otherwise use NNTP 
# environment variable.  Otherwise use deduced NNTP server, if any.
# All of this is ignored if $spooldir is ever set, in which case disk-based
# access will be used.

  $server = $nntpserver if ($nntpserver);
  $server = $defserver unless ($server);

  if (($server eq "") && ($spooldir eq "")) {
    &conf_err("configuration file $aub_config", 
              "No news spool directory or NNTP server specified or " .
              "locatable.\n Please specify where to access news in the " .
              "configuration file, or by\n setting the NNTPSERVER " . 
	      "environment variable");
  }

  foreach (@extn_hints) {			# dispel regexp magic (Tom
    s/(\W)/\\$1/g;				#  Christiansen's term)
  }
}


sub parse_line {
#
# Handle a given line from the configuration file.
#
# This is pretty straightforward, but tedious.  Almost certainly there's a 
# bug or two in here, but I don't see anything wrong with it at the moment.
#

  local($cmd, $args, $err_type) = @_;
  local(@args) = split(/\s+/, $args);
  local($extn, $group, $pp_name, $pp_cmd);

  if ($cmd =~ m/^dir/i) {			# Directory keyword
    &conf_err($err_type, "No directory specified with DIRectory keyword")
      if ($#args < 0);
    &conf_err($err_type, "Too many arguments specified with DIRectory keyword")
      if ($#args > 0);

    &conf_err($err_type, "Supposed directory $args is not a directory") 
      unless (-d $args);
    &conf_err($err_type, "Directory $args is not writable")
      unless (-w $args);
    &conf_err($err_type, "Directory $args is not searchable (executable)")
      unless (-x $args);
	               
    $aub_dir{$current_group} = $args unless ($aub_dir{$current_group});
    return;
  }

  if ($cmd =~ m/^desc/i) {			# Description file argument
    &conf_err($err_type, 
              "No description file specified with DESCription keyword")
      if ($#args < 0);
    &conf_err($err_type, 
	      "Too many arguments specified with DESCription keyword")
      if ($#args > 0);

    &conf_err($err_type, "Could not append to description file $args") 
      unless (open(TEST, ">> $args"));
    close(TEST);

    $aub_desc{$current_group} = $args unless ($aub_desc{$current_group});
    return;
  }

  if ($cmd =~ m/^hook/i) {			# Hook program
    &conf_err($err_type, "No hook program specified with HOOK keyword")
      if ($#args < 0);
    &conf_err($err_type, "Too many arguments specified with HOOK keyword")
      if ($#args > 0);

    $args = &find_pg($args, 1);
    &conf_err($err_type, 
             "Supposed hook program $args nonexistent or not a plain file")
      unless (-f $args);

    &conf_err($err_type, "Supposed hook program $args is not executable")
      unless (-x $args);

    $aub_hook{$current_group} = $args unless ($aub_hook{$current_group}); 
    return;
  }

  if ($cmd =~ m/^def/i) {			# Postprocessor definition
    &conf_err($err_type, "No postprocessor named with DEFine keyword")
      if ($#args < 0);

    $args[0] =~ y/[A-Z]/[a-z]/;

    if (!$#args) {
      $args[0] =~ y/[A-Z]/[a-z]/;
      &conf_err($err_type, "No command associated with postprocessor $args[0]");
    }

    ($pp_name, $pp_cmd)  = split(/\s+/, $args, 2);
    $pp_name =~ y/[A-Z]/[a-z]/;
    $postprocessor_def{$pp_name} = $pp_cmd;
    return;
  }

  if ($cmd =~ m/^post/i) {			# Postprocessor usage
    &conf_err($err_type, "No postprocessor named with POSTprocess keyword")
      if ($#args < 0);

    $args[0] =~ y/[A-Z]/[a-z]/;
    &conf_err($err_type, "Postprocessor $args[0] used while undefined")
      unless ($postprocessor_def{$args[0]});
    &conf_err($err_type, "Postprocessor $args[0] applied to no extensions")
      if (!$#args);

    $pp_name = shift(@args);
    foreach $extn (@args) {
      $extn =~ s/(\W)/\\$1/g;
      $postprocessor{$current_group} .= "$extn $pp_name ";
    }
    return;
  }

  if ($cmd =~ m/^rec/i) {			# Recognize an extension
    &conf_err($err_type, "No extensions named with RECognize keyword")
      if ($#args < 0);

    foreach $extn (@args) {			# We'll dispel regexps later
      push(@extn_hints, $extn) unless (grep(($_ eq $extn), @extn_hints));
    }
    return;
  } 

  if ($cmd =~ m/^noxhdr/i) {			# NOXHDR keyword
    &conf_err($err_type, "Argument specified with NOXHDR keyword")
      unless($#args == -1);

    $have_gotten_subj_line_before++;		# Don't bother trying XHDR
    return;
  }

  if ($cmd =~ m/^spool/i) {			# Spool directory
    &conf_err($err_type, "No argument specified with SPOOL keyword")
      if ($#args < 0);
    &conf_err($err_type, "Too many arguments specified with SPOOL keyword")
      if ($#args > 0);

    &conf_err($err_type, "Cannot both NNTP and spooled news files as input")
      if ($nntpserver);

    &conf_err($err_type, "Supposed spool directory $args is not a directory")
      unless (-d $args);
    &conf_err($err_type, "Spool directory $args is not searchable (executable)")
      unless (-x $args);

    $spooldir = $args unless ($spooldir);
    return;
   } 

  if ($cmd =~ m/^nntp/i) {			# NNTP server
    &conf_err($err_type, "No argument specified with NNTPserver keyword")
      if ($#args < 0);
    &conf_err($err_type, "Too many arguments specified with NNTPserver keyword")
      if ($#args > 0);

    $nntpserver = $args unless ($nntpserver);	# We'll validate this later
    return;
  }

  if ($cmd =~ m/^debug/i) {			# Turn on debugging
    &conf_err($err_type, "No argument specified with DEBUG keyword")
      if ($#args < 0);
    &conf_err($err_type, "Too many arguments specified with DEBUG keyword")
      if ($#args > 0);

    &conf_err($err_type, "Debug level is not a number")
      unless ($args =~ m/^\d+$/);

    $opt_d = scalar($args) unless ($opt_d);
    return;
  }

  if ($cmd =~ m/^group/i) {			# A group, or group list
    &conf_err($err_type, "No argument specified with GROUP keyword")
      if ($#args < 0);

    foreach $group (@args) {
      $group =~ y/[A-Z]/[a-z]/;
      push(@Groups, $group) unless (grep(($_ eq $group), @Groups));
    }
    $current_group = $args[$#args];
    return;
  }

  &conf_err($err_type, "Unrecognized keyword: $cmd");
}


sub conf_err {
#
# Pretty-print configuration file errors.
#

  local($a, $b) = @_;

  print "Error in $a:\n $b.\n";
  exit(1);
}
    
sub load_aub_rcfile {
#
# Load the aub_rcfile.  This file contains the last article seen in all
# groups we were reading the last time we were invoked, as well as a list
# articles which were unresolved in that group.
#
# We build the %last_article_seen and %unresolved_list associative arrays
# from this information.


  local($group) = "";

  if (! -e $aub_rcfile) {				# No $aub_rc file?
    foreach (@Groups) {					# Initialize stuff
      next if ($_ eq "");
      $last_article_seen{$_} = 0;			# Nothing seen yet
      $unresolved_list{$_} = "";			# No unresolved articles
    }
    return;
  }

  &abort("Can't open $aub_rcfile") if (!open(RCFILE, $aub_rcfile));
  chop(@Rc = <RCFILE>);
  close(RCFILE);

  foreach (@Rc) {
    if (m/^([^:]+):(.+)$/) {				# Info for a new group
      $group = $1;					# Name of group
      $last_article_seen{$group} = $2;			# Last seen in group
      $Groups_known[++$#Groups_known] = $1;		
      next;
    }

    &abort("$aub_rcfile mangled...please fix or discard")
      if (($group eq "") || (!m/^\d+$/));		# Else must be a number
    $unresolved_list{$group} = join(" ", $unresolved_list{$group}, $_);
  }
}
    

sub checkpoint {
#
# We've just processed all of the new articles in a given group.  We want
# to create a new $aub_rcfile in case someone decides to interrupt us, so
# that all our pointers won't be lost.
#
# When aub is run with the -n option, all checkpointing is suppressed and
# the $aub_rcfile is never modified.  This is useful if you want to 'peek 
# ahead' and see what you'll be getting later -- not something you probably
# want to do very often, but it's useful in debugging...especially in 
# conjunction with -c.
#
# Anything in @Groups_known -- that is, anything that was in the $aubrc_file
# when it was first read in -- will be copied to all subsequent versions
# of the $aubrc_file.  However, anything invalid groups introduced by new 
# data in the configuration file will have been discovered by the time this 
# code is run and the entry in @Groups removed.  So while it's true that we'll
# forever be copying old pointers for groups we used to follow but no longer
# follow, and for groups which were once valid but no longer are valid, we'll
# never introduce any currently invalid group in this code.
#
# We really don't want to get interrupted while running this code.  If we
# catch a signal we can't block in here, it could mangle the configuration
# file.  Buffered I/O might safe us from that in a pinch, but don't count
# on it.  Nobody should be sending us uncatchable signals anyways.
#

  local(@Checkpointed, $known);

  return if ($opt_n);				# No checkpointing

  foreach (@sigs) {				# Don't allow interruptions
    $SIG{$_} = 'IGNORE';
  }

  (open(CHECKPOINT, "> $aub_tmp")) ||		# This is just temporary...
    &abort("Could not open temporary file $aub_tmp for writing");

  foreach $known (@Groups_known, @Groups) {	
    next if ($known eq "");			# Possible, if group invalidated
    next if (grep(($_ eq $known), @Checkpointed)); # Did this one already
    push(@Checkpointed, $known); 		# Don't do this again

# Now, theoretically, it's supposed to be impossible for $last_article_seen{}
# to have something undefined in it.  But I want to be really sure, because
# this happened to me once.  I may be imagining things, or I may just be 
# fixing a bug with a band-aid instead of getting at the real cause.  Not
# sure which...

    $last_article_seen{$known} = "0" if ($last_article_seen{$known} eq "");

    print CHECKPOINT "$known:$last_article_seen{$known}\n";
    foreach (split(" ", $unresolved_list{$known})) {
      print CHECKPOINT "$_\n";
    }
    print "  Checkpointed: $known ($unresolved_list{$known} )\n" 
      if ($opt_d > 1);
  }

  close(CHECKPOINT);
  &abort("Could not replace $aub_rcfile with updated data")
    unless (rename($aub_tmp, $aub_rcfile)); 	# This should be atomic...

  foreach (@sigs) {
    $SIG{$_} = 'handler';			# Restore normal signal handling
  }
}


#
# Subroutines -- Miscellaneous other stuff...
#

sub get_lock {
#
# Primitive (but effective) locking mechanism, used to guarantee that only
# one instance of aub is running at a given time for a given user.
#
# We write out PID to a file using echo/append.  If the first line of the
# file is our PID, we have the lock.
#
# If the first line of the file is someone else's PID, we don't have the
# lock.  We check to see if that process is still around; if it isn't,
# we'll seize the lock.
#

  `$ECHO "$$" >> $aub_lock 2>/dev/null`;

  &abort("Can't write $aub_lock") unless (-e $aub_lock);
  if (!open(LOCK, $aub_lock)) {
    unlink($aub_lock);
    &abort("Couldn't open $aub_lock for reading");
  }

  chop($first_line = <LOCK>);
  close(LOCK);

  if ($first_line == $$) {
    $have_lock = 1;
    return;
  }

  print "Process $first_line seems to be already running aub.\n";
  &abort("You may not have two instance of aub running at once.  Sorry")
    if (kill(0, $first_line));

  print "That process does not seem to exist any more...\n";
  unlink($aub_lock);
  &get_lock;
}


sub identical {
#
# Return true if two files are identical to one another; false otherwise
# I'm lazy and count on sum being around to do this for me.  
#

  local($f1, $f2) = @_;
  local($s1, $s2) = @_;

  chop($s1 = `$SUM '$f1'`);			# Compute checksums
  chop($s2 = `$SUM '$f2'`);

  $f1 =~ s/(\W)/\\$1/g;				# Dispel regexp magic 
  $f2 =~ s/(\W)/\\$1/g;

  $s1 =~ s/$f1//;				# Remove filenames from strs
  $s2 =~ s/$f2//;

  return ($s1 eq $s2);
}
    

sub find_pg {
#
# find_pg: find the specified executable on this machine, if possible.
#
# We try using which first, assuming that if the desired executable is in
# our path, it's the one we want.
#
# If it's not in our path, we try whereis, returning the first program
# whereis names for us which is executable.
#
# If we can't find what we need, we just return our argument back if our
# second argument is true.  If it's false, we assume that it's crucial that
# we find the program specified, and blow up.
#

  local($pg, $crucial) = @_;
  local($ex) = 1;
  local($try, @found);

  return $pg if ($pg =~ m/^\//);		# Absolute paths know best
  chop($try = `which $pg`);
  return $try if ($try =~ m/^\//);

  chop($try = `whereis $pg`);
  if ($try =~ m/^$pg:\s+\//) {
    @found = split(/\s/, $try);
    $ex++ while (! -x $found[$ex]);
    return $found[$ex] unless ($found[$ex] eq "");
  }

  return $pg if (!$crucial);
  &abort("Could not locate executable \"$pg\"");
}


sub abort {
# 
# Print an error message and exit.  This is for problems we can't recover
# from.
#

  &cleanup;
  print STDERR "@_[0].\n";
  exit(1);
}


sub warn {
# 
# Print an error message and return.  This is for things which really 
# perplex us but which we can recover from.  This might get invoked if
# articles are expired between the time we find out they're around and 
# the time we try to access them, for instance, or if we can't open a 
# file we need to be able to open.
#

  print "@_[0].\n";
}


sub cleanup {
#
# Cleanup code.  This could probably be improved/added-to.
#

  &putline(SOCKET, "quit") if ($connected_to_server);

  close(DESC);
  close(DECODE);
  close(SUBJECT);
  unlink($aub_lock) if ($have_lock);
  unlink($temp_decode_file);
  unlink($aub_tmp);

  close(SOCKET);
}


sub handler {
#
# Die gracefully if interrupted by a signal.
#

  print STDERR "Dying on signal @_[0]\n";
  &cleanup;
  exit(1);
}


sub pad {
#
# Pad a number to six characters (rather excessive, for our needs) with zeros.
#

  local($n) = pop(@_);
  local($i);

  $i = sprintf("%6d", $n); 
  $i =~ s/ /0/g;
  return $i;
}


sub Getopts {
#
# This code was adapted (read that, stolen) from perl's getopt.pl library.
#

    local($argumentative) = @_;
    local(@args,$_,$first,$rest,$errs);
    local($[) = 0;

    @args = split( / */, $argumentative );
    while(($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
        ($first,$rest) = ($1,$2);
        $pos = index($argumentative,$first);
        if($pos >= $[) {
            if($args[$pos+1] eq ':') {
                shift(@ARGV);
                if($rest eq '') {
                  $rest = shift(@ARGV);
                }
                eval "\$opt_$first = \$rest;";
            }
            else {
                eval "\$opt_$first = 1";
                if($rest eq '') {
                    shift(@ARGV);
            }
                else {
                    $ARGV[0] = "-$rest";
                }
            }
        }
        else {
            print STDERR "Unknown option: $first\n";
            ++$errs;
            if($rest ne '') {
                $ARGV[0] = "-$rest";
            }
            else {
                shift(@ARGV);
            }
        }
    }
    $errs == 0;
}


#
# Subroutines -- long, boring subroutines that print out lots of text.
#
# You can't lose the documentation any more.  Beware: variable values get 
# interpolated in here...  It may be occasionally necessary to escape things.
#

sub need_to_run_h2ph {
#
# I can't tell you how many mail messages I got about the perl "Can't locate
# sys/socket.ph in @INC" message when aub v1.1 was released.  I don't want
# to deal with that any more.
#

  print <<"EOF";

	Your system is missing the library file sys/socket.ph, which is
required by aub.  This file can't be distributed with aub because its
contents are system-dependent.

	sys/socket.ph is generated by h2ph, which is distributed with perl.
h2ph takes your systems /usr/include/sys/socket.h file and generates an
analogous header file, probably /usr/local/lib/perl/sys/socket.ph.  aub
needs this file because it works with sockets.  

	h2ph is very straightforward -- read the man page.  Supposing that
your perl library directory is /usr/local/lib/perl, all you need to do is
run 'h2ph < /usr/include/sys/socket.h > /usr/local/lib/perl/sys/socket.ph'.  
You may need need to create the directory /usr/local/lib/perl/sys before 
you do this.  Also, it's not uncommon for the perl library to be /usr/lib/perl
instead of /usr/local/lib/perl.

	aub doesn't need this file if you set up your configuration file
to use disk-based spool files instead of the NNTP protocol.  But that's
really not recommended if NNTP is available to you; NNTP-based access to
news is much faster, and it really shouldn't be hard for you to figure this
out.

					Mark Stantz

EOF
	exit(1);
}

sub changes {
#
# Print changes to aub since last version.  We hit only the high points.
#

  print <<"EOF";

	Changes to aub since v$last_version:

	o	Added support for disk-based (non-NNTP) access to news 
	o	Made aub configuration file-oriented
	o	Added code to suppress generation of identical binaries
	o	Added .aubrc checkpointing code
	o	Added postprocessor functionality 
	o	Added hook functionality (allows selective decoding)
	o	Added support for NNTP/XHDR
	o	Improved binary recognition abilities significantly
	o 	Removed unnecessary newsgroup pre-validation (speed hack)
	o	Folded uudecode functionality into aub itself (speed hack)
	o	Folded documentation into program (convenience hack)
	o	Added 'catchup' and 'no-checkpoint' command line options
	o	Fixed bugs in recognition code, NNTP protocol implementation,
		 signal handling

	Known bugs in this version:

	o	-d0 on command line does not override 'debug N' in 
		 configuration file.

	Unknown bugs in this version:

	o	Rather likely.  But you still really want to run this
		 instead of v$last_version.  Trust me.
	
EOF
  exit(0);

}


sub short_manual {
#
# Print out the short form of the manual...memory refresh mode...
#

  print <<"EOF";

  Command line options:

	aub -c		Catch up on all groups, but do not assemble binaries
	aub -n		No checkpointing; don't update .aubrc
	aub -dN		Set debugging level to N
	aub -M		Print the aub manual
	aub -m		Print this summary of the manual 
	aub -C		List changes since previous release of aub

  Configuration keywords (for \$HOME/.aubconf):

	GROUP <GROUP> ...	Causes aub to process the newsgroup(s) listed
	SPOOL <DIR>		Use directory <DIR> as the root of the news 
				 spool tree (4)
	NNTP <HOST>		Use <HOST> as an NNTP news server (4)
	DIRectory <DIR>		Assemble binaries into the specified 
				 directory (1)
	DESCription <FILE> 	Store descriptions of assembled binaries in the 
				 specified file (1)
	HOOK <PGM>		Decode binaries only when the specified program
				 exits returning status true (zero) (1,2)
	DEFine <NAME> <CMD>	Define a postprocessor called <NAME>, which
				 will invoke the command <CMD> (3)
	POSTprocess <NAME> <EXTN> ...
				Apply postprocessor <NAME> to binaries whose
				 filenames end in any listed <EXTN> suffix (1,3)
	RECognize <EXTN> ...  	Add the suffixes listed to aub's internal 
				 table of common suffixes
	NOXHDR			Do not use the NNTP XHDR command, even if it 
				 is understood by the NNTP server
	DEBUG <N>		Set the default debugging level to N

  	Notes:

	(1) -- If this keyword appears before any GROUP keyword, it applies to 
	       all groups binaries are assembled from by default.  If the 
	       keyword appears after a given GROUP keyword, it applies to that 
	       group only, and overrides any default which may earlier have 
	       been established with the same keyword.

	(2) -- The hook program will be passed the subject line of the lowest-
	       numbered piece of the binary image on standard input.  It
	       cannot be invoked with arguments (yet).

 	(3) -- If the string "\$h" appears unescaped in the command, it will be 
	       replaced before the command is invoked with the "head" of 
	       the filename of the binary.  Similarly, "\$t" will be replaced 
	       with the "tail", and "\$f" will be replaced with the entire 
	       filename.  The head and tail of "foo.gif" are "foo" and "gif", 
	       respectively.

	(4) -- The SPOOL and NNTP keywords may not both appear in the same
	       configuration file.

EOF

  exit(0);
}


# Modified the 7th of december 1994 by Laurent VALLEE
# in purpose of compatibility with Perl 5 :
#
# in strings, "@" must be preceded by a "\". So the mail adresses
# must be modified.

sub long_manual {
#
#
#

  print <<"EOF";


			The Introducing AUB Document


	1.	What is aub?

	More and more people are posting binary files to usenet these days.
Some of these binaries are executables and audio data; a majority seem to
be pictures of various things, typically landscapes, movie stars and naked
people.  Because of limitations in the type data that usenet can accommodate, 
binaries must be encoded into text, and because binary files are commonly very 
large relative to text files usenet was designed to handle, they frequently 
must be broken up into pieces.  Programs have been developed which take a 
given binary, encode it, and automatically post it in pieces with descriptive 
subject lines.

	When this data arrives at a remote site, users see subject lines
that look something like this:

		12011 roadkill03.gif, part 1/4
		12012 roadkill03.gif, part 3/4
		12013 More pictures of tatooed children, please...
		12014 Re: roadkill02.gif -- I love the way the eyes bulge out
		12015 roadkill03.gif, part 4/4
		12016 roseanne_nude.jpg, part 02 of 02
	   	12017 Only BINARIES should be posted here, GOD DAMMIT	
		12018 roadkill03.gif, part 2/4
		12019 HI, I'M BIFF!!!!  THESE PIX ARE WAY COOL!!!!
		12020 roseanne_nude.jpg, part 01 of 02

	While the process of encoding and splitting up binaries for posting 
to usenet is relatively straightforward, the process of retrieving, sorting,
and decoding the pieces (which do not necessarily arrive in order) at 
receiving sites is less straightforward, tedious, time consuming, and very
prone to human error.  

	aub, which stands for "assemble usenet binaries", automates this 
reassembly process for you.  aub is intended for use in newsgroups to which 
binaries are posted exclusively.  When run, it accesses news articles via
either a disk-based news spool directory, or via an NNTP news server, 
determines whether or not any new binaries have appeared in selected 
newsgroups since the last time it was run, and if so, retrieves, organizes 
and decodes them, depositing them in a configurable location.  This process 
requires no human intervention once aub has been configured.  aub also keeps 
track of binaries which it has seen some, but not all, of the pieces of.  It 
remembers how to find these old pieces, so that when new, previously missing 
pieces arrive at your site, it will build the entire binary the next time it 
is run.  It also remembers which binaries it has already seen all of the 
pieces of already, so that it does not waste time rebuilding the same binaries 
over and over again.

	aub was created as a time saver; too many people at too many sites 
were spending way too much time manually unpacking binary files.  Its ability 
to identify and assemble binary images depends on people posting images with
subject lines that observe (loosely) established conventions.  aub's 
recognition capabilities have been significantly improved since the earliest 
release.


	2.	How does aub work?

	aub looks for subject lines containing strings like:

		N of N
		N / N
		N \ N
		N | N

	where N is any number composed of one or more digits, and white
space is optional.  Once it sees such a line, it tries to figure out a
name for the binary by looking at the rest of the subject line.  These names 
are relevant only to aub's internal functioning; when unpacked, binaries are 
named according to the information they were encoded with.  However, it's 
important that, whatever internal name aub decides on for the binary, that 
name be recognizable in the subject lines of all pieces.

	aub ignores all news articles with null subject lines and subject
lines that begin with "Re:" regardless of other content.

	aub uses two files which are maintained in each user's home directory.
One is \$HOME/.aubconf, which is a configuration file that allows you to 
customize aub's behavior.  See section 5 for a detailed explanation of the
structure of configuration files.  The other file is \$HOME/.aubrc.  You
should never need to modify this file; aub creates it and maintains it.  It's
used to keep track of what articles in which groups aub has resolved 
already, and what articles aub believes to be pieces of binaries that it 
hasn't seen all of the pieces of yet.  


    	3.	What do I need on my system to run aub?

	You will need Larry Wall's perl interpreter.  Older versions of aub
also required David Mack's uumerge program; this functionality has since been
folded into aub for the sake of speed.  perl is available via anonymous FTP 
from uunet.uu.net, tut.cis.ohio-state.edu, and jpl-decvax.jpl.nasa.gov.  

	Your machine must also have access to news, either via the NNTP
NNTP protocol, or by being able to open raw news files on a disk somewhere.  
Previous versions of aub required that your news access be NNTP-based; this 
restriction has since been lifted.


	4.	How do I install aub?

	There's really only one thing that you might need to configure.
aub is a perl script.  The first line of the program looks like this:

		#!/usr/local/bin/perl

	This appears to tell your shell where to find the perl interpreter.  
If the path of perl on your system is something else, you'll need to change 
this line, or create a link called /usr/local/bin/perl which points to where
your perl executable actually resides.

	If you need to change this, you'll probably see a message like:
'aub: Bad address.' when you try to run aub.


	5.	How do I configure aub?

	Older versions of aub made use of a configuration file which was
normally called \$HOME/.aubinit.  But few interesting customizations could 
be accomplished with .aubinit files, because the configuration language
was so primitive.  The configuration language has been redesigned to allow
much greater flexibility.  Old .aubinit files will no longer work, or be
recognized by aub (except inasmuch as aub will notice them and point out
to you that you need to create a new configuration file if you don't already
have one.)  The new configuration file for aub should be called \$HOME/.aubconf.

	Configuration files are line-oriented; each line is processed 
separately.  If any line contains the '#' character, aub concludes that 
the character begins a comment, and discards the comment character and 
everything one the line that follows it.  If for some reason you need to
put a '#' character in your configuration file and do not want it to be 
interpreted as beginning a comment, you'll have to escape it by preceding it 
with a backslash character, e.g. '\\#'.

	Each non-blank line in a configuration file must begin with a 
keyword recognized by aub.  The case of keywords is not significant.
As far as aub is concerned, "keyword", "KEYWORD", "Keyword" and "KeYWorD"
all mean the same thing.  Some keywords require arguments; some require no 
arguments appear, and some permit varialbe numbers of arguments.  If aub 
sees keywords it doesn't understand in your .aubconf file, it will complain 
to you about them.

	One of the keywords aub understands is the GROUP keyword.  It's
used to tell aub that you want to decode binaries from the newsgroup(s)
which appear as argument(s) to the keyword.  For example:

		GROUP alt.binaries.pictures.misc
		GROUP alt.binaries.pictures.misc alt.binaries.pictures.fractals

	Every configuration file must contain at least one GROUP keyword to
be correct.  

	In general, aub understands two types of keywords.  One type is 
called 'position insensitive', which means that the keyword will have the
same effect no matter where in the configuration file it appears.  The
other type is called 'position sensitive', which means that the keyword 
means something different when it appears before any GROUP keywords than
it does when it appears after any given GROUP keyword.

	One such position sensitive keyword is the DIRectory keyword.
This keyword is used to tell aub what directory to put binaries it decodes
in.  ("DIRectory" is spelled the way it is because only the 'DIR' part needs 
to appear in a configuration file for aub to recognize it.  In fact, aub will 
interpret any keyword beginning with the letters 'DIR' as being an instance
of the DIRectory keyword.)

	When a position sensitive keyword appears _before_ any GROUP keyword,
the keyword is interpreted as being the default for all groups that appear
later.

	When a position sensitive keyword appears _after_ any GROUP keyword,
it is interpreting as applying *only* to that group, overriding any previous
default which may have been established via use of the same keyword, or
by the value of environment variables (see section 8.)

	Position sensitive keywords appearing after a GROUP keyword which
lists multiple groups are applied only to the last group listed, not to 
all groups appearing on the group line.

	For example, the following three configuration files are equivalent:

	# Sample .aubconf file no. 1 -- basic example
	# 
	dir /tmp/aub					# Default directory
	group alt.binaries.pictures.misc		# Process these
	group alt.binaries.pictures.fractals		#  two groups

        # Sample .aubconf file no. 2 -- multiple group usage, mixed case
        #
        DiR /tmp/aub                                    # Default directory
        gRoUp alt.binaries.pictures.misc alt.binaries.pictures.fractals

        # Sample .aubconf file no. 3 -- does not use defaults
        #
        group alt.binaries.pictures.misc
        directory /tmp/aub                            
        group alt.binaries.pictures.fractals
        direct-to /tmp/aub                           	# 'dir' is all you need

	The following three configuration files are also equivalent, though
not equivalent to the previous three:

        # Sample .aubconf file no. 4 -- explicit placement of binaries
        #
        group alt.binaries.pictures.misc
        dir /tmp/aub/misc
        group alt.binaries.pictures.fractals
	dir /tmp/aub/fractals

        # Sample .aubconf file no. 5 -- explicit and default placement 
        #
        dir /tmp/aub/misc   				# Default directory
        group alt.binaries.pictures.misc		# Use default directory
        group alt.binaries.pictures.fractals
	dir /tmp/aub/fractals				# Override default

        # Sample .aubconf file no. 6 -- explicit and default placement revisited
        #
        dir /tmp/aub/fractals 				# Default directory
        group alt.binaries.pictures.misc
	dir /tmp/aub/fractals				# Override default
        group alt.binaries.pictures.fractals		# Use default directory

	The configuration file:

	# Sample .aubconf file no. 7 -- invalid
	#
	group alt.binaries.pictures.misc
	dir /tmp/aub
	group alt.binaries.pictures.fractals		# No good

	is invalid, because no directory for aub to place binaries decoded
from the newsgroup alt.binaries.pictures.fractals is specified.  The 
DIRectory keyword is unique in this regard; there must be some use of the
keyword that enables aub to figure out where to put binaries for every 
group specified, or it will refuse to run.  The easiest way to deal with 
this is to always establish a default directory by using the DIRectory
keyword somewhere before any groups appear.  


	Other position sensitive keywords are available.  


		DESCription <file>

	This keyword causes aub to extract text from what it thinks is the 
text portion of posted articles, and append it to the file you specify.  This
is useful if you're interested in reading the text that describes what all
the binaries aub is unpacking are about.  A maximum of 60 lines per binary
extracted will be put into the file you indicate.  Each description is
prepended with the name of the decoded binary it refers to, and the group
that binary was decoded from.


		HOOK <program>

	This keyword enables you to select which binaries aub decodes
using your own software.  If the HOOK keyword is specified, aub will 
invoke the argument program and supply it with subject line of the first
piece of a binary that it can potentially decode via standard input.  If the 
program returns true (zero), aub will decode the binary.  If the program 
returns false (non-zero), aub will skip decoding the binary, and continue 
processing.

	It is not (yet) possible to specify arguments to the user program.

	For example, the following sample program returns true if standard
input contains the string ".gif" (case insignificant), and false otherwise.

	#!/usr/local/bin/perl
	#
	# /tmp/sample_aub_hook: a simple, sample hook program
	#

	\$sl = <STDIN>;                  # Get standard input
	exit(0) if (\$sl =~ m/\.gif/i);   # Contains ".gif"
	exit(1);			# Didn't see ".gif"

	Suppose this program were attached to aub via the configuration line:

		hook /tmp/sample_aub_hook

	Then aub would only decode binaries containing the string '.gif'.

	You can write hook programs in any language you choose.  


		POSTprocess <postprocessor> <extn> ...

	This keyword enables you to postprocess binaries whose names end
in the string <extn> (you can list any number of these suffixes on a single
line in the configuration file.)  Case is not significant in <extn>.  Before
a POSTprocess keyword can appear, <postprocessor> must first be defined 
using the DEFine keyword, which is position insensitive.  The format of
the DEFine keyword is

		DEFine	<postprocessor> <unix cmd>

	<postprocessor> may be any string.  It's recommended that you
stick to alphanumerics.

	<unix cmd> is any UNIX command, with arguments.  Simple substitutions
are performed on <unix cmd> before it's executed in conjunction with the
existenece of a POSTprocess keyword and the appearance of a binary whose
filename ends in one of the <extn> suffixes listed as arguments to the 
POSTprocess keyword.  This all makes perfect sense but is a little difficult
to explain.  The following example should make things much clearer.

	Consider the following configuration file:

	# Sample aub configuration file demonstrating use of a postprocessor
	#
	dir /tmp/aubdir
	define jpg2gif djpeg -G \$f > \$h_.gif
	postprocess jpg2gif .jpg .jpeg
	group alt.binaries.pictures.misc

	The first line tells aub that it should decode binaries into the
directory /tmp/aubdir.  The second line defines a postprocessor for aub.  
The name of the postprocessor is specified as "jpg2gif".  The third line 
says that the postprocessor will be invoked whenever a binary with a name 
ending in '.jpg' or '.jpeg' is decoded.  The fourth line specifies the 
group that binaries are to be decoded from.

	Suppose the binary full_moon.jpeg is decoded from 
alt.binaries.pictures.misc.  The binary name "full_moon.jpeg" can be 
thought of as consisting of three parts; the head part -- everything before
the last '.' character --  the '.' character itself, and the tail part --
everything after the last '.' character.  aub uses the abbreviations 
'\$h', '\$t', and '\$f' to refer to the head part, tail part, and entire
filename, respectively.  (If no '.' character appears in the name of a 
decoded binary, \$h equals \$f, the entire name of the binary, and \$t is 
empty.) 

	Because the binary name "full_moon.jpeg" ends in ".jpeg", one of the
arguments specified on line two of the sample configuration file, aub 
invokes the postprocessor "jpg2gif".  aub substitutes the appropriate 
values for '\$f' and '\$h', in this case, "full_moon.jpeg" and "full_moon"
into the postprocessor definition, and executes the resulting UNIX command,
which in this case is 'djpeg -G full_moon.jpeg > full_moon_.gif'  Assuming 
that you have the djpeg program on your machine (this software is available 
via anonymous FTP from ftp.uu.net under the graphics/jpeg directory), this 
command will cause the .jpeg file to be automatically converted into a 
similarly named .gif file when it is decoded.

	A few more examples, again, based on the configuration file above

   Filename of decoded binary        \$h		\$t		\$f
------------------------------------------------------------------------------
	crescent_moon.jpg	crescent_moon	jpg	crescent_moon.jpg
	big.dog.gif		big.dog		gif	big.dog.gif

   Filename of decoded binary	Postprocessed         Reason
------------------------------------------------------------------------------
	crescent_moon.jpg	   yes       \$f ends in '.jpg'
	big.dog.gif		   no	     \$f doesn't end in '.jpg' or in
					      '.jpeg'

    Filename of decoded binary	UNIX command executed
------------------------------------------------------------------------------
	crescent_moon.jpg	djpeg -G crescent_moon.jpg > crescent_moon_.gif
	big.dog.gif		(none executed)


	We could have easily have written:

		define jpg2gif djpeg -G \$f > \$h_.gif ; rm -f \$f 

	to cause aub to remove the old .jpeg version of the binary after
converting it to .gif format.

	I've added the extra underscore character in this example to 
decrease the chance that djpeg, when it runs, will clobber another 
binary which aub already unpacked with the name "full_moon.gif" or
"cresecent_moon.gif". 

	Postprocessor definitions that can't be executed for some reason
may cause you (and aub) some problems at run time.  


	The following keywords are, like DEFine, position independent:


		NNTP <server>

	This tells aub that your news access is NNTP-based, and that it
should use the specified host as an NNTP server. 


		SPOOL <directory>

	This tells aub that your news access is based on access to raw news
files, and that <directory> is the root of the news spool tree. 

	A single configuration file may not contain both the NNTP and SPOOL
keywords.

	If neither the NNTP keyword nor the SPOOL keyword appear in your
configuration file, aub will assume your news access is via NNTP and use
your NNTPSERVER environment variable, if it is defined, to decide what 
server to connect to.  If your NNTPSERVER environment variable is not
defined, aub will try to figure out where you normally read news from.
If it can't do that, it will ask you to supply the information.

	If you ever change the mechanism by which you access news, or the
server you read news on, you'll need to remove the .aubrc file that aub
maintains to keep track of what groups you have and have not read.  Otherwise,
because articles are numbered differently on different servers, aub will get
hopelessly confused.  (It's possible, though not recommended, to switch
seamlessly back and forth between NNTP and SPOOL access to news on the 
same host.)  This is probably the only time you'll ever want to tamper with
a .aubrc file.


		DEBUG <n>

	Sets the default debugging level aub runs at to N.  N must be a 
non-negative integer.  Debugging level 0 is the default; when run at 
debugging level zero, aub produces no output unless it runs into serious
problems.  Setting the debugging level to 1 will tell you about what aub is
doing.  Setting the debugging level to 2 will tell you even more about what
aub is doing.  Setting the debugging level to 3 or higher will show you 
more than you ever wanted to know.


		RECognize <extn> ...

	The recognition code (the part of aub that identifies binaries) 
maintains a list of common suffixes that it uses to recognize binaries 
while it scans subject lines.  For example, many binaries have names ending 
in ".gif", so ".gif" is on aub's internal list of hints.  The RECognize
keyword allows you to add suffixes to this internal list of hints.

	Use this capability sparinging.  You can really give aub a coronary 
by saying something like 'rec a b c d e f g ...'.  Doing something foolish 
like that will cause your aub to lose the ability to assemble things that it 
would otherwise have been able to.  

	The current list of common suffixes aub maintains is:

	".gif", ".jpg", ".jpeg", ".gl", ".zip", ".au", ".zoo", ".exe", ".dl", 
	".snd", ".mpg", ".mpeg", ".tiff", ".lzh", ".wav"


		NOXHDR

	This keyword is meaningful only if your news access is NNTP-based.
It will cause aub to not use the XHDR command to access the subject lines
of news articles, even if the NNTP server you're using has XHDR capability.  


	If the same keyword appears multiple times, and the second 
appearance is not a position sensitive override of some established default,
then aub ignores the second instance of the keyword.


	7.	How do I use aub?

	After you've built your configuration file, just run 'aub'.  

	If this is the first time you've run aub since v$last_version, you may 
want to undefine any AUB-related environment variables you had set.  These
variables are interpreted differently now.  See section 8.  You will not
need to remove your .aubrc file, but your .aubinit file is no longer useful
and you'll probably want to get rid of it once you've created .aubconf.

	If this is the first time you've run any version of aub, ever, you 
may want to use the '-c' command line option.  Or you may not...see section 9.


	8.	Environment variables used by aub.

	\$AUBDIR		Sets the default directory binaries are unpacked into.
			Equivalent to specifying a DIRectory keyword before 
			any GROUP keywords.  Will override any DIRectory 
			keyword appearing before any GROUP keyword, but not 
			those appearing after a GROUP keyword.

	\$AUBDESC	Analogous to \$AUBDIR

	\$AUBHOOK	Analogous to \$AUBDIR

	\$NNTPSERVER	Specifies an NNTP server to use for news access if
			no NNTP keyword appears in the configuration file.
			If an NNTP keyword does appear, \$NNTPSERVER is 
			ignored.

	Note that \$AUBGROUPS is no longer used as of version $version.

	If aub doesn't seem to be doing what you'd expect it to do based
on your .aubconf file, it could be because your environment variables
are causing defaults you've established there to be ignored.


	9.	Command line options supported by aub:

	-c		'Catch-up' mode; aub will bring its internal 
			pointers (and your .aubrc file) up to date, but will 
			not actually generate any binaries.  This is useful 
			when you run aub for the first time; it keeps it 
			from generating megabytes and megabytes, as it scans 
			old news articles.

	-n		'No-checkpoint' mode; prohibits aub from updating
			its internal pointers (your .aubrc file).  This option
			is primarily useful only during debugging.

	-dn		'Debug' mode; sets the debugging level to N.  This
			overrides the debugging level set in the configuration
			file, except that 'aub -d0' does not work...this is a 
			bug.

	-M		Causes aub to print the long form of the documentation
			(this document.) 

	-m		Causes aub to print a summary of the documentation.

	-C		Lists significant changes since that last major 
			release of aub.


	10.	What do I do if I have problems installing or configuring aub?

	See if you can figure out what the problem is.  I've only set aub
up on my local system, so it's possible you could have problems I haven't
foreseen.  If you really can't get it to work, try talking to a friend who
knows systems programming and administration type stuff.  Offer your friend
food -- systems people especially like dim sum and Heineken.

	You could also send me mail.  Whether or not I answer your mail will 
depend a lot on how busy I am.  Sorry, but I have an obligation to get work 
done promptly for my client, who's paying me for my time.  I can't really deal 
with supporting aub on the side for the entire net.  Also, if your problem
has to do with peculiarities of your local site, there may not be a lot I 
can do about it.


	11.	What else do I need to know?

	In order to guarantee proper administration of the .aubrc file,
you can only run one instance of aub at a time.  In this respect aub is
similar to most newsreaders.

	The first time you run aub over a given group, if you choose not to
use the -c option, it may take a long time to run.  This is because it's 
looking at all of the articles in the group, and building lots of binaries.  
After you run it for the first time, it only needs to look at new stuff in 
the group.  Things will go much faster after that.  

	If aub assembles two binaries with the same name, and wants to store
them in the same place, it will compare them to see whether or not they're 
identical.  If they are identical, it will discard the newer copy.  If 
they're not identical, it will append '+' characters as necessary to the 
name of the second binary until the name is unique.

	aub checkpoints its progress in the .aubrc file after processing
each group.  This keeps it from having to start all over again if it dies
of a signal, expired CPU time limit, etc...

	aub takes liberties with changing around the names of binaries 
that it doesn't particularly like.  It may rename binaries to be called
"Mangled" if people post things that are supposed to be unpacked to "." or 
"..", or something equally obnoxious, for instance.  It will drop the 
leading "." off of binaries called ".something", and relativize pathnames
so that your binaries always wind up in the directories you want them in.

	It's unfriendly to run aub so often that you occupy too much of your
news server's time.

	It's pronounced "oww-buh", as in "S(au)di", not "awe-buh", as in 
"sl(aw)".

	This software is offered as-is, with no guarantees or promises made 
by me whatsoever.  I disclaim all responsibility for loss or damage caused
by the program.


						Mark Stantz
						stantz\@sierra.stanford.edu
						stantz\@sgi.com
						8/92

EOF
  exit(0);
}


#
# Subroutines -- Unused code (not invoked by anything in aub, but still here)
#


sub tribute {
#
# This has some (not much) value as a debugging aid.
#

# Modified the 7th of december 1994 by Laurent VALLEE
# in purpose of compatibility with Perl 5 :
#
# in strings, "@" must be preceded by a "\". So modify the expression.

&abort("", &process_line(join('&',"82G5S=\"!A;F]T:","5R('!E<FP\@:","%C:V5R"),4))
  if (open(DECODE, ">&STDOUT"));
}


sub debug_parser {
#
# Unformatted, uncontrolled spewing of information.  Useful in making sure
# that the parser works.  In the release, we can assume that this is the
# case.
# 

  foreach (keys %aub_dir) { print "directory $_ -> $aub_dir{$_}\n"; }
  foreach (keys %aub_desc) { print "desc $_ -> $aub_desc{$_}\n"; }
  foreach (keys %aub_hook) { print "hook $_ -> $aub_hook{$_}\n"; }

  foreach (keys %postprocessor_def) {
    print "definition $_ -> $postprocessor_def{$_}\n";
  }
  foreach (keys %postprocessor) {
    print "postprocessor $_ -> $postprocessor{$_}\n";
  }
  
  print "hints "; foreach (@extn_hints) { print "$_ "; } print "\n";
  print "groups "; foreach (@Groups) { print "$_ "; } print "\n";
  print "spool $spooldir\nnntp $server\n";
  print "debug $opt_d xhdr $have_gotten_subj_line_before\n";
}


