#!/home/gschwarz/PerlMod/bin/perl -w ####################################################### ## Program extracts out the specified columns which ## ## are delimited by a blank, tab, or an &. The last ## ## arguement should be the machine readable file. ## ## Results will be written to standard output (i.e ## ## the screen) but can be redirected to a file by ## ## appending a '> newfile' at the end of the command.## ## ## ## There are a few other flags that do other useful ## ## things. Use ## ## ## ## extractcols.pl -help ## ## ## ## for information on these flags and their syntax. ## ## ## ## Written by Greg Schwarz (AAS Journals staff ## ## scientist) on 12/13/00 ## ####################################################### use strict; my $metadata = 'off'; # flag to mark the column info my $endmetadata = 'no'; # flag to mark the metadata end my $printheader = 'no'; # flag to print the metadata info my $printlatex = 'no'; # flag to print data in LaTeX style my $delimiter = ' '; # default delimiter to use my $metacntflg = 0; # counts # of metadata lines my $i = 0; # generic counter my $ict = 0; # user columns counter my $j; # generic counter my @col; # array containing user columns my @colstr; # array containing user start positions my @colend; # array containing user end positions my @label; # array containing user labels my $comma; # location of the commas in -c flag my $numcom; # number of commas in -c flag my $extractcol; # extracted col (goes into $alldata) my $tmp; # temp variable for the -d flag my $alldata=''; # long string containing all data values my $colhead; # string containing the colhead info my $ccs=''; # a 'cc...c' string with $numcom+1 'c's my $title='UNKNOWN'; # string containing the table title ## THIS PORTION DETERMINES THE USER FLAGS ## Search through the options and extract all of ## the ones that start with a '-'. The last one ## will be interpreted as the filename. while ($ARGV[0] =~ m/^-/ ) { $_ = shift(@ARGV); ## print out the options if -help is set if(m/^-help/) { print "\nextractcols.pl\n\n"; print "Program extracts any number of columns (and in any order) out\n"; print "of a machine readable table using the information in the metaheader.\n"; print "Use \'extractcols.pl -[options] MRfile > newfile\' to capture the\n"; print "output to the file \'newfile\' where \'MRfile\' is the \n"; print "machine-readable table you want to extract data columns from.\n"; print "\n"; print "NOTE YOU MUST CHANGE THE FIRST LINE IN THIS CODE TO POINT TO \n"; print "THE LOCAL VERSION OF PERL ON YOUR MACHINE FOR THIS TO WORK!\n"; print "\n"; print "The options flags that are currently available are: \n"; print "\n"; print " -c#,#,#,...,# which extracts out the columns numbers given after\n"; print " the -c flag. Each column number needs to be seperated\n"; print " by a comma.\n"; print "\n"; print " -d(tab|amp) specifies the type of delimiter used to seperate the \n"; print " columns. Use tab for a TAB delimiter and amp for &s.\n"; print "\n"; print " -latex outputs the data in a LaTeX/AASTex style table. \n"; print "\n"; print " -p print out the metadata header only\n"; print "\n"; print " -help print this help message and quit.\n"; print "\n"; print "The last arguement has to be the machine readable file otherwise you\n"; print "will get a message like this:\n\n"; } ## Quit the program if the last variable starts with a '-' if (defined($ARGV[0])) { } else { die "You have not supplied a filename! \nUse: extractcols.pl -[options] file\n"; } ## Print out a LaTeX/AASTeX table if the -latex flag is set ## Also set $delimiter to &s so the table is correct. if(m/^-latex/) { $printlatex = 'yes'; $delimiter = " \& "; } ## Print only the metadata header if the -p flag is set if(m/^-p/) { $printheader = 'yes'; } ## grab the paramters of the -d flag if(m/^-d(.*)/) { $tmp = $1; ## If 'tab' then use tabs or if 'amp' then ## use &, otherwise quit the program if($tmp eq 'tab' or $tmp eq 'amp') { if($tmp eq 'tab') { $delimiter = "\t"; } if($tmp eq 'amp') { $delimiter = " \& "; } } else { die "\nI do not know what delimiter $tmp is!\nUse -damp or -dtab.\n\n"; } } ## grab all of the columns after the -c flag if(m/^-c(.*)/ ) { $_ = $1; $numcom = tr/,//; ## Check to see if there is only one column requested ## If so you only need to extract out the number after "c" if($numcom < 1) { $col[0] = $_; } ## Check to see if there are 2 columns requested ## If so grab the number between the comma if($numcom eq 1) { m/(.*),(.*)/; $col[0] = $1; $col[1] = $2; } ## If multiple columns are requested then use the ## index and substr commands to extract the various ## columns if($numcom > 1) { ## Read in the number before the first comma and ## place the remainder in the $_ string $comma = index($_,','); $col[0] = substr($_,0,$comma); $_ = substr($_,$comma+1); ## Read in all the numbers until the last comma for($i=1; $i<$numcom; $i++) { $comma = index($_,','); $col[$i] = substr($_,0,$comma); $_ = substr($_,$comma+1); } ## Read in the last number column $col[$i] = $_; } } } ## THIS PORTION READS IN THE META-DATA HEADER. ## open the file and read until you reach the end ## of the first metadata flag. open(FILE, "$ARGV[0]"); while( defined($_ = ) and $endmetadata eq 'no' ) { ## Find the table title and store it if(m/^Table: (.+)$/) { $title=$1; } ## If -p has been set print the line. if($printheader eq 'yes') { print $_; } ## If you encounter the '--------' flag turn ## the $metadata header keyword off. if(m/^-{60}/) { $metadata = 'off'; $metacntflg++; if($metacntflg eq 3) { $endmetadata = 'yes'; } } ## If the $metadata keyword is turned on and ## you can match numbers around a '-' in the 5th ## column or 5 blanks and then numbers, you have ## found another column. Insert $ in $label if needed. if($metadata eq 'on') { if(m/^\s(..\d)-(..\d)\s+\S+\s+\S+\s+(\S+)/) { $colstr[$ict] = $1; $colend[$ict] = $2; $label[$ict] = $3; $label[$ict] =~ s/\_(\S+)/\$\_{$1}\$/g; $ict++; } if(m/^\s{5}(..\d)\s+\S+\s+\S+\s+(\S+)/) { $colstr[$ict] = $1; $colend[$ict] = $1; $label[$ict] = $2; $label[$ict] =~ s/\_(\S+)/\$\_{$1}\$/g; $ict++; } } ## When you find the ' 1-' then you have found ## the first column of information (assuming the ## $metadata flag is off) so turn on the $metadata ## keyword and increment the counter. This portion ## needs to be last so you don't loop the first ## column twice (here and the section above!). ## Insert $ in $label if needed. if(m/^\s{3}1-(..\d)\s+\S+\s+\S+\s+(\S+)/ and $metadata eq 'off') { $colstr[$ict] = 1; $colend[$ict] = $1; $label[$ict] = $2; $label[$ict] =~ s/\_(\S+)/\$\_{$1}\$/g; $metadata = 'on'; $ict++; } } ## Once you have extracted the metadata info you ## still need to see if there are any resulting ## Notes. If there are keep reading until you ## get to the next '------' line. Print the data ## if the -p flag is set! if(m/^Note /) { if($printheader eq 'yes') { print $_; } until (m/^-{60}/) { $_ = ; if($printheader eq 'yes') { print $_; } } $_ = ; } ## THIS PORTION READS THE DATA IN THE MACHINE READABLE TABLE. ## Extract and print the columns the user requested using ## the set delimiter if the -p flag wasn't set. Use the ## substr to cut out the columns and then glue it back together ## in the correct order in $alldata. if($printheader ne 'yes') { while(defined($_)) { for($i=0;$i<$numcom+1;$i++) { $extractcol = substr($_,$colstr[$col[$i]-1]-1, $colend[$col[$i]-1]-$colstr[$col[$i]-1]+1); $alldata=$alldata.$extractcol.$delimiter; } $alldata=$alldata."\n"; ## Read the next line $_ = ; } } close FILE; ## If the -latex flag has been set print out the LaTeX/AASTeX ## flags before and after $alldata. If the flag has not been ## set then just print the data if ($printlatex eq 'yes') { ## Run a loop to get the right number of columns and 'Label's for($i=0;$i<$numcom;$i++) { $ccs=$ccs.'c'; $colhead=$colhead."\\colhead{$label[$col[$i]-1]} \&\n"; } $ccs=$ccs.'c'; $colhead=$colhead."\\colhead{$label[$col[$i]-1]} \n"; print "\\documentclass{aastex}\n"; print "\\begin{document}\n\n"; print "\\begin{deluxetable}{$ccs}\n"; print "\\tablecaption{$title}\n"; print "\\tablehead{\n"; print $colhead; print "}\n"; print "\\startdata\n"; $alldata =~ s/\& \n/ \\\\ \n/msg; print $alldata; print "\\enddata\n"; print "\\end{deluxetable} \n\n"; print "\\end{document}\n"; } else { print $alldata; }