#!/usr/bin/perl
# FQreadnames2posmap.pl - convert posmaps from UIDs to Read names for FASTQ 
# files in Celera Assembler/CABOG output
#
# Usage: FQreadnames2posmap.pl <.gkpStore.fastqUIDmap> <.posmap.frg*> \
#        [<.posmap.frg*>]
#
# Overwrites the posmap files, but saves the orignal as .posmap.frg???.bak 
#

################################################################################
# This software is "United States Government Work" under the terms of
# the United States Copyright Act.  It was written as part of the authors'
# official duties for the United States Government and thus cannot be
# copyrighted.  This software is freely available to the public for
# use without a copyright notice.  Restrictions cannot be placed on its present
#     or future use. 
#
# Although all reasonable efforts have been taken to ensure the accuracy and
# reliability of the software and data, the National Human Genome Research
# Institute (NHGRI) and the U.S. Government does not and cannot warrant the
# performance or results that may be obtained by using this software or data.
# NHGRI and the U.S. Government disclaims all warranties as to performance,
# merchantability or fitness for any particular purpose. 
#
# In any work or product derived from this material, proper attribution of the
# authors as the source of the software or data should be made, using "Arjun
# Prasad, NHGRI Genome Technology Branch" as the citation. 
################################################################################

use strict;
use warnings;


comment_usage() if (@ARGV < 2);

my $readnamemap_file = Open(shift(@ARGV));
my @posmap_files = @ARGV;
# quick check of posmap files so we can fail early if they don't exist
my @errors = grep { -e $_ ? undef : "File $_ not found" } @posmap_files;
comment_usage(@errors) if (@errors);

# first read in UID to Read name mapping
my %uid2readname;
my $line = <$readnamemap_file>;
while(defined($line)) {
    chomp($line);
    my @F = split(/\s/, $line);
    $uid2readname{$F[0]} = $F[2];
    if (@F > 2) {
        $uid2readname{$F[3]} = $F[5];
    }
    $line = <$readnamemap_file>;
}

# now fix the posmap files
foreach my $filename (@posmap_files) {
    my $old_filename = "$filename.bak";
    rename $filename, $old_filename
        or die "Couldn't rename $filename to $old_filename: $!";
    my $old_posmap = Open($old_filename);
    my $posmap = Open($filename, 'w');
    $line = <$old_posmap>;
    while(defined $line) {
        $line =~ s/^(\d+)/$uid2readname{$1}/;
        if (! $uid2readname{$1}) {
            die "ERROR: UID $1 not found in $readnamemap_file. On line $. of $old_filename";
        }
        print $posmap $line;
        $line = <$old_posmap>;
    }
    close($old_posmap);
    close($posmap);
}

sub Open {
    my $file = shift;
    my $mode = shift;
    my $fh;
    if (defined $mode and $mode eq 'w') {
        open($fh, '>', $file) or die "Couldn't open $file for writing: $!";
    } else {
        open($fh, $file) or die "Couldn't open $file: $!";
    }
    return $fh;
}

sub comment_usage {
    open IN, $0 or die "Couldn't read source ($0): $!";
    $_ = <IN>;
    while(<IN>) {
        if (s/^# ?//) {
            print $_;
        } else {
            close(IN);
            last;
        }
    }
    print map {chomp; $_ = "ERROR: $_\n"} @_;
    exit(1);
}
    
