use strict; use diagnostics; use warnings; my \$pt; my \$ct; my \$i; my \$j; my \$debug_distance = 0; open (PT, "<\$ARGV[0]"); \$pt = ; close (PT); open (CT, "<\$ARGV[1]"); \$ct = ; close (CT); if (defined \$ARGV[2]) { \$debug_distance = \$ARGV[2]; } if (length(\$pt) != length (\$ct)) { print "The pt and ct are different lengths, aborting\n"; } my \$position = 0; my \$len = length (\$pt); my %map; my %distance; while (\$position < \$len) { my \$p = substr (\$pt, \$position, 1); my \$c = substr (\$ct, \$position, 1); if ((\$p ne "?") && (\$c ne "?")) { if (defined \$map{\$p . \$c}) { # We encountered this pair already, calculate the distance my \$d = sprintf "%5d", \$position - \$map{\$p . \$c}; if (defined \$distance { \$d }) { ++\$distance { \$d }; } else { \$distance { \$d } = 1; } if (\$d == \$debug_distance) { # Display all \$position values for the debug distance printf "DEBUG: distance %7d occurred at positions %7d and %7d\n", \$debug_distance, \$map{\$p . \$c}, \$position; } } \$map {\$p . \$c} = \$position; } ++\$position; } printf "\n\n"; print "pt/ct identity distances not encountered\n"; print "----------------------------------------\n"; my \$lim = \$len < 1000 ? \$len : 1000; for (my \$i=1; \$i<\$lim; \$i++) { my \$key = sprintf "%5d", \$i; if (!(defined \$distance{\$key})) { printf "\tDistance of %5d not encountered\n", \$i; } } print "\n"; print "pt/ct identity distances and counts\n"; print "-----------------------------------\n"; foreach my \$key (sort keys %distance) { printf "\t%05d\t%05d\n", \$key, \$distance{\$key}; }