#! /usr/local/bin/perl -w
# expansionsFromUMLS.pl - reads a list of abbreviations and queries the
# UMLS LRABR table to identify the possible expansions
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see
# .
# Read command line options
use Getopt::Std;
%options = ();
getopts("hu:", \%options);
if(exists $options{h}) { &usage(); }
if(exists $options{u}) {
$umls_abbrev_file = $options{u};
unless(-e $umls_abbrev_file) {
die("LRABR file ($umls_abbrev_file) does not exist\n");
} # unless
} else {
&usage();
} # if/else
# Read in location of LRABR table from UMLS and list of abbreviations
%abbreviations = ();
while(<>) {
if(/^(.+)/) {
$abbreviations{$1} = "true";
} # if
} # while
# Read through LEXABR data file and find expansions for target abbreviations
%expansions = ();
open(ABBREV, $umls_abbrev_file) or die "Can't open $umls_abbrev_file for reading\n";
while() {
@fields = split(/\|/, $_);
$abbrev = $fields[1];
$expansion = $fields[4];
if(exists $abbreviations{$abbrev}) {
$expansions{$abbrev}{$expansion} = "true";
} # if
} # while
close(ABBREV);
# Print out data file in appropriate format
foreach $abbrev (sort {$a cmp $b} keys %expansions) {
$counter = 1;
foreach $expansion (keys %{$expansions{$abbrev}}) {
print "$abbrev M".$counter." \"$expansion\"\n";
$counter++;
} # foreach
print "\n";
} # foreach
# sub usage
# Prints out usage notes
sub usage {
die("$0 -u umls_data_file -h abbreviation_file\n\t-u umls_data_file\tlocation of LRABR UMLS table\n\t-h\t\t\tprints out these usage notes\n");
} # sub usage