#!/usr/local/bin/perl # construct a database of webmo formulas, charges, and multiplicities # # read the current directory # open each file and extract formula, charge, and multiplicity #$file = "allyl-.mol" ; #$file = "CH3AsH2.mol" ; #$file = "BF3.mol" ; opendir(DIR,'.') || die "Can't open directory\n" ; local(@filenames) = readdir(DIR) ; closedir(DIR) ; for (@filenames) { next unless /mol$/ ; $file = $_ ; open(MOL,$file) || die "Can't open mol file $file" ; $nel = 0 ; $#elm = 0 ; $ielm{'C'} = 0 ; $ielm{'H'} = 0 ; $ielm{'N'} = 0 ; $ielm{'O'} = 0 ; $ielm{'F'} = 0 ; $ielm{'Cl'} = 0 ; $ielm{'Br'} = 0 ; $ielm{'P'} = 0 ; $ielm{'S'} = 0 ; $ielm{'CL'} = 0 ; $ielm{'BR'} = 0 ; $formula = '' ; $comment = scalar() ; $isis = scalar() ; $blank = scalar() ; $_ = scalar() ; ($atoms,$bonds,$stuff) = split ; $i = 0 ; while () { ($x,$y,$z,$el) = split ; $new = 1 ; for ($iel=0 ; $iel<$nel ; $iel++) { if ( $el eq $elm[$iel] ) { $ielm{$el}++ ; $new = 0 ; last } ; # end if $el eq } ; # end for $iel if ( $new ) { $elm[$nel] = $el ; $ielm{$el} = 1 ; $nel++ } $i++ ; last if $i == $atoms ; } @sorted = sort(@elm) ; # assemble formula $iC = $ielm{'C'} ; $ielm{'C'} = 0 ; $formula .= 'C '.$iC.' ' if $iC != 0 ; $iH = $ielm{'H'} ; $ielm{'H'} = 0 ; $formula .= 'H '.$iH.' ' if $iH != 0 ; $iN = $ielm{'N'} ; $ielm{'N'} = 0 ; $formula .= 'N '.$iN.' ' if $iN != 0 ; $iO = $ielm{'O'} ; $ielm{'O'} = 0 ; $formula .= 'O '.$iO.' ' if $iO != 0 ; $iF = $ielm{'F'} ; $ielm{'F'} = 0 ; $formula .= 'F '.$iF.' ' if $iF != 0 ; $iCl = $ielm{'Cl'}+$ielm{'CL'} ; $ielm{'Cl'} = 0 ; $ielm{'CL'} = 0 ; $formula .= 'Cl '.$iCl.' ' if $iCl != 0 ; $iBr = $ielm{'Br'}+$ielm{'BR'} ; $ielm{'Br'} = 0 ; $ielm{'BR'} = 0 ; $formula .= 'Br '.$iBr.' ' if $iBr != 0 ; $iP = $ielm{'P'} ; $ielm{'P'} = 0 ; $formula .= 'P '.$iP.' ' if $iP != 0 ; $iS = $ielm{'S'} ; $ielm{'S'} = 0 ; $formula .= 'S '.$iS.' ' if $iS != 0 ; for ($iel=0 ; $iel<$nel ; $iel++) { $sel = $sorted[$iel] ; $A = substr($sel,0,1) ; ( $a = substr($sel,1,1) ) =~ tr/A-Z/a-z/ ; $Aa = $A.$a ; $iX = $ielm{$sel} ; $formula .= $Aa.' '.$iX.' ' if $iX != 0 ; } ; # end for $iel $file =~ s/mol/html/ ; print "$iC\t$iH\t$iN\t$iO\t$iF\t$iCl\t$iBr\t$iP\t$iS\t$formula\t" ; # now get charge, multiplicity and header from html file open(HTM,$file) || die "Can't open html file $file" ; $charge = 0 ; $multiplicity = 1 ; $chg = '' ; $mult = '' ; while () { last if /Tell/ ; if (/BODY/) { s/// ; s/

// ; s/<\/H2>// ; chop ; $head = $_ ; } if (/charge/) { s/The ion charge is // ; ($chg,$tail) = split(/\./) ; } if (/multiplicity/) { s/The multiplicity is // ; ($mult,$tail) = split(/\./) if $charge == 0 ; ($stuff,$mult,$tail) = split(/\./) if $charge != 0 ; } } $charge = $chg if $chg ; $multiplicity = $mult if $mult ; print "$charge\t$multiplicity\t$head\t$file\n" ; }