# Spider Python Library
# Copyright (C) 2006 Health Research Inc.
#
# HEALTH RESEARCH INCORPORATED (HRI),
# ONE UNIVERSITY PLACE, RENSSELAER, NY 12144-3455
#
# Email: spider@wadsworth.org
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
import os, re, time
import struct, sys
from commands import getoutput
from types import *
WRITE_SPIREOUT = "WRITE_SPIREOUT" # environmental variable for Spire
def fileReadLines(filename):
"read a text file, return a list of lines"
try:
fp = open(filename,'r')
B = fp.readlines()
fp.close()
return B
except IOError, e:
print 'Unable to open file \n' + filename, e
return None
def fileWriteLines(filename, lines, append=0, mode='w'):
"write a list of lines to a text file"
mode = 'w'
if append != 0 or mode != 'w':
mode = 'a'
fp = open(filename, mode)
if type(lines) == type("string"):
fp.write(lines)
elif type(lines) == type(["list"]):
fp.writelines(lines)
fp.close()
###################################################################
#
# Reading and writing SPIDER document files
#
# readdoc
# writedoc
# nowisthetime, makeDocfileHeader, fixHeaders : functiones used by writedoc
# Usage: readdoc(file, column=2) or readdoc(file, columns=[2,3])
#
# Of they following 3 keywords (column, columns, lines), only one may be
# used in a call to readdoc. If none are used, then the default is "column=1"
#
# If column=int, returns that column as a list.
# If columns=[i,j,k], returns a tuple of lists.
# If lines=[i,j,k], returns a dictionary, each entry is a list of line values.
#
# Version 1.2 'keys' keyword is deprecated, use lines='all' instead
# If keys=0, returns a list of column values
# (column 1 = 1st Spider doc file data column).
# If keys=1, returns a dictionary indexed by keys, uses column keyword.
# If keys='all', returns dictionary of all values, ignores column keyword.
# Only works for Spider Version 11.0 format, with spaces between columns.
def readdoc(filename, column=1, columns=None, line=None, lines=None, keys=0):
"Read a SPIDER document file; return a list or a dictionary"
# first, figure out the which keyword (columns, line, lines) is used
columnlist = None
linelist = None
if lines != None:
if isListorTuple(lines):
linelist = lines
elif lines == 'all':
linelist = 'all'
else:
ci = checkInteger(lines)
if ci != None:
linelist = [ci]
else:
print "%s is not a valid value for 'lines' keyword" % str(lines)
return None # lines != None but can't parse it
elif line != None:
# try the 'line' keyword
ci = checkInteger(line)
if ci != None:
linelist = [ci]
elif isListorTuple(line):
linelist = line
else:
print "%s is not a valid value for 'line' keyword" % str(line)
return None
elif columns != None:
if isListorTuple(columns):
columnlist = columns
else:
ci = checkInteger(columns)
if ci != None:
columnlist = [ci]
else:
return None # columns != None but can't parse it
else:
# try the column keyword
ci = checkInteger(column)
if ci != None:
columnlist = [ci]
elif isListorTuple(column):
columnlist = column
else:
return None
# read the file data
B = fileReadLines(filename)
if B == None:
return None
# if keys=0, return a list of column values (or tuple of lists)
if keys == 0 and columnlist != None:
if len(columnlist) == 1:
# only asking for 1 column
if columnlist[0] != 0: col = columnlist[0] + 1 # first Spider data column
else: col = 0 # key column
C = []
for line in B:
d = line.split()
try: int(d[0])
except: continue
try:
C.append(d[col])
except:
ncols = int(d[1])
if col > ncols:
if len(C) > 0:
print "readdoc: column %d has no data at line %s" % (column,d[0])
del(B)
return map(float,C)
else:
print "column=%d, %s has only %d columns" % (column, filename, ncols)
del(B)
return None
del(B)
return map(float,C)
else:
# need to get multiple columns
ncols = len(columnlist)
listlist = []
for c in columnlist:
listlist.append([]) # a list of empty lists
for line in B:
d = line.split()
try: int(d[0])
except: continue
for i in range(ncols):
col = columnlist[i]
if col != 0: col = col + 1 # get past SPIDER's second column
try:
f = float(d[col])
listlist[i].append(f)
except:
pass
del(B)
return tuple(listlist) # convert it to a tuple
# return a dictionary of line values
elif keys == 0 and linelist != None:
D = {}
if linelist == 'all':
linedata = []
for bline in B:
s = bline.split()
try: bkey = int(s[0])
except: continue
linedata = map(float, s[2:])
D[bkey] = linedata
return D
else: # a subset of lines
for bline in B:
s = bline.split()
try: bkey = int(s[0])
except: continue
if bkey in linelist:
linedata = map(float, s[2:])
D[bkey] = linedata
idx = linelist.index(bkey) # remove key from linelist
del(linelist[idx])
if len(linelist) == 0:
return D
# if keys != 0, return a dictionary
else:
D = {}
if keys == 'all':
# each dictionary key points to list of all values in that line
for line in B:
d = line.split()
try: int(d[0])
except: continue
key = int(d[0])
ncols = int(d[1]) + 1
data = map(float,d[2:ncols+1])
D[key] = data
else:
# dictionary contains a list of requested columns
clist = []
for col in columnlist:
if col == 0: clist.append(col)
else: clist.append(col+1)
ncols = len(clist)
for line in B:
d = line.split()
try: int(d[0])
except: continue
key = int(d[0])
data = []
for col in clist:
data.append(float(d[col]))
D[key] = data
del(B)
return D
# end readdoc -------------------------------------------------
# included for compatibility
def readSpiderDocFile(filename, col_list=None):
"Read a SPIDER document file; return a dictionary"
return readdoc(filename, keys='all')
def numberOfColumns(docfile):
" find the number of columns in a doc file"
B = fileReadLines(docfile)
for line in B:
line = line.strip()
if line == "" or line[0] == ";": continue
d = line.split()
if len(d) > 1:
try:
ncol = int(d[1])
return ncol
except:
pass
return 0
onespace = re.compile("\S+ \S+")
def combineHeaderSpaces(g,s,ncolumns):
" g is list, s is string. Convert ['HAS', 'SPACE'] -> ['HAS SPACE']"
m = onespace.search(s)
# if an item with a space is found AND there are more headers than columns..
if m and len(g) > ncolumns:
start,end = m.span()
substring = s[start:end]
pp = substring.split()
s1 = pp[0]
s2 = pp[1]
n = len(g)
idx = -1
for i in range(n-1):
if g[i] == s1 and g[i+1] == s2:
idx = i
break
if idx > -1:
# collapse list items i,i+1 to one item with a space
h = g[:idx] + [g[idx] + ' ' + g[idx+1]]
if n > idx+1:
h = h + g[idx+2:]
g = combineHeaderSpaces(h,s[end:],ncolumns)
return g
else:
return g
else:
return g
def getDocfileHeaders(filename, output='list'):
" tries to get the column headers from a doc file"
try:
fp = open(filename,'r')
except:
print 'unable to open %s' % (filename)
return ""
headers = []
if output == "string":
headers = ""
max = 5
i = 1
while 1:
s = fp.readline()
if not s:
break
if len(s) > 0 and s[1] == ';':
x = s.split()
if len(x) == 5 and len(x[0]) == 8 and x[0][4] == '/' and x[2] == 'AT':
# it's ;spi/dat 22-AUG-2008 AT 15:17:59 file.dat
continue
if len(x) > 1:
if output == "string":
return s[2:].rstrip()
g = x[1:] # g is list, s is string
# some headers have a space, so 2 items need to be combined
ncol = numberOfColumns(filename)
headers = combineHeaderSpaces(g,s.rstrip(), ncol)
break
i += 1
if i > max:
break
fp.close()
return headers
#########################################################################
# functions for writedoc
# returns e.g., ('16-OCT-03', '13:08:16', '031016130816')
def nowisthetime():
"return current time as tuple of 3 strings: (date, time, ID)"
tt = time.localtime(time.time())
# localtime return format: (2003, 10, 16, 12, 48, 30, 3, 289, 1)
#t = string.split(time.asctime(tt))
t = time.asctime(tt).split()
# asctime return format: 'Thu Oct 16 12:50:17 2003'
mo = t[1].upper()
day = t[2]
if len(day) < 2: day = '0' + day
timestr = t[3]
yr = t[4]
datestr = "%s-%s-%s" % (day, mo, yr)
yr = yr[-2:]
# this is just to get the month as a number
d = map(str,tt) # stringify all numbers in the tuple
mon = d[1]
if len(mon) < 2: mon = '0' + mon
#(h,m,s) = string.split(timestr,':')
(h,m,s) = timestr.split(':')
idstr = "%s%s%s%s%s%s" % (yr,mon,day,h,m,s)
return (datestr, timestr, idstr)
def makeDocfileHeader(filename, batext=None):
"create the comment line used at the top of SPIDER document files"
filename = os.path.basename(filename)
fn, ext = os.path.splitext(filename)
ext = ext[1:]
if batext == None:
batext = 'spl' # Spider Python Library
date,time,idstr = nowisthetime()
h = " ;%s/%s %s AT %s %s\n" % (batext,ext,date,time,filename)
return h
def fixHeaders(headers):
"make all headers 11 characters in width; return doc string"
w = 11
docstr = " ; / "
for h in headers:
d = len(h)
if d > w:
h = h[:w]
docstr += h.rjust(w+1)
docstr += "\n"
return docstr
###################################################################
#
# type-checking functions
def checkInteger(x):
" returns int if input can be converted to an integer, else None"
try:
i = int(x) # works for 5, 5.0, '5'
return i
except:
try:
i = int(float(x)) # works for '5.0'
return i
except:
return None
return None
def isListorTuple(x):
"returns 1 if input is a list or a tuple"
if isinstance(x, ListType) or isinstance(x, TupleType) : return 1
else: return 0
def isDictionary(d):
"returns 1 if input is a Python dictionary"
if isinstance(d, DictType): return 1
else: return 0
def isListofLists(d):
"returns 1 if input is a list, and 1st item in input is also a list"
"actually works for tuples as well. Only checks 1st element "
if not isListorTuple(d):
return 0
if len(d) < 1:
return 0
if isListorTuple(d[0]):
return 1
else:
return 0
def getLastDocfileKey(docfile):
"return the last key of a doc file"
if not os.path.exists(docfile):
return None
cmd = 'tail %s' % docfile
res = getoutput(cmd)
s = res.split("\n")
s.reverse()
for line in s:
if len(line) > 1 and line[1] != ";":
ss = line.split()
try:
i = int(ss[0])
return i
except:
pass
return None
# --------------------------------------------------------------
# writedoc
# Data can be organized as columns or line. Call to writedoc should
# use EITHER columns OR lines.
# Data must be integer or float. (they can be in string format)
# columns: a list of lists; each doc file column is a list
# lines : a list of lists; each doc file line is a list (w/o key)
# headers: a list of strings
#
# todo: check if columns have different lengths
def writedoc(filename, columns=None, lines=None, headers=None, keys=None, mode='w'):
"write data to a file in SPIDER document file format"
if not isListofLists(columns) and not isListofLists(lines):
if isDictionary(columns):
return writeSpiderDocFile(filename, columns, headers=headers, mode=mode)
else:
print "writedoc: columns or lines must be a list of lists"
return
"filename must have data extension"
try:
fp = open(filename, mode)
except:
print "Unable to open %s for writing." % filename
return
# write Spider doc file header
lastkey = None
_APPEND = 0
if mode == 'w':
hdr = makeDocfileHeader(os.path.basename(filename))
fp.write(hdr)
elif mode == 'a':
_APPEND = 1
try:
lastkey = getLastDocfileKey(filename)
except:
pass
# write column headings
if headers != None and type(headers) == type(["list"]):
fp.write(fixHeaders(headers))
datalines = []
# write data columns
if columns != None:
ncol = len(columns) # number of columns
n = len(columns[0]) # length of 1st column (assumes all have same length)
if keys == None:
if lastkey == None:
keys = range(1,n+1)
else:
keys = range(lastkey+1, lastkey+n+1)
for i in range(n):
dstr = "%5d %2d" % (int(keys[i]), int(ncol))
for j in range(ncol):
dstr += " %11g" % float(columns[j][i])
datalines.append(dstr+"\n")
# write data lines
elif lines != None:
n = len(lines) # number of lines
if keys == None:
if lastkey == None:
keys = range(1,n+1)
else:
keys = range(lastkey+1, lastkey+n+1)
for i in range(n):
line = lines[i]
ncol = len(line) # number of columns
dstr = "%5d %2d" % (int(keys[i]), ncol)
for item in line:
dstr += " %11g" % float(item)
datalines.append(dstr+"\n")
fp.writelines(datalines)
fp.close()
if not _APPEND: # i.e. it's a new doc file
writeSpireoutFile(filename)
#included for compatibiity:
# data must be in dictionary form: D[key] = [list of column values]
def writeSpiderDocFile(filename, data, headers=None, append=0, mode='w'):
"write data (in dictionary form) to a file in SPIDER document file format"
if append > 0: mode = 'a'
if mode == 'a': _APPEND = 1
else: _APPEND = 0
if not isDictionary(data):
# if it's not a dictionary, see if it's a list of lists
if isListofLists(data):
return writedoc(filename, columns=data, headers=headers, mode=mode)
else:
return 0
try:
fp = open(filename, mode)
except:
print "unable to open %s for writing" % filename
return 0
# write Spider doc file header
hdr = makeDocfileHeader(os.path.basename(filename))
fp.write(hdr)
# and any column headings
if headers != None and type(headers) == type(["list"]):
fp.write(fixHeaders(headers))
# write data
keys = data.keys()
keys.sort()
if len(keys) > 0:
firstkey = keys[0]
if firstkey in data:
v1 = data[firstkey]
else:
print "writeSpiderDocFile: key not found in data"
else:
print "writeSpiderDocFile: empty data dictionary"
return 0
if isinstance(v1, ListType):
for key in keys:
values = data[key]
n = len(values)
h = "%5d %2d " % (int(key),int(n))
for value in values:
h += " %11g " % (float(value))
fp.write(h+"\n")
else:
# it's supposed to be a list! But if it's not..
for key in keys:
value = data[key]
try:
f = float(value)
except:
print "writedoc: unable to convert %s" % str(value)
print "writedoc: Dictionary elements must be lists!"
return 0
h = "%5d %2d %11g\n" % (int(key), 1, f)
fp.write(h)
fp.close()
if not _APPEND: # i.e. it's a new doc file
writeSpireoutFile(filename)
return 1
def writeSpireoutFile(filename):
# this section added Aug 2006, for Spire compatibility
if WRITE_SPIREOUT in os.environ:
spireout = os.environ[WRITE_SPIREOUT]
if os.path.exists(spireout):
fp = open(spireout, mode='a')
else:
fp = open(spireout, mode='w')
date, time, id = nowisthetime()
fn = os.path.basename(filename)
s = " %s AT %s OPENED NEW DOC FILE: %s\n" % (date, time, fn)
fp.write(s)
fp.close()
###################################################################
# convenience functions
#
def list2int(a):
" converts a list of strings to integers"
return map(int, map(float,a))
def list2float(a):
" converts a list of strings to floats"
return map(float,a)
# given ("mic****", 89) returns "mic0089"
# Substitutes the first set of asterisks it finds (i.e. leftmost).
# If number of *'s too small for number, filename is extended.
# asterisks can be replaced by another char
def makeFilename(filename, n, char='*'):
"substitutes asterisks for number: ('mic****', 89) returns 'mic0089'"
try:
n = int(float(n))
except:
print "makeFilename: unable to convert %s to integer" % str(n)
number = str(n)
re_findchar = re.compile('[%s]+' % char)
a = re_findchar.search(filename)
if not a:
print "makeFilename: no '%s' found in %s" % (char, filename)
return ""
(start,end) = a.span()
sp = (end - start)
num = number.zfill(sp) # pad the numeric string with zeroes out to req. length
f = filename[:start] + num + filename[end:]
return f
re_asterisk = re.compile('[*]+')
# old version of makeFilename included for compatibility
def makeSpiderFilename(filename, n):
"substitutes asterisks for number: ('mic****', 89) returns 'mic0089'"
try:
n = int(float(n))
except:
print "makeSpiderFilename: unable to convert %s to integer" % str(n)
number = str(n)
a = re_asterisk.search(filename)
if not a:
#print "makeSpiderFilename: no asterisks in %s" % filename
return filename
(start,end) = a.span()
sp = (end - start)
num = number.zfill(sp) # pad the numeric string with zeroes out to req. length
f = filename[:start] + num + filename[end:]
return f
###################################################################
# File number routines:
# filenumber(filename) returns an integer
# getfilenumber(filename) returns string with leading zeroes
# name2template(filename) given 'mic021.dat', returns 'mic***.dat'
# template2filename(template, n) given (pic***.dat, 3) returns pic003.dat
# numberlist2string(nlist) converts [1,2,3,4,6,8] -> '1-4,6,8'
# range2list(nrange) given string '1-4', outputs list [1,2,3,4]
#re_nums = re.compile('\d+\D?') # ints followed by one non-int char
def filenumber(file):
"returns file number (integer nearest the file extension)"
if len(file) == 0: return None
n = getfilenumber(file)
if n:
return int(n)
else:
return None
def getfilenumber(filename):
"returns file number as a string with leading zeroes "
filename = os.path.basename(filename)
fname,ext = os.path.splitext(filename)
numstr = ""
f = list(fname)
f.reverse()
done = 0
for ch in f:
if not done:
try:
int(ch)
numstr = ch + numstr
except:
if numstr != "":
done = 1
return numstr
def name2template(filename, all=0):
" given 'mic021.dat' --> returns mic***.dat "
" by default, only replaces number nearest extension. all !=0 replaces all"
if len(filename) == 0: return ""
path, basename = os.path.split(filename)
fname,ext = os.path.splitext(basename)
newfn = ""
f = list(fname)
f.reverse()
if all:
for ch in f:
try:
int(ch)
newch = '*'
except:
newch = ch
newfn = newch + newfn
else:
found = 0
for ch in f:
if not found:
try:
int(ch)
newch = '*'
except:
newch = ch
if newfn and newfn[0] == '*':
found = 1
else:
newch = ch
newfn = newch + newfn
fname = os.path.join(path,newfn) + ext
return fname
# template should have asterisks, num can be int or a numbered filename.
# Like makeSpiderFilename, but it can accept a filename instead of a number.
def template2filename(template, n=0): #numfile=None, n=None):
"replaces asterisks with number: (pic***.dat, doc003.dat) returns pic003.dat"
if type(n) == type(1):
pass
elif type(n) == type("string"):
n = filenumber(n)
else:
print "template2filename: unable to parse input"
return ""
nstars = template.count("*")
if nstars == 0:
return template
if len(str(n)) > nstars:
print "template2filename: **** Warning number larger than template"
numstr = str(n).zfill(nstars)
sts = "*" * nstars
filename = template.replace(sts,numstr)
return filename
# list2range: hyphenates runs of consecutive intgers.
# input: list [1,2,3,6,7,8,9,10,11,17]
# output: list with strings ['1-3', '6-11', '17']
def list2range(f):
" input is a list of integers "
fn = []
for item in f:
try:
fn.append(int(item))
except:
print "list2range: unable to convert %s to integer" % str(item)
return []
if len(fn) < 1: return []
fn.sort()
N = []
p = fn[0]
start = p
n = p
fn = fn[1:]
while len(fn) > 0:
n = fn[0]
if n == p+1:
p = n
elif n == p:
print "list2range: Warning, %s occurs more than once" % str(n)
p = n
else:
if start != p:
if p == start+1: # don't create two-item ranges (eg, '1-2')
N.append(str(start)) ; N.append(str(p))
else:
next = "%s-%s" % (str(start),str(p))
N.append(next)
else:
N.append(str(start))
start = n
p = n
fn = fn[1:]
if start != n:
if n == start+1:
N.append(str(start)) ; N.append(str(n))
else:
next = "%s-%s" % (str(start),str(n))
N.append(next)
else:
N.append(str(start))
return N
# input: list of strings (output from list2range)
# output: string of concatenated items
def range2string(n):
" converts ['1-4','7'] to '1-4,7' "
if n == None or len(n) == 0:
return ""
s = ""
for item in n:
s += item + ','
s = s[:-1] # remove trailing comma
return s
def numberlist2string(numberlist):
" a list of integers is converted to a string, "
" hyphenating consecutive numbers. [1,2,3,4] -> '1-4' "
d = list2range(numberlist)
return range2string(d)
def range2list(numberstring):
"input string: '1-4' -> output list: [1,2,3,4] "
if numberstring == "" or None:
return []
L = numberstring.split(',')
K = []
for item in L:
if item.find('-') > -1:
xlist = item.split('-')
start = int(xlist[0])
end = int(xlist[-1])
for i in range(start,end+1):
K.append(i)
else:
K.append(int(item))
return K
###################################################################
#
# Checking file types
#
# istextfile() boolean
# isSpiderDocFile() boolean
# isSpiderImage() boolean
# isSpiderBin() returns "image","volume","Fourier" or 0
# ------ text file functions -------------
noNumbers = re.compile("[^\d^\s^\.^\+^\-Ee]")
text_characters = "".join(map(chr, range(32, 127)) + list("\n\r\t\b"))
from string import maketrans
_null_trans = maketrans("", "")
# Applications importing this file should call istextfile, not istext,
# which is used internally.
# istextfile returns 1 for text, 0 for binary, 0 for error (not found?)
# pdf test added, cos they can get either answer.
def istextfile(filename, blocksize = 512):
"returns 1 if input is a text file (pdf's and zero-length files are binary)"
if os.path.isdir(filename):
return 0
name,ext = os.path.splitext(filename)
if ext.lower() == ".pdf":
return 0
try:
res = istext(open(filename).read(blocksize))
return res
except:
return 0
def istext(s):
"returns 1 if input is a text file (pdf's and zero-length files are binary)"
if "\0" in s:
return 0
if not s: # Empty files
return 1
# Get the non-text characters (maps a character to itself then
# use the 'remove' option to get rid of the text characters.)
t = s.translate(_null_trans, text_characters)
# If more than 30% non-text characters, then
# this is considered a binary file
ratio = float(len(t)) / float(len(s))
if ratio > 0.30:
return 0
return 1
# Quits as soon as it gets a good data line, i.e.,
# int1 int2 [floats], where no. floats = int2
def isSpiderDocfile(file):
"returns 1 if input is a SPIDER document file"
try:
fp = open(file, 'r')
except:
print 'unable to open %s' % (file)
return 0
comments = 0
isDoc = 0
blank = 0
while 1:
s = fp.readline()
if s == "": # only EOF should return blank
break
if len(s) > 2 and s[0] == " " and s[1] == ';': # Spider comment line
continue
if noNumbers.match(s): # if find any nondigits, +, _ etc
isDoc = 0
break
ss = s.split()
# test for new format: nums divided by blanks, 1st value is an int,
try:
i = int(ss[0])
# and there are N data columns, where N = s[1]
n = int(ss[1])
if len(ss[2:]) >= n:
try:
float(ss[2]) # we'll just test one
isDoc = 1
except:
isDoc = 0
break # then it's new (SPIDER 11.0 Feb 2004)
except:
pass
# see if it's the older fixed column format
if len(s) < 13:
isDoc = 0
break
try:
key = int(s[0:6]) # 1st 6 chars are key
n = int(s[6]) # 7th char is N
f = float(s[7:13]) # see if there's 1 good data value
isDoc = 1
break
except:
isDoc = 0
break
fp.close()
return isDoc
def stripComment(line, strip=1):
"removes all text after and including the 1st semicolon in a string"
n = line.find(";")
if n > -1:
line = line[:n].rstrip()
if strip:
line = line.strip()
return line
re_hdr = re.compile('END +BATCH +HEADER')
re_reg = re.compile('[xX][0-9][0-9] *=') # "x11 =" patterns
re_nam = re.compile('\[[ a-zA-Z0-9_-]+\] *=') # "[symbol] =" patterns
re_sym = re.compile('\?[ \w]+\?') # ?text? patterns
# top line of procedures
re_reglist = re.compile('([xX][0-9][0-9])(, *[xX][0-9][0-9])*') # x11,x12,x13
re_namlist = re.compile('(\[[ a-zA-Z0-9_-]+\])(, *\[[ a-zA-Z0-9_-]+\])*')
re_nam1 = re.compile('(\[[ a-zA-Z0-9_-]+\])') # named reg in proc hdr
# This is not really dependable - there are just too many variants to
# catch them all. Plus it may return with false positives.
def isSpiderBatchfile(file):
"returns 1 if input is a SPIDER batch file"
""" only checks first few lines of text.
Returns 1 if finds any of the following:
; --- End batch header ---
"FR G/L" pattern followed by [symbol] on next line
"x11=" register assignment pattern
"[symbol]=" named register assignment
Returns 2 if it thinks its a procedure, with the first line:
[x11,x12]
([ang-step],[ang-limit],[radius])
again, there are too many variants to catch them all
"""
#B = fileReadLines(file) takes too long for huge text files
#if B == None or len(B) == 0:
# return 0
max = 40
B =[]
fp = open(file,'r')
for i in range(max):
try:
B.append(fp.readline())
except:
pass
fp.close()
nlines = len(B)
if nlines < 40:
max = nlines
for i in range(max):
line = B[i]
if not line:
return 0
if line.find("RESULTS FILE FLUSHED") > -1:
return 0
line = line.strip()
line = line.upper()
if len(line) < 2: continue
# test if 1st line is a procedure call
if (line[0]=='[' and line[-1]=="]") or (line[0]=='(' and line[-1]==")"):
if re_reglist.match(line[1:-1]):
return 2
if re_namlist.match(line[1:-1]):
return 2
cmd = ""
if len(line) > 3:
cmd = line[0:4]
if len(line) == 0:
continue
elif re_hdr.search(line):
#print "hdr: " + line
return 1
# comment check must come after header check
elif line[0] == ";":
if line.find('SPIDER') > -1: # a comment with the word 'spider'?
return 1
else:
continue
elif re_reg.match(line):
#print "reg: " + line
return 1
elif re_nam.match(line):
#print "nam: " + line
return 1
elif re_sym.match(line):
#print "sym: " + line
return 1
elif cmd == "FR G" or cmd == "FR L":
#print "FR: " + line
return 1
elif line == "FR":
nextline = B[i+1].strip()
if re_sym.match(nextline):
return 2
return 0
def isSpiderProcedurefile(file):
"returns 1 if input is a SPIDER procedure file"
if isSpiderBatchfile(file) == 2:
return 1
else:
return 0
# ------ binary file functions -------------
def isInt(f):
"returns 1 if input is an integer"
try:
i = int(f)
if f-i == 0: return 1
else: return 0
except:
return 0
iforms = [1,3,-11,-12,-21,-22]
# returns header tuple, if t is a valid Spider header,
# otherwise returns 0
def isSpiderHeader(t):
"returns tuple of values from a valid SPIDER header, else 0"
h = (99,) + t # add 1 value so can use spider header index start=1
# header values 1,2,5,12,13,22,23 should be integers
for i in [1,2,5,12,13,22,23]:
if not isInt(h[i]): return 0
# check iform
iform = int(h[5])
if not iform in iforms: return 0
# check other header values
labrec = int(h[13]) # no. records in file header
labbyt = int(h[22]) # total no. of bytes in header
lenbyt = int(h[23]) # record length in bytes
#print "labrec = %d, labbyt = %d, lenbyt = %d" % (labrec,labbyt,lenbyt)
if labbyt != (labrec * lenbyt): return 0
# looks like a valid header
return h
# returns "image","volume","Fourier" or 0
def isSpiderBin(filename):
"returns nonzero value if input is a SPIDER binary file"
if not os.path.exists(filename):
return 0
minsize = 27 * 4 # 27 floating points
if os.path.getsize(filename) < minsize:
return 0
try:
fp = open(filename,'rb')
f = fp.read(minsize) # read 27 * 4 bytes
fp.close()
except:
return 0
bigendian = 1
t = struct.unpack('>27f',f) # try big-endian first
hdr = isSpiderHeader(t)
if hdr == 0:
bigendian = 0
t = struct.unpack('<27f',f) # little-endian
hdr = isSpiderHeader(t)
if hdr == 0:
return 0
iform = int(hdr[5])
if iform == 1:
istack = hdr[24]
if istack == 0:
return "image"
else:
return "stack"
elif iform == 3:
return "volume"
elif iform in [-11,-12,-21,-22]:
return "Fourier"
else:
return 0
def isSpiderImage(file):
"returns 1 if input is a SPIDER 2D image"
if isSpiderBin(file) == "image": return 1
else: return 0
def isSpiderVolume(file):
"returns 1 if input is a SPIDER 3D volume"
if isSpiderBin(file) == "volume": return 1
else: return 0
def isSpiderStack(file):
"returns 1 if input is a SPIDER stack file"
if isSpiderBin(file) == "stack": return 1
else: return 0
###################################################################
#
# Utilities for finding and testing SPIDER
def testSpider(spider):
"returns 1 if input is a working path to SPIDER"
file = 'test6637'
ext = ".bat"
filename = file + ext
fp = open(filename, 'w')
fp.write("en d\n")
fp.close()
spicmd = "%s bat/dat @%s" % (spider, file)
success = 0
output = getoutput(spicmd)
if output.find('Results file') > 0:
success = 1
log = "LOG" + ext
if os.path.exists(log):
os.remove(log)
os.remove(filename)
return success
def programExists(prog):
"a wrapper for os.path.exists that won't crash"
try:
if os.path.exists(prog): return 1
else: return 0
except:
return 0
def findProgram(prog):
"Use the Unix 'which' command to find a program"
if os.name != 'posix':
print 'not a posix system: no "which" command?'
cmd = 'which %s' % prog
out = getoutput(cmd)
# output from 'which' command may contain newlines and spaces
if out.find(os.linesep) > -1:
lines = out.split(os.linesep)
for line in lines:
if line.find(" ") > -1:
d = line.split()
for item in d:
if programExists(item):
return item
else:
if programExists(line):
return line
elif out.find(" ") > -1:
d = out.split()
print d
for item in d:
if programExists(item):
return item
else:
if programExists(out):
return out
# failure
return ""
def findSpider():
"returns path to SPIDER, or else an empty string"
spider = findProgram('spider')
if spider != "" and testSpider(spider):
return spider
else:
return ""
def runSpider(spider, batch, dataext):
bat, batext = os.path.splitext(batch)
batext = batext[1:]
if dataext[0] == '.':
dataext = dataext[1:]
cmd = "%s %s/%s @%s" % (spider, batext, dataext, bat)
out = getoutput(cmd)
return out
###################################################################
#
# Reading and writing the SPIDER header
#
SpiderHeaderDict = { 1 : 'nslice ', 2 : 'nrow ', 3 : 'irec ', 4 : 'nhistrec ',
5 : 'iform ', 6 : 'imami ', 7 : 'fmax ', 8 : 'fmin ',
9 : 'av ', 10 : 'sig ', 11 : 'ihist ', 12 : 'nsam ',
13 : 'labrec ', 14 : 'iangle ', 15 : 'phi ', 16 : 'theta ',
17 : 'gamma ',18 : 'xoff ',19 : 'yoff ',20 : 'zoff ',
21 : 'scale ', 22 : 'labbyt ', 23 : 'lenbyt ', 24 : 'istack ',
25 : 'NOTUSED ', 26 : 'maxim ', 27 : 'imgnum ', 28 : 'lastindx ',
29 : 'unused ', 30 : 'unused ', 31 : 'Kangle ', 32 : 'phi1 ',
33 : 'theta1 ', 34 : 'psi1 ', 35 : 'phi2 ', 36 : 'theta2 ',
37 : 'psi2 '}
# item[0] (bigendian flag) is not part of the Spider header. It is added
# so that SPIDER indices (starting with 1) may be used.
# hdr is the array returned by getSpiderHeader.
class SpiderHeaderClass:
def __init__(self, hdr):
self.header = hdr
self.hdrlen = len(hdr)
self.bigendian = hdr[0]
for i in range(1, self.hdrlen):
if i in SpiderHeaderDict:
name = SpiderHeaderDict[i]
if name in ['NOTUSED', 'unused']:
continue
val = hdr[i]
s = "self.%s = %f" % (name, val)
exec(s)
if self.hdrlen > 9:
self.avg = hdr[9] # alternate access format
if self.hdrlen > 31:
self.kangle = hdr[31]
# Create a SPIDER header for binary files
def makeSpiderHeader(dims):
" dims must be (nsam, nrow), or (nsam, nrow, nslice) "
if len(dims) == 2:
nsam, nrow = dims[0], dims[1]
nslice = 1.0
iform = 1.0
isVolume = 0
elif len(dims) == 3:
nsam, nrow, nslice = dims[0], dims[1], dims[2]
iform = 3.0
isVolume = 1
else:
return []
lenbyt = nsam * 4 # There are labrec records in the header
labrec = 1024 / lenbyt
if 1024%lenbyt != 0: labrec += 1
labbyt = labrec * lenbyt
hdr = []
nvalues = labbyt / 4
for i in range(nvalues):
hdr.append(0.0)
if len(hdr) < 23:
return []
# NB these are Fortran indices
hdr[1] = float(nslice) # nslice (=1 for an image)
hdr[2] = float(nrow) # number of rows per slice
hdr[5] = iform # iform for 2D image
hdr[12] = float(nsam) # number of pixels per line
hdr[13] = float(labrec) # number of records in file header
hdr[22] = float(labbyt) # total number of bytes in header
hdr[23] = float(lenbyt) # record length in bytes
# adjust for Fortran indexing
hdr = hdr[1:]
hdr.append(0.0)
# pack binary data into a string
hdrstr = []
for v in hdr:
hdrstr.append(struct.pack('f',v))
return hdrstr
def getSpiderHeader(filename, n=27):
" returns first n numbers, with Spider indices (starting at 1)"
" if n = 'all', returns entire header "
if not os.path.exists(filename):
return 0
getall = 0
if not isInt(n):
n = 27
getall = 1
nwords = n * 4 # no. floating point words
if os.path.getsize(filename) < nwords:
return 0
try:
fp = open(filename,'rb')
f = fp.read(nwords) # read 27 * 4 bytes
fp.close()
except:
return 0
bigendian = 1
bigformat = '>%df' % n
t = struct.unpack(bigformat,f) # try big-endian first
hdr = isSpiderHeader(t)
if hdr == 0:
bigendian = 0
littleformat = "<%df" % n
t = struct.unpack(littleformat,f) # little-endian
hdr = isSpiderHeader(t)
if hdr == 0:
return 0
else:
# check if user requested the entire header
if getall:
labbyt = int(hdr[22]) # total no. of bytes in header
hdr = getSpiderHeader(filename, n=labbyt)
hdr = list(hdr)
hdr[0] = bigendian
return hdr
# returns [type, (dimensions), and if there are any,(stats) ]
# where type = "image","volume","Fourier","stack" (but only for image stacks)
# or returns 0
def spiderInfo(filename):
if not os.path.exists(filename):
return 0
type = isSpiderBin(filename)
if type == 0:
return 0
hdr = getSpiderHeader(filename) # header with Spider indices (starting at 1)
info = getSpiderInfo(hdr)
return [type] + info
# return [ (dimensions), (stats) ]
def getSpiderInfo(h):
" assumes its a valid header "
#h = (99,) + t
nsam = int(h[12])
nrow = int(h[2])
nslice = int(h[1])
iform = int(h[5])
dim2D = [1, -11, -12]
dim3D = [3, -21, -22]
if iform in dim3D:
dims = (nsam, nrow, nslice)
else:
dims = (nsam, nrow)
imami = int(h[6])
if imami != 0:
max = float(h[7])
min = float(h[8])
avg = float(h[9])
std = float(h[10])
stats = (max, min, avg, std)
return [dims, stats]
else:
return [dims]