#!/usr/bin/env python
#altered Dec 09 al for migration to /usr8
"""
Read the Single Particle Reconstruction web page:
/usr8/spider/docs/techs/recon/mr.html
Generate an object containing the subdirectories and their batch files.
Expects each section to begin with:
"These procedures should be run in the Directory directory."
Subdirectories need to have the format:
"Subdirectory(ies): dir1, dir2"
Loads anything in a link in the 'newprogs' directory.
---
Usage:
D = read_spr_page.getproject()
returns a dictionary D where
D.keys() = [project directories]
D[dir] = ( [procs], [subdirs] )
A special key 'dirlist' has an ordered list of directories.
"""
import os,string,sys
import re
re_subdirs = re.compile("[sS]ubdirector(y|ies) *: *.+")
re_spire = re.compile("' "
subs = "subdirectories"
if string.find(line,subs) < 0:
return []
a = string.find(line,subs)
line = line[a+len(subs):]
line = string.replace(line,"=","")
line = string.replace(line,"-->","")
s = string.split(line,",")
ss = []
for sub in s:
ss.append(string.strip(sub))
return ss
def getDirectory(line):
" gets xxx from '' "
a = string.find(line,"=")
if a > -1:
line = line[a+1:]
d = string.split(line)
return d[0]
else:
return ""
def get_batfile(line):
' get x.bat from a line of text '
batfile = get_ahref(line)
return os.path.split(batfile)[-1]
def readSPRpage(filename=None, dirobj=None):
" returns dictionary w/ D[dirname] = list of batch files"
if filename == None:
filename = default_page
fp = open(filename,'r')
B = fp.readlines()
fp.close()
# get batch files for the top level project directory
dirstring = "' # don't bother after this point
if dirobj == None:
D = {}
dirlist = [] # for retaining the order of the directories
else:
D = dirobj
if D.has_key('dirlist'):
dirlist = D['dirlist']
else:
dirlist = []
D['dirlist'] = dirlist
lendstr = len(dirstring)
i = 0
n = len(B)
s = B[i]
procs = []; oldprocs = []
subs=[]; oldsubs = []
dirname = ""
while string.find(s,endstring) < 0 and i < n-1:
a = re_spire.search(s)
# found a new directory heading
if a:
while string.find(s, "-->") < 0: # keep reading lines til end comment
i += 1
s = s + B[i]
newdirname = getDirectory(s)
if newdirname == "":
print "read_spr_page.py: unable to get directory from %s" % str(d)
break
newsubs = getSubdirectories(s)
if dirname != "":
if D.has_key(dirname):
oldprocs, oldsubs = D[dirname]
else:
oldprocs, oldsubs = [],[]
procs = oldprocs + procs
subs = oldsubs + subs
D[dirname] = (procs, subs)
if dirname not in dirlist:
dirlist.append(dirname)
dirname = newdirname
subs = newsubs
procs = []
# look for procedures
elif string.find(s,batdir) > -1:
proc = get_batfile(s)
fname,ext = os.path.splitext(proc)
if ext in batext and proc not in procs:
procs.append(proc)
i = i + 1
s = B[i]
# end while
if dirname != "":
if D.has_key(dirname):
oldprocs, oldsubs = D[dirname]
for p in oldprocs:
if p not in procs:
procs.append(p)
for s in oldsubs:
if s not in subs:
subs.append(s)
D[dirname] = (procs, subs) # finish up the last set
if dirname not in dirlist:
dirlist.append(dirname)
D['dirlist'] = dirlist
return D
def getproject(webpages=None):
if webpages == None:
webpages = [os.path.join(source_dir,"mr.html"),
os.path.join(source_dir,"refine.html")]
elif type(webpages) == type("string"):
webpages = [webpages]
D = {}
for webpage in webpages:
webdir, webfile = os.path.split(webpage)
os.chdir(webdir)
D = readSPRpage(webpage, D)
return D
if __name__ == '__main__':
D = getproject()
dirs = D['dirlist']
print dirs
for dir in dirs:
print dir
procs, subdirs = D[dir]
for proc in procs:
print " " + proc
for sub in subdirs:
print " " + sub + "/"