#!/usr/bin/env python #altered Dec 09 al for migration to /usr8 """ Read the Single Particle Reconstruction web page: /usr8/spider/docs/techs/recon/mr.html Generate an object containing the subdirectories and their batch files. Expects each section to begin with: "These procedures should be run in the Directory directory." Subdirectories need to have the format: "Subdirectory(ies): dir1, dir2" Loads anything in a link in the 'newprogs' directory. --- Usage: D = read_spr_page.getproject() returns a dictionary D where D.keys() = [project directories] D[dir] = ( [procs], [subdirs] ) A special key 'dirlist' has an ordered list of directories. """ import os,string,sys import re re_subdirs = re.compile("[sS]ubdirector(y|ies) *: *.+") re_spire = re.compile("' " subs = "subdirectories" if string.find(line,subs) < 0: return [] a = string.find(line,subs) line = line[a+len(subs):] line = string.replace(line,"=","") line = string.replace(line,"-->","") s = string.split(line,",") ss = [] for sub in s: ss.append(string.strip(sub)) return ss def getDirectory(line): " gets xxx from '' " a = string.find(line,"=") if a > -1: line = line[a+1:] d = string.split(line) return d[0] else: return "" def get_batfile(line): ' get x.bat from a line of text ' batfile = get_ahref(line) return os.path.split(batfile)[-1] def readSPRpage(filename=None, dirobj=None): " returns dictionary w/ D[dirname] = list of batch files" if filename == None: filename = default_page fp = open(filename,'r') B = fp.readlines() fp.close() # get batch files for the top level project directory dirstring = "' # don't bother after this point if dirobj == None: D = {} dirlist = [] # for retaining the order of the directories else: D = dirobj if D.has_key('dirlist'): dirlist = D['dirlist'] else: dirlist = [] D['dirlist'] = dirlist lendstr = len(dirstring) i = 0 n = len(B) s = B[i] procs = []; oldprocs = [] subs=[]; oldsubs = [] dirname = "" while string.find(s,endstring) < 0 and i < n-1: a = re_spire.search(s) # found a new directory heading if a: while string.find(s, "-->") < 0: # keep reading lines til end comment i += 1 s = s + B[i] newdirname = getDirectory(s) if newdirname == "": print "read_spr_page.py: unable to get directory from %s" % str(d) break newsubs = getSubdirectories(s) if dirname != "": if D.has_key(dirname): oldprocs, oldsubs = D[dirname] else: oldprocs, oldsubs = [],[] procs = oldprocs + procs subs = oldsubs + subs D[dirname] = (procs, subs) if dirname not in dirlist: dirlist.append(dirname) dirname = newdirname subs = newsubs procs = [] # look for procedures elif string.find(s,batdir) > -1: proc = get_batfile(s) fname,ext = os.path.splitext(proc) if ext in batext and proc not in procs: procs.append(proc) i = i + 1 s = B[i] # end while if dirname != "": if D.has_key(dirname): oldprocs, oldsubs = D[dirname] for p in oldprocs: if p not in procs: procs.append(p) for s in oldsubs: if s not in subs: subs.append(s) D[dirname] = (procs, subs) # finish up the last set if dirname not in dirlist: dirlist.append(dirname) D['dirlist'] = dirlist return D def getproject(webpages=None): if webpages == None: webpages = [os.path.join(source_dir,"mr.html"), os.path.join(source_dir,"refine.html")] elif type(webpages) == type("string"): webpages = [webpages] D = {} for webpage in webpages: webdir, webfile = os.path.split(webpage) os.chdir(webdir) D = readSPRpage(webpage, D) return D if __name__ == '__main__': D = getproject() dirs = D['dirlist'] print dirs for dir in dirs: print dir procs, subdirs = D[dir] for proc in procs: print " " + proc for sub in subdirs: print " " + sub + "/"