Package MolKit :: Module protein
[hide private]
[frames] | no frames]

Source Code for Module MolKit.protein

   1  ############################################################################# 
   2  # 
   3  # Author: Michel F. SANNER 
   4  # 
   5  # Copyright: M. Sanner TSRI 2000 
   6  # 
   7  ############################################################################# 
   8   
   9  # 
  10  # $Header: /opt/cvs/python/packages/share1.5/MolKit/protein.py,v 1.42 2007/05/17 22:03:53 sargis Exp $ 
  11  # 
  12  # $Id: protein.py,v 1.42 2007/05/17 22:03:53 sargis Exp $ 
  13  # 
  14   
  15  """ 
  16  This Module implements the classes Residue, Chain and Protein. 
  17   
  18  Residue and Chain are TreeNode objects similarly to the Atom object. 
  19   
  20  Protein is a specialization of the Molecule class to represent a protein 
  21  in a 4 level tree (from leafs to root: atoms, residues, chains, molecule) 
  22  """ 
  23   
  24  from MolKit.tree import TreeNode, TreeNodeSet, TreeNodeSetSelector 
  25  from MolKit.molecule import Molecule, MoleculeSet, Atom, AtomSet, Bond 
  26  import re 
  27  from types import NoneType, StringType, IntType, TupleType, FloatType, ListType 
  28  from string import split, upper, find 
  29  from Numeric import sum 
  30  global bhtreeFlag 
  31  try: 
  32      import bhtree 
  33      bhtreeFlag = 1 
  34  except: 
  35      bhtreeFlag = 0 
  36   
  37  # FIXME had to overwrite isBelow because of the Protein vs Molecule pb 
38 -class ProteinMolecule(Molecule):
39 ## def __del__(self): 40 ## self.__dict__.clear() 41 ## Molecule.__del__(self) 42
43 - def findType(self, _type, uniq=0):
44 if self.setClass is MoleculeSet and _type is Protein: 45 n = ProteinSet([self]) 46 elif self.setClass is ProteinSet and _type is Molecule: 47 n = MoleculeSet([self,]) 48 else: 49 n = self.setClass([self,]) 50 if n.elementType == _type: return n 51 result = n.findType(_type, uniq=uniq) 52 return result
53
54 - def isBelow(self, Klass):
55 if Klass is Molecule: Klass=Protein 56 return Molecule.isBelow(self, Klass)
57 58 ####################################################################### 59 ## RESIDUES ## 60 ####################################################################### 61
62 -class ResidueSet(TreeNodeSet):
63 """Class to extend a TreeNodeSet with residue specific methods""" 64
65 - def __init__(self, objects=None, stringRepr=None, comments="", keywords=[]):
66 TreeNodeSet.__init__(self, objects, Residue, stringRepr, comments, keywords)
67 68 69 # def get(self, selectionString, selector=None, sets=None, 70 # caseSensitive=True, escapeCharacters=False): 71 # if selector is None: 72 # selector = ResidueSetSelector() 73 # selector.caseSensitive = caseSensitive 74 # selector.escapeCharacters = escapeCharacters 75 # #results, msg = selector.select(self, selectionString, sets, 76 # selectionStringRepr = str(selectionString) 77 # results = selector.processListItem(self, selectionString, sets) 78 # selectionStringRepr = '&' + selectionStringRepr 79 # results.setStringRepr(selectionStringRepr) 80 # return results 81 82
83 - def getSelector(self):
84 if self.selector==None: 85 self.selector = ResidueSetSelector() 86 return self.selector
87 88 ## def get(self, selectionString, selector=None, sets=None, 89 ## caseSensitive=True, escapeCharacters=False, 90 ## returnMsg=False): 91 ## """ 92 ## selects from self.data, objects of self.elementType specified by selectionString 93 ## """ 94 ## #print "RS: get: selectionString=", selectionString 95 96 ## if selector is None: 97 ## selector = ResidueSetSelector() 98 ## selector.caseSensitive = caseSensitive 99 ## selector.escapeCharacters = escapeCharacters 100 ## selectionStringRepr = '&' + str(selectionString) 101 ## if type(selectionString)==StringType: 102 ## result, msg = selector.select(self, selectionString) 103 ## result = self.ReturnType(result) 104 ## result.setStringRepr(selectionStringRepr) 105 ## if returnMsg: result = (result, msg) 106 ## return result 107 ## #if selector.select(self, selectionString)[0] is None: 108 ## # return self.ReturnType([]) 109 ## #else: 110 ## # return selector.select(self, selectionString)[0] 111 ## elif callable(selectionString): 112 ## result = filter(selectionString, self.data) 113 ## if len(result)==len(self.data): 114 ## return self 115 ## else: 116 ## result = self.ReturnType(result) 117 ## result.setStringRepr(selectionStringRepr) 118 ## return result 119 ## else: 120 ## raise RuntimeError("argument has to be a function or a string") 121 122 123
124 -class Residue(ProteinMolecule):
125 """Class to represent an amino acide. Inherits from tree element""" 126 127 _bbname = ['N', 'CA', 'C', 'O'] 128 _bbtype = ['N', 'C', 'C', 'O'] 129 130 131 132 ## def __del__(self): 133 ## self.__dict__.clear() 134 ## ProteinMolecule.__del__(self) 135 136
137 - def __init__(self, type='UNK', number=-1, icode='', parent=None, 138 elementType=Atom, list=None, childrenName='atoms', 139 setClass=ResidueSet, 140 childrenSetClass=AtomSet, top=None, childIndex=None, 141 assignUniqIndex=1):
142 """Residue constructor. 143 Arguments: 144 type (string) 145 number (integer or string) 146 icode (1character) insertion code 147 optional parent (instance of a TreeNode) 148 optional elementType (instance of class inheriting from TreeNode)""" 149 name = type+str(number)+icode 150 Molecule.__init__(self, name, parent, elementType, 151 list, childrenName, setClass, childrenSetClass, top, 152 childIndex, assignUniqIndex) 153 self.type = type 154 self.number = number 155 self.icode = icode 156 self.psi = None # not calculated 157 self.phi = None # not calculated
158
159 - def getPsi(self):
160 """ compute PSI N(i),CA(i),C(i),N(i+1) """ 161 if self.getNext() is not None: 162 from mglutil.math.torsion import torsion 163 names = self.atoms.name 164 idx=names.index('N') ; at1 = self.atoms[idx] 165 idx=names.index('CA'); at2 = self.atoms[idx] 166 idx=names.index('C') ; at3 = self.atoms[idx] 167 nextResidueAtoms = self.getNext().atoms 168 names = nextResidueAtoms.name 169 idx= names.index('N') 170 at4 = nextResidueAtoms[idx] 171 172 self.psi = torsion(at1.coords, at2.coords, 173 at3.coords, at4.coords) 174 175 return self.psi
176
177 - def getPhi(self):
178 """ compute PHI C(i-1),N(i),CA(i),c(i) """ 179 if self.getPrevious() is not None: 180 from mglutil.math.torsion import torsion 181 prevResidueAtoms = self.getPrevious().atoms 182 names = prevResidueAtoms.name 183 idx= names.index('C') 184 at1 = prevResidueAtoms[idx] 185 186 names = self.atoms.name 187 idx=names.index('N') ; at2 = self.atoms[idx] 188 idx=names.index('CA'); at3 = self.atoms[idx] 189 idx=names.index('C') ; at4 = self.atoms[idx] 190 191 self.phi = torsion(at1.coords, at2.coords, 192 at3.coords, at4.coords) 193 return self.phi
194 195
196 - def buildBondsByDistance(self):
197 """Build bonds between atoms inside this residue, based on distance 198 WARNING this is a n^2 process, should only be used for small 199 molecules like residues""" 200 201 if self.hasBonds: return 202 # atoms = findType(Atom) 203 atoms = self.children 204 if not self.hasBonds: 205 self.buildBondsByDistanceOnAtoms(atoms) 206 self.hasBonds = 1 207 return len(atoms)
208 209
210 - def backbone(self):
211 """Return atomset containing backbone atoms""" 212 213 bb = self.get(lambda x, name=self._bbname,type=self._bbtype: 214 x.name in name and x.element in type) 215 if bb is None: return AtomSet( [] ) 216 else: return bb
217 218
219 - def sidechain(self):
220 """Return atomset containing sidechain atoms""" 221 222 return self.atoms - self.backbone()
223
224 - def getAtmsAndCoords(self, atmNames):
225 """ 226 Function returning the coords of all the atoms of the given atom name 227 or None if one is not in the atoms residues 228 """ 229 childNames = self.childByName.keys() 230 check = map(lambda x, cn = childNames: x in cn, atmNames) 231 # all the given names should be atoms of the residue 232 if 0 in check: 233 return 0, None 234 coords = [] 235 # short cut to coords.append 236 coordsappend = coords.append 237 cName = self.childByName 238 for name in atmNames: 239 atm = cName[name] 240 if atm.alternate: coordsappend(atm.getAverageCoords()) 241 else: 242 coordsappend(atm.coords) 243 return 1, coords
244 245 246
247 -class ResidueSetSelector(TreeNodeSetSelector):
248 249 residueList = {} # name: ['ala','arg'] 250 residueList['std']=['ala', 'ALA','arg', 'ARG','asn', 'ASN', 'asp', 'ASP', 251 'cys', 'CYS','gln', 'GLN','glu', 'GLU', 'gly', 'GLY', 252 'his', 'HIS','ile', 'ILE', 'leu', 'LEU','lys', 'LYS', 253 'met', 'MET', 'phe', 'PHE','pro', 'PRO','ser', 'SER', 254 'thr', 'THR','trp', 'TRP','tyr', 'TYR','val', 'VAL'] 255 residueList['acidic']=['ASP','asp','glu','GLU'] 256 residueList['acyclic']=['ala','ALA','arg','ARG','asn','ASN','asp','ASP','cys','CYS','glu','GLU','gln','GLN','gly','GLY','ile','ILE','leu','LEU','lys','LYS','met','MET','ser','SER','thr','THR','val','VAL'] 257 residueList['aliphatic']=['ala','ALA','gly','GLY','ile','ILE','leu','LEU','val','VAL'] 258 residueList['aromatic']=['his','HIS','phe','PHE','trp','TRP','tyr','TYR'] 259 residueList['basic']=['arg','ARG','his','HIS','lys','LYS'] 260 residueList['buried']=['ala','ALA','cys','CYS','ile','ILE','leu','LEU','met','MET','phe','PHE','trp','TRP','val','VAL'] 261 residueList['charged']=['arg','ARG','asp','ASP','glu','GLU','his','HIS','lys','LYS'] 262 residueList['cyclic']=['his','HIS','phe','PHE','pro','PRO','trp','TRP','tyr','TYR'] 263 residueList['hydrophobic']=['ala','ALA','gly','GLY','ile','ILE','leu','LEU','met','MET','phe','PHE','pro','PRO','trp','TRP','tyr','TYR','val','VAL'] 264 residueList['large']=['arg','ARG','glu','GLU','gln','GLN','his','HIS','ile','ILE','leu','LEU','lys','LYS','met','MET','phe','PHE','trp','TRP','tyr','TYR'] 265 residueList['medium']=['asn','ASN','asp','ASP','cys','CYS','pro','PRO','thr','THR','val','VAL'] 266 residueList['negative']=['asp','ASP','glu','GLU'] 267 residueList['neutral']=['ala','ALA','asn','ASN','cys','CYS','gln','GLN','gly','GLY','his','HIS','ile','ILE','leu','LEU','met','MET','phe','PHE','pro','PRO','ser','SER','thr','THR','trp','TRP','tyr','TYR','val','VAL'] 268 residueList['polar']=['arg','ARG','asn','ASN','asp','ASP','cys','CYS','glu','GLU','gln','GLN','his','HIS','lys','LYS','ser','SER','thr','THR'] 269 residueList['positive']=['arg','ARG','his','HIS','lys','LYS'] 270 residueList['small']=['ala','ALA','gly','GLY','ser','SER'] 271 residueList['surface']=['arg','ARG','asn','ASN','asp','ASP','glu','GLU','gln','GLN','gly','GLY','his','HIS','lys','LYS','pro','PRO','ser','SER','thr','THR','tyr','TYR'] 272 273 #dictionary mapping from residue types to 1 char identifier 274 r_keyD = {} 275 r_keyD['ALA']='A' 276 r_keyD['ARG']='R' 277 r_keyD['ASN']='N' 278 r_keyD['ASP']='D' 279 r_keyD['ASX']='B' 280 r_keyD['CYS']='C' 281 r_keyD['GLU']='E' 282 r_keyD['GLN']='Q' 283 r_keyD['GLX']='Z' 284 r_keyD['GLY']='G' 285 r_keyD['HIS']='H' 286 r_keyD['ILE']='I' 287 r_keyD['LEU']='L' 288 r_keyD['LYS']='K' 289 r_keyD['MET']='M' 290 r_keyD['PHE']='F' 291 r_keyD['PRO']='P' 292 r_keyD['SER']='S' 293 r_keyD['THR']='T' 294 r_keyD['TRP']='W' 295 r_keyD['TYR']='Y' 296 r_keyD['VAL']='V' 297 #FIX THIS HACK 298 r_keyD['XAA']='X' #from www.ensembl.org/Docs/Pdoc/bioperl-live/Bio/SeqUtils.html 299 r_keyD['SEL']='U' 300 r_keys = r_keyD.values() 301 302
303 - def __init__(self):
304 TreeNodeSetSelector.__init__(self) 305 self.level = ResidueSet
306 307
308 - def matchSequence(self, nodes, item):
309 result= ResidueSet() 310 #print 'seeking ', item, ':' 311 parents = nodes.parent.uniq() 312 for ch in parents: 313 res_nodes = ch.residues 314 n_str = "" 315 for r in res_nodes: 316 #J is not currently used for any res type so 317 #use it for residues whose type is outside the dict 318 n_str = n_str + self.r_keyD.get(r.type,'J') 319 #n_str = n_str + r_keyD[r.type] 320 #print 'in chain ', ch.name, '->', n_str 321 ind1 = find(n_str, item) 322 if ind1==-1: 323 continue 324 #return None 325 result.extend(res_nodes[ind1:ind1+len(item)]) 326 for i in range(ind1+1, len(res_nodes)): 327 ind1 = find(n_str[i], item) 328 if ind1>-1: 329 result.extend(res_nodes[ind1:ind1+len(item)]) 330 if not len(result): 331 return None 332 return result
333 334 335 #FOR RESIDUES:
336 - def getRange(self, nodes, item):
337 if len(nodes)<2: 338 return None 339 levItList=split(item, '-') 340 if len(levItList)!=2: return None 341 #if levItList[0][0]=='#' or levItList[1][0]=='#': 342 if levItList[0][0]=='#' and levItList[1][0]=='#': 343 return self.getResidueRelRange(nodes, item) 344 firstNodes = self.processListItem(nodes, levItList[0]) 345 lastNodes = self.processListItem(nodes, levItList[1]) 346 if firstNodes and lastNodes: 347 return self.rangeMatch(nodes, firstNodes[0],lastNodes[-1]) 348 else: 349 return None
350 351
352 - def getResidueRelRange(self, nodes, item):
353 #this needs to be done on a PER CHAIN basis: 354 levItList = split(item, '-') 355 selNodes = None 356 parentNodes = ChainSet(nodes.parent.uniq()) 357 for par in parentNodes: 358 nds = ResidueSet(filter(lambda x, par=par: x.parent==par, nodes)) 359 if len(nds)<2: continue 360 firstNodes = self.processListItem(nds, levItList[0]) 361 lastNodes = self.processListItem(nds, levItList[1]) 362 if firstNodes and lastNodes: 363 newNodes = self.rangeMatch(nds,firstNodes[0],lastNodes[-1]) 364 if newNodes: 365 if selNodes: selNodes = selNodes + newNodes 366 else: selNodes = newNodes 367 else: continue 368 return selNodes
369 370
371 - def processListItem(self, nodes, item, sets=None):
372 373 # check for pre-defined filtering lists 374 if item in self.residueList.keys(): 375 names = self.residueList[item] 376 newNodes = filter(lambda x, names=names, nodes=nodes: 377 x.type in names, nodes) 378 return self.level(newNodes) 379 380 elif self.testSequence(item): 381 # detect ambiguous residue sequences such as ASP vs Alanine-Serine-Proline 382 # and all char in 'ARNDCEQGHILKMFPSTWYV' 383 #print "testSequence: item=", item 384 newNodes = self.matchSequence(nodes, item) 385 #newNodes = FD['resSeq'](item, nodes) 386 return newNodes 387 388 else: 389 return TreeNodeSetSelector.processListItem(self, nodes, item, sets=sets)
390 391
392 - def testR(self,c):
393 #t = 'ARNDCEQGHILKMFPSTWYV' 394 return c in self.r_keys
395 #return c in r_keyD.values() 396 #return c in t 397 398
399 - def testSequence(self, item):
400 import Numeric 401 try: 402 ans = Numeric.add.reduce(map(self.testR,item))==len(item) 403 except: 404 ans = 0 405 return ans
406 407
408 - def regexp(self, nodes, item):
409 #use self.procFunction to build a regexp 410 #Residue strings match to 'type', so use 'objectsFromStringField' 411 if not self.caseSensitive: 412 if self.escapeCharacters: 413 item = self.processStringcIWEC(item) 414 #newNodes = self.processStringcIWEC(item) 415 else: 416 item = self.processStringcI(item) 417 #newNodes = self.processStringcI(item) 418 else: 419 item = self.processStringcS(item) 420 #newNodes = self.processStringcS(item) 421 try: 422 #to match 13 to residue whose number is '13' 423 #t = int(item[0]) 424 t = int(item) 425 return self.level(nodes.objectsFromString(item, 'number')) 426 #return self.level(nodes.objectsFromStringField(item, 'number')) 427 except ValueError: 428 #if not a number, 429 #do something like match 'THR1' to residue whose name is 'THR1' 430 return self.level(nodes.objectsFromString(item))
431 #return nodes.ReturnType(nodes.objectsFromStringField(reItem, 'type')) 432 433 434 ####################################################################### 435 ## CHAINS ## 436 ####################################################################### 437
438 -class ChainSet(TreeNodeSet):
439 """Class to extend a TreeNodeSet with Chain specific methods""" 440
441 - def __init__(self, objects=None, stringRepr=None, comments="", keywords=[]):
442 TreeNodeSet.__init__(self, objects, Chain, stringRepr, comments, keywords)
443 444 445 # def get(self, selectionString, selector=None, sets=None, 446 # caseSensitive=True, escapeCharacters=False): 447 # if selector is None: 448 # selector = ChainSetSelector() 449 # selector.caseSensitive = caseSensitive 450 # selector.escapeCharacters = escapeCharacters 451 # #results, msg = selector.select(self, selectionString, sets, 452 # selectionStringRepr = str(selectionString) 453 # results = selector.processListItem(self, selectionString, sets) 454 # selectionStringRepr = '&' + selectionStringRepr 455 # results.setStringRepr(selectionStringRepr) 456 # return results 457
458 - def getSelector(self):
459 if self.selector==None: 460 self.selector = ChainSetSelector() 461 return self.selector
462 463 ## def get(self, selectionString, selector=None, sets=None, 464 ## caseSensitive=True, escapeCharacters=False, 465 ## returnMsg=False): 466 ## """ 467 ## selects from self.data, objects of self.elementType specified by selectionString 468 ## """ 469 ## #print "CS: get: selectionString=", selectionString 470 ## if selector is None: 471 ## selector = ChainSetSelector() 472 ## selector.caseSensitive = caseSensitive 473 ## selector.escapeCharacters = escapeCharacters 474 ## selectionStringRepr = '&' + str(selectionString) 475 ## if type(selectionString)==StringType: 476 ## result, msg = selector.select(self, selectionString) 477 ## result = self.ReturnType(result) 478 ## result.setStringRepr(selectionStringRepr) 479 ## if returnMsg: result = (result, msg) 480 ## return result 481 ## elif callable(selectionString): 482 ## result = filter(selectionString, self.data) 483 ## if len(result)==len(self.data): 484 ## return self 485 ## else: 486 ## result = self.ReturnType(result) 487 ## result.setStringRepr(selectionStringRepr) 488 ## return result 489 ## else: 490 ## raise RuntimeError("argument has to be a function or a string") 491 492 493
494 -class Chain(ProteinMolecule):
495 """Class to represent chains or residues. Inherits from tree element""" 496 497 498 ## def __del__(self): 499 ## self.__dict__.clear() 500 ## ProteinMolecule.__del__(self) 501 502 503
504 - def __init__(self, id=None, parent=None, elementType=Residue, list=None, 505 childrenName='residues', setClass=ChainSet, 506 childrenSetClass=ResidueSet, top=None, childIndex=None, 507 assignUniqIndex=1):
508 """Chain constructor. 509 Arguments: 510 id (string) 511 optional parent (instance of a TreeNode) 512 optional elementType (instance of class inheriting from TreeNode)""" 513 514 Molecule.__init__(self, str(id), parent, elementType, list, 515 childrenName, setClass, childrenSetClass, top, 516 childIndex, assignUniqIndex) 517 self.id = id 518 self.hasBonds = 0 # 1 for bondsByDistance is supported 519 self.gaps = [] # list to store tuple (Res, Res) define the edge of a gap
520
521 - def shortestDist(self, atoms1, atoms2):
522 min = 99999.9 523 for a1 in atoms1: 524 c1 = a1.coords 525 for a2 in atoms2: 526 c2 = a2.coords 527 diff = (c1[0]-c2[0], c1[1]-c2[1], c1[2]-c2[2]) 528 d2 = (diff[0]*diff[0] + diff[1]*diff[1] + diff[2]*diff[2]) 529 if d2 < min: 530 min = d2 531 as1 = a1 532 as2 = a2 533 return as1, as2, min
534
535 - def connectResidues(self, res1, res2, cut_off = 1.85):
536 """ Connect residues based on distance""" 537 self.bbDisconnectedAfter = [] 538 #cut_off2 = cut_off*cut_off 539 #diff = [0,0,0] 540 p = 0 541 # Get the C atom of the first residue res1 : 542 c = res1.atoms.get(lambda x: split(x.name)[0]=='C') 543 544 if c is None or len(c) == 0: c = res1.atoms.get(lambda x: 545 split(x.name)[0]=='O3*') 546 # Get the N atom of the second Residue only if the first residue 547 # has a C atom. 548 if not c is None and len(c) != 0: 549 # get c coords 550 cx, cy, cz = c[0].coords 551 cov_radc = c[0].bondOrderRadius 552 553 n = res2.atoms.get(lambda x: split(x.name)[0]=='N') 554 if n is None or len(n) == 0: 555 n = res2.atoms.get(lambda x:split(x.name)[0]=='P') 556 if n is not None and len(n)!=0: 557 nx, ny,nz = n[0].coords 558 cov_radsum = (cov_radc + n[0].bondOrderRadius)*1.1 559 diffx = cx-nx 560 diffy = cy-ny 561 diffz = cz-nz 562 d = (diffx*diffx) + (diffy*diffy) + (diffz*diffz) 563 if d<cov_radsum*cov_radsum: 564 resConnectionFound = 1 565 atom1 = c[0] 566 atom2 = n[0] 567 if not atom1.isBonded(atom2): 568 Bond( atom1, atom2, origin='BuiltByDistance', check=0 ) 569 else: 570 resConnectionFound = 0 571 self.bbDisconnectedAfter.append(res1) 572 else: # N not found 573 resConnectionFound = 0 574 else: # C not found 575 resConnectionFound = 0 576 577 if not resConnectionFound: 578 at1,at2,dist2 = self.shortestDist(res1.atoms, 579 res2.atoms) 580 cov_radsum = (at1.bondOrderRadius + at2.bondOrderRadius) *1.1 581 if dist2<cov_radsum *cov_radsum: 582 if not at1.isBonded(at2): 583 Bond( at1, at2, origin='BuiltByDistance', check=0 ) 584 else: 585 self.bbDisconnectedAfter.append(res1)
586
587 - def buildBondsByDistance(self, cut_off=1.85):
588 """Build bonds between atoms inside this chain, based on distance""" 589 if self.hasBonds: return 590 for i in xrange(len(self.residues)): 591 res = self.residues[i] 592 length = res.buildBondsByDistance() 593 if upper(res.type) in ['HOH', 'DOD']: continue 594 # Now we try to connect residues together. 595 if i < len(self.residues)-1: 596 self.connectResidues(res, self.residues[i+1], cut_off) 597 self.hasBonds = 1
598 599
600 - def isDna(self):
601 """ checks if the chain is DNA or not.""" 602 AAlist = ['G','C','U','A', 'T'] 603 #typical names are " G" 604 nas = [x for x in self.residues.type if len(x.strip())==1] 605 if len(nas): 606 for residue in self.residues: 607 if len(residue.type) > 2: 608 if residue.type[2] == 'P' or residue.type[2] == 'p': 609 residue.type = residue.type[0] 610 else: 611 residue.type = residue.type[2] 612 613 self.dnaRes = filter(lambda x: x.type.strip() in AAlist, self.residues) 614 if len(self.dnaRes) == len(self.residues): 615 self.isDNA = True 616 return True 617 else: 618 self.isDNA = False 619 return False
620
621 - def isProteic(self):
622 """ checks if the chain is proteic or not.""" 623 AAlist = ['ALA','ARG','ASN','ASP','CYS','GLU','GLN','GLY','HIS','ILE', 624 'LEU','LYS','MET','PHE','PRO','SER','THR', 'TRP','TYR','VAL'] 625 self.AARes = filter(lambda x: x.type in AAlist, self.residues) 626 if len(self.AARes) == len(self.residues): 627 return True 628 else: 629 return False
630 631
632 - def isHetatmChain(self):
633 """ checks if is whole chain of hetatms """ 634 n = filter(lambda x: not x.hetatm, self.residues.atoms) 635 if n: return 0 636 else: return 1
637 638
639 - def secondaryStructure(self, ssBuilder):
640 """ create a secondarystructureset. If secondarystructureset can't be 641 obtained, none is created and a warning is printed.""" 642 from MolKit.getsecondarystructure import GetSecondaryStructure 643 assert isinstance(ssBuilder, GetSecondaryStructure) 644 ## ss = ssBuilder.createSSNodesForChain(self) 645 ssBuilder.createSSNodesForChain(self) 646 if not self.secondarystructureset: 647 delattr(self, 'secondarystructureset')
648 649
650 -class ChainSetSelector(TreeNodeSetSelector):
651 #what else to do here?
652 - def __init__(self):
653 TreeNodeSetSelector.__init__(self) 654 self.level = ChainSet
655 656
657 - def processListItem(self, nodes, item, sets=None):
658 # special treatment for items 'proteic' and 'dna' 659 if item =='proteic': 660 newNodes = filter(lambda x, nodes=nodes: 661 x.isProteic(), nodes) 662 return self.level(newNodes) 663 elif item == 'dna': 664 newNodes = filter(lambda x, nodes=nodes: 665 x.isDna(), nodes) 666 return self.level(newNodes) 667 668 else: 669 return TreeNodeSetSelector.processListItem(self, nodes, item, 670 sets=sets)
671 672
673 - def regexp(self, nodes, item):
674 #use self.procFunction to build a regexp 675 #Chain strings match to 'id', so use 'objectsFromStringField' 676 if not self.caseSensitive: 677 if self.escapeCharacters: 678 item = self.processStringcIWEC(item) 679 #newNodes = self.processStringcIWEC(item) 680 else: 681 item = self.processStringcI(item) 682 #newNodes = self.processStringcI(item) 683 else: 684 item = self.processStringcS(item) 685 return self.level(nodes.objectsFromString(item, 'id'))
686 #return self.level(nodes.objectsFromStringField(item, 'id')) 687 688 689 690 691 692 ####################################################################### 693 ## PROTEIN ## 694 ####################################################################### 695 from MolKit.molecule import MoleculeSet 696
697 -class ProteinSet(MoleculeSet):
698 """Class to extend a TreeNodeSet with molecule specific methods""" 699
700 - def __init__(self, objects=None, stringRepr=None, comments="", keywords=[]):
701 MoleculeSet.__init__(self, objects, stringRepr, comments=comments, 702 keywords=keywords) 703 self.elementType = Protein
704 705 706 # def get(self, selectionString, selector=None, sets=None, 707 # caseSensitive=True, escapeCharacters=False): 708 # if selector is None: 709 # selector = ProteinSetSelector() 710 # selector.caseSensitive = caseSensitive 711 # selector.escapeCharacters = escapeCharacters 712 # #results, msg = selector.select(self, selectionString, sets, 713 # selectionStringRepr = str(selectionString) 714 # results = selector.processListItem(self, selectionString, sets) 715 # selectionStringRepr = '&' + selectionStringRepr 716 # results.setStringRepr(selectionStringRepr) 717 # return results 718
719 - def getSelector(self):
720 if self.selector==None: 721 self.selector = ProteinSetSelector() 722 return self.selector
723 724 725 ## def get(self, selectionString, selector=None, sets=None, 726 ## caseSensitive=True, escapeCharacters=False, 727 ## returnMsg=False): 728 ## """ 729 ## returns from self.data, objects of self.elementType specified by selectionString 730 ## """ 731 ## #print "PS: get: selectionString=", selectionString 732 ## selectionStringRepr = '&' + str(selectionString) 733 ## if selector is None: 734 ## selector = ProteinSetSelector() 735 ## selector.caseSensitive = caseSensitive 736 ## selector.escapeCharacters = escapeCharacters 737 ## if type(selectionString)==StringType: 738 ## result, msg = selector.select(self, selectionString) 739 ## result = self.ReturnType(result) 740 ## result.setStringRepr(selectionStringRepr) 741 ## if returnMsg: result = (result, msg) 742 ## return result 743 ## #if selector.select(self, selectionString)[0] is None: 744 ## # return self.ReturnType([]) 745 ## #else: 746 ## # return selector.select(self, selectionString)[0] 747 ## elif callable(selectionString): 748 ## result = filter(selectionString, self.data) 749 ## if len(result)==len(self.data): 750 ## return self 751 ## else: 752 ## result = self.ReturnType(result) 753 ## result.setStringRepr(selectionStringRepr) 754 ## return result 755 ## else: 756 ## raise RuntimeError("argument has to be a function or a string") 757 758
759 -class Protein(ProteinMolecule):
760 """Class to represent a protein. 761 A protein is a hierarchical structure made of chains, residues and atoms. 762 By definition a Protein is a list of chains (inheritence from TreeNode) 763 For efficiency reasons the protein also stores a list of residues 764 and atoms 765 766 Read methods are provided to handle various PDB file format flavors 767 """ 768
769 - def __init__(self, name='NoName', parent=None, elementType=Chain, 770 list=None, childrenName='chains', setClass=ProteinSet, 771 childrenSetClass=ChainSet, top=None, childIndex=None, 772 assignUniqIndex=1):
773 """Protein constructor. 774 Arguments: 775 name (string) 776 optional parent (instance of a TreeNode) 777 optional elementType (instance of class inheriting from TreeNode, 778 defaults to Chain)""" 779 780 Molecule.__init__(self, name=name, parent=parent, 781 elementType=elementType, list=list, 782 childrenName=childrenName, 783 setClass=setClass, 784 childrenSetClass=childrenSetClass, top=top, 785 childIndex=childIndex, assignUniqIndex=assignUniqIndex) 786 self.bondsflag = 0 787 self.hasSS = [] 788 self.hasBonds = 0 # 1 for bondsByDistance is supported
789
790 - def copy(self, newname=None):
791 """copy makes a new Protein instance with 'newname' and 792 other protein level parameters from self. Next,self.allAtoms is copied 793 atom by atom. First: '_fit_atom_into_tree', which uses the same 794 logic as pdbParser, builds up new instances of residues and chains 795 as necessary. Then: _copy_atom_attr copies the remaining 796 String, Int, Float, None, List and Tuple attributes into new atom 797 instances. The new molecule is returned by copy. 798 NB: subsequently the two copies can be visualized: 799 copy2=mv.Mols[0].copy() 800 mv.addMolecule(copy2) 801 mv.GUI.VIEWER.TransformRootOnly( yesno=0) 802 mv.GUI.VIEWER.currentObject=copy2.geomContainer.geoms['master'] 803 then mouse movements would move only copy2, the new object """ 804 805 if not newname: newname = self.name + "_copy" 806 newmol=Protein(name=newname, parent=self.parent, 807 elementType=self.elementType, childrenName=self.childrenName, 808 setClass=self.setClass, childrenSetClass=self.childrenSetClass, 809 top=self.top) 810 newmol.curChain=Chain() 811 newmol.curRes=Residue() 812 newmol.allAtoms= AtomSet() 813 newmol.parser = self.parser 814 for at in self.allAtoms: 815 self._fit_atom_into_tree(newmol, at) 816 newmol.buildBondsByDistance() 817 return newmol
818
819 - def _fit_atom_into_tree(self,newmol,at):
820 chainID = at.parent.parent.id 821 if chainID != newmol.curChain.id: 822 newmol.curChain = Chain(chainID, newmol, top=newmol) 823 resName = at.parent.name 824 resNum =at.parent.number 825 if resName != newmol.curRes.name or resNum !=newmol.curRes.number: 826 newmol.curRes=Residue(resName[:3], resNum, newmol.curChain, 827 top=newmol) 828 newat=Atom(at.name, newmol.curRes, at.element, top=newmol) 829 self._copy_atom_attr(newat,at) 830 newmol.allAtoms.append(newat)
831
832 - def _copy_atom_attr(self, newat, at):
833 for item in at.__dict__.items(): 834 if type(item[1]) in [NoneType,StringType, 835 IntType,FloatType]: 836 exec('newat.%s=item[1]' %item[0]) 837 if type(item[1]) in [ListType, TupleType]: 838 exec('newat.%s=item[1][:]' %item[0])
839 840
841 - def buildBondsByDistance(self, cut_off=1.85):
842 """Build bonds between atoms inside this chain, based on distance""" 843 if self.hasBonds: return 844 845 if bhtreeFlag: 846 for c in self.chains: 847 c.buildBondsBhtree() 848 c.hasBonds = 1 849 c.residues.hasBonds = 1 850 self.hasBonds=1 851 else: 852 for c in self.chains: 853 c.buildBondsByDistance(cut_off) 854 c.hasBonds = 1 855 c.residues.hasBonds = 1 856 self.hasBonds = 1 857 858 self.bondsflag = 1
859 860
861 - def secondaryStructure(self,ssBuilder) :
862 863 from MolKit.getsecondarystructure import GetSecondaryStructure 864 assert isinstance(ssBuilder, GetSecondaryStructure) 865 866 for c in self.chains: 867 if c.isHetatmChain(): continue 868 elif not ssBuilder.ssDataForMol.has_key(c.id): continue 869 else: 870 c.secondaryStructure(ssBuilder)
871 872
873 - def secondaryStructureFromFile(self):
874 """Function which a crate an instance of 875 GetSecondaryStructureFromFile to add the secondarystructurelevel 876 to the molecule hierarchy""" 877 from MolKit.getsecondarystructure import GetSecondaryStructureFromFile 878 ssBuilder = GetSecondaryStructureFromFile(self) 879 self.secondaryStructure(ssBuilder) 880 self.hasSS = ['From File']
881
883 """Function which a creat an instance of 884 GetSecondaryStructureFromStride to add the secondarystructurelevel 885 to the molecule hierarchy.""" 886 from MolKit.getsecondarystructure import GetSecondaryStructureFromStride 887 ssBuilder = GetSecondaryStructureFromStride(self) 888 self.secondaryStructure(ssBuilder) 889 self.hasSS = ['From Stride']
890 891
892 -class ProteinSetSelector(TreeNodeSetSelector):
893 #what else to do here?
894 - def __init__(self):
895 TreeNodeSetSelector.__init__(self) 896 self.level = ProteinSet
897 898 899 900 901 ############################################################################# 902 # 903 # section defining objects for protein's secondary structure 904 # 905 ############################################################################# 906
907 -class SecondaryStructureSet(TreeNodeSet):
908 """class to represent a set of secondary structure elements 909 typically for a protein's chain""" 910
911 - def __init__(self, objects=None, stringRepr=None, comments='', keywords=[]):
912 TreeNodeSet.__init__(self, objects, SecondaryStructure, stringRepr, 913 comments, keywords)
914
915 - def __repr__(self):
916 if len(self.data): 917 ob = self.data[0] 918 return '<%s instance> holding %d %s' %( 919 self.__class__.__name__, len(self.data), 920 ob.__class__.__bases__[0].__name__) 921 else: 922 return '<%s instance> empty' %(self.__class__.__name__)
923 924
925 -class SecondaryStructure(TreeNode):
926 """Base class to represent a Secondary Structure element such as Helix, 927 Sheet, etc...""" 928 ## def __del__(self): 929 ## print 'free %s'%self.__class__, self.name 930 ## Tree.__del__() 931 ## print self.__class__._numberOfDeletedNodes
932 - def __init__(self, chain=None, structureType=None, index=None, 933 start=None, end=None, parent=None, 934 elementType=Residue, list=None, 935 childrenName='residues', 936 setClass=SecondaryStructureSet, 937 childrenSetClass=ResidueSet, top=None, 938 childIndex=None, assignUniqIndex=1, createNewLevel=1):
939 940 TreeNode.__init__(self,structureType+str(index), parent, 941 elementType, list, childrenName, setClass, 942 childrenSetClass, top, childIndex, assignUniqIndex) 943 self.index = index 944 self.structureType = structureType 945 self.start = start 946 self.end = end 947 self.chain = chain 948 self.children = self.chain.residues.get(start.name+'-'+end.name) 949 if createNewLevel: 950 self.createNewLevel() 951 self.residues = self.children
952
953 - def createNewLevel(self):
954 955 if hasattr(self.chain,'secondarystructureset'): 956 for i in self.children: 957 i.secondarystructure = self
958
959 -class Helix(SecondaryStructure):
960 """Class to represent an helix inherits from SecondaryStructure.""" 961
962 - def __init__(self, chain=None, index=None, 963 start=None, end=None, parent=None, 964 list=None, top=None, createNewLevel=True, 965 helClass=1, comment=None ):
966 967 """ 968 optional argument: 969 chain -- Chain instance to which the secondary structure 970 belongs to 971 index -- Helix index 972 start -- N-terminal residue of the helix 973 end -- last residue of the helix 974 parent 975 list 976 top 977 createNewLevel -- Boolean flag to specify whether or not to create a 978 new level in the tree representation of the 979 molecule for the SS. 980 helClass -- Helix class number (PDB record 39-40) 981 1 (default) Right-handed alpha 982 2 Right-handed omega 983 3 Right-handed pi 984 4 Right-handed gamma 985 5 Right-handed 310 986 6 Left-handed alpha 987 7 Left-handed omega 988 8 Left-handed gamma 989 9 27 ribbon/helix 990 10 Polyproline 991 992 comment -- String describing the helix (PDB record 41-70 993 """ 994 self.comment = comment 995 # self.helDescr: 996 # key: PDB helClass or Stride HelClass 997 # value: {'helType':type of Helix, 'helDir': direction of the helix} 998 # the direction of the helix can either be Right-handed, Left-handed 999 # or None for unknown 1000 self.helDescr = {1:{'helType':'alpha', 1001 'helDir':'Right-handed'}, 1002 2:{'helType':'omega', 1003 'helDir':'Right-handed'}, 1004 3:{'helType':'pi', 1005 'helDir':'Right-handed pi'}, 1006 4:{'helType':'gamma', 1007 'helDir':'Right-handed'}, 1008 5:{'helType':'310', 1009 'helDir':'Right-handed'}, 1010 6:{'helType':'alpha', 1011 'helDir':'Left-handed'}, 1012 7:{'helType':'omega', 1013 'helDir':'Left-handed'}, 1014 8:{'helType':'gamma', 1015 'helDir':'Left-handed'}, 1016 9:{'helType':'27 ribbon.helix', 1017 'helDir':None}, 1018 10:{'helType':'Polyproline', 1019 'helDir':None}, 1020 'H':{'helType':'alpha', 1021 'helDir':None}, 1022 'G':{'helType':'310', 1023 'helDir':None}, 1024 'I':{'helType':'pi', 1025 'helDir':None}} 1026 1027 if helClass is None or not self.helDescr.has_key(helClass): 1028 self.helType = None 1029 self.helDir = None 1030 else: 1031 self.helType = self.helDescr[helClass]['helType'] 1032 self.helDir = self.helDescr[helClass]['helDir'] 1033 1034 # This is needed to be able to write out the Helix information 1035 # in a PDB file 1036 if helClass in xrange(1,10): self.helClass = helClass 1037 elif helClass == 'H': self.helClass = 1 1038 elif helClass == 'G': self.helClass = 5 1039 elif helClass == 'I': self.helClass = 3 1040 else: self.helClass = 1 1041 1042 1043 SecondaryStructure.__init__(self, chain=chain, 1044 structureType='Helix', index=index, 1045 start=start,end=end, parent=parent, 1046 elementType=Residue, list=list, 1047 childrenName='residues', 1048 setClass=SecondaryStructureSet, 1049 childrenSetClass=ResidueSet, 1050 top=top, createNewLevel=createNewLevel)
1051
1052 -class Strand(SecondaryStructure):
1053 """Class to represent a sheet inherits from SecondaryStructure.""" 1054
1055 - def __init__(self, chain=None, index=None, 1056 start=None, end=None, parent=None, 1057 list=None,top=None,createNewLevel=1, 1058 nbStrand=None, sense=None):
1059 """ 1060 optional argument: 1061 chain -- Chain instance to which the secondary structure 1062 belongs to 1063 index -- Helix index 1064 start -- N-terminal residue of the strand 1065 end -- last residue of the strand 1066 parent 1067 list 1068 top 1069 createNewLevel -- Boolean flag to specify whether or not to create a 1070 new level in the tree representation of the molecule 1071 for the SS. 1072 nbStrand -- Number of strand in the sheet None if not known 1073 sense -- Sense of strand with respect to previous strand in 1074 the sheet 1075 0 if first strand 1076 -1 if anti-parallel 1077 1 if parallel 1078 None if not known (from stride or MOL2 files 1079 1080 """ 1081 self.nbStrand = nbStrand 1082 self.sense = sense 1083 1084 SecondaryStructure.__init__(self, chain=chain, 1085 structureType='Strand', index=index, 1086 start=start,end=end, parent=parent, 1087 elementType=Residue, list=list, 1088 childrenName='residues', 1089 setClass=SecondaryStructureSet, 1090 childrenSetClass=ResidueSet, 1091 top=top, createNewLevel=createNewLevel)
1092 1093
1094 -class Turn(SecondaryStructure):
1095 """Class to represent a turn inherits from SecondaryStructure."""
1096 - def __init__(self, chain=None, index=None, 1097 start=None, end=None, parent=None, 1098 list=None, top=None, createNewLevel=1, 1099 comment=None):
1100 1101 """ 1102 optional argument: 1103 chain -- Chain instance to which the secondary structure 1104 belongs to 1105 index -- Helix index 1106 start -- N-terminal residue of the strand 1107 end -- last residue of the strand 1108 parent 1109 list 1110 top 1111 createNewLevel -- Boolean flag to specify whether or not to create 1112 a new level in the tree representation of the 1113 molecule for the SS. 1114 comment -- String describing the turn 1115 """ 1116 self.comment = comment 1117 SecondaryStructure.__init__(self, chain=chain, 1118 structureType='Turn', index=index, 1119 start=start,end=end, parent=parent, 1120 elementType=Residue, list=list, 1121 childrenName='residues', 1122 setClass=SecondaryStructureSet, 1123 childrenSetClass=ResidueSet, 1124 top=top, createNewLevel=createNewLevel)
1125 1126
1127 -class Coil(SecondaryStructure):
1128 """Class to represent a coil inherits from SecondaryStructure.""" 1129
1130 - def __init__(self, chain=None, index=None, 1131 start=None, end=None, parent=None, 1132 list=None, 1133 structureType='Coil', 1134 top=None, 1135 createNewLevel=1):
1136 SecondaryStructure.__init__(self, chain=chain, 1137 structureType=structureType, index=index, 1138 start=start,end=end, parent=parent, 1139 elementType=Residue, list=list, 1140 childrenName='residues', 1141 setClass=SecondaryStructureSet, 1142 childrenSetClass=ResidueSet, 1143 top=top, createNewLevel=createNewLevel)
1144 1145
1146 -def test_secondaryStructure():
1147 from MolKit.pdbParser import PdbParser 1148 from MolKit.protein import Protein 1149 print 'create an object Protein crn' 1150 crn = Protein() 1151 print 'read the pdb file' 1152 crn.read('/tsri/pdb/struct/1crn.pdb', PdbParser()) 1153 print 'create an object secondarystructureSet for each chain of crn' 1154 crn.getSS() 1155 print 'create the geometries for each structures of crn' 1156 extrudestructure = [] 1157 for c in range(len(crn.chains)): 1158 for i in range(len(crn.chains[c].secondarystructureset)): 1159 extrudestructure.append(crn.chains[c].secondarystructureset[i].extrudeSS())
1160