In [1]:
from rdkit import Chem from rdkit.Chem import Draw, AllChem from rdkit.Chem.Draw import IPythonConsole import gzip, numpy
In [2]:
from rdkit.Chem import PyMol v = PyMol.MolViewer()MD simulation of ligand in water¶
In [3]:
# zinc 0903464 smi_zinc = 'Cc1cc2c(c(c1)OCC(=O)N3Cc4c(c5ccccc5[nH]4)C[C@H]3C(=O)OC)c6c(c(=O)o2)CCC6' ref1 = Chem.MolFromSmiles(smi_zinc) ref1
Out[3]:
In [4]:
# load pdb ref1 = Chem.MolFromSmiles('Cc1cc2c(c(c1)OCC(=O)N3Cc4c(c5ccccc5[nH]4)C[C@H]3C(=O)OC)c6c(c(=O)o2)CCC6') ref1 = AllChem.AddHs(ref1, addCoords=True) mol1 = Chem.MolFromPDBFile('ligand_zinc_start.pdb', removeHs=False) mol1 = AllChem.AssignBondOrdersFromTemplate(ref1, mol1) mol1.RemoveAllConformers() mol1
Out[4]:
In [5]:
from rdkit.Chem import rdConformerParser cids1 = rdConformerParser.AddConformersFromGromosTrajectory(mol1, 'ligand_zinc_gath.trc') print len(cids1)
In [6]:
from rdkit.Chem import rdMolAlign for i in range(1, mol1.GetNumConformers()): pyO3A = rdMolAlign.GetO3A(mol1, mol1, prbCid=i, refCid=0) pyO3A.Align()
In [7]:
v.DeleteAll() for cid in cids1: #[:10]: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[8]:
RMSD (first conformer as reference)¶In [9]:
rmsd1 = [] for cid in cids1[1:]: rmsd1.append(AllChem.GetConformerRMS(mol1, 0, cid, prealigned=True))
In [10]:
plt.xlabel('RMSD') plt.ylabel('count') hist(rmsd1) plt.show()
In [11]:
cids12 = [0] cids12 += [cid for cid,r in zip(cids1[1:], rmsd1) if r >= 3.0] print len(cids12)
In [12]:
v.DeleteAll() for cid in cids12: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[13]:
In [14]:
rmsdmat1 = AllChem.GetConformerRMSMatrix(mol1, prealigned=True)Diversity picking (10 most diverse conformers)¶
In [15]:
from rdkit import SimDivFilters mmp = SimDivFilters.MaxMinPicker() dids1 = mmp.Pick(numpy.array(rmsdmat1),mol1.GetNumConformers(),10) print [d for d in dids1]
[37, 75, 61, 81, 10, 44, 84, 59, 18, 3]
In [16]:
v.DeleteAll() for cid in dids1: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[17]:
In [18]:
from rdkit.ML.Cluster import Butina clusters1 = Butina.ClusterData(rmsdmat1, mol1.GetNumConformers(), 1.0, isDistData=True, reordering=True) print len(clusters1)
In [19]:
plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(clusters1)+1), [len(c) for c in clusters1]) plt.show()
In [20]:
clids1 = [] for c in clusters1: if len(c) > 1: clids1.append(c[0]) print clids1
[70, 21, 93, 2, 40, 64, 59, 39, 36]
In [21]:
v.DeleteAll() for cid in clids1: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[22]:
Shape Tanimoto (first conformer as reference)¶In [23]:
tani1 = [] for cid in cids1[1:]: tani1.append(AllChem.ShapeTanimotoDist(mol1, mol1, confId1=0, confId2=cid))
In [24]:
plt.xlabel('Shape Tanimoto Distance') plt.ylabel('count') hist(tani1) plt.show()
In [25]:
cids13 = [0] cids13 += [cid for cid,t in zip(cids1[1:], tani1) if t > 0.65] print len(cids13)
In [26]:
v.DeleteAll() for cid in cids13: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[27]:
Shape Tanimoto distance matrix¶In [28]:
distmat1 = [] for i in range(1, mol1.GetNumConformers()): tmp = [] for j in range(i): tmp.append(AllChem.ShapeTanimotoDist(mol1, mol1, confId1=i, confId2=j)) distmat1.extend(tmp)Diversity picking (10 most diverse conformers)¶
In [29]:
mmp = SimDivFilters.MaxMinPicker() tdids1 = mmp.Pick(numpy.array(distmat1),mol1.GetNumConformers(),10) print [d for d in tdids1]
[37, 81, 75, 61, 10, 73, 59, 7, 11, 44]Shape-Tanimoto-based clustering¶
In [30]:
tclusters1 = Butina.ClusterData(distmat1, mol1.GetNumConformers(), 0.3, isDistData=True, reordering=True) print len(tclusters1)
In [31]:
plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(tclusters1)+1), [len(c) for c in tclusters1]) plt.show()
In [32]:
tclids1 = [] for c in tclusters1: if len(c) > 1: tclids1.append(c[0]) print tclids1
[30, 99, 93, 15, 87, 40, 3, 57, 74, 20, 98, 80, 38, 8]
In [33]:
v.DeleteAll() for cid in tclids1: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[34]:
3D pharmacophore fingerprints¶ Tanimoto similarity of 3D pharmacophore fingerprints¶In [35]:
from rdkit.Chem.Pharm2D import Gobbi_Pharm2D, Generate factory = Gobbi_Pharm2D.factory pharm3D = [] for cid in cids1: dm = Chem.Get3DDistanceMatrix(mol1, confId=cid) pharm3D.append(Generate.Gen2DFingerprint(mol1, factory, dMat=dm))
In [36]:
from rdkit import DataStructs # similarity distribution sims1 = [] p1 = pharm3D[0] # first conformer as reference for p in pharm3D[1:]: sims1.append(DataStructs.TanimotoSimilarity(p1, p))
In [37]:
plt.xlabel('Tanimoto similarity') plt.ylabel('number of conformers') hist(sims1) plt.show()
In [38]:
sids1 = [0] sids1 += [cid for cid,s in zip(cids1[1:], sims1) if s < 0.4] print len(sids1) print sids1
11 [0, 64, 73, 76, 79, 83, 84, 87, 88, 95, 98]3D-Pharmacophore-based clustering¶
In [39]:
tanimat1 = [] for i in range(1, mol1.GetNumConformers()): tanimat1.extend(DataStructs.BulkTanimotoSimilarity(pharm3D[i], pharm3D[:i], returnDistance=True))
In [40]:
tclusters12 = Butina.ClusterData(tanimat1, mol1.GetNumConformers(), 0.3, isDistData=True, reordering=True) print len(tclusters12)
In [41]:
plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(tclusters12)+1), [len(c) for c in tclusters12]) plt.show()
In [42]:
tclids12 = [] for c in tclusters12: if len(c) > 1: tclids12.append(c[0]) print tclids12
[13, 90, 51, 87, 82, 98, 76, 56, 99, 77, 61, 11, 10]
In [43]:
v.DeleteAll() for cid in tclids12: v.ShowMol(mol1, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[44]:
MD simulations of ligand in binding pocket¶In [45]:
smi_pdb = 'C[NH+]1CCN(CCOc2cc3ncnc(Nc4c5c(ccc4Cl)OCO5)c3c(OC3CCOCC3)c2)CC1' mol2 = Chem.MolFromSmiles(smi_pdb) mol2
Out[45]:
In [46]:
# PDB 2H8H mol2 = Chem.MolFromMolFile('lig_2H8H.sdf', removeHs=False) mol2.RemoveAllConformers() mol2
Out[46]:
In [48]:
cids2 = rdConformerParser.AddConformersFromAmberTrajectory(mol2, 'ligand.trx') print len(cids2)
In [49]:
for i in range(1, mol2.GetNumConformers()): pyO3A = rdMolAlign.GetO3A(mol2, mol2, prbCid=i, refCid=0) pyO3A.Align()
In [72]:
v.DeleteAll() for cid in cids2: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[73]:
RMSD (first conformer as reference)¶In [57]:
rmsd2 = [] for cid in cids2[1:]: rmsd2.append(AllChem.GetConformerRMS(mol2, 0, cid, prealigned=True))
In [58]:
plt.xlabel('RMSD') plt.ylabel('count') hist(rmsd2) plt.show()
In [59]:
cids22 = [0] cids22 += [cid for cid,r in zip(cids2[1:], rmsd2) if r >= 1.0] print len(cids22)
In [74]:
v.DeleteAll() for cid in cids22: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[75]:
In [62]:
rmsdmat2 = AllChem.GetConformerRMSMatrix(mol2, prealigned=True)Diversity picking (10 most diverse conformers)¶
In [63]:
mmp = SimDivFilters.MaxMinPicker() dids2 = mmp.Pick(numpy.array(rmsdmat2), mol2.GetNumConformers(), 10) print [d for d in dids2]
[37, 23, 11, 62, 28, 77, 61, 49, 44, 33]
In [76]:
v.DeleteAll() for cid in dids2: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[77]:
In [66]:
clusters2 = Butina.ClusterData(rmsdmat2, mol2.GetNumConformers(), 0.5, isDistData=True, reordering=True) print len(clusters2)
In [67]:
plt.xlabel('cluster index') plt.ylabel('number of members') bar(range(1,len(clusters2)+1), [len(c) for c in clusters2]) plt.show()
In [68]:
clids2 = [] for c in clusters2: if len(c) > 1: clids2.append(c[0]) print clids2
[37, 47, 14, 83, 80, 18, 69, 45, 84, 59]
In [78]:
v.DeleteAll() for cid in clids2: v.ShowMol(mol2, confId=cid, name='conf_'+str(cid), showOnly=False)
Out[79]:
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4