Source code for menten_gcn.decorators.rosetta

import math

from menten_gcn.decorators.base import *
from menten_gcn.wrappers import RosettaPoseWrapper

try:
    from pyrosetta import rosetta
    from pyrosetta.rosetta.core.scoring import *
except BaseException:
    rosetta = None

#from pyrosetta import rosetta
# Caller needs to call init()

# import scipy  # Jump
from scipy.spatial.transform import Rotation as R

# Convention: All decorators must start with "Rosetta". This allows us to
# standardize them later while still maintaining backwards compatability


[docs]class RosettaResidueSelectorDecorator(Decorator):

    """
    Takes a user-provided residue selctor and labels each residue with a 1 or 0 accordingly.

    - 1 Node Feature
    - 0 Edge Features

    Parameters
    ---------
    selector: ResidueSelector
        This residue selector will be applied to the Rosetta pose
    description: str
        This is the string that will label this feature in the final summary. Not technically required but highly recommended
    """

    def __init__(self, selector, description: str):
        assert isinstance(selector, rosetta.core.select.residue_selector.ResidueSelector)
        assert isinstance(description, str)
        self.selector = selector
        self.description = description
        self.unique_key = str(id(self)) + "_selection"

    def get_version_name(self):
        return "RosettaResidueSelectorDecorator"

    def _get_selection(self, wrapped_pose):
        assert isinstance(wrapped_pose, RosettaPoseWrapper)
        pose = wrapped_pose.pose
        return self.selector.apply(pose)

    def cache_data(self, wrapped_pose, dict_cache):
        if self.unique_key in dict_cache:
            pass
            #assert isinstance( dict_cache[ self.unique_key ], rosetta.core.select.residue_selector.ResidueSelection )
        else:
            selection = self._get_selection(wrapped_pose)
            #assert isinstance( selection, rosetta.core.select.residue_selector.ResidueSelection )
            dict_cache[self.unique_key] = selection

    def n_node_features(self):
        return 1

    def calc_node_features(self, wrapped_pose, resid, dict_cache=None):
        if dict_cache is None:
            selection = self._get_selection(wrapped_pose)
        else:
            assert self.unique_key in dict_cache
            selection = dict_cache[self.unique_key]

        if selection[resid]:
            return [1.0]
        else:
            return [0.0]

    def describe_node_features(self):
        return [
            "1.0 if the residue is selected by the residue selector, 0.0 otherwise. "
            + "User defined definition of the residue selector and how to reproduce it: "
            + self.description,
        ]


[docs]class RosettaResidueSelectorFromXML(RosettaResidueSelectorDecorator):
    """
    Takes a user-provided residue selctor via XML and labels each residue with a 1 or 0 accordingly.

    - 1 Node Feature
    - 0 Edge Features

    Parameters
    ---------
    xml_str: str
        XML snippet that defines the selector
    res_sele_name: str
        The name of the selector within the snippet
    """

    # Useful resource: https://www.programmersought.com/article/87461668890/
    def __init__(self, xml_str: str, res_sele_name: str):
        xml = rosetta.protocols.rosetta_scripts.XmlObjects.create_from_string(xml_str)
        selector = xml.get_residue_selector(res_sele_name)
        description = "Took the residue selector named " + res_sele_name + " from this XML: " + xml_str
        super(RosettaResidueSelectorFromXML, self).__init__(selector, description=description)


class RosettaHBondDecorator_v0(Decorator):

    def __init__(self, sfxn=None, bb_only: bool = False):
        self.key = "hbset"
        self.bb_only = bb_only
        if sfxn is None:
            #self.sfxn = rosetta.core.scoring.ScoreFunctionFactory.get_score_function()
            self.sfxn = rosetta.core.scoring.get_score_function()
        else:
            self.sfxn = sfxn

    def get_version_name(self):
        return "RosettaHBondDecorator_v0"

    def cache_data(self, wrapped_pose, dict_cache):
        if self.key in dict_cache:
            assert isinstance(dict_cache[self.key], rosetta.core.scoring.hbonds.HBondSet)
        else:
            assert isinstance(wrapped_pose, RosettaPoseWrapper)
            pose = wrapped_pose.pose
            hbset = rosetta.core.scoring.hbonds.HBondSet(pose, False)
            dict_cache[self.key] = hbset

    def n_node_features(self):
        return 0

    def n_edge_features(self):
        if self.bb_only:
            return 1
        else:
            return 5

    def calc_edge_features(self, wrapped_pose, resid1, resid2, dict_cache):
        if dict_cache is None:
            assert isinstance(wrapped_pose, RosettaPoseWrapper)
            pose = wrapped_pose.pose
            hbset = rosetta.core.scoring.hbonds.HBondSet(pose, False)
        else:
            assert self.key in dict_cache
            hbset = dict_cache.get(self.key)
        assert hbset is not None

        n_bb_bb = 0
        n_bb_sc = 0
        n_sc_sc = 0
        n_don1 = 0  # n_don2 = n_acc1
        n_acc1 = 0  # n_acc2 = n_don1

        hbonds = hbset.residue_hbonds(resid1, False)
        for hbond in hbonds:
            if resid1 + resid2 == hbond.don_res() + hbond.acc_res():
                res1_is_don = (resid1 == hbond.don_res())
                if res1_is_don:
                    n_don1 += 1
                else:
                    n_acc1 += 1
                if hbond.acc_atm_is_backbone():
                    if hbond.don_hatm_is_backbone():
                        n_bb_bb += 1
                    else:
                        n_bb_sc += 1
                else:
                    if hbond.don_hatm_is_backbone():
                        n_bb_sc += 1
                    else:
                        n_sc_sc += 1

        if self.bb_only:
            return [n_bb_bb], [n_bb_bb]

        f_12 = [n_bb_bb, n_bb_sc, n_sc_sc, n_don1, n_acc1]
        f_21 = [n_bb_bb, n_bb_sc, n_sc_sc, n_acc1, n_don1]

        return f_12, f_21

    def describe_edge_features(self):
        alldesc = [
            "Total number of backbone-backbone hbonds (symmetric)",
            "Total number of backbone-sidechain hbonds (symmetric)",
            "Total number of sidechain-sidechain hbonds (symmetric)",
            "Number of hbonds in which the first residue is the donor (asymmetric)",
            "Number of hbonds in which the first residue is the acceptor (asymmetric)",
        ]
        if self.bb_only:
            return [alldesc[0]]
        else:
            return alldesc


class _RosettaOnebodyEnergies_v0(Decorator):

    def __init__(self, sfxn, individual: bool = False, score_types=None):
        self.sfxn = sfxn
        self.ind = individual

        if individual:
            if score_types is None:
                self.terms = sfxn.get_nonzero_weighted_scoretypes()
            else:
                self.terms = score_types

    def get_version_name(self):
        raise NotImplementedError  # Child class needs to define this

    def n_edge_features(self):
        return 0

    def n_node_features(self):
        if self.ind:
            return len(self.terms)
        else:
            return 1

    def calc_node_features(self, wrapped_pose, resid, dict_cache=None):
        assert isinstance(wrapped_pose, RosettaPoseWrapper)
        pose = wrapped_pose.pose
        self.sfxn.setup_for_scoring(pose)
        emap = rosetta.core.scoring.EMapVector()
        self.sfxn.eval_ci_1b(pose.residue(resid), pose, emap)
        self.sfxn.eval_cd_1b(pose.residue(resid), pose, emap)
        self.sfxn.eval_intrares_energy(pose.residue(resid), pose, emap)

        if self.ind:
            f = []
            for i in self.terms:
                f.append(emap[i])
            return f
        else:
            features = [emap.dot(self.sfxn.weights())]
            return features

    def describe_node_features(self):
        if self.ind:
            d = []
            for i in self.terms:
                desc = str(i) + " onebody term using " + self.get_version_name()
                d.append(desc)
            return d
        else:
            desc = "Sum of all Rosetta onebody energies using " + self.get_version_name()
            return [desc]


class _RosettaTwobodyEnergies_v0(Decorator):

    def __init__(self, sfxn, individual: bool = False, score_types=None):
        self.sfxn = sfxn
        self.ind = individual
        if individual:
            if score_types is None:
                self.terms = sfxn.get_nonzero_weighted_scoretypes()
            else:
                self.terms = score_types

    def get_version_name(self):
        raise NotImplementedError  # Child class needs to define this

    def n_node_features(self):
        return 0

    def n_edge_features(self):
        if self.ind:
            return len(self.terms)
        else:
            return 1

    def calc_edge_features(self, wrapped_pose, resid1, resid2, dict_cache=None):
        assert isinstance(wrapped_pose, RosettaPoseWrapper)
        pose = wrapped_pose.pose
        self.sfxn.setup_for_scoring(pose)
        emap = rosetta.core.scoring.EMapVector()
        self.sfxn.eval_ci_2b(pose.residue(resid1), pose.residue(resid2), pose, emap)
        self.sfxn.eval_cd_2b(pose.residue(resid1), pose.residue(resid2), pose, emap)

        if self.ind:
            f = []
            for i in self.terms:
                f.append(emap[i])
            return f, f
        else:
            features = [emap.dot(self.sfxn.weights())]
            return features, features

    def describe_edge_features(self):
        if self.ind:
            d = []
            for i in self.terms:
                desc = str(i) + " twobody term using " + self.get_version_name() + " (symmetric)"
                d.append(desc)
            return d
        else:
            desc = "Sum of all Rosetta twobody energies using " + self.get_version_name() + " (symmetric)"
            return [desc]


[docs]class RosettaJumpDecorator(Decorator):

    """
    Measures the translational and rotational relationships between all residue pairs.
    This uses internal coordinate frames so it is agnostic to the global coordinate system.
    You can move/rotate your protein around and these will stay the same.

    - 0 Node Features
    - 6-12 Edge Features

    Parameters
    ---------
    use_nm: bool
        If true (default), measure distance in Angstroms.
        Otherwise use nanometers.
    rottype: str
        How do you want to represent the rotational degrees of freedom?
        Options are "euler" (default), "euler_sincos", "matrix",
        "quat", "rotvec", and "rotvec_sincos".
    """

    def __init__(self, use_nm: bool = False, rottype: str = "euler"):
        assert(rottype in ["euler", "euler_sincos", "matrix", "quat", "rotvec", "rotvec_sincos"])
        self.rottype = rottype
        self.use_nm = use_nm

[docs]    def get_version_name(self):
        return "RosettaJumpDecorator"

[docs]    def n_node_features(self):
        return 0

[docs]    def n_edge_features(self):
        if self.rottype == "euler" or self.rottype == "rotvec":
            return 6
        elif self.rottype == "quat":
            return 7
        elif self.rottype == "euler_sincos" or self.rottype == "rotvec_sincos":
            return 9
        elif self.rottype == "matrix":
            return 12
        else:
            assert False, self.rottype

    def jump_to_vec(self, jump):
        trans = jump.get_translation()
        if self.use_nm:
            trans /= 10.0

        rot_mat = jump.get_rotation()
        rot = R.from_matrix(rot_mat)

        vec = []
        vec.extend(trans)

        if self.rottype == "euler":
            rot_euler = rot.as_euler('xyz', degrees=False)
            vec.extend(rot_euler)
        elif self.rottype == "euler_sincos":
            rot_euler = rot.as_euler('xyz', degrees=False)
            for i in range(0, 3):
                vec.append(math.sin(rot_euler[i]))
                vec.append(math.cos(rot_euler[i]))
        elif self.rottype == "rotvec":
            vec.extend(rot.as_rotvec())
        elif self.rottype == "rotvec_sincos":
            rot_vec = rot.as_rotvec()
            for i in range(0, 3):
                vec.append(math.sin(rot_vec[i]))
                vec.append(math.cos(rot_vec[i]))
        elif self.rottype == "quat":
            vec.extend(rot.as_quat())
        elif self.rottype == "matrix":
            for i in range(0, 3):
                vec.extend(rot_mat[i])

        return vec

[docs]    def calc_edge_features(self, wrapped_pose, resid1, resid2, dict_cache=None):
        assert isinstance(wrapped_pose, RosettaPoseWrapper)
        pose = wrapped_pose.pose
        stub1 = rosetta.protocols.hotspot_hashing.StubGenerator.residueStubOrientFrame(pose.residue(resid1))
        stub2 = rosetta.protocols.hotspot_hashing.StubGenerator.residueStubOrientFrame(pose.residue(resid2))
        jump_ij = rosetta.core.kinematics.Jump(stub1, stub2)
        jump_ji = rosetta.core.kinematics.Jump(stub2, stub1)

        f_ij = self.jump_to_vec(jump_ij)
        f_ji = self.jump_to_vec(jump_ji)

        return f_ij, f_ji

[docs]    def describe_edge_features(self):
        if self.use_nm:
            d_units = "nanometers"
        else:
            d_units = "Angstroms"

        return ["Value #{} for the Rosetta jump. Distances are measured in {}".format(i, d_units)
                for i in range(0, self.n_edge_features())]


[docs]class RosettaHBondDecorator(RosettaHBondDecorator_v0):
    """
    Takes a user-provided residue selctor via XML and labels each residue with a 1 or 0 accordingly.

    - 0 Node Features
    - 1-5 Edge Features (depending on bb_only)

    Parameters
    ---------
    sfxn: ScoreFunction
        Score function used to calculate hbonds.
        We will use Rosetta's default if this is None
    bb_only: bool
        Only consider backbone-backbone hbonds.
        Reduces the number of features from 5 down to 1
    """
    pass


[docs]class Rosetta_Ref2015_OneBodyEneriges(_RosettaOnebodyEnergies_v0):
    """
    Label each node with its Rosetta one-body energy

    - 1 - 20-ish Node Features
    - 0 Edge Features

    Parameters
    ---------
    individual: bool
        If true, list the score for each term individually.
        Otherwise sum them all into one value.
    score_types: list of ScoreTypes
        Only use these score types.
        None (default) includes all default types.
        Note - this only applies if individual == True
    """

    def __init__(self, individual: bool = False, score_types=None):
        sfxn = rosetta.core.scoring.ScoreFunctionFactory.create_score_function("ref2015.wts")
        _RosettaOnebodyEnergies_v0.__init__(self, sfxn=sfxn, individual=individual, score_types=score_types)

[docs]    def get_version_name(self):
        return "Rosetta_Ref2015_OneBodyEneriges"


[docs]class Rosetta_Ref2015_TwoBodyEneriges(_RosettaTwobodyEnergies_v0):
    """
    Label each edge with its Rosetta two-body energy

    - 0 Node Features
    - 1 - 20-ish Edge Features

    Parameters
    ---------
    individual: bool
        If true, list the score for each term individually.
        Otherwise sum them all into one value.
    score_types: list of ScoreTypes
        Only use these score types.
        None (default) includes all default types.
        Note - this only applies if individual == True
    """

    def __init__(self, individual: bool = False, score_types=None):
        sfxn = rosetta.core.scoring.ScoreFunctionFactory.create_score_function("ref2015.wts")
        _RosettaTwobodyEnergies_v0.__init__(self, sfxn=sfxn, individual=individual, score_types=score_types)

[docs]    def get_version_name(self):
        return "Rosetta_Ref2015_TwoBodyEneriges"


[docs]class Ref2015Decorator(CombinedDecorator):

    """
    Meta-decorator that combines Rosetta_Ref2015_OneBodyEneriges and Rosetta_Ref2015_TwoBodyEneriges

    - 1 - 20-ish Node Features
    - 1 - 20-ish Edge Features

    Parameters
    ---------
    individual: bool
        If true, list the score for each term individually.
        Otherwise sum them all into one value.
    score_types: list of ScoreTypes
        Only use these score types.
        None (default) includes all default types.
        Note - this only applies if individual == True
    """

    def __init__(self, individual: bool = False, score_types=None):
        decorators = [Rosetta_Ref2015_OneBodyEneriges(individual=individual, score_types=score_types),
                      Rosetta_Ref2015_TwoBodyEneriges(individual=individual, score_types=score_types)]
        CombinedDecorator.__init__(self, decorators)

[docs]    def get_version_name(self):
        return "Ref2015Decorator"


class AbbreviatedRef2015Decorator_v0(CombinedDecorator):

    """
    Meta-decorator that attempts to cut down on the feature counts by only using some score types.
    We try to eliminate one-body types from the edges and two-body types from the nodes but
    be warned that this is all done by hand.

    - 15 Node Features
    - 10 Edge Features
    """

    def __init__(self):
        onebody_types = [fa_atr, fa_rep, fa_sol, fa_intra_rep,
                         fa_intra_sol_xover4, lk_ball_wtd, fa_elec,
                         pro_close, hbond_bb_sc, omega, fa_dun,
                         p_aa_pp, yhh_planarity, ref, rama_prepro]
        twobody_types = [fa_atr, fa_rep, fa_sol, lk_ball_wtd, fa_elec,
                         hbond_sr_bb, hbond_lr_bb, hbond_bb_sc, hbond_sc, dslf_fa13]
        decorators = [Rosetta_Ref2015_OneBodyEneriges(individual=True, score_types=onebody_types),
                      Rosetta_Ref2015_TwoBodyEneriges(individual=True, score_types=twobody_types)]
        CombinedDecorator.__init__(self, decorators)

    def get_version_name(self):
        return "AbbreviatedRef2015Decorator_v0"