Source code for pyrosetta_help.alphafold.retrieval

__all__ = ['pose_from_alphafold2', 'get_alphafold2_error', 'reshape_errors']

from typing import (Union, Dict, List)

import numpy as np
import pyrosetta
import requests


[docs]def pose_from_alphafold2(uniprot: str) -> pyrosetta.Pose: """ Returns a pose from the alphafold2 server. :param uniprot: uniprot id ("accession"), not gene name or uniprot name. :return: """ reply = requests.get(f'https://alphafold.ebi.ac.uk/files/AF-{uniprot}-F1-model_v2.pdb') reply.raise_for_status() pdbblock = reply.text pose = pyrosetta.Pose() pyrosetta.rosetta.core.import_pose.pose_from_pdbstring(pose, pdbblock) return pose
# ==== errors based methods ====================================================
[docs]def get_alphafold2_error(uniprot: str, reshaped=True) -> Union[np.ndarray, list]: """ Returns the distances errors either as numpy matrix (``reshaped=True``) or as the weird format from AF2-EBI —see ``help(pyrosetta_help.alphafold.retrieval.reshape_errors)`` for more. Remember that the matrix is zero indexed and that these values are in Ångström and are not pLDDT, which are stored as b-factors. """ # https://alphafold.ebi.ac.uk/files/AF-Q00341-F1-predicted_aligned_error_v1.json reply = requests.get(f'https://alphafold.ebi.ac.uk/files/AF-{uniprot}-F1-predicted_aligned_error_v2.json') reply.raise_for_status() errors = reply.json() if reshaped: return reshape_errors(errors) else: return errors
[docs]def reshape_errors(errors: List[Dict[str, list]]) -> np.array: """ The JSON from AF2 has a single element list. the sole element is a dictionary with keys 'residue1', 'residue2' and 'distance'. This method returns a matrix of distances reshaped based on the stated residue indices. This is rather unlikely to differ from a regular reshape... but idiotically I am not taking changes assuming it is always sorted. """ n_residues = int(np.sqrt(len(errors[0]['distance']))) error_matrix = np.zeros((n_residues, n_residues)) * np.nan for i, d in enumerate(errors[0]['distance']): error_matrix[errors[0]['residue1'][i] - 1, errors[0]['residue2'][i] - 1] = d return error_matrix