根据数据配置信息 (data configure)运用非集成函数nonensembled_map_fns处理蛋白质tensor dict。nonensembled_map_fns处理函数: correct_msa_restypes,add_distillation_flag,cast_64bit_ints,squeeze_features,randomly_replace_msa_with_unknown,
make_seq_mask,make_msa_mask,make_hhblits_profile,make_random_crop_to_size_seed,fix_templates_aatype,make_template_mask,make_pseudo_beta,make_atom14_masks
import copy
import tensorflow.compat.v1 as tf
import pickle
import numpy as np
import ml_collectionsNUM_RES = 'num residues placeholder'
NUM_MSA_SEQ = 'msa placeholder'
NUM_EXTRA_SEQ = 'extra msa placeholder'
NUM_TEMPLATES = 'num templates placeholder'CONFIG = ml_collections.ConfigDict({'data': {'common': {'masked_msa': {'profile_prob': 0.1,'same_prob': 0.1,'uniform_prob': 0.1},'max_extra_msa': 1024,'msa_cluster_features': True,'num_recycle': 3,'reduce_msa_clusters_by_max_templates': False,'resample_msa_in_recycling': True,'template_features': ['template_all_atom_positions', 'template_sum_probs','template_aatype', 'template_all_atom_masks','template_domain_names'],'unsupervised_features': ['aatype', 'residue_index', 'sequence', 'msa', 'domain_name','num_alignments', 'seq_length', 'between_segment_residues','deletion_matrix'],'use_templates': False,},'eval': {'feat': {'aatype': [NUM_RES],'all_atom_mask': [NUM_RES, None],'all_atom_positions': [NUM_RES, None, None],'alt_chi_angles': [NUM_RES, None],'atom14_alt_gt_exists': [NUM_RES, None],'atom14_alt_gt_positions': [NUM_RES, None, None],'atom14_atom_exists': [NUM_RES, None],'atom14_atom_is_ambiguous': [NUM_RES, None],'atom14_gt_exists': [NUM_RES, None],'atom14_gt_positions': [NUM_RES, None, None],'atom37_atom_exists': [NUM_RES, None],'backbone_affine_mask': [NUM_RES],'backbone_affine_tensor': [NUM_RES, None],'bert_mask': [NUM_MSA_SEQ, NUM_RES],'chi_angles': [NUM_RES, None],'chi_mask': [NUM_RES, None],'extra_deletion_value': [NUM_EXTRA_SEQ, NUM_RES],'extra_has_deletion': [NUM_EXTRA_SEQ, NUM_RES],'extra_msa': [NUM_EXTRA_SEQ, NUM_RES],'extra_msa_mask': [NUM_EXTRA_SEQ, NUM_RES],'extra_msa_row_mask': [NUM_EXTRA_SEQ],'is_distillation': [],'msa_feat': [NUM_MSA_SEQ, NUM_RES, None],'msa_mask': [NUM_MSA_SEQ, NUM_RES],'msa_row_mask': [NUM_MSA_SEQ],'pseudo_beta': [NUM_RES, None],'pseudo_beta_mask': [NUM_RES],'random_crop_to_size_seed': [None],'residue_index': [NUM_RES],'residx_atom14_to_atom37': [NUM_RES, None],'residx_atom37_to_atom14': [NUM_RES, None],'resolution': [],'rigidgroups_alt_gt_frames': [NUM_RES, None, None],'rigidgroups_group_exists': [NUM_RES, None],'rigidgroups_group_is_ambiguous': [NUM_RES, None],'rigidgroups_gt_exists': [NUM_RES, None],'rigidgroups_gt_frames': [NUM_RES, None, None],'seq_length': [],'seq_mask': [NUM_RES],'target_feat': [NUM_RES, None],'template_aatype': [NUM_TEMPLATES, NUM_RES],'template_all_atom_masks': [NUM_TEMPLATES, NUM_RES, None],'template_all_atom_positions': [NUM_TEMPLATES, NUM_RES, None, None],'template_backbone_affine_mask': [NUM_TEMPLATES, NUM_RES],'template_backbone_affine_tensor': [NUM_TEMPLATES, NUM_RES, None],'template_mask': [NUM_TEMPLATES],'template_pseudo_beta': [NUM_TEMPLATES, NUM_RES, None],'template_pseudo_beta_mask': [NUM_TEMPLATES, NUM_RES],'template_sum_probs': [NUM_TEMPLATES, None],'true_msa': [NUM_MSA_SEQ, NUM_RES]},'fixed_size': True,'subsample_templates': False, # We want top templates.'masked_msa_replace_fraction': 0.15,'max_msa_clusters': 512,'max_templates': 4,'num_ensemble': 1,},},'model': {'embeddings_and_evoformer': {'evoformer_num_block': 48,'evoformer': {'msa_row_attention_with_pair_bias': {'dropout_rate': 0.15,'gating': True,'num_head': 8,'orientation': 'per_row','shared_dropout': True},'msa_column_attention': {'dropout_rate': 0.0,'gating': True,'num_head': 8,'orientation': 'per_column','shared_dropout': True},'msa_transition': {'dropout_rate': 0.0,'num_intermediate_factor': 4,'orientation': 'per_row','shared_dropout': True},'outer_product_mean': {'first': False,'chunk_size': 128,'dropout_rate': 0.0,'num_outer_channel': 32,'orientation': 'per_row','shared_dropout': True},'triangle_attention_starting_node': {'dropout_rate': 0.25,'gating': True,'num_head': 4,'orientation': 'per_row','shared_dropout': True},'triangle_attention_ending_node': {'dropout_rate': 0.25,'gating': True,'num_head': 4,'orientation': 'per_column','shared_dropout': True},'triangle_multiplication_outgoing': {'dropout_rate': 0.25,'equation': 'ikc,jkc->ijc','num_intermediate_channel': 128,'orientation': 'per_row','shared_dropout': True,'fuse_projection_weights': False,},'triangle_multiplication_incoming': {'dropout_rate': 0.25,'equation': 'kjc,kic->ijc','num_intermediate_channel': 128,'orientation': 'per_row','shared_dropout': True,'fuse_projection_weights': False,},'pair_transition': {'dropout_rate': 0.0,'num_intermediate_factor': 4,'orientation': 'per_row','shared_dropout': True}},'extra_msa_channel': 64,'extra_msa_stack_num_block': 4,'max_relative_feature': 32,'msa_channel': 256,'pair_channel': 128,'prev_pos': {'min_bin': 3.25,'max_bin': 20.75,'num_bins': 15},'recycle_features': True,'recycle_pos': True,'seq_channel': 384,'template': {'attention': {'gating': False,'key_dim': 64,'num_head': 4,'value_dim': 64},'dgram_features': {'min_bin': 3.25,'max_bin': 50.75,'num_bins': 39},'embed_torsion_angles': False,'enabled': False,'template_pair_stack': {'num_block': 2,'triangle_attention_starting_node': {'dropout_rate': 0.25,'gating': True,'key_dim': 64,'num_head': 4,'orientation': 'per_row','shared_dropout': True,'value_dim': 64},'triangle_attention_ending_node': {'dropout_rate': 0.25,'gating': True,'key_dim': 64,'num_head': 4,'orientation': 'per_column','shared_dropout': True,'value_dim': 64},'triangle_multiplication_outgoing': {'dropout_rate': 0.25,'equation': 'ikc,jkc->ijc','num_intermediate_channel': 64,'orientation': 'per_row','shared_dropout': True,'fuse_projection_weights': False,},'triangle_multiplication_incoming': {'dropout_rate': 0.25,'equation': 'kjc,kic->ijc','num_intermediate_channel': 64,'orientation': 'per_row','shared_dropout': True,'fuse_projection_weights': False,},'pair_transition': {'dropout_rate': 0.0,'num_intermediate_factor': 2,'orientation': 'per_row','shared_dropout': True}},'max_templates': 4,'subbatch_size': 128,'use_template_unit_vector': False,}},'global_config': {'deterministic': False,'multimer_mode': False,'subbatch_size': 4,'use_remat': False,'zero_init': True,'eval_dropout': False,},'heads': {'distogram': {'first_break': 2.3125,'last_break': 21.6875,'num_bins': 64,'weight': 0.3},'predicted_aligned_error': {# `num_bins - 1` bins uniformly space the# [0, max_error_bin A] range.# The final bin covers [max_error_bin A, +infty]# 31A gives bins with 0.5A width.'max_error_bin': 31.,'num_bins': 64,'num_channels': 128,'filter_by_resolution': True,'min_resolution': 0.1,'max_resolution': 3.0,'weight': 0.0,},'experimentally_resolved': {'filter_by_resolution': True,'max_resolution': 3.0,'min_resolution': 0.1,'weight': 0.01},'structure_module': {'num_layer': 8,'fape': {'clamp_distance': 10.0,'clamp_type': 'relu','loss_unit_distance': 10.0},'angle_norm_weight': 0.01,'chi_weight': 0.5,'clash_overlap_tolerance': 1.5,'compute_in_graph_metrics': True,'dropout': 0.1,'num_channel': 384,'num_head': 12,'num_layer_in_transition': 3,'num_point_qk': 4,'num_point_v': 8,'num_scalar_qk': 16,'num_scalar_v': 16,'position_scale': 10.0,'sidechain': {'atom_clamp_distance': 10.0,'num_channel': 128,'num_residual_block': 2,'weight_frac': 0.5,'length_scale': 10.,},'structural_violation_loss_weight': 1.0,'violation_tolerance_factor': 12.0,'weight': 1.0},'predicted_lddt': {'filter_by_resolution': True,'max_resolution': 3.0,'min_resolution': 0.1,'num_bins': 50,'num_channels': 128,'weight': 0.01},'masked_msa': {'num_output': 23,'weight': 2.0},},'num_recycle': 3,'resample_msa_in_recycling': True},
})data_config = CONFIG.datarestypes = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P','S', 'T', 'W', 'Y', 'V'
]# Partial inversion of HHBLITS_AA_TO_ID.
ID_TO_HHBLITS_AA = {0: 'A',1: 'C', # Also U.2: 'D', # Also B.3: 'E', # Also Z.4: 'F',5: 'G',6: 'H',7: 'I',8: 'K',9: 'L',10: 'M',11: 'N',12: 'P',13: 'Q',14: 'R',15: 'S',16: 'T',17: 'V',18: 'W',19: 'Y',20: 'X', # Includes J and O.21: '-',
}restypes_with_x_and_gap = restypes + ['X', '-']
MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple(restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i])for i in range(len(restypes_with_x_and_gap)))restype_name_to_atom14_names = {'ALA': ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', ''],'ARG': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', '', ''],'ASN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', ''],'ASP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', ''],'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', ''],'GLN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', ''],'GLU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', ''],'GLY': ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', ''],'HIS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', '', ''],'ILE': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', ''],'LEU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', ''],'LYS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', ''],'MET': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', ''],'PHE': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', '', ''],'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', ''],'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', ''],'THR': ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', ''],'TRP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'],'TYR': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'OH', '', ''],'VAL': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', ''],'UNK': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''],}restype_1to3 = {'A': 'ALA','R': 'ARG','N': 'ASN','D': 'ASP','C': 'CYS','Q': 'GLN','E': 'GLU','G': 'GLY','H': 'HIS','I': 'ILE','L': 'LEU','K': 'LYS','M': 'MET','F': 'PHE','P': 'PRO','S': 'SER','T': 'THR','W': 'TRP','Y': 'TYR','V': 'VAL',
}atom_types = ['N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD','CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3','NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2','CZ3', 'NZ', 'OXT'
]atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)}residue_atoms = {'ALA': ['C', 'CA', 'CB', 'N', 'O'],'ARG': ['C', 'CA', 'CB', 'CG', 'CD', 'CZ', 'N', 'NE', 'O', 'NH1', 'NH2'],'ASP': ['C', 'CA', 'CB', 'CG', 'N', 'O', 'OD1', 'OD2'],'ASN': ['C', 'CA', 'CB', 'CG', 'N', 'ND2', 'O', 'OD1'],'CYS': ['C', 'CA', 'CB', 'N', 'O', 'SG'],'GLU': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O', 'OE1', 'OE2'],'GLN': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'NE2', 'O', 'OE1'],'GLY': ['C', 'CA', 'N', 'O'],'HIS': ['C', 'CA', 'CB', 'CG', 'CD2', 'CE1', 'N', 'ND1', 'NE2', 'O'],'ILE': ['C', 'CA', 'CB', 'CG1', 'CG2', 'CD1', 'N', 'O'],'LEU': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'N', 'O'],'LYS': ['C', 'CA', 'CB', 'CG', 'CD', 'CE', 'N', 'NZ', 'O'],'MET': ['C', 'CA', 'CB', 'CG', 'CE', 'N', 'O', 'SD'],'PHE': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O'],'PRO': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O'],'SER': ['C', 'CA', 'CB', 'N', 'O', 'OG'],'THR': ['C', 'CA', 'CB', 'CG2', 'N', 'O', 'OG1'],'TRP': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE2', 'CE3', 'CZ2', 'CZ3','CH2', 'N', 'NE1', 'O'],'TYR': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O','OH'],'VAL': ['C', 'CA', 'CB', 'CG1', 'CG2', 'N', 'O']
}def shape_list(x):"""Return list of dimensions of a tensor, statically where possible.Like `x.shape.as_list()` but with tensors instead of `None`s.Args:x: A tensor.Returns:A list with length equal to the rank of the tensor. The n-th element of thelist is an integer when that dimension is statically known otherwise it isthe n-th element of `tf.shape(x)`."""x = tf.convert_to_tensor(x)# If unknown rank, return dynamic shapeif x.get_shape().dims is None:return tf.shape(x)static = x.get_shape().as_list()shape = tf.shape(x)ret = []for i in range(len(static)):dim = static[i]if dim is None:dim = shape[i]ret.append(dim)return retdef data_transforms_curry1(f):"""Supply all arguments but the first."""def fc(*args, **kwargs):return lambda x: f(x, *args, **kwargs)return fcdef correct_msa_restypes(protein):"""Correct MSA restype to have the same order as residue_constants."""new_order_list = MAP_HHBLITS_AATYPE_TO_OUR_AATYPEnew_order = tf.constant(new_order_list, dtype=protein['msa'].dtype)protein['msa'] = tf.gather(new_order, protein['msa'], axis=0)perm_matrix = np.zeros((22, 22), dtype=np.float32)perm_matrix[range(len(new_order_list)), new_order_list] = 1.for k in protein:if 'profile' in k: # Include both hhblits and psiblast profilesnum_dim = protein[k].shape.as_list()[-1]assert num_dim in [20, 21, 22], ('num_dim for %s out of expected range: %s' % (k, num_dim))protein[k] = tf.tensordot(protein[k], perm_matrix[:num_dim, :num_dim], 1)return protein@data_transforms_curry1
def add_distillation_flag(protein, distillation):protein['is_distillation'] = tf.constant(float(distillation),shape=[],dtype=tf.float32)return proteindef cast_64bit_ints(protein):for k, v in protein.items():if v.dtype == tf.int64:protein[k] = tf.cast(v, tf.int32)return proteindef squeeze_features(protein):"""Remove singleton and repeated dimensions in protein features."""protein['aatype'] = tf.argmax(protein['aatype'], axis=-1, output_type=tf.int32)for k in ['domain_name', 'msa', 'num_alignments', 'seq_length', 'sequence','superfamily', 'deletion_matrix', 'resolution','between_segment_residues', 'residue_index', 'template_all_atom_masks']:if k in protein:final_dim = shape_list(protein[k])[-1]if isinstance(final_dim, int) and final_dim == 1:protein[k] = tf.squeeze(protein[k], axis=-1)for k in ['seq_length', 'num_alignments']:if k in protein:protein[k] = protein[k][0] # Remove fake sequence dimensionreturn protein@data_transforms_curry1
def randomly_replace_msa_with_unknown(protein, replace_proportion):"""Replace a proportion of the MSA with 'X'."""msa_mask = (tf.random.uniform(shape_list(protein['msa'])) <replace_proportion)x_idx = 20gap_idx = 21msa_mask = tf.logical_and(msa_mask, protein['msa'] != gap_idx)protein['msa'] = tf.where(msa_mask,tf.ones_like(protein['msa']) * x_idx,protein['msa'])aatype_mask = (tf.random.uniform(shape_list(protein['aatype'])) <replace_proportion)protein['aatype'] = tf.where(aatype_mask,tf.ones_like(protein['aatype']) * x_idx,protein['aatype'])return proteindef make_seq_mask(protein):protein['seq_mask'] = tf.ones(shape_list(protein['aatype']), dtype=tf.float32)return proteindef make_msa_mask(protein):"""Mask features are all ones, but will later be zero-padded."""protein['msa_mask'] = tf.ones(shape_list(protein['msa']), dtype=tf.float32)protein['msa_row_mask'] = tf.ones(shape_list(protein['msa'])[0], dtype=tf.float32)return proteindef make_hhblits_profile(protein):"""Compute the HHblits MSA profile if not already present."""if 'hhblits_profile' in protein:return protein# Compute the profile for every residue (over all MSA sequences).protein['hhblits_profile'] = tf.reduce_mean(tf.one_hot(protein['msa'], 22), axis=0)return proteinclass SeedMaker(object):"""Return unique seeds."""def __init__(self, initial_seed=0):self.next_seed = initial_seeddef __call__(self):i = self.next_seedself.next_seed += 1return iseed_maker = SeedMaker()def make_random_seed():return tf.random.uniform([2],tf.int32.min,tf.int32.max,tf.int32,seed=seed_maker())def make_random_crop_to_size_seed(protein):"""Random seed for cropping residues and templates."""protein['random_crop_to_size_seed'] = make_random_seed()return proteindef make_atom14_masks(protein):"""Construct denser atom positions (14 dimensions instead of 37)."""restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14restype_atom14_mask = []for rt in restypes:atom_names = restype_name_to_atom14_names[restype_1to3[rt]]restype_atom14_to_atom37.append([(atom_order[name] if name else 0)for name in atom_names])atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)}restype_atom37_to_atom14.append([(atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0)for name in atom_types])restype_atom14_mask.append([(1. if name else 0.) for name in atom_names])# Add dummy mapping for restype 'UNK'restype_atom14_to_atom37.append([0] * 14)restype_atom37_to_atom14.append([0] * 37)restype_atom14_mask.append([0.] * 14)restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32)restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32)restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32)# create the mapping for (residx, atom14) --> atom37, i.e. an array# with shape (num_res, 14) containing the atom37 indices for this proteinresidx_atom14_to_atom37 = tf.gather(restype_atom14_to_atom37,protein['aatype'])residx_atom14_mask = tf.gather(restype_atom14_mask,protein['aatype'])protein['atom14_atom_exists'] = residx_atom14_maskprotein['residx_atom14_to_atom37'] = residx_atom14_to_atom37# create the gather indices for mapping backresidx_atom37_to_atom14 = tf.gather(restype_atom37_to_atom14,protein['aatype'])protein['residx_atom37_to_atom14'] = residx_atom37_to_atom14# create the corresponding maskrestype_atom37_mask = np.zeros([21, 37], dtype=np.float32)for restype, restype_letter in enumerate(restypes):restype_name = restype_1to3[restype_letter]atom_names = residue_atoms[restype_name]for atom_name in atom_names:atom_type = atom_order[atom_name]restype_atom37_mask[restype, atom_type] = 1residx_atom37_mask = tf.gather(restype_atom37_mask,protein['aatype'])protein['atom37_atom_exists'] = residx_atom37_maskreturn proteindef nonensembled_map_fns(data_config):"""Input pipeline functions which are not ensembled."""common_cfg = data_config.commonmap_fns = [correct_msa_restypes,add_distillation_flag(False),cast_64bit_ints,squeeze_features,# Keep to not disrupt RNG.randomly_replace_msa_with_unknown(0.0),make_seq_mask,make_msa_mask,# Compute the HHblits profile if it's not set. This has to be run before# sampling the MSA.make_hhblits_profile,make_random_crop_to_size_seed,]if common_cfg.use_templates:map_fns.extend([fix_templates_aatype,make_template_mask,make_pseudo_beta('template_')])map_fns.extend([make_atom14_masks,])return map_fns@data_transforms_curry1
def compose(x, fs):for f in fs:x = f(x)return xwith open("Human_HBB_tensor_dict.pkl",'rb') as f:Human_HBB_tensor_dict = pickle.load(f)protein = copy.deepcopy(Human_HBB_tensor_dict)
print(f"protein特征数:{len(protein)}")
print(f"protein特征:{protein.keys()}")eval_cfg = data_config.eval
protein = compose(nonensembled_map_fns(data_config))(protein)print(f"nonensembled函数处理后protein特征数:{len(protein)}")
print(f"nonensembled函数处理后protein特征:{protein.keys()}")print(protein)## 保存
with open("Human_HBB_tensor_dict_nonensembled.pkl",'wb') as f:pickle.dump(protein, f)