From 1052a8024f413de6dad91c50b5addfa35ec1f18e Mon Sep 17 00:00:00 2001 From: lab-pc Date: Thu, 16 Mar 2023 15:38:48 +0800 Subject: [PATCH] -- --- input.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ train.py | 6 +++--- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/input.py b/input.py index c8267be..a61f1ac 100644 --- a/input.py +++ b/input.py @@ -194,3 +194,59 @@ def get_data(dataset): feas['num_features'] = num_features feas['num_nodes'] = num_nodes return feas + +def get_data_by_fold(dataset): + # Load data + # adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name) + adj, features, y_test, tx, ty, test_maks, true_labels = load_data(dataset) # e ic gpcr nr luo + + # Store original adjacency matrix (without diagonal entries) for later + adj_orig = adj + # 删除对角线元素 + adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) + adj_orig.eliminate_zeros() + + adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) + adj = adj_train + adj_dense = adj.toarray() + + # Some preprocessing + adj_norm = preprocess_graph(adj) + + num_nodes = adj.shape[0] + features_dense = features.tocoo().toarray() + + features = sparse_to_tuple(features.tocoo()) + # num_features是feature的维度 + num_features = features[2][1] + # features_nonzero就是非零feature的个数 + features_nonzero = features[1].shape[0] + + pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() + norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) + + adj_label = adj_train + sp.eye(adj_train.shape[0]) + adj_label = sparse_to_tuple(adj_label) + items = [ + adj, num_features, num_nodes, features_nonzero, + pos_weight, norm, adj_norm, adj_label, + features, true_labels, train_edges, val_edges, + val_edges_false, test_edges, test_edges_false, adj_orig, features_dense, adj_dense, features_dense + ] + + feas = {} + + print('num_features is:', num_features) + print('num_nodes is:', num_nodes) + print('features_nonzero is:', features_nonzero) + print('pos_weight is:', pos_weight) + print('norm is:', norm) + + for item in items: + # item_name = [ k for k,v in locals().iteritems() if v == item][0] + feas[retrieve_name(item)] = item + + feas['num_features'] = num_features + feas['num_nodes'] = num_nodes + return feas + diff --git a/train.py b/train.py index 2f2fa23..2476e26 100644 --- a/train.py +++ b/train.py @@ -19,13 +19,13 @@ def parse_args(): parser.add_argument('--hidden2', type=int, default=32, help='隐藏层2神经元数量.') parser.add_argument('--hidden3', type=int, default=64, help='隐藏层3神经元数量.') parser.add_argument('--learning_rate', type=float, default=.6 * 0.001, help='学习率') - parser.add_argument('--discriminator_learning_rate', type=float, default=0.001, help='判别器学习率') + parser.add_argument('--discriminator_learning_rate', type=float, default=0.0001, help='判别器学习率') # luo 判别器学习率0.0001, 其它数据集0.001 parser.add_argument('--epoch', type=int, default=250, help='迭代次数') parser.add_argument('--seed', type=int, default=50, help='用来打乱数据集') parser.add_argument('--features', type=int, default=1, help='是(1)否(0)使用特征') parser.add_argument('--dropout', type=float, default=0., help='Dropout rate (1 - keep probability).') parser.add_argument('--weight_decay', type=float, default=0., help='Weight for L2 loss on embedding matrix.') - parser.add_argument('--dataset', type=str, default='e', help='使用的数据集') + parser.add_argument('--dataset', type=str, default='luo', help='使用的数据集') args = parser.parse_args() return args @@ -40,7 +40,7 @@ if __name__ == "__main__": # DPP采样和PCA降维 DPP = FiniteDPP('correlation', **{'K': feas['adj'].toarray()}) pca = PCA(n_components=settings.hidden2) - DPP.sample_exact_k_dpp(size=21) # e 21 ic 6 gpcr 3 + DPP.sample_exact_k_dpp(size=20) # e 21 ic 6 gpcr 3 index = DPP.list_of_samples[0] feature_sample = feas['features_dense'] feature_sample = pca.fit_transform(feature_sample)