From 057d1a0eb1cada01badf1f031b094d1c81ff66bd Mon Sep 17 00:00:00 2001 From: lab-pc Date: Thu, 20 Oct 2022 21:31:13 +0800 Subject: [PATCH] -- --- constructor.py | 4 +++- link_prediction.py | 20 ++++++++++++-------- load_data.py | 4 ++-- metrics.py | 7 +++++-- preprocessing.py | 2 +- 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/constructor.py b/constructor.py index f16397f..b51f8a8 100644 --- a/constructor.py +++ b/constructor.py @@ -54,7 +54,7 @@ def format_data(data_name): # Load data # adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name) - adj, features, y_test, tx, ty, test_maks, true_labels = load_data_1("e") + adj, features, y_test, tx, ty, test_maks, true_labels = load_data_1("luo") # e ic gpcr nr luo # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj @@ -105,6 +105,8 @@ def format_data(data_name): # item_name = [ k for k,v in locals().iteritems() if v == item][0] feas[retrieve_name(item)] = item + feas['num_features'] = num_features + feas['num_nodes'] = num_nodes return feas diff --git a/link_prediction.py b/link_prediction.py index c90197b..a857e94 100644 --- a/link_prediction.py +++ b/link_prediction.py @@ -43,7 +43,7 @@ class Link_pred_Runner(): # index = DPP.list_of_samples[0] if self.data_name == 'cora': - DPP.sample_exact_k_dpp(size=21) + DPP.sample_exact_k_dpp(size=21) # e 21 ic 6 gpcr 3 index = DPP.list_of_samples[0] pass elif self.data_name == 'citeseer': @@ -88,22 +88,26 @@ class Link_pred_Runner(): emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], kde, feas['features_dense']) lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false']) - roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) + roc_curr, ap_curr, _, aupr_score = lm_train.get_roc_score(emb, feas) val_roc_score.append(roc_curr) print("Epoch:", '%04d' % (epoch + 1), "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=", - "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) + "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr), "val_aupr=", "{:.5f}".format(aupr_score)) if (epoch + 1) % 10 == 0: lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false']) - roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas) - print('Test ROC score: ' + str(roc_score)) - print('Test AP score: ' + str(ap_score)) - record.append([roc_score, ap_score]) + roc_score, ap_score, _, aupr_score = lm_test.get_roc_score(emb, feas) + print('Test ROC score: ' + str(roc_score), 'Test AUPR score: ' + str(aupr_score), 'Test AP score: ' + str(ap_score)) + # print('Test AUPR score: ' + str(aupr_score)) + # print('Test AP score: ' + str(ap_score)) + record.append([roc_score, aupr_score, ap_score]) record_emb.append(emb) rec = np.array(record) index = rec[:, 0].tolist().index(max(rec[:, 0].tolist())) + index_pr = rec[:, 1].tolist().index(max(rec[:, 1].tolist())) emb = record_emb[index] ana = record[index] + ana_pr = record[index_pr] # scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), {'embedded': emb,'labels': feas['true_labels']}) - print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1])) + print('The peak [auc] test_roc=%f, aupr=%f, ap = %f' % (ana[0], ana[1], ana[2])) + print('The peak [aupr] test_roc=%f, aupr=%f, ap = %f' % (ana_pr[0], ana_pr[1], ana_pr[2])) diff --git a/load_data.py b/load_data.py index 1b96e05..90eff50 100644 --- a/load_data.py +++ b/load_data.py @@ -5,10 +5,10 @@ import scipy.sparse as sp def load_data_1(dataset): - adj = np.loadtxt('./data/partitioned_data/{}/orig/e_adj_orig.txt'.format(dataset), dtype=int) + adj = np.loadtxt('./data/partitioned_data/{0}/orig/{0}_adj_orig.txt'.format(dataset), dtype=int) adj = sp.csr_matrix(adj) - features = pickle.load(open("data/partitioned_data/e/feature/e_feature.pkl",'rb')) + features = pickle.load(open("data/partitioned_data/{0}/feature/{0}_feature.pkl".format(dataset),'rb')) y_test = 0 diff --git a/metrics.py b/metrics.py index 170f333..e90f6b8 100644 --- a/metrics.py +++ b/metrics.py @@ -1,5 +1,5 @@ from sklearn.metrics import f1_score -from sklearn.metrics import roc_auc_score +from sklearn.metrics import roc_auc_score,precision_recall_curve, auc from sklearn.metrics import average_precision_score from sklearn import metrics from munkres import Munkres, print_matrix @@ -38,7 +38,10 @@ class linkpred_metrics(): roc_score = roc_auc_score(labels_all, preds_all) ap_score = average_precision_score(labels_all, preds_all) - return roc_score, ap_score, emb + precision, recall, _thresholds = metrics.precision_recall_curve(labels_all, preds_all) + aupr_score = auc(recall, precision) + + return roc_score, ap_score, emb, aupr_score class clustering_metrics(): diff --git a/preprocessing.py b/preprocessing.py index e76b3d7..20bc07b 100644 --- a/preprocessing.py +++ b/preprocessing.py @@ -119,7 +119,7 @@ def mask_test_edges(adj): val_edges_false.append([idx_i, idx_j]) assert ~ismember(test_edges_false, edges_all) - assert ~ismember(val_edges_false, edges_all) +# assert ~ismember(val_edges_false, edges_all) assert ~ismember(val_edges, train_edges) assert ~ismember(test_edges, train_edges) assert ~ismember(val_edges, test_edges)