From 057d1a0eb1cada01badf1f031b094d1c81ff66bd Mon Sep 17 00:00:00 2001
From: lab-pc <ismaxu@163.com>
Date: Thu, 20 Oct 2022 21:31:13 +0800
Subject: [PATCH] --

---
 constructor.py     |  4 +++-
 link_prediction.py | 20 ++++++++++++--------
 load_data.py       |  4 ++--
 metrics.py         |  7 +++++--
 preprocessing.py   |  2 +-
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/constructor.py b/constructor.py
index f16397f..b51f8a8 100644
--- a/constructor.py
+++ b/constructor.py
@@ -54,7 +54,7 @@ def format_data(data_name):
     # Load data
 
     # adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name)
-    adj, features, y_test, tx, ty, test_maks, true_labels = load_data_1("e")
+    adj, features, y_test, tx, ty, test_maks, true_labels = load_data_1("luo")  # e  ic gpcr nr luo
 
     # Store original adjacency matrix (without diagonal entries) for later
     adj_orig = adj
@@ -105,6 +105,8 @@ def format_data(data_name):
         # item_name = [ k for k,v in locals().iteritems() if v == item][0]
         feas[retrieve_name(item)] = item
 
+    feas['num_features'] = num_features
+    feas['num_nodes'] = num_nodes
     return feas
 
 
diff --git a/link_prediction.py b/link_prediction.py
index c90197b..a857e94 100644
--- a/link_prediction.py
+++ b/link_prediction.py
@@ -43,7 +43,7 @@ class Link_pred_Runner():
         # index = DPP.list_of_samples[0]
 
         if self.data_name == 'cora':
-            DPP.sample_exact_k_dpp(size=21)
+            DPP.sample_exact_k_dpp(size=21)  # e 21 ic 6 gpcr 3
             index = DPP.list_of_samples[0]
             pass
         elif self.data_name == 'citeseer':
@@ -88,22 +88,26 @@ class Link_pred_Runner():
             emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], kde, feas['features_dense'])
 
             lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false'])
-            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
+            roc_curr, ap_curr, _, aupr_score = lm_train.get_roc_score(emb, feas)
             val_roc_score.append(roc_curr)
             print("Epoch:", '%04d' % (epoch + 1),
                   "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=",
-                  "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr))
+                  "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr), "val_aupr=", "{:.5f}".format(aupr_score))
 
             if (epoch + 1) % 10 == 0:
                 lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false'])
-                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
-                print('Test ROC score: ' + str(roc_score))
-                print('Test AP score: ' + str(ap_score))
-                record.append([roc_score, ap_score])
+                roc_score, ap_score, _, aupr_score = lm_test.get_roc_score(emb, feas)
+                print('Test ROC score: ' + str(roc_score), 'Test AUPR score: ' + str(aupr_score), 'Test AP score: ' + str(ap_score))
+                # print('Test AUPR score: ' + str(aupr_score))
+                # print('Test AP score: ' + str(ap_score))
+                record.append([roc_score, aupr_score, ap_score])
                 record_emb.append(emb)
         rec = np.array(record)
         index = rec[:, 0].tolist().index(max(rec[:, 0].tolist()))
+        index_pr = rec[:, 1].tolist().index(max(rec[:, 1].tolist()))
         emb = record_emb[index]
         ana = record[index]
+        ana_pr = record[index_pr]
         # scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), {'embedded': emb,'labels': feas['true_labels']})
-        print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
+        print('The peak [auc] test_roc=%f, aupr=%f, ap = %f' % (ana[0], ana[1], ana[2]))
+        print('The peak [aupr] test_roc=%f, aupr=%f, ap = %f' % (ana_pr[0], ana_pr[1], ana_pr[2]))
diff --git a/load_data.py b/load_data.py
index 1b96e05..90eff50 100644
--- a/load_data.py
+++ b/load_data.py
@@ -5,10 +5,10 @@ import scipy.sparse as sp
 
 
 def load_data_1(dataset):
-    adj = np.loadtxt('./data/partitioned_data/{}/orig/e_adj_orig.txt'.format(dataset), dtype=int)
+    adj = np.loadtxt('./data/partitioned_data/{0}/orig/{0}_adj_orig.txt'.format(dataset), dtype=int)
     adj = sp.csr_matrix(adj)
 
-    features = pickle.load(open("data/partitioned_data/e/feature/e_feature.pkl",'rb'))
+    features = pickle.load(open("data/partitioned_data/{0}/feature/{0}_feature.pkl".format(dataset),'rb'))
 
     y_test = 0
 
diff --git a/metrics.py b/metrics.py
index 170f333..e90f6b8 100644
--- a/metrics.py
+++ b/metrics.py
@@ -1,5 +1,5 @@
 from sklearn.metrics import f1_score
-from sklearn.metrics import roc_auc_score
+from sklearn.metrics import roc_auc_score,precision_recall_curve, auc
 from sklearn.metrics import average_precision_score
 from sklearn import metrics
 from munkres import Munkres, print_matrix
@@ -38,7 +38,10 @@ class linkpred_metrics():
         roc_score = roc_auc_score(labels_all, preds_all)
         ap_score = average_precision_score(labels_all, preds_all)
 
-        return roc_score, ap_score, emb
+        precision, recall, _thresholds = metrics.precision_recall_curve(labels_all, preds_all)
+        aupr_score = auc(recall, precision)
+
+        return roc_score, ap_score, emb, aupr_score
 
 
 class clustering_metrics():
diff --git a/preprocessing.py b/preprocessing.py
index e76b3d7..20bc07b 100644
--- a/preprocessing.py
+++ b/preprocessing.py
@@ -119,7 +119,7 @@ def mask_test_edges(adj):
         val_edges_false.append([idx_i, idx_j])
 
     assert ~ismember(test_edges_false, edges_all)
-    assert ~ismember(val_edges_false, edges_all)
+#    assert ~ismember(val_edges_false, edges_all)
     assert ~ismember(val_edges, train_edges)
     assert ~ismember(test_edges, train_edges)
     assert ~ismember(val_edges, test_edges)