--

2 years ago · 057d1a0eb1
parent df9704e595
commit 057d1a0eb1
5 changed files with 23 additions and 14 deletions
--- a/constructor.py
+++ b/constructor.py
@ -54,7 +54,7 @@ def format_data(data_name):
    # Load data

    # adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name)
-    adj, features, y_test, tx, ty, test_maks, true_labels = load_data_1("e")
+    adj, features, y_test, tx, ty, test_maks, true_labels = load_data_1("luo")  # e  ic gpcr nr luo

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
@ -105,6 +105,8 @@ def format_data(data_name):
        # item_name = [ k for k,v in locals().iteritems() if v == item][0]
        feas[retrieve_name(item)] = item

+    feas['num_features'] = num_features
+    feas['num_nodes'] = num_nodes
    return feas


--- a/link_prediction.py
+++ b/link_prediction.py
@ -43,7 +43,7 @@ class Link_pred_Runner():
        # index = DPP.list_of_samples[0]

        if self.data_name == 'cora':
-            DPP.sample_exact_k_dpp(size=21)
+            DPP.sample_exact_k_dpp(size=21)  # e 21 ic 6 gpcr 3
            index = DPP.list_of_samples[0]
            pass
        elif self.data_name == 'citeseer':
@ -88,22 +88,26 @@ class Link_pred_Runner():
            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], kde, feas['features_dense'])

            lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false'])
-            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
+            roc_curr, ap_curr, _, aupr_score = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)
            print("Epoch:", '%04d' % (epoch + 1),
                  "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=",
-                  "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr))
+                  "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr), "val_aupr=", "{:.5f}".format(aupr_score))

            if (epoch + 1) % 10 == 0:
                lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false'])
-                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
-                print('Test ROC score: ' + str(roc_score))
-                print('Test AP score: ' + str(ap_score))
-                record.append([roc_score, ap_score])
+                roc_score, ap_score, _, aupr_score = lm_test.get_roc_score(emb, feas)
+                print('Test ROC score: ' + str(roc_score), 'Test AUPR score: ' + str(aupr_score), 'Test AP score: ' + str(ap_score))
+                # print('Test AUPR score: ' + str(aupr_score))
+                # print('Test AP score: ' + str(ap_score))
+                record.append([roc_score, aupr_score, ap_score])
                record_emb.append(emb)
        rec = np.array(record)
        index = rec[:, 0].tolist().index(max(rec[:, 0].tolist()))
+        index_pr = rec[:, 1].tolist().index(max(rec[:, 1].tolist()))
        emb = record_emb[index]
        ana = record[index]
+        ana_pr = record[index_pr]
        # scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), {'embedded': emb,'labels': feas['true_labels']})
-        print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
+        print('The peak [auc] test_roc=%f, aupr=%f, ap = %f' % (ana[0], ana[1], ana[2]))
+        print('The peak [aupr] test_roc=%f, aupr=%f, ap = %f' % (ana_pr[0], ana_pr[1], ana_pr[2]))
--- a/load_data.py
+++ b/load_data.py
@ -5,10 +5,10 @@ import scipy.sparse as sp


 def load_data_1(dataset):
-    adj = np.loadtxt('./data/partitioned_data/{}/orig/e_adj_orig.txt'.format(dataset), dtype=int)
+    adj = np.loadtxt('./data/partitioned_data/{0}/orig/{0}_adj_orig.txt'.format(dataset), dtype=int)
    adj = sp.csr_matrix(adj)

-    features = pickle.load(open("data/partitioned_data/e/feature/e_feature.pkl",'rb'))
+    features = pickle.load(open("data/partitioned_data/{0}/feature/{0}_feature.pkl".format(dataset),'rb'))

    y_test = 0

--- a/metrics.py
+++ b/metrics.py
@ -1,5 +1,5 @@
 from sklearn.metrics import f1_score
-from sklearn.metrics import roc_auc_score
+from sklearn.metrics import roc_auc_score,precision_recall_curve, auc
 from sklearn.metrics import average_precision_score
 from sklearn import metrics
 from munkres import Munkres, print_matrix
@ -38,7 +38,10 @@ class linkpred_metrics():
        roc_score = roc_auc_score(labels_all, preds_all)
        ap_score = average_precision_score(labels_all, preds_all)

-        return roc_score, ap_score, emb
+        precision, recall, _thresholds = metrics.precision_recall_curve(labels_all, preds_all)
+        aupr_score = auc(recall, precision)
+
+        return roc_score, ap_score, emb, aupr_score


 class clustering_metrics():
--- a/preprocessing.py
+++ b/preprocessing.py
@ -119,7 +119,7 @@ def mask_test_edges(adj):
        val_edges_false.append([idx_i, idx_j])

    assert ~ismember(test_edges_false, edges_all)
-    assert ~ismember(val_edges_false, edges_all)
+#    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)