更新src

2 years ago · f772462815
parent 3547377388
commit f772462815
12 changed files with 1242 additions and 0 deletions
--- a/init.py
+++ b/init.py
@ -0,0 +1,2 @@
 from __future__ import print_function
 from __future__ import division
--- a/constructor.py
+++ b/constructor.py
@ -0,0 +1,191 @@
 import tensorflow as tf
 import numpy as np
 from model import GCN, Generator_z2g, Discriminator, D_graph
 from optimizer import OptimizerAE, OptimizerCycle
 import scipy.sparse as sp
 from input_data import load_data
 import inspect
 from preprocessing import preprocess_graph, sparse_to_tuple, mask_test_edges, construct_feed_dict
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 def get_placeholder(adj, num_features):
    # 给tf.sparse_placeholder喂数据时：
    #   1.应该直接填充 (indices, values, shape)
    #   2.或者使用 tf.SparseTensorValue
    placeholders = {
        'features': tf.sparse_placeholder(tf.float32),
        'features_dense': tf.placeholder(tf.float32, shape=[adj.shape[0], num_features],
                                         name='real_distribution'),
        'adj': tf.sparse_placeholder(tf.float32),
        'adj_orig': tf.sparse_placeholder(tf.float32),
        'dropout': tf.placeholder_with_default(0., shape=()),
        'real_distribution': tf.placeholder(dtype=tf.float32, shape=[adj.shape[0], FLAGS.hidden2],
                                            name='real_distribution')
    }
    return placeholders
 def get_model(model_str, placeholders, num_features, num_nodes, features_nonzero):
    # 计算图构建
    discriminator = Discriminator()
    D_Graph = D_graph(num_features)
    d_real = discriminator.construct(placeholders['real_distribution'])
    GD_real = D_Graph.construct(placeholders['features_dense'])
    model = None
    if model_str == 'arga_ae':
        model = GCN(placeholders, num_features, features_nonzero)
    elif model_str == 'DBGAN':
        model = GCN(placeholders, num_features, features_nonzero)
        model_z2g = Generator_z2g(placeholders, num_features, features_nonzero)
    return d_real, discriminator, model, model_z2g, D_Graph, GD_real
 def format_data(data_name):
    # Load data
    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name)
    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    # 删除对角线元素
    adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()
    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
    adj = adj_train
    adj_dense = adj.toarray()
    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless
    # Some preprocessing
    adj_norm = preprocess_graph(adj)
    num_nodes = adj.shape[0]
    features_dense = features.tocoo().toarray()
    features = sparse_to_tuple(features.tocoo())
    # num_features是feature的维度
    num_features = features[2][1]
    # features_nonzero就是非零feature的个数
    features_nonzero = features[1].shape[0]
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero,
        pos_weight, norm, adj_norm, adj_label,
        features, true_labels, train_edges, val_edges,
        val_edges_false, test_edges, test_edges_false, adj_orig, features_dense, adj_dense, features_dense
    ]
    feas = {}
    print('num_features is:', num_features)
    print('num_nodes is:', num_nodes)
    print('features_nonzero is:', features_nonzero)
    print('pos_weight is:', pos_weight)
    print('norm is:', norm)
    for item in items:
        #item_name = [ k for k,v in locals().iteritems() if v == item][0]
        feas[retrieve_name(item)] = item
    return feas
 def get_optimizer(model_str, model, model_z2g, D_Graph, discriminator, placeholders, pos_weight, norm, d_real, num_nodes, GD_real):
    if model_str == 'arga_ae':
        output = model.construct()
        embeddings = output[0]
        reconstructions = output[1]
        d_fake = discriminator.construct(embeddings, reuse=True)
        opt = OptimizerAE(preds=reconstructions,
                          labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                      validate_indices=False), [-1]),
                          pos_weight=pos_weight,
                          norm=norm,
                          d_real=d_real,
                          d_fake=d_fake)
    elif model_str == 'DBGAN':
        z2g = model_z2g.construct()
        hidden = z2g[1]
        z2g = z2g[0]
        preds_z2g = model.construct(hidden=hidden, reuse=True)[0]
        g2z = model.construct()
        embeddings = g2z[0]
        reconstructions = g2z[1]
        d_fake = discriminator.construct(embeddings, reuse=True)
        GD_fake = D_Graph.construct(z2g, reuse=True)
        epsilon = tf.random_uniform(shape=[1], minval=0.0, maxval=1.0)
        interpolated_input = epsilon * placeholders['real_distribution'] + (1 - epsilon) * embeddings
        gradient = tf.gradients(discriminator.construct(interpolated_input, reuse=True), [interpolated_input])[0]
        epsilon = tf.random_uniform(shape=[1], minval=0.0, maxval=1.0)
        interpolated_input = epsilon * placeholders['features_dense'] + (1 - epsilon) * z2g
        gradient_z = tf.gradients(D_Graph.construct(interpolated_input, reuse=True), [interpolated_input])[0]
        opt = OptimizerCycle(preds=reconstructions,
                             labels=tf.reshape(tf.sparse_tensor_to_dense(placeholders['adj_orig'],
                                                                         validate_indices=False), [-1]),
                             pos_weight=pos_weight,
                             norm=norm,
                             d_real=d_real,
                             d_fake=d_fake,
                             GD_real=GD_real,
                             GD_fake=GD_fake,
                             preds_z2g=preds_z2g,
                             labels_z2g=placeholders['real_distribution'],
                             preds_cycle=model_z2g.construct(embeddings, reuse=True)[0],
                             labels_cycle=placeholders['features_dense'],
                             gradient=gradient,
                             gradient_z=gradient_z)
    return opt
 def update(model, opt, sess, adj_norm, adj_label, features, placeholders, adj, distribution, adj_dense):
    # Construct feed dictionary
    feed_dict = construct_feed_dict(adj_norm, adj_label, features, placeholders)
    feed_dict.update({placeholders['dropout']: FLAGS.dropout})
    feed_dict.update({placeholders['features_dense']: adj_dense})
    feed_dict.update({placeholders['dropout']: 0})
    z_real_dist = np.random.randn(adj.shape[0], FLAGS.hidden2)
    z_real_dist = distribution.sample(adj.shape[0])
    feed_dict.update({placeholders['real_distribution']: z_real_dist})
    for j in range(5):
        _, reconstruct_loss = sess.run([opt.opt_op, opt.cost], feed_dict=feed_dict)
    g_loss, _ = sess.run([opt.generator_loss, opt.generator_optimizer], feed_dict=feed_dict)
    d_loss, _ = sess.run([opt.dc_loss, opt.discriminator_optimizer], feed_dict=feed_dict)
    GD_loss, _ = sess.run([opt.GD_loss, opt.discriminator_optimizer_z2g], feed_dict=feed_dict)
    GG_loss, _ = sess.run([opt.generator_loss_z2g, opt.generator_optimizer_z2g], feed_dict=feed_dict)
    # GD_loss = sess.run(opt.GD_loss, feed_dict=feed_dict)
    # GG_loss = sess.run(opt.generator_loss_z2g, feed_dict=feed_dict)
    # g_loss = sess.run(opt.generator_loss, feed_dict=feed_dict)
    # d_loss = sess.run(opt.dc_loss, feed_dict=feed_dict)
    emb = sess.run(model.z_mean, feed_dict=feed_dict)
    avg_cost = [reconstruct_loss, d_loss, g_loss, GD_loss, GG_loss]
    return emb, avg_cost
 def retrieve_name(var):
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    print([var_name for var_name, var_val in callers_local_vars if var_val is var])
    return [var_name for var_name, var_val in callers_local_vars if var_val is var][0]
--- a/initializations.py
+++ b/initializations.py
@ -0,0 +1,11 @@
 import tensorflow as tf
 import numpy as np
 def weight_variable_glorot(input_dim, output_dim, name=""):
    """Create a weight variable with Glorot & Bengio (AISTATS 2010)
    initialization.
    """
    init_range = np.sqrt(6.0 / (input_dim + output_dim))
    initial = tf.random_uniform([input_dim, output_dim], minval=-init_range,
                                maxval=init_range, dtype=tf.float32)
    return tf.Variable(initial, name=name)
--- a/input_data.py
+++ b/input_data.py
@ -0,0 +1,122 @@
 import numpy as np
 import pickle as pkl
 import networkx as nx
 import scipy.sparse as sp
 import sys
 def parse_index_file(filename):
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index
 def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)
 def load_data(dataset):
    # load the data: x, tx, allx, graph
    # x => 训练实例的特征向量，如scipy.sparse.csr.csr_matrix类的实例
    # tx => 测试实例的特征向量，如scipy.sparse.csr.csr_matrix类的实例
    # allx => 有标签的+无无标签训练实例的特征向量，是ind.dataset_str.x的超集
    # y => 训练实例的标签，独热编码，numpy.ndarray类的实例
    # ty => 测试实例的标签，独热编码，numpy.ndarray类的实例
    # ally => 有标签的+无无标签训练实例的标签，独热编码，numpy.ndarray类的实例
    # graph => 图数据，collections.defaultdict类的实例，格式为 {index：[index_of_neighbor_nodes]}
    # index => 测试实例的id
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                objects.append(pkl.load(f, encoding='latin1'))
            else:
                objects.append(pkl.load(f))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)
    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position、
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended
    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)
    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])
    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]
    return adj, features, y_test, tx, ty, test_mask, np.argmax(labels,1)
 def load_alldata(dataset_str):
    """Load data."""
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        objects.append(pkl.load(open("data/ind.{}.{}".format(dataset_str, names[i]))))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)
    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended
    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]
    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)
    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])
    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]
    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask, np.argmax(labels, 1)
--- a/layers.py
+++ b/layers.py
@ -0,0 +1,158 @@
 from initializations import *
 import tensorflow as tf
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 # global unique layer ID dictionary for layer name assignment
 _LAYER_UIDS = {}
 def get_layer_uid(layer_name=''):
    """Helper function, assigns unique layer IDs
       分配唯一的层ID
    """
    if layer_name not in _LAYER_UIDS:
        _LAYER_UIDS[layer_name] = 1
        return 1
    else:
        _LAYER_UIDS[layer_name] += 1
        return _LAYER_UIDS[layer_name]
 def dropout_sparse(x, keep_prob, num_nonzero_elems):
    """
    Dropout for sparse tensors. Currently fails for very large sparse tensors (>1M elements)
    num_nonzero_elems: 稀疏矩阵中的非零元素个数
    keep_prob: 
    x: input
    """
    noise_shape = [num_nonzero_elems]
    random_tensor = keep_prob
    random_tensor += tf.random_uniform(noise_shape)
    dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
    pre_out = tf.sparse_retain(x, dropout_mask)
    return pre_out * (1./keep_prob)
 class Layer(object):
    """Base layer class. Defines basic API for all layer objects.
    # Properties
        name: String, defines the variable scope of the layer.
    # Methods
        _call(inputs): Defines computation graph of layer
            (i.e. takes input, returns output)
        __call__(inputs): Wrapper for _call()
    """
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            layer = self.__class__.__name__.lower()
            name = layer + '_' + str(get_layer_uid(layer))
        self.name = name
        self.vars = {}
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.issparse = False
    def _call(self, inputs):
        return inputs
    def __call__(self, inputs):
        with tf.name_scope(self.name):
            outputs = self._call(inputs)
            return outputs
 class GraphConvolution(Layer):
    """Basic graph convolution layer for undirected graph without edge labels."""
    def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)
        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = weight_variable_glorot(input_dim, output_dim, name="weights")
        self.dropout = dropout
        self.adj = adj
        self.act = act
    def _call(self, inputs):
        x = inputs
        x = tf.nn.dropout(x, 1-self.dropout)
        x = tf.matmul(x, self.vars['weights'])
        x = tf.sparse_tensor_dense_matmul(self.adj, x)
        outputs = self.act(x)
        return outputs
 class GraphConvolutionSparse(Layer):
    """
    Graph convolution layer for sparse inputs.
    多了一个features_nonzero
    """
    def __init__(self, input_dim, output_dim, adj, features_nonzero, dropout=0., act=tf.nn.relu, **kwargs):
        super(GraphConvolutionSparse, self).__init__(**kwargs)
        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = weight_variable_glorot(input_dim, output_dim, name="weights")
        self.dropout = dropout
        self.adj = adj
        self.act = act
        self.issparse = True
        self.features_nonzero = features_nonzero
    def _call(self, inputs):
        x = inputs
        x = dropout_sparse(x, 1-self.dropout, self.features_nonzero)
        x = tf.sparse_tensor_dense_matmul(x, self.vars['weights'])
        x = tf.sparse_tensor_dense_matmul(self.adj, x)
        outputs = self.act(x)
        return outputs
 class InnerProductDecoder(Layer):
    """Decoder model layer for link prediction."""
    def __init__(self, input_dim, dropout=0., act=tf.nn.sigmoid, **kwargs):
        super(InnerProductDecoder, self).__init__(**kwargs)
        self.dropout = dropout
        self.act = act
    def _call(self, inputs):
        """
        这个decoder部分实际上就只是input的转置再乘input
        """
        inputs = tf.nn.dropout(inputs, 1-self.dropout)
        x = tf.transpose(inputs)
        x = tf.matmul(inputs, x)
        x = tf.reshape(x, [-1])
        outputs = self.act(x)
        return outputs
 class GraphConvolution_z2g(Layer):
    """Basic graph convolution layer for undirected graph without edge labels."""
    def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
        super(GraphConvolution, self).__init__(**kwargs)
        with tf.variable_scope(self.name + '_vars'):
            self.vars['weights'] = weight_variable_glorot(input_dim, output_dim, name="weights")
        self.dropout = dropout
        self.adj = adj
        self.act = act
    def _call(self, inputs):
        x = inputs
        x = tf.nn.dropout(x, 1-self.dropout)
        x = tf.matmul(x, self.vars['weights'])
        x = tf.sparse_tensor_dense_matmul(self.adj, x)
        outputs = self.act(x)
        return outputs
    def _call(self, inputs):
        x = inputs
        x = dropout_sparse(x, 1-self.dropout, self.features_nonzero)
        x = tf.sparse_tensor_dense_matmul(x, self.vars['weights'])
        x = tf.sparse_tensor_dense_matmul(self.adj, x)
        outputs = self.act(x)
        return outputs
--- a/link_prediction.py
+++ b/link_prediction.py
@ -0,0 +1,110 @@
 from __future__ import division
 from __future__ import print_function
 import os
 # Train on CPU (hide GPU) due to memory constraints
 os.environ['CUDA_VISIBLE_DEVICES'] = " 0,4,2,3"
 import tensorflow as tf
 import settings
 from constructor import get_placeholder, get_model, format_data, get_optimizer, update
 from metrics import linkpred_metrics
 from sklearn.neighbors import KernelDensity
 from dppy.finite_dpps import FiniteDPP
 from sklearn.decomposition import PCA
 import numpy as np
 import scipy.io as scio
 # Settings
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 class Link_pred_Runner():
    def __init__(self, settings):
        self.data_name = settings['data_name']
        self.iteration = settings['iterations']
        self.model = settings['model']
    def erun(self):
        model_str = self.model
        # formatted data
        feas = format_data(self.data_name)
        # Define placeholders
        # 定义placeholders,get_placeholder函数中只需要传入一个参数，即adj，函数中需要用到adj.shape
        placeholders = get_placeholder(feas['adj'], feas['num_features'])
        # 定义由Dpp和密度估计出来的混合高斯
        DPP = FiniteDPP('correlation', **{'K': feas['adj'].toarray()})
        # DPP.sample_exact_k_dpp(size=4)
        pca = PCA(n_components=FLAGS.hidden2)
        # index = DPP.list_of_samples[0]
        if self.data_name == 'cora':
            DPP.sample_exact_k_dpp(size=21)
            index = DPP.list_of_samples[0]
            pass
        elif self.data_name == 'citeseer':
            index = np.array([1782, 741, 3258, 3189, 3112, 2524, 2895, 1780, 1100, 2735, 1318,
                              2944, 1825, 18, 987, 2564, 463, 6, 3173, 701, 1901, 2349,
                              2786, 2412, 646, 2626, 2648, 1793, 432, 538, 1729, 1217, 1397,
                              1932, 2850, 458, 2129, 702, 2934, 2030, 2882, 1393, 308, 1271,
                              1106, 2688, 629, 1145, 3251, 1903, 1004, 1149, 1385, 285, 858,
                              2977, 844, 335, 532, 404, 3174, 528])
        elif self.data_name == 'pubmed':
            index = np.array([842, 3338, 5712, 17511, 10801, 2714, 6970, 13296, 5466,
                              2230])
        feature_sample = feas['features_dense']
        feature_sample = pca.fit_transform(feature_sample)
        featuresCompress = np.array([feature_sample[i] for i in index])
        # featuresCompress = np.array(feature_sample)
        kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)
        # construct model
        d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'])
        # Optimizer
        opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real)
        # Initialize session
        # config = tf.ConfigProto()
        # config.gpu_options.allow_growth = True
        # sess = tf.Session(config = config)
        sess = tf.Session()
        sess.run(tf.global_variables_initializer())
        val_roc_score = []
        record = []
        record_emb = []
        # Train model
        for epoch in range(self.iteration):
            emb, avg_cost = update(ae_model, opt, sess, feas['adj_norm'], feas['adj_label'], feas['features'], placeholders, feas['adj'], kde, feas['features_dense'])
            lm_train = linkpred_metrics(feas['val_edges'], feas['val_edges_false'])
            roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
            val_roc_score.append(roc_curr)
            print("Epoch:", '%04d' % (epoch + 1),
                  "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=",
                  "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr))
            if (epoch + 1) % 10 == 0:
                lm_test = linkpred_metrics(feas['test_edges'], feas['test_edges_false'])
                roc_score, ap_score, _ = lm_test.get_roc_score(emb, feas)
                print('Test ROC score: ' + str(roc_score))
                print('Test AP score: ' + str(ap_score))
                record.append([roc_score, ap_score])
                record_emb.append(emb)
        rec = np.array(record)
        index = rec[:, 0].tolist().index(max(rec[:, 0].tolist()))
        emb = record_emb[index]
        ana = record[index]
        scio.savemat('result/{}_link_64_64_new.mat'.format(self.data_name), {'embedded': emb,
                                                                             'labels': feas['true_labels']})
        print('The peak val_roc=%f, ap = %f' % (ana[0], ana[1]))
--- a/metrics.py
+++ b/metrics.py
@ -0,0 +1,108 @@
 from sklearn.metrics import f1_score
 from sklearn.metrics import roc_auc_score
 from sklearn.metrics import average_precision_score
 from sklearn import metrics
 from munkres import Munkres, print_matrix
 import numpy as np
 class linkpred_metrics():
    def __init__(self, edges_pos, edges_neg):
        self.edges_pos = edges_pos
        self.edges_neg = edges_neg
    def get_roc_score(self, emb, feas):
        # if emb is None:
        #     feed_dict.update({placeholders['dropout']: 0})
        #     emb = sess.run(model.z_mean, feed_dict=feed_dict)
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))
        # Predict on test set of edges
        adj_rec = np.dot(emb, emb.T)
        preds = []
        pos = []
        for e in self.edges_pos:
            preds.append(sigmoid(adj_rec[e[0], e[1]]))
            pos.append(feas['adj_orig'][e[0], e[1]])
        preds_neg = []
        neg = []
        for e in self.edges_neg:
            preds_neg.append(sigmoid(adj_rec[e[0], e[1]]))
            neg.append(feas['adj_orig'][e[0], e[1]])
        preds_all = np.hstack([preds, preds_neg])
        labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds))])
        roc_score = roc_auc_score(labels_all, preds_all)
        ap_score = average_precision_score(labels_all, preds_all)
        return roc_score, ap_score, emb
 class clustering_metrics():
    def __init__(self, true_label, predict_label):
        self.true_label = true_label
        self.pred_label = predict_label
    def clusteringAcc(self):
        # best mapping between true_label and predict label
        l1 = list(set(self.true_label))
        numclass1 = len(l1)
        l2 = list(set(self.pred_label))
        numclass2 = len(l2)
        if numclass1 != numclass2:
            print('Class Not equal, Error!!!!')
            return 0
        cost = np.zeros((numclass1, numclass2), dtype=int)
        for i, c1 in enumerate(l1):
            mps = [i1 for i1, e1 in enumerate(self.true_label) if e1 == c1]
            for j, c2 in enumerate(l2):
                mps_d = [i1 for i1 in mps if self.pred_label[i1] == c2]
                cost[i][j] = len(mps_d)
        # match two clustering results by Munkres algorithm
        m = Munkres()
        cost = cost.__neg__().tolist()
        indexes = m.compute(cost)
        # get the match results
        new_predict = np.zeros(len(self.pred_label))
        for i, c in enumerate(l1):
            # correponding label in l2:
            c2 = l2[indexes[i][1]]
            # ai is the index with label==c2 in the pred_label list
            ai = [ind for ind, elm in enumerate(self.pred_label) if elm == c2]
            new_predict[ai] = c
        acc = metrics.accuracy_score(self.true_label, new_predict)
        f1_macro = metrics.f1_score(self.true_label, new_predict, average='macro')
        precision_macro = metrics.precision_score(self.true_label, new_predict, average='macro')
        recall_macro = metrics.recall_score(self.true_label, new_predict, average='macro')
        f1_micro = metrics.f1_score(self.true_label, new_predict, average='micro')
        precision_micro = metrics.precision_score(self.true_label, new_predict, average='micro')
        recall_micro = metrics.recall_score(self.true_label, new_predict, average='micro')
        return acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro
    def evaluationClusterModelFromLabel(self):
        nmi = metrics.normalized_mutual_info_score(self.true_label, self.pred_label)
        adjscore = metrics.adjusted_rand_score(self.true_label, self.pred_label)
        acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro = self.clusteringAcc()
        print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
        fh = open('recoder.txt', 'a')
        fh.write('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore) )
        fh.write('\r\n')
        fh.flush()
        fh.close()
        return acc, nmi, adjscore
--- a/model.py
+++ b/model.py
@ -0,0 +1,200 @@
 from layers import GraphConvolution, GraphConvolutionSparse, InnerProductDecoder
 import tensorflow as tf
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 class Model(object):
    def __init__(self, **kwargs):
        allowed_kwargs = {'name', 'logging'}
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        for kwarg in kwargs.keys():
            assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
        name = kwargs.get('name')
        if not name:
            name = self.__class__.__name__.lower()
        self.name = name
        logging = kwargs.get('logging', False)
        self.logging = logging
        self.vars = {}
    def _build(self):
        raise NotImplementedError
    def build(self):
        """ Wrapper for _build() """
        with tf.variable_scope(self.name):
            self._build()
        variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
        self.vars = {var.name: var for var in variables}
    def fit(self):
        pass
    def predict(self):
        pass
 class GCN(Model):
    def __init__(self, placeholders, num_features, features_nonzero, **kwargs):
        super(GCN, self).__init__(**kwargs)
        """
        inputs:输入
        input_dim:feature的数量，即input的维度？
        feature_nonzero：非0的特征
        adj:邻接矩阵
        dropout：dropout
        """
        self.inputs = placeholders['features']
        self.input_dim = num_features
        self.features_nonzero = features_nonzero
        self.adj = placeholders['adj']
        self.dropout = placeholders['dropout']
    def construct(self, inputs = None, hidden = None, reuse = False):
        if inputs == None :
            inputs = self.inputs
        with tf.variable_scope('Encoder', reuse=reuse):
            self.hidden1 = GraphConvolutionSparse(input_dim=self.input_dim,
                                                  output_dim=FLAGS.hidden1,
                                                  adj=self.adj,
                                                  features_nonzero = self.features_nonzero,
                                                  act=tf.nn.relu,
                                                  dropout=self.dropout,
                                                  logging=self.logging,
                                                  name='e_dense_1')(inputs)
            self.noise = gaussian_noise_layer(self.hidden1, 0.1)
            if hidden == None:
                hidden = self.hidden1
            self.embeddings = GraphConvolution(input_dim=FLAGS.hidden1,
                                           output_dim=FLAGS.hidden2,
                                           adj=self.adj,
                                           act=lambda x: x,
                                           dropout=self.dropout,
                                           logging=self.logging,
                                           name='e_dense_2')(hidden)
            self.z_mean = self.embeddings
            self.reconstructions = InnerProductDecoder(input_dim=FLAGS.hidden2,
                                          act=lambda x: x,
                                          logging=self.logging)(self.embeddings)
            return self.z_mean, self.reconstructions
 class Generator_z2g(Model):
    def __init__(self, placeholders, num_features, features_nonzero, **kwargs):
        super(Generator_z2g, self).__init__(**kwargs)
        """
        inputs:输入
        input_dim:feature的数量，即input的维度？
        feature_nonzero：非0的特征
        adj:邻接矩阵
        dropout：dropout
        """
        self.inputs = placeholders['real_distribution']
        self.input_dim = num_features
        self.features_nonzero = features_nonzero
        self.adj = placeholders['adj']
        self.dropout = placeholders['dropout']
    def construct(self, inputs = None, reuse = False):
        if inputs == None:
            inputs = self.inputs
        with tf.variable_scope('Decoder', reuse=reuse):
            self.hidden1 = GraphConvolution(input_dim=FLAGS.hidden2,
                                                  output_dim=FLAGS.hidden1,
                                                  adj=self.adj,
                                                  act=tf.nn.relu,
                                                  dropout=self.dropout,
                                                  logging=self.logging,
                                                  name='GG_dense_1')(inputs)
            self.embeddings = GraphConvolution(input_dim=FLAGS.hidden1,
                                           output_dim=self.input_dim,
                                           adj=self.adj,
                                           act=lambda x: x,
                                           dropout=self.dropout,
                                           logging=self.logging,
                                           name='GG_dense_2')(self.hidden1)
        self.z_mean = self.embeddings
        return self.z_mean,self.hidden1
 def dense(x, n1, n2, name):
    """
    Used to create a dense layer.
    :param x: input tensor to the dense layer
    :param n1: no. of input neurons
    :param n2: no. of output neurons
    :param name: name of the entire dense layer.i.e, variable scope name.
    :return: tensor with shape [batch_size, n2]
    """
    with tf.variable_scope(name, reuse=None):
        # np.random.seed(1)
        tf.set_random_seed(1)
        weights = tf.get_variable("weights", shape=[n1, n2],
                                  initializer=tf.random_normal_initializer(mean=0., stddev=0.01))
        bias = tf.get_variable("bias", shape=[n2], initializer=tf.constant_initializer(0.0))
        out = tf.add(tf.matmul(x, weights), bias, name='matmul')
        return out
 class D_graph(Model):
    def __init__(self, num_features, **kwargs):
        super(D_graph, self).__init__(**kwargs)
        self.act = tf.nn.relu
        self.num_features = num_features
    def construct(self, inputs, reuse = False):
        # input是一张Graph的adj，把每一列当成一个通道，所以input的通道数是num_nodes
        with tf.variable_scope('D_Graph'):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            # np.random.seed(1)
            #tf.set_random_seed(1)
            dc_den1 = tf.nn.relu(dense(inputs, self.num_features, 512, name='GD_den1'))#(bs,num_nodes,512)
            dc_den2 = tf.nn.relu(dense(dc_den1, 512, 128, name='GD_den2'))#(bs, num_nodes, 128)
            output = dense(dc_den2, 128, 1, name='GD_output')#(bs,num_nodes,1)
            return output
 def gaussian_noise_layer(input_layer, std):
    noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32)
    return input_layer + noise    
 class Discriminator(Model):
    def __init__(self, **kwargs):
        super(Discriminator, self).__init__(**kwargs)
        self.act = tf.nn.relu
    def construct(self, inputs, reuse = False):
        # with tf.name_scope('Discriminator'):
        with tf.variable_scope('Discriminator'):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            # np.random.seed(1)
            tf.set_random_seed(1)
            dc_den1 = tf.nn.relu(dense(inputs, FLAGS.hidden2, FLAGS.hidden3, name='dc_den1'))
            dc_den2 = tf.nn.relu(dense(dc_den1, FLAGS.hidden3, FLAGS.hidden1, name='dc_den2'))
            output = dense(dc_den2, FLAGS.hidden1, 1, name='dc_output')
            return output
--- a/optimizer.py
+++ b/optimizer.py
@ -0,0 +1,138 @@
 import tensorflow as tf
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 class OptimizerAE(object):
    def __init__(self, preds, labels, pos_weight, norm, d_real, d_fake):
        preds_sub = preds
        labels_sub = labels
        self.real = d_real
        # Discrimminator Loss
        #self.dc_loss_real = tf.reduce_mean(
        #    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(self.real), logits=self.real,name='dclreal'))
        self.dc_loss_real = - tf.reduce_mean(self.real)
        #self.dc_loss_fake = tf.reduce_mean(
        #    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(d_fake), logits=d_fake,name='dcfake'))
        self.dc_loss_fake = tf.reduce_mean(d_fake)
        GP_loss = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient), axis = [0, 1])) - 1))
        self.dc_loss = self.dc_loss_fake + self.dc_loss_real + GP_loss
        #self.dc_loss = self.dc_loss_fake + self.dc_loss_real
        # Generator loss
        #generator_loss = tf.reduce_mean(
        #    tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake, name='gl'))
        generator_loss = -self.dc_loss_fake
        # pos_weight，允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度
        self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
        self.generator_loss = generator_loss + self.cost
        all_variables = tf.trainable_variables()
        dc_var = [var for var in all_variables if 'dc_' in var.name]
        en_var = [var for var in all_variables if 'e_' in var.name]
        with tf.variable_scope(tf.get_variable_scope()):
            self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
                                                             beta1=0.9, name='adam1').minimize(self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var)
            self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
                                                         beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
        # 值得注意的是，这个地方，除了对抗优化之外，
        # 还单纯用cost损失又优化了一遍，
        # 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作
        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)  # Adam Optimizer
        self.opt_op = self.optimizer.minimize(self.cost)
        self.grads_vars = self.optimizer.compute_gradients(self.cost)
 class OptimizerCycle(object):
    def __init__(self, preds, labels, pos_weight, norm, d_real, d_fake, GD_real, GD_fake, preds_z2g, labels_z2g, preds_cycle,labels_cycle,gradient, gradient_z):
        preds_sub = preds
        labels_sub = labels
        self.real = d_real
        # Discrimminator Loss
        self.dc_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(self.real), logits=self.real,name='dclreal'))
        #self.dc_loss_real = - tf.reduce_mean(self.real)
        self.dc_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(d_fake), logits=d_fake,name='dcfake'))
        #self.dc_loss_fake = tf.reduce_mean(d_fake)
        #GP_loss = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient), axis = [0, 1])) - 1))
        #GP_loss_z = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient_z), axis = [0, 1])) - 1))
        #self.dc_loss = self.dc_loss_fake + self.dc_loss_real + 10.0 * GP_loss
        self.GD_loss_real = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(GD_real), logits=GD_real,name='GD_real'))
        #self.GD_loss_real = - tf.reduce_mean(GD_real)
        self.GD_loss_fake = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(GD_fake), logits=GD_fake,name='GD_fake'))
        #self.GD_loss_fake = tf.reduce_mean(GD_fake)
        self.dc_loss = self.dc_loss_fake + self.dc_loss_real
        self.GD_loss = self.GD_loss_fake + self.GD_loss_real
        # Generator loss
        generator_loss = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake, name='gl'))
        #generator_loss = -self.dc_loss_fake
        generator_loss_z2g = tf.reduce_mean(
            tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(GD_fake), logits=GD_fake, name='G_z2g'))
        #generator_loss_z2g = -self.GD_loss_fake
        # pos_weight，允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度
        self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
        cost_cycle = norm * tf.reduce_mean(tf.square(preds_cycle - labels_cycle))
        cost_z2g = norm * tf.reduce_mean(tf.square(preds_z2g-labels_z2g))
        #with tf.device("/gpu:1"):
        #self.cost = 0.00001*self.cost + cost_cycle #for citseer cluster
        self.cost = self.cost + cost_cycle
        self.generator_loss = generator_loss + self.cost
        self.generator_loss_z2g = generator_loss_z2g
        all_variables = tf.trainable_variables()
        dc_var = [var for var in all_variables if 'dc_' in var.name]
        en_var = [var for var in all_variables if 'e_' in var.name]
        GG_var = [var for var in all_variables if 'GG' in var.name]
        GD_var = [var for var in all_variables if 'GD' in var.name]
        with tf.variable_scope(tf.get_variable_scope()):
            self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
                                                             beta1=0.9, name='adam1').minimize(self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var)
            self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
                                                         beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
            self.discriminator_optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
                                                             beta1=0.9, name='adam1').minimize(self.GD_loss, var_list=GD_var)
            self.generator_optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
                                                         beta1=0.9, name='adam2').minimize(self.generator_loss_z2g, var_list=GG_var)
        # 值得注意的是，这个地方，除了对抗优化之外，
        # 还单纯用cost损失又优化了一遍，
        # 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作
        self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)  # Adam Optimizer
        self.opt_op = self.optimizer.minimize(self.cost)
            #self.grads_vars = self.optimizer.compute_gradients(self.cost)
        #self.optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate)  # Adam Optimizer
        #self.opt_op_z2g = self.optimizer.minimize(cost_z2g)
        #self.grads_vars_z2g = self.optimizer.compute_gradients(cost_z2g)
--- a/preprocessing.py
+++ b/preprocessing.py
@ -0,0 +1,138 @@
 import numpy as np
 import scipy.sparse as sp
 def sparse_to_tuple(sparse_mx):
    #判断是否是coo_matrix，不是的话就转成coo_matrix
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    values = sparse_mx.data
    shape = sparse_mx.shape
    return coords, values, shape
 def preprocess_graph(adj):
        # A.sum(axis=1)：计算矩阵的每一行元素之和，得到节点的度矩阵D
        # np.power(x, n)：数组元素求n次方，得到D^(-1/2)
        # sp.diags()函数根据给定的对象创建对角矩阵，对角线上的元素为给定对象中的元素
    adj = sp.coo_matrix(adj)
    adj_ = adj + sp.eye(adj.shape[0])#A* = A+I，即对邻接矩阵加入自连接
    rowsum = np.array(adj_.sum(1))#对行求和，即得到节点的度
    degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())#得到D的-1/2次方矩阵d
    adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()#这一步的实质是做归一化，即A* × d转置 × d
    return sparse_to_tuple(adj_normalized)
 def construct_feed_dict(adj_normalized, adj, features, placeholders):
    # construct feed dictionary
    # .update()用法就是将()内的字段增加到dict当中
    feed_dict = dict()#创建一个空字典
    feed_dict.update({placeholders['features']: features})
    feed_dict.update({placeholders['adj']: adj_normalized})
    feed_dict.update({placeholders['adj_orig']: adj})
    return feed_dict
 def mask_test_edges(adj):
    # Function to build test set with 10% positive links
    # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
    # TODO: Clean up.
    # sp.matrix(data,offsets)是将data的元素每列的元素，按offset里的顺序在列上进行重新排列，offset里的值是偏移量
    # 具体可以参考https://blog.csdn.net/ChenglinBen/article/details/84424379
    # .diagonal()就是提取对角线元素
    # Remove diagonal elements删除对角线元素
    adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
    #把零元素都消除掉
    adj.eliminate_zeros()
    # Check that diag is zero:
    # np.diag(matrix)即提取matrix的对角线元素，todense() like toarray(),区别是一个是将存储方式由稀疏矩阵转成正常矩阵，另一个是转成array
    # assert检查是否对角线元素是否都被清空了
    assert np.diag(adj.todense()).sum() == 0
    # sp.triu(matrix)获取matrix的上三角矩阵，相应的，tril()是获取下三角矩阵
    adj_triu = sp.triu(adj)
    adj_tuple = sparse_to_tuple(adj_triu)
    # edges相当于组合，因为是上三角矩阵的edge，所以减少了一半的重复量，(4.6)与(6,4)不会同时存在，而只会保留(4,6)
    # edges_all相当于排列，就都包含了
    edges = adj_tuple[0]
    edges_all = sparse_to_tuple(adj)[0]
    # 取edge的10%作为test
    # 取edge的20%作为val
    num_test = int(np.floor(edges.shape[0] / 10.))
    num_val = int(np.floor(edges.shape[0] / 20.))
    # 随机选取一部分作为test与val 
    all_edge_idx = list(range(edges.shape[0]))
    np.random.shuffle(all_edge_idx)
    val_edge_idx = all_edge_idx[:num_val]
    test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
    test_edges = edges[test_edge_idx]
    val_edges = edges[val_edge_idx]
    train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)
    # 该函数请参考github中gae的写法，应该是更新了，这种方法应该是错的，或者说与python3不兼容
    # 其中，return部分或许应该改成np.any(rows_close)
    def ismember(a, b, tol=5):
        # 该函数的作用就是判断a元素是否存在于b集合中
        rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
        return np.any(rows_close)
        #return (np.all(np.any(rows_close, axis=-1), axis=-1) and
                #np.all(np.any(rows_close, axis=0), axis=0))
    # test_edges_false是去生成一些本来就不存在的edges
    test_edges_false = []
    while len(test_edges_false) < len(test_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], edges_all):
            continue
        if test_edges_false:
            if ismember([idx_j, idx_i], np.array(test_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(test_edges_false)):
                continue
        test_edges_false.append([idx_i, idx_j])
    # val_edges_false生成一些不存在于train与val的edges
    val_edges_false = []
    while len(val_edges_false) < len(val_edges):
        idx_i = np.random.randint(0, adj.shape[0])
        idx_j = np.random.randint(0, adj.shape[0])
        if idx_i == idx_j:
            continue
        if ismember([idx_i, idx_j], train_edges):
            continue
        if ismember([idx_j, idx_i], train_edges):
            continue
        if ismember([idx_i, idx_j], val_edges):
            continue
        if ismember([idx_j, idx_i], val_edges):
            continue
        if val_edges_false:
            if ismember([idx_j, idx_i], np.array(val_edges_false)):
                continue
            if ismember([idx_i, idx_j], np.array(val_edges_false)):
                continue
        val_edges_false.append([idx_i, idx_j])
    assert ~ismember(test_edges_false, edges_all)
    assert ~ismember(val_edges_false, edges_all)
    assert ~ismember(val_edges, train_edges)
    assert ~ismember(test_edges, train_edges)
    assert ~ismember(val_edges, test_edges)
    data = np.ones(train_edges.shape[0])
    # Re-build adj matrix
    # 如英文注释所说，这里将处理好的train_edges再重建出adj_train
    adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
    adj_train = adj_train + adj_train.T
    # NOTE: these edge lists only contain single direction of edge!
    return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
--- a/run.py
+++ b/run.py
@ -0,0 +1,15 @@
 import settings
 from link_prediction import Link_pred_Runner
 dataname = 'cora'       # 'cora' or 'citeseer' or 'pubmed'
 model = 'DBGAN'          # 'arga_ae' or 'DBGAN'
 task = 'link_prediction'    
 settings = settings.get_settings(dataname, model, task)
 if task == 'link_prediction':
    runner = Link_pred_Runner(settings)
 runner.erun()
--- a/settings.py
+++ b/settings.py
@ -0,0 +1,49 @@
 import tensorflow as tf
 import numpy as np
 flags = tf.app.flags
 FLAGS = flags.FLAGS
 flags.DEFINE_integer('hidden3', 64, 'Number of units in hidden layer 3.')
 flags.DEFINE_integer('discriminator_out', 0, 'discriminator_out.')
 flags.DEFINE_float('discriminator_learning_rate', 0.001, 'Initial learning rate.')
 flags.DEFINE_float('learning_rate', .6*0.001, 'Initial learning rate.')
 flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.')#64 for Citeseer and Pubmed
 flags.DEFINE_integer('hidden2', 32, 'Number of units in hidden layer 2.')#64 for Citeseer and Pubmed
 flags.DEFINE_float('weight_decay', 0., 'Weight for L2 loss on embedding matrix.')
 flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).')
 flags.DEFINE_integer('features', 1, 'Whether to use features (1) or not (0).')
 flags.DEFINE_integer('seed', 50, 'seed for fixing the results.')
 flags.DEFINE_integer('iterations', 60, 'number of iterations.')
 '''
 infor: number of clusters 
 '''
 infor = {'cora': 7, 'citeseer': 6, 'pubmed':3}
 '''
 We did not set any seed when we conducted the experiments described in the paper;
 We set a seed here to steadily reveal better performance of ARGA
 '''
 seed = 7
 np.random.seed(seed)
 tf.set_random_seed(seed)
 def get_settings(dataname, model, task):
    if dataname != 'citeseer' and dataname != 'cora' and dataname != 'pubmed':
        print('error: wrong data set name')
    if task != 'clustering' and task != 'link_prediction':
        print('error: wrong task name')
    if task == 'clustering':
        iterations = FLAGS.iterations
        clustering_num = infor[dataname]
        re = {'data_name': dataname, 'iterations' : iterations, 'clustering_num' :clustering_num, 'model' : model}
    elif task == 'link_prediction':
        iterations = 4 * FLAGS.iterations
        print('epoch is', iterations)
        re = {'data_name': dataname, 'iterations' : iterations,'model' : model}
    return re
		`@ -0,0 +1,2 @@`
							`from __future__ import print_function`
							`from __future__ import division`