format code

main
lab-pc 2 years ago
parent c91e3cf0f4
commit 55bb0f3d59
  1. 3
      constructor.py
  2. 1
      initializations.py
  3. 16
      input_data.py
  4. 19
      layers.py
  5. 6
      link_prediction.py
  6. 11
      metrics.py
  7. 84
      model.py
  8. 83
      optimizer.py
  9. 26
      preprocessing.py
  10. 7
      run.py
  11. 1
      settings.py

@ -99,7 +99,7 @@ def format_data(data_name):
print('norm is:', norm)
for item in items:
#item_name = [ k for k,v in locals().iteritems() if v == item][0]
# item_name = [ k for k,v in locals().iteritems() if v == item][0]
feas[retrieve_name(item)] = item
return feas
@ -188,4 +188,3 @@ def retrieve_name(var):
callers_local_vars = inspect.currentframe().f_back.f_locals.items()
print([var_name for var_name, var_val in callers_local_vars if var_val is var])
return [var_name for var_name, var_val in callers_local_vars if var_val is var][0]

@ -1,6 +1,7 @@
import tensorflow as tf
import numpy as np
def weight_variable_glorot(input_dim, output_dim, name=""):
"""Create a weight variable with Glorot & Bengio (AISTATS 2010)
initialization.

@ -11,12 +11,14 @@ def parse_index_file(filename):
index.append(int(line.strip()))
return index
def sample_mask(idx, l):
"""Create mask."""
mask = np.zeros(l)
mask[idx] = 1
return np.array(mask, dtype=np.bool)
def load_data(dataset):
# load the data: x, tx, allx, graph
# x => 训练实例的特征向量,如scipy.sparse.csr.csr_matrix类的实例
@ -42,9 +44,9 @@ def load_data(dataset):
if dataset == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position、
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx_extended[test_idx_range - min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range - min(test_idx_range), :] = ty
@ -72,7 +74,7 @@ def load_data(dataset):
y_val[val_mask, :] = labels[val_mask, :]
y_test[test_mask, :] = labels[test_mask, :]
return adj, features, y_test, tx, ty, test_mask, np.argmax(labels,1)
return adj, features, y_test, tx, ty, test_mask, np.argmax(labels, 1)
def load_alldata(dataset_str):
@ -89,12 +91,12 @@ def load_alldata(dataset_str):
if dataset_str == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder) + 1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx_extended[test_idx_range - min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range-min(test_idx_range), :] = ty
ty_extended[test_idx_range - min(test_idx_range), :] = ty
ty = ty_extended
features = sp.vstack((allx, tx)).tolil()
@ -106,7 +108,7 @@ def load_alldata(dataset_str):
idx_test = test_idx_range.tolist()
idx_train = range(len(y))
idx_val = range(len(y), len(y)+500)
idx_val = range(len(y), len(y) + 500)
train_mask = sample_mask(idx_train, labels.shape[0])
val_mask = sample_mask(idx_val, labels.shape[0])

@ -32,7 +32,7 @@ def dropout_sparse(x, keep_prob, num_nonzero_elems):
random_tensor += tf.random_uniform(noise_shape)
dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
pre_out = tf.sparse_retain(x, dropout_mask)
return pre_out * (1./keep_prob)
return pre_out * (1. / keep_prob)
class Layer(object):
@ -46,6 +46,7 @@ class Layer(object):
(i.e. takes input, returns output)
__call__(inputs): Wrapper for _call()
"""
def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging'}
for kwarg in kwargs.keys():
@ -71,6 +72,7 @@ class Layer(object):
class GraphConvolution(Layer):
"""Basic graph convolution layer for undirected graph without edge labels."""
def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
super(GraphConvolution, self).__init__(**kwargs)
with tf.variable_scope(self.name + '_vars'):
@ -81,7 +83,7 @@ class GraphConvolution(Layer):
def _call(self, inputs):
x = inputs
x = tf.nn.dropout(x, 1-self.dropout)
x = tf.nn.dropout(x, 1 - self.dropout)
x = tf.matmul(x, self.vars['weights'])
x = tf.sparse_tensor_dense_matmul(self.adj, x)
outputs = self.act(x)
@ -93,6 +95,7 @@ class GraphConvolutionSparse(Layer):
Graph convolution layer for sparse inputs.
多了一个features_nonzero
"""
def __init__(self, input_dim, output_dim, adj, features_nonzero, dropout=0., act=tf.nn.relu, **kwargs):
super(GraphConvolutionSparse, self).__init__(**kwargs)
with tf.variable_scope(self.name + '_vars'):
@ -105,7 +108,7 @@ class GraphConvolutionSparse(Layer):
def _call(self, inputs):
x = inputs
x = dropout_sparse(x, 1-self.dropout, self.features_nonzero)
x = dropout_sparse(x, 1 - self.dropout, self.features_nonzero)
x = tf.sparse_tensor_dense_matmul(x, self.vars['weights'])
x = tf.sparse_tensor_dense_matmul(self.adj, x)
outputs = self.act(x)
@ -114,6 +117,7 @@ class GraphConvolutionSparse(Layer):
class InnerProductDecoder(Layer):
"""Decoder model layer for link prediction."""
def __init__(self, input_dim, dropout=0., act=tf.nn.sigmoid, **kwargs):
super(InnerProductDecoder, self).__init__(**kwargs)
self.dropout = dropout
@ -123,15 +127,17 @@ class InnerProductDecoder(Layer):
"""
这个decoder部分实际上就只是input的转置再乘input
"""
inputs = tf.nn.dropout(inputs, 1-self.dropout)
inputs = tf.nn.dropout(inputs, 1 - self.dropout)
x = tf.transpose(inputs)
x = tf.matmul(inputs, x)
x = tf.reshape(x, [-1])
outputs = self.act(x)
return outputs
class GraphConvolution_z2g(Layer):
"""Basic graph convolution layer for undirected graph without edge labels."""
def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
super(GraphConvolution, self).__init__(**kwargs)
with tf.variable_scope(self.name + '_vars'):
@ -142,16 +148,15 @@ class GraphConvolution_z2g(Layer):
def _call(self, inputs):
x = inputs
x = tf.nn.dropout(x, 1-self.dropout)
x = tf.nn.dropout(x, 1 - self.dropout)
x = tf.matmul(x, self.vars['weights'])
x = tf.sparse_tensor_dense_matmul(self.adj, x)
outputs = self.act(x)
return outputs
def _call(self, inputs):
x = inputs
x = dropout_sparse(x, 1-self.dropout, self.features_nonzero)
x = dropout_sparse(x, 1 - self.dropout, self.features_nonzero)
x = tf.sparse_tensor_dense_matmul(x, self.vars['weights'])
x = tf.sparse_tensor_dense_matmul(self.adj, x)
outputs = self.act(x)

@ -66,7 +66,8 @@ class Link_pred_Runner():
kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)
# construct model
d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero'])
d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'],
feas['features_nonzero'])
# Optimizer
opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real)
@ -91,7 +92,8 @@ class Link_pred_Runner():
roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
val_roc_score.append(roc_curr)
print("Epoch:", '%04d' % (epoch + 1),
"train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=",
"train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3],
avg_cost[4]), "val_roc=",
"{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr))
if (epoch + 1) % 10 == 0:

@ -5,6 +5,7 @@ from sklearn import metrics
from munkres import Munkres, print_matrix
import numpy as np
class linkpred_metrics():
def __init__(self, edges_pos, edges_neg):
self.edges_pos = edges_pos
@ -45,7 +46,6 @@ class clustering_metrics():
self.true_label = true_label
self.pred_label = predict_label
def clusteringAcc(self):
# best mapping between true_label and predict label
l1 = list(set(self.true_label))
@ -94,15 +94,16 @@ class clustering_metrics():
nmi = metrics.normalized_mutual_info_score(self.true_label, self.pred_label)
adjscore = metrics.adjusted_rand_score(self.true_label, self.pred_label)
acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro = self.clusteringAcc()
print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (
acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
fh = open('recoder.txt', 'a')
fh.write('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore) )
fh.write('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (
acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
fh.write('\r\n')
fh.flush()
fh.close()
return acc, nmi, adjscore

@ -4,6 +4,7 @@ import tensorflow as tf
flags = tf.app.flags
FLAGS = flags.FLAGS
class Model(object):
def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging'}
@ -56,43 +57,39 @@ class GCN(Model):
self.adj = placeholders['adj']
self.dropout = placeholders['dropout']
def construct(self, inputs = None, hidden = None, reuse = False):
if inputs == None :
def construct(self, inputs=None, hidden=None, reuse=False):
if inputs == None:
inputs = self.inputs
with tf.variable_scope('Encoder', reuse=reuse):
self.hidden1 = GraphConvolutionSparse(input_dim=self.input_dim,
output_dim=FLAGS.hidden1,
adj=self.adj,
features_nonzero = self.features_nonzero,
features_nonzero=self.features_nonzero,
act=tf.nn.relu,
dropout=self.dropout,
logging=self.logging,
name='e_dense_1')(inputs)
self.noise = gaussian_noise_layer(self.hidden1, 0.1)
if hidden == None:
hidden = self.hidden1
self.embeddings = GraphConvolution(input_dim=FLAGS.hidden1,
output_dim=FLAGS.hidden2,
adj=self.adj,
act=lambda x: x,
dropout=self.dropout,
logging=self.logging,
name='e_dense_2')(hidden)
output_dim=FLAGS.hidden2,
adj=self.adj,
act=lambda x: x,
dropout=self.dropout,
logging=self.logging,
name='e_dense_2')(hidden)
self.z_mean = self.embeddings
self.reconstructions = InnerProductDecoder(input_dim=FLAGS.hidden2,
act=lambda x: x,
logging=self.logging)(self.embeddings)
act=lambda x: x,
logging=self.logging)(self.embeddings)
return self.z_mean, self.reconstructions
class Generator_z2g(Model):
def __init__(self, placeholders, num_features, features_nonzero, **kwargs):
super(Generator_z2g, self).__init__(**kwargs)
@ -110,33 +107,28 @@ class Generator_z2g(Model):
self.adj = placeholders['adj']
self.dropout = placeholders['dropout']
def construct(self, inputs = None, reuse = False):
def construct(self, inputs=None, reuse=False):
if inputs == None:
inputs = self.inputs
with tf.variable_scope('Decoder', reuse=reuse):
self.hidden1 = GraphConvolution(input_dim=FLAGS.hidden2,
output_dim=FLAGS.hidden1,
adj=self.adj,
act=tf.nn.relu,
dropout=self.dropout,
logging=self.logging,
name='GG_dense_1')(inputs)
output_dim=FLAGS.hidden1,
adj=self.adj,
act=tf.nn.relu,
dropout=self.dropout,
logging=self.logging,
name='GG_dense_1')(inputs)
self.embeddings = GraphConvolution(input_dim=FLAGS.hidden1,
output_dim=self.input_dim,
adj=self.adj,
act=lambda x: x,
dropout=self.dropout,
logging=self.logging,
name='GG_dense_2')(self.hidden1)
output_dim=self.input_dim,
adj=self.adj,
act=lambda x: x,
dropout=self.dropout,
logging=self.logging,
name='GG_dense_2')(self.hidden1)
self.z_mean = self.embeddings
return self.z_mean,self.hidden1
return self.z_mean, self.hidden1
def dense(x, n1, n2, name):
@ -165,21 +157,23 @@ class D_graph(Model):
self.act = tf.nn.relu
self.num_features = num_features
def construct(self, inputs, reuse = False):
def construct(self, inputs, reuse=False):
# input是一张Graph的adj,把每一列当成一个通道,所以input的通道数是num_nodes
with tf.variable_scope('D_Graph'):
if reuse:
tf.get_variable_scope().reuse_variables()
# np.random.seed(1)
#tf.set_random_seed(1)
dc_den1 = tf.nn.relu(dense(inputs, self.num_features, 512, name='GD_den1'))#(bs,num_nodes,512)
dc_den2 = tf.nn.relu(dense(dc_den1, 512, 128, name='GD_den2'))#(bs, num_nodes, 128)
output = dense(dc_den2, 128, 1, name='GD_output')#(bs,num_nodes,1)
# tf.set_random_seed(1)
dc_den1 = tf.nn.relu(dense(inputs, self.num_features, 512, name='GD_den1')) # (bs,num_nodes,512)
dc_den2 = tf.nn.relu(dense(dc_den1, 512, 128, name='GD_den2')) # (bs, num_nodes, 128)
output = dense(dc_den2, 128, 1, name='GD_output') # (bs,num_nodes,1)
return output
def gaussian_noise_layer(input_layer, std):
noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32)
return input_layer + noise
return input_layer + noise
class Discriminator(Model):
def __init__(self, **kwargs):
@ -187,7 +181,7 @@ class Discriminator(Model):
self.act = tf.nn.relu
def construct(self, inputs, reuse = False):
def construct(self, inputs, reuse=False):
# with tf.name_scope('Discriminator'):
with tf.variable_scope('Discriminator'):
if reuse:
@ -197,4 +191,4 @@ class Discriminator(Model):
dc_den1 = tf.nn.relu(dense(inputs, FLAGS.hidden2, FLAGS.hidden3, name='dc_den1'))
dc_den2 = tf.nn.relu(dense(dc_den1, FLAGS.hidden3, FLAGS.hidden1, name='dc_den2'))
output = dense(dc_den2, FLAGS.hidden1, 1, name='dc_output')
return output
return output

@ -12,40 +12,37 @@ class OptimizerAE(object):
self.real = d_real
# Discrimminator Loss
#self.dc_loss_real = tf.reduce_mean(
# self.dc_loss_real = tf.reduce_mean(
# tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(self.real), logits=self.real,name='dclreal'))
self.dc_loss_real = - tf.reduce_mean(self.real)
#self.dc_loss_fake = tf.reduce_mean(
# self.dc_loss_fake = tf.reduce_mean(
# tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(d_fake), logits=d_fake,name='dcfake'))
self.dc_loss_fake = tf.reduce_mean(d_fake)
GP_loss = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient), axis = [0, 1])) - 1))
GP_loss = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient), axis=[0, 1])) - 1))
self.dc_loss = self.dc_loss_fake + self.dc_loss_real + GP_loss
#self.dc_loss = self.dc_loss_fake + self.dc_loss_real
# self.dc_loss = self.dc_loss_fake + self.dc_loss_real
# Generator loss
#generator_loss = tf.reduce_mean(
# generator_loss = tf.reduce_mean(
# tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake, name='gl'))
generator_loss = -self.dc_loss_fake
# pos_weight,允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度
self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
self.generator_loss = generator_loss + self.cost
all_variables = tf.trainable_variables()
dc_var = [var for var in all_variables if 'dc_' in var.name]
en_var = [var for var in all_variables if 'e_' in var.name]
with tf.variable_scope(tf.get_variable_scope()):
self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam1').minimize(self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var)
beta1=0.9, name='adam1').minimize(self.dc_loss,
var_list=dc_var) # minimize(dc_loss_real, var_list=dc_var)
self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
# 值得注意的是,这个地方,除了对抗优化之外,
# 还单纯用cost损失又优化了一遍,
@ -56,8 +53,7 @@ class OptimizerAE(object):
class OptimizerCycle(object):
def __init__(self, preds, labels, pos_weight, norm, d_real, d_fake, GD_real, GD_fake, preds_z2g, labels_z2g, preds_cycle,labels_cycle,gradient, gradient_z):
def __init__(self, preds, labels, pos_weight, norm, d_real, d_fake, GD_real, GD_fake, preds_z2g, labels_z2g, preds_cycle, labels_cycle, gradient, gradient_z):
preds_sub = preds
labels_sub = labels
@ -65,74 +61,71 @@ class OptimizerCycle(object):
# Discrimminator Loss
self.dc_loss_real = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(self.real), logits=self.real,name='dclreal'))
#self.dc_loss_real = - tf.reduce_mean(self.real)
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(self.real), logits=self.real, name='dclreal'))
# self.dc_loss_real = - tf.reduce_mean(self.real)
self.dc_loss_fake = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(d_fake), logits=d_fake,name='dcfake'))
#self.dc_loss_fake = tf.reduce_mean(d_fake)
#GP_loss = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient), axis = [0, 1])) - 1))
#GP_loss_z = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient_z), axis = [0, 1])) - 1))
#self.dc_loss = self.dc_loss_fake + self.dc_loss_real + 10.0 * GP_loss
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(d_fake), logits=d_fake, name='dcfake'))
# self.dc_loss_fake = tf.reduce_mean(d_fake)
# GP_loss = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient), axis = [0, 1])) - 1))
# GP_loss_z = tf.reduce_mean(tf.square(tf.sqrt(tf.reduce_mean(tf.square(gradient_z), axis = [0, 1])) - 1))
# self.dc_loss = self.dc_loss_fake + self.dc_loss_real + 10.0 * GP_loss
self.GD_loss_real = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(GD_real), logits=GD_real,name='GD_real'))
#self.GD_loss_real = - tf.reduce_mean(GD_real)
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(GD_real), logits=GD_real, name='GD_real'))
# self.GD_loss_real = - tf.reduce_mean(GD_real)
self.GD_loss_fake = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(GD_fake), logits=GD_fake,name='GD_fake'))
#self.GD_loss_fake = tf.reduce_mean(GD_fake)
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(GD_fake), logits=GD_fake, name='GD_fake'))
# self.GD_loss_fake = tf.reduce_mean(GD_fake)
self.dc_loss = self.dc_loss_fake + self.dc_loss_real
self.GD_loss = self.GD_loss_fake + self.GD_loss_real
# Generator loss
generator_loss = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake, name='gl'))
#generator_loss = -self.dc_loss_fake
# generator_loss = -self.dc_loss_fake
generator_loss_z2g = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(GD_fake), logits=GD_fake, name='G_z2g'))
#generator_loss_z2g = -self.GD_loss_fake
# generator_loss_z2g = -self.GD_loss_fake
# pos_weight,允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度
self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
cost_cycle = norm * tf.reduce_mean(tf.square(preds_cycle - labels_cycle))
cost_z2g = norm * tf.reduce_mean(tf.square(preds_z2g-labels_z2g))
#with tf.device("/gpu:1"):
#self.cost = 0.00001*self.cost + cost_cycle #for citseer cluster
cost_z2g = norm * tf.reduce_mean(tf.square(preds_z2g - labels_z2g))
# with tf.device("/gpu:1"):
# self.cost = 0.00001*self.cost + cost_cycle #for citseer cluster
self.cost = self.cost + cost_cycle
self.generator_loss = generator_loss + self.cost
self.generator_loss_z2g = generator_loss_z2g
all_variables = tf.trainable_variables()
dc_var = [var for var in all_variables if 'dc_' in var.name]
en_var = [var for var in all_variables if 'e_' in var.name]
GG_var = [var for var in all_variables if 'GG' in var.name]
GD_var = [var for var in all_variables if 'GD' in var.name]
with tf.variable_scope(tf.get_variable_scope()):
self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam1').minimize(self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var)
beta1=0.9, name='adam1').minimize(self.dc_loss,
var_list=dc_var) # minimize(dc_loss_real, var_list=dc_var)
self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
self.discriminator_optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam1').minimize(self.GD_loss, var_list=GD_var)
beta1=0.9, name='adam1').minimize(self.GD_loss, var_list=GD_var)
self.generator_optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam2').minimize(self.generator_loss_z2g, var_list=GG_var)
beta1=0.9, name='adam2').minimize(self.generator_loss_z2g, var_list=GG_var)
# 值得注意的是,这个地方,除了对抗优化之外,
# 还单纯用cost损失又优化了一遍,
# 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作
self.optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # Adam Optimizer
self.opt_op = self.optimizer.minimize(self.cost)
#self.grads_vars = self.optimizer.compute_gradients(self.cost)
#self.optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # Adam Optimizer
#self.opt_op_z2g = self.optimizer.minimize(cost_z2g)
#self.grads_vars_z2g = self.optimizer.compute_gradients(cost_z2g)
# self.grads_vars = self.optimizer.compute_gradients(self.cost)
# self.optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) # Adam Optimizer
# self.opt_op_z2g = self.optimizer.minimize(cost_z2g)
# self.grads_vars_z2g = self.optimizer.compute_gradients(cost_z2g)

@ -3,7 +3,7 @@ import scipy.sparse as sp
def sparse_to_tuple(sparse_mx):
#判断是否是coo_matrix,不是的话就转成coo_matrix
# 判断是否是coo_matrix,不是的话就转成coo_matrix
if not sp.isspmatrix_coo(sparse_mx):
sparse_mx = sparse_mx.tocoo()
coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
@ -13,22 +13,22 @@ def sparse_to_tuple(sparse_mx):
def preprocess_graph(adj):
# A.sum(axis=1):计算矩阵的每一行元素之和,得到节点的度矩阵D
# np.power(x, n):数组元素求n次方,得到D^(-1/2)
# sp.diags()函数根据给定的对象创建对角矩阵,对角线上的元素为给定对象中的元素
# A.sum(axis=1):计算矩阵的每一行元素之和,得到节点的度矩阵D
# np.power(x, n):数组元素求n次方,得到D^(-1/2)
# sp.diags()函数根据给定的对象创建对角矩阵,对角线上的元素为给定对象中的元素
adj = sp.coo_matrix(adj)
adj_ = adj + sp.eye(adj.shape[0])#A* = A+I,即对邻接矩阵加入自连接
adj_ = adj + sp.eye(adj.shape[0]) # A* = A+I,即对邻接矩阵加入自连接
rowsum = np.array(adj_.sum(1))#对行求和,即得到节点的度
degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())#得到D的-1/2次方矩阵d
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()#这一步的实质是做归一化,即A* × d转置 × d
rowsum = np.array(adj_.sum(1)) # 对行求和,即得到节点的度
degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten()) # 得到D的-1/2次方矩阵d
adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo() # 这一步的实质是做归一化,即A* × d转置 × d
return sparse_to_tuple(adj_normalized)
def construct_feed_dict(adj_normalized, adj, features, placeholders):
# construct feed dictionary
# .update()用法就是将()内的字段增加到dict当中
feed_dict = dict()#创建一个空字典
feed_dict = dict() # 创建一个空字典
feed_dict.update({placeholders['features']: features})
feed_dict.update({placeholders['adj']: adj_normalized})
feed_dict.update({placeholders['adj_orig']: adj})
@ -44,7 +44,7 @@ def mask_test_edges(adj):
# .diagonal()就是提取对角线元素
# Remove diagonal elements删除对角线元素
adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
#把零元素都消除掉
# 把零元素都消除掉
adj.eliminate_zeros()
# Check that diag is zero:
# np.diag(matrix)即提取matrix的对角线元素,todense() like toarray(),区别是一个是将存储方式由稀疏矩阵转成正常矩阵,另一个是转成array
@ -78,8 +78,8 @@ def mask_test_edges(adj):
# 该函数的作用就是判断a元素是否存在于b集合中
rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
return np.any(rows_close)
#return (np.all(np.any(rows_close, axis=-1), axis=-1) and
#np.all(np.any(rows_close, axis=0), axis=0))
# return (np.all(np.any(rows_close, axis=-1), axis=-1) and
# np.all(np.any(rows_close, axis=0), axis=0))
# test_edges_false是去生成一些本来就不存在的edges
test_edges_false = []
@ -134,5 +134,3 @@ def mask_test_edges(adj):
# NOTE: these edge lists only contain single direction of edge!
return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false

@ -2,9 +2,9 @@ import settings
from link_prediction import Link_pred_Runner
dataname = 'cora' # 'cora' or 'citeseer' or 'pubmed'
model = 'DBGAN' # 'arga_ae' or 'DBGAN'
task = 'link_prediction'
dataname = 'cora' # 'cora' or 'citeseer' or 'pubmed'
model = 'DBGAN' # 'arga_ae' or 'DBGAN'
task = 'link_prediction'
settings = settings.get_settings(dataname, model, task)
@ -12,4 +12,3 @@ if task == 'link_prediction':
runner = Link_pred_Runner(settings)
runner.erun()

@ -29,6 +29,7 @@ seed = 7
np.random.seed(seed)
tf.set_random_seed(seed)
def get_settings(dataname, model, task):
if dataname != 'citeseer' and dataname != 'cora' and dataname != 'pubmed':
print('error: wrong data set name')

Loading…
Cancel
Save