format code

main
lab-pc 2 years ago
parent c91e3cf0f4
commit 55bb0f3d59
  1. 1
      constructor.py
  2. 1
      initializations.py
  3. 2
      input_data.py
  4. 7
      layers.py
  5. 6
      link_prediction.py
  6. 9
      metrics.py
  7. 12
      model.py
  8. 15
      optimizer.py
  9. 2
      preprocessing.py
  10. 1
      run.py
  11. 1
      settings.py

@ -188,4 +188,3 @@ def retrieve_name(var):
callers_local_vars = inspect.currentframe().f_back.f_locals.items() callers_local_vars = inspect.currentframe().f_back.f_locals.items()
print([var_name for var_name, var_val in callers_local_vars if var_val is var]) print([var_name for var_name, var_val in callers_local_vars if var_val is var])
return [var_name for var_name, var_val in callers_local_vars if var_val is var][0] return [var_name for var_name, var_val in callers_local_vars if var_val is var][0]

@ -1,6 +1,7 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
def weight_variable_glorot(input_dim, output_dim, name=""): def weight_variable_glorot(input_dim, output_dim, name=""):
"""Create a weight variable with Glorot & Bengio (AISTATS 2010) """Create a weight variable with Glorot & Bengio (AISTATS 2010)
initialization. initialization.

@ -11,12 +11,14 @@ def parse_index_file(filename):
index.append(int(line.strip())) index.append(int(line.strip()))
return index return index
def sample_mask(idx, l): def sample_mask(idx, l):
"""Create mask.""" """Create mask."""
mask = np.zeros(l) mask = np.zeros(l)
mask[idx] = 1 mask[idx] = 1
return np.array(mask, dtype=np.bool) return np.array(mask, dtype=np.bool)
def load_data(dataset): def load_data(dataset):
# load the data: x, tx, allx, graph # load the data: x, tx, allx, graph
# x => 训练实例的特征向量,如scipy.sparse.csr.csr_matrix类的实例 # x => 训练实例的特征向量,如scipy.sparse.csr.csr_matrix类的实例

@ -46,6 +46,7 @@ class Layer(object):
(i.e. takes input, returns output) (i.e. takes input, returns output)
__call__(inputs): Wrapper for _call() __call__(inputs): Wrapper for _call()
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging'} allowed_kwargs = {'name', 'logging'}
for kwarg in kwargs.keys(): for kwarg in kwargs.keys():
@ -71,6 +72,7 @@ class Layer(object):
class GraphConvolution(Layer): class GraphConvolution(Layer):
"""Basic graph convolution layer for undirected graph without edge labels.""" """Basic graph convolution layer for undirected graph without edge labels."""
def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs): def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
super(GraphConvolution, self).__init__(**kwargs) super(GraphConvolution, self).__init__(**kwargs)
with tf.variable_scope(self.name + '_vars'): with tf.variable_scope(self.name + '_vars'):
@ -93,6 +95,7 @@ class GraphConvolutionSparse(Layer):
Graph convolution layer for sparse inputs. Graph convolution layer for sparse inputs.
多了一个features_nonzero 多了一个features_nonzero
""" """
def __init__(self, input_dim, output_dim, adj, features_nonzero, dropout=0., act=tf.nn.relu, **kwargs): def __init__(self, input_dim, output_dim, adj, features_nonzero, dropout=0., act=tf.nn.relu, **kwargs):
super(GraphConvolutionSparse, self).__init__(**kwargs) super(GraphConvolutionSparse, self).__init__(**kwargs)
with tf.variable_scope(self.name + '_vars'): with tf.variable_scope(self.name + '_vars'):
@ -114,6 +117,7 @@ class GraphConvolutionSparse(Layer):
class InnerProductDecoder(Layer): class InnerProductDecoder(Layer):
"""Decoder model layer for link prediction.""" """Decoder model layer for link prediction."""
def __init__(self, input_dim, dropout=0., act=tf.nn.sigmoid, **kwargs): def __init__(self, input_dim, dropout=0., act=tf.nn.sigmoid, **kwargs):
super(InnerProductDecoder, self).__init__(**kwargs) super(InnerProductDecoder, self).__init__(**kwargs)
self.dropout = dropout self.dropout = dropout
@ -130,8 +134,10 @@ class InnerProductDecoder(Layer):
outputs = self.act(x) outputs = self.act(x)
return outputs return outputs
class GraphConvolution_z2g(Layer): class GraphConvolution_z2g(Layer):
"""Basic graph convolution layer for undirected graph without edge labels.""" """Basic graph convolution layer for undirected graph without edge labels."""
def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs): def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
super(GraphConvolution, self).__init__(**kwargs) super(GraphConvolution, self).__init__(**kwargs)
with tf.variable_scope(self.name + '_vars'): with tf.variable_scope(self.name + '_vars'):
@ -148,7 +154,6 @@ class GraphConvolution_z2g(Layer):
outputs = self.act(x) outputs = self.act(x)
return outputs return outputs
def _call(self, inputs): def _call(self, inputs):
x = inputs x = inputs
x = dropout_sparse(x, 1 - self.dropout, self.features_nonzero) x = dropout_sparse(x, 1 - self.dropout, self.features_nonzero)

@ -66,7 +66,8 @@ class Link_pred_Runner():
kde = KernelDensity(bandwidth=0.7).fit(featuresCompress) kde = KernelDensity(bandwidth=0.7).fit(featuresCompress)
# construct model # construct model
d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'], feas['features_nonzero']) d_real, discriminator, ae_model, model_z2g, D_Graph, GD_real = get_model(model_str, placeholders, feas['num_features'], feas['num_nodes'],
feas['features_nonzero'])
# Optimizer # Optimizer
opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real) opt = get_optimizer(model_str, ae_model, model_z2g, D_Graph, discriminator, placeholders, feas['pos_weight'], feas['norm'], d_real, feas['num_nodes'], GD_real)
@ -91,7 +92,8 @@ class Link_pred_Runner():
roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas) roc_curr, ap_curr, _ = lm_train.get_roc_score(emb, feas)
val_roc_score.append(roc_curr) val_roc_score.append(roc_curr)
print("Epoch:", '%04d' % (epoch + 1), print("Epoch:", '%04d' % (epoch + 1),
"train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3], avg_cost[4]), "val_roc=", "train_loss= {:.5f}, d_loss= {:.5f}, g_loss= {:.5f}, GD_loss= {:.5f}, GG_loss= {:.5f}".format(avg_cost[0], avg_cost[1], avg_cost[2], avg_cost[3],
avg_cost[4]), "val_roc=",
"{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr)) "{:.5f}".format(val_roc_score[-1]), "val_ap=", "{:.5f}".format(ap_curr))
if (epoch + 1) % 10 == 0: if (epoch + 1) % 10 == 0:

@ -5,6 +5,7 @@ from sklearn import metrics
from munkres import Munkres, print_matrix from munkres import Munkres, print_matrix
import numpy as np import numpy as np
class linkpred_metrics(): class linkpred_metrics():
def __init__(self, edges_pos, edges_neg): def __init__(self, edges_pos, edges_neg):
self.edges_pos = edges_pos self.edges_pos = edges_pos
@ -45,7 +46,6 @@ class clustering_metrics():
self.true_label = true_label self.true_label = true_label
self.pred_label = predict_label self.pred_label = predict_label
def clusteringAcc(self): def clusteringAcc(self):
# best mapping between true_label and predict label # best mapping between true_label and predict label
l1 = list(set(self.true_label)) l1 = list(set(self.true_label))
@ -95,14 +95,15 @@ class clustering_metrics():
adjscore = metrics.adjusted_rand_score(self.true_label, self.pred_label) adjscore = metrics.adjusted_rand_score(self.true_label, self.pred_label)
acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro = self.clusteringAcc() acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro = self.clusteringAcc()
print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore)) print('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (
acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
fh = open('recoder.txt', 'a') fh = open('recoder.txt', 'a')
fh.write('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore) ) fh.write('ACC=%f, f1_macro=%f, precision_macro=%f, recall_macro=%f, f1_micro=%f, precision_micro=%f, recall_micro=%f, NMI=%f, ADJ_RAND_SCORE=%f' % (
acc, f1_macro, precision_macro, recall_macro, f1_micro, precision_micro, recall_micro, nmi, adjscore))
fh.write('\r\n') fh.write('\r\n')
fh.flush() fh.flush()
fh.close() fh.close()
return acc, nmi, adjscore return acc, nmi, adjscore

@ -4,6 +4,7 @@ import tensorflow as tf
flags = tf.app.flags flags = tf.app.flags
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
class Model(object): class Model(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
allowed_kwargs = {'name', 'logging'} allowed_kwargs = {'name', 'logging'}
@ -70,7 +71,6 @@ class GCN(Model):
logging=self.logging, logging=self.logging,
name='e_dense_1')(inputs) name='e_dense_1')(inputs)
self.noise = gaussian_noise_layer(self.hidden1, 0.1) self.noise = gaussian_noise_layer(self.hidden1, 0.1)
if hidden == None: if hidden == None:
hidden = self.hidden1 hidden = self.hidden1
@ -82,7 +82,6 @@ class GCN(Model):
logging=self.logging, logging=self.logging,
name='e_dense_2')(hidden) name='e_dense_2')(hidden)
self.z_mean = self.embeddings self.z_mean = self.embeddings
self.reconstructions = InnerProductDecoder(input_dim=FLAGS.hidden2, self.reconstructions = InnerProductDecoder(input_dim=FLAGS.hidden2,
@ -91,8 +90,6 @@ class GCN(Model):
return self.z_mean, self.reconstructions return self.z_mean, self.reconstructions
class Generator_z2g(Model): class Generator_z2g(Model):
def __init__(self, placeholders, num_features, features_nonzero, **kwargs): def __init__(self, placeholders, num_features, features_nonzero, **kwargs):
super(Generator_z2g, self).__init__(**kwargs) super(Generator_z2g, self).__init__(**kwargs)
@ -110,12 +107,10 @@ class Generator_z2g(Model):
self.adj = placeholders['adj'] self.adj = placeholders['adj']
self.dropout = placeholders['dropout'] self.dropout = placeholders['dropout']
def construct(self, inputs=None, reuse=False): def construct(self, inputs=None, reuse=False):
if inputs == None: if inputs == None:
inputs = self.inputs inputs = self.inputs
with tf.variable_scope('Decoder', reuse=reuse): with tf.variable_scope('Decoder', reuse=reuse):
self.hidden1 = GraphConvolution(input_dim=FLAGS.hidden2, self.hidden1 = GraphConvolution(input_dim=FLAGS.hidden2,
output_dim=FLAGS.hidden1, output_dim=FLAGS.hidden1,
adj=self.adj, adj=self.adj,
@ -124,8 +119,6 @@ class Generator_z2g(Model):
logging=self.logging, logging=self.logging,
name='GG_dense_1')(inputs) name='GG_dense_1')(inputs)
self.embeddings = GraphConvolution(input_dim=FLAGS.hidden1, self.embeddings = GraphConvolution(input_dim=FLAGS.hidden1,
output_dim=self.input_dim, output_dim=self.input_dim,
adj=self.adj, adj=self.adj,
@ -134,7 +127,6 @@ class Generator_z2g(Model):
logging=self.logging, logging=self.logging,
name='GG_dense_2')(self.hidden1) name='GG_dense_2')(self.hidden1)
self.z_mean = self.embeddings self.z_mean = self.embeddings
return self.z_mean, self.hidden1 return self.z_mean, self.hidden1
@ -177,10 +169,12 @@ class D_graph(Model):
output = dense(dc_den2, 128, 1, name='GD_output') # (bs,num_nodes,1) output = dense(dc_den2, 128, 1, name='GD_output') # (bs,num_nodes,1)
return output return output
def gaussian_noise_layer(input_layer, std): def gaussian_noise_layer(input_layer, std):
noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32) noise = tf.random_normal(shape=tf.shape(input_layer), mean=0.0, stddev=std, dtype=tf.float32)
return input_layer + noise return input_layer + noise
class Discriminator(Model): class Discriminator(Model):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(Discriminator, self).__init__(**kwargs) super(Discriminator, self).__init__(**kwargs)

@ -28,25 +28,22 @@ class OptimizerAE(object):
# tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake, name='gl')) # tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(d_fake), logits=d_fake, name='gl'))
generator_loss = -self.dc_loss_fake generator_loss = -self.dc_loss_fake
# pos_weight,允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度 # pos_weight,允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度
self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight)) self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
self.generator_loss = generator_loss + self.cost self.generator_loss = generator_loss + self.cost
all_variables = tf.trainable_variables() all_variables = tf.trainable_variables()
dc_var = [var for var in all_variables if 'dc_' in var.name] dc_var = [var for var in all_variables if 'dc_' in var.name]
en_var = [var for var in all_variables if 'e_' in var.name] en_var = [var for var in all_variables if 'e_' in var.name]
with tf.variable_scope(tf.get_variable_scope()): with tf.variable_scope(tf.get_variable_scope()):
self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate, self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam1').minimize(self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var) beta1=0.9, name='adam1').minimize(self.dc_loss,
var_list=dc_var) # minimize(dc_loss_real, var_list=dc_var)
self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate, self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var) beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
# 值得注意的是,这个地方,除了对抗优化之外, # 值得注意的是,这个地方,除了对抗优化之外,
# 还单纯用cost损失又优化了一遍, # 还单纯用cost损失又优化了一遍,
# 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作 # 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作
@ -57,7 +54,6 @@ class OptimizerAE(object):
class OptimizerCycle(object): class OptimizerCycle(object):
def __init__(self, preds, labels, pos_weight, norm, d_real, d_fake, GD_real, GD_fake, preds_z2g, labels_z2g, preds_cycle, labels_cycle, gradient, gradient_z): def __init__(self, preds, labels, pos_weight, norm, d_real, d_fake, GD_real, GD_fake, preds_z2g, labels_z2g, preds_cycle, labels_cycle, gradient, gradient_z):
preds_sub = preds preds_sub = preds
labels_sub = labels labels_sub = labels
@ -94,7 +90,6 @@ class OptimizerCycle(object):
# pos_weight,允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度 # pos_weight,允许人们通过向上或向下加权相对于负误差的正误差的成本来权衡召回率和精确度
self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight)) self.cost = norm * tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
cost_cycle = norm * tf.reduce_mean(tf.square(preds_cycle - labels_cycle)) cost_cycle = norm * tf.reduce_mean(tf.square(preds_cycle - labels_cycle))
cost_z2g = norm * tf.reduce_mean(tf.square(preds_z2g - labels_z2g)) cost_z2g = norm * tf.reduce_mean(tf.square(preds_z2g - labels_z2g))
@ -104,17 +99,16 @@ class OptimizerCycle(object):
self.generator_loss = generator_loss + self.cost self.generator_loss = generator_loss + self.cost
self.generator_loss_z2g = generator_loss_z2g self.generator_loss_z2g = generator_loss_z2g
all_variables = tf.trainable_variables() all_variables = tf.trainable_variables()
dc_var = [var for var in all_variables if 'dc_' in var.name] dc_var = [var for var in all_variables if 'dc_' in var.name]
en_var = [var for var in all_variables if 'e_' in var.name] en_var = [var for var in all_variables if 'e_' in var.name]
GG_var = [var for var in all_variables if 'GG' in var.name] GG_var = [var for var in all_variables if 'GG' in var.name]
GD_var = [var for var in all_variables if 'GD' in var.name] GD_var = [var for var in all_variables if 'GD' in var.name]
with tf.variable_scope(tf.get_variable_scope()): with tf.variable_scope(tf.get_variable_scope()):
self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate, self.discriminator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam1').minimize(self.dc_loss, var_list=dc_var) #minimize(dc_loss_real, var_list=dc_var) beta1=0.9, name='adam1').minimize(self.dc_loss,
var_list=dc_var) # minimize(dc_loss_real, var_list=dc_var)
self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate, self.generator_optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var) beta1=0.9, name='adam2').minimize(self.generator_loss, var_list=en_var)
@ -125,7 +119,6 @@ class OptimizerCycle(object):
self.generator_optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate, self.generator_optimizer_z2g = tf.train.AdamOptimizer(learning_rate=FLAGS.discriminator_learning_rate,
beta1=0.9, name='adam2').minimize(self.generator_loss_z2g, var_list=GG_var) beta1=0.9, name='adam2').minimize(self.generator_loss_z2g, var_list=GG_var)
# 值得注意的是,这个地方,除了对抗优化之外, # 值得注意的是,这个地方,除了对抗优化之外,
# 还单纯用cost损失又优化了一遍, # 还单纯用cost损失又优化了一遍,
# 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作 # 待会儿看训练的时候注意看是在哪部分进行的这部分优化操作

@ -134,5 +134,3 @@ def mask_test_edges(adj):
# NOTE: these edge lists only contain single direction of edge! # NOTE: these edge lists only contain single direction of edge!
return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false

@ -12,4 +12,3 @@ if task == 'link_prediction':
runner = Link_pred_Runner(settings) runner = Link_pred_Runner(settings)
runner.erun() runner.erun()

@ -29,6 +29,7 @@ seed = 7
np.random.seed(seed) np.random.seed(seed)
tf.set_random_seed(seed) tf.set_random_seed(seed)
def get_settings(dataname, model, task): def get_settings(dataname, model, task):
if dataname != 'citeseer' and dataname != 'cora' and dataname != 'pubmed': if dataname != 'citeseer' and dataname != 'cora' and dataname != 'pubmed':
print('error: wrong data set name') print('error: wrong data set name')

Loading…
Cancel
Save