TensorFlow实例-MNIST

TensorFlow实例-MNIST

Data:

MNIST link: http://yann.lecun.com/exdb/mnist

事实上TensorFlow提供了一个类来处理MNIST

from tensorflow.examples.tutorials.mnist import input_data

Code

直接上代码, self-explain

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
input_node = 784
output_node = 10
layer1_node = 500
batch_size = 100
lr_base = 0.8
lr_decay = 0.99
regular_rate = 1e-4
training_steps = 30000
moving_average_decay = 0.99
def inference(input_tensor, avg_class, weight1, bias1, weight2, bias2):
# 无滑动平均
if avg_class = None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weight1) + bias1)
return tf.matmul(layer1, weight2) + bias2
else:
layer1 = tf.nn.relu(
tf.matmul(input_tensor, avg_class.average(weight1)) +
avg_class.average(bias1))
return tf.matmul(layer1, avg_class.average(weight2)) +
avg_class.average(bias2)
def train(mnist):
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y_ = tf.placeholder(tf.float32, [None, output_node], name='y-input')
weight1 = tf.Variable(tf.trauncated_normal([input_node, layer1_node], stddev = 0.1))
bias1 = tf.Variable(tf.constant(0.1, shape=[layer1_node]))
weight2 = tf.Variable(tf.trauncated_normal([layer1_node, output_node], stddev = 0.1))
bias2 = tf.Variable(tf.constant(0.1, shape=[output_node]))
y = inference(x, None, weight1, bias1, weight2, bias2)
# 定义存储训练轮数的变量,这个变量不需要计算滑动平均值, 所以这里指定这个变量为
# 不可训练的变量
global_step = tf.Variable(0, trainable=False)
variable_average = tf.train.ExponentialMovingAverage(
moving_average_decay, global_step)
# 在所有代表神经网络参数的变量上使用滑动平均。其他辅助变量就不需要了。
# tf.trainable_varialbes返回的就是图上的集合 GraphKeys.TRAINABLE_VARIABLES
# 中的元素
variable_average_op = variable_average.apply(
tf.trainable_variables())
average_y = inference(x, variable_average, weight1, bias1,
weight2, bias2)
#为什么是y,不是average_y?
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=y, labels=tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# L2 Loss
regularizer = tf.contrib.layers.l2_regularizer(regular_rate)
regularization = regularizer(weight1) + regularizer(weight2)
loss = cross_entropy_mean + regularization
# 设定lr的衰减率, 每过一次全部数据,lr衰减一次
lr = tf.train.exponential_decay(
lr_base, global_step, mnist.train.num_examples/ batch_size,
lr_decay)
train_step = tf.train.GraidentDescentOptimizer(lr).minimize(loss, global_step=global_step)
## 在训练神经网络模型时,每过一遍数据既需要通过反向传播来更新神经网络中的
# 参数,又要更新每一个参数的滑动平均值。为了一次完成多个操作, TensorFlow
# 提供了tf.control_dependencies 和 tf.group俩种机制。 下面俩行程序和
# train_op = tf.group(train_step, variables_average_op)
with tf.control_dependencies([train_step, variables_average_op]):
# no_op 就是什么都不做的意思, 如其名
# 我的理解就是把train_step与average绑定起来
train_op = tf.no_op(name='train')
# argmax里面的1是代表维度,不要搞混
correct_prediction = tf.equal(tf.argmax(average_y, 1),
tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
with tf.Session() as sess:
tf.global_variables_initializer().run()
validate_feed = {x : mnist.validation.images,
y_: mnist.validation.labels}
test_feed = {x:mnist.test.images,
y_:mnist.test.labels}
for i in range(training_steps):
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict=validate_feed)
print("after {} training steps, validation accuracy ".format(i)
"using average model is {}".format(validate_acc))
xs, ys = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict={x:xs, y:ys})
test_acc = sess.run(accuracy, feed_dict=test_feed)
print("")
def main():
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)
train(mnist)
if __name__ == '__main__':
tf.app.run()

Analysis

对结果的影响: 网络结构最重要,其他Moving average, regularization也会有或多或少的影响。

Improve

之前的定义inference的时候将所有的变量作为参数传入了函数。如果network层数增加就不方便这么写了。tf提供通过变量名称来创建或者获取一个变量的机制。函数: tf.get_variable和tf.variable_scope

1
2
3
v = tf.get_variable('v', shape=[1], initializer=tf.constant_initializer(1.0))
v = tf.Variable(tf.constant(1.0, shape=[1], name='v'))
# 相同功能

tf中的初始化函数与生成函数对应,只是多一个Initializer后缀

tf.get_variable会根据名字去创建或者获取。通过tf.varialbe_scope来生成上下文,有点类似c++ namespace的意思。

Plus: 变量name会有:0,表示这个变量是生成变量这个运算的第一个结果

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
def inference(input_sensor, reuse=False):
with tf.variable_scope('layer1', reuse=reuse):
weight = tf.get_variable('weight', [input_node, layer1_node],
initializer=
tf.truncated_normal_initializer(stddev=0.1))
bias = tf.get_variable('bias', [layer1_node],
initializer =
tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weight) + bias)
with tf.variable_scope('layer2', reuse = reuse):
weight = tf.get_variable('weight', [layer1_node, output_node],
initializer =
tf.truncated_normal_initializer(stddev=0.1))
bias = tf.get_variable('bias', [output_node],
tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weight) + bias
return layer2

Storage

模型持久化。

saver = tf.train.Saver()

saver.save(sess, '/path/model.ckpt')

这个目录下会有三个文件,因为tf将计算图的结构和图上参数值分开保存。

model.ckpt.meta :计算图结构

Model.ckpt: 保存了tf中每一个变量的取值。

checkpoint: 保存了一个目录下所有的模型文件列表。

Load

加载

saver = tf.train.Saver()

saver.restore(sess, 'path/model.ckpt')

注意直接加载参数值的话,还是需要事先定好与之前保存的同样的结构,但是不需要tf.global_varilabe_initializer()。

加载之后可以通过张量的名字来获取张量

print (sess.run(tf.get_default_graph().get_tensor_by_name('add:0')))

直接加载图

1
2
3
4
saver = tf.train.import_meta_graph('path/model.ckpt/model.ckpt.meta')
with tf.Session() as sess:
saver.restore(sess, 'path/model.ckpt')
print (sess.run(tf.get_default_graph().get_tensor_by_name('add:0')))

可以通过给Saver([v1])参数来指定保存的变量,也可以通过给字典参数来重命名被加载的变量与当前变量一致。saver = tf.train.Saver({‘v1’:v1, ‘v2’:v2})。

这样的目的主要是为了方便使用滑动平均值。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
ema = tf.train.ExponentialMovingAverage(0.99)
avg_op = ema.apply(tf.all_varialbes())
for var in tf.all_variables():
print(var.name) # v:0 v/ExponentialMovingAverage:0
saver = tf.train.Saver()
with tf.Session() as sess:
init_op = tf.global_varialbes_initializer()
sess.run(init_op)
sess.run(tf.assign(v, 10))
sess.run(avg_op)
saver.save(sess, 'path/model.ckpt')
saver = tf.train.Saver({'v/ExponentialMovingAverage':v})
with tf.Session() as sess:
saver.restore(sess, 'path/model.ckpt')
print sess.run(v)
也可以这么操作
saver = tf.train.Saver(ema.variables_to_restore())

tf提供了convert_variables_to_constant来将计算图中的变量以及取值通过常量的方式保存,这个整个tf计算图放在一个文件中。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from tensorflow.python.framework import graph_util
v1 = tf.Variable(tf.constant(1.0, shape=[1]), name='v1')
v2 = tf.Variable(tf.constant(2.0, shape=[1]), name='v2')
result = v1 + v2
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_op)
#只需要这一部分就可以完成从输入层到输出层的计算过程了
graph_def = tf.get_default_graph().as_graph_def()
output_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, ['add'])
with tf.gfile.GFile('path/model.pb', 'wb') as f:
f.write(output_graph_def.SerializeToString())
from tensorflow.python.platform import gfile
with tf.Session() as sess:
model_filename = 'path/model.pb'
with gfile.FastGFile(model_filename, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
result = tf.import_graph_def(graph_def, return_elements=['add:0'])
print(sess.run(result))

A more elegant implementation

符合解耦的软工要求的实现,将之前的代码分成了三部分,值得借鉴参考

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# Split the previous codes into three parts
# First one: mnist_inference.py
import tensorflow as tf
input_node = 784
output_node = 10
layer1_node = 500
# 通过tf.get_variable函数来获取变量, 在训练神经网络时会创建这些变量;
# 在测试时会通过保存的模型加载这些变量的取值。而且更加方便的是,因为可以
# 变量加载时将滑动平均变量重命名,所以可以直接通过同样的名字在训练时使用
# 变量自身, 在测试时使用变量的滑动平均值。在这个函数中也会将变量的正则
# 化损失加入损失集合
def get_weight_variable(shape, regularizer):
weight = tf.get_variable('weight', shape, initializer=
tf.truncated_normal_initializer(stddev=0.1))
if regularizer:
tf.add_to_collection('losses', regularizer(weight))
return weight
def inference(input_tensor, regularizer):
with tf.variable_scope('layer1'):
# 这里使用tf.get_variable或者tf.Variable没有本质区别,因
# 训练或者测试中更没有在同一个程序中多次调用这个函数。如果在同
# 一个程序中多次调用, 在第一次调用之后需要将reuse参数设置为
# True
weight = get_weight_variable([input_node, layer1_node],
regularizer)
bias = tf.get_variable('bias',[layer1_node],
initializer = tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weight) + bias)
with tf.variable_scope('layer2'):
weight = get_weight_variable(
[layer1_node, output_node], regularizer)
bias = tf.get_variable('bias',[output_node],
initializer = tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weight) + bias
return layer2
# mnist_train.py
import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
batch_size = 100
lr_base = 0.8
lr_decay = 0.99
regular_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99
model_save_path = '/path/model/'
model_name = 'model.ckpt'
def train(mnist):
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y_ = tf.placeholder(tf.float32, [None, output_node], name='y-input')
regularizer = tf.contrib.layers.l2_regularizer(regular_rate)
y = mnist_inference.inference(x, regularizer)
global_step = tf.Variable(0, trainable=False)
ema = tf.trian.ExponentialMovingAverage(
moving_average_decay, global_step)
avg_op = ema.apply(tf.trainable_variables())
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
y, tf.argmax(y_, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
lr = tf.train.exponential_decay(
lr_base, global_step, mnist.train.num_examples/batch_size,
lr_decay)
train_step = tf.train.GradientDescentOptimizer(lr).minimize(
loss, global_step=global_step)
with tf.control_dependencies([train_step, avg_op]):
train_op = tf.no_op(name='train')
# 初始化Saver
saver = tf.train.Saver()
with tf.Session() as sess:
tf.global_variables.initializer().run()
for i in range(training_steps):
xs, ys = mnist.train.next_batch(batch_size)
_, loss_value, step = sess.run(
[train_op, loss, global_step],
feed_dict={x:xs, y_:ys})
if i % 1000 == 0:
print('')
# global_step to add model.ckpt-1000
saver.save(sess, os.path.join(
model_save_path, model_name),
global_step=global_step)
def main():
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
# mnist_eval.py
import time
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import mnist_train
# 每10秒加载一次模型
eval_interval_secs = 10
def evaluate(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y_ = tf.placeholder(tf.float32, [None, output_node], name='y-input')
validate_feed = {x : mnist.validation.images,
y_: mnist.validation.labels}
# 测试时不关注正则化的值,所以为None
y = mnist_inference(x, None)
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
ema = tf.trian.ExponentialMovingAverage(
mnist_train.moving_average_decay)
var_restore = ema.variables_to_restore()
saver = tf.train.Saver(var_restore)
while True:
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(
mnist_train.model_save_path)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split(
'/')[-1].split('-')[-1]
acc = sess.run(accuracy, feed_dict=validate_feed)
print(' ')
else:
print(' no ')
return
time.sleep(eval_interval_secs)
def main():
mnist = input_data.read_data_sets('/tmp/data', one_hot=True)
train(mnist)
if __name__ == '__main__':
tf.app.run()