From charlesreid1

No edit summary
 
(12 intermediate revisions by the same user not shown)
Line 1: Line 1:
=MNIST Convolutional Neural Network=
=Simple MNIST Convolutional Network=


Concept: Simple, end-to-end, LeNet-5-like convolutional MNIST model example. Meant as a tutorial for simple convolutional models.
==Input Function==


Link to code: https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py
Define an input function. This has an internal function that parses the example data (one piece of data at a time) and one-hot encodes the labeled images with the digit it corresponds to.


Link to tutorial(s): https://www.tensorflow.org/tutorials/
<pre>
def input_fn(mode, batch_size=1):
  """A simple input_fn using the contrib.data input pipeline."""


Link to original data set: http://yann.lecun.com/exdb/mnist/
  def example_parser(serialized_example):
    """Parses a single tf.Example into image and label tensors."""
    features = tf.parse_single_example(
        serialized_example,
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
        })
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image.set_shape([28 * 28])


==License==
    # Normalize the values of the image from the range [0, 255] to [-0.5, 0.5]
    image = tf.cast(image, tf.float32) / 255 - 0.5
    label = tf.cast(features['label'], tf.int32)
    return image, tf.one_hot(label, 10)


<pre>
  if mode == tf.estimator.ModeKeys.TRAIN:
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
    tfrecords_file = os.path.join(FLAGS.data_dir, 'train.tfrecords')
#
  else:
# Licensed under the Apache License, Version 2.0 (the "License");
     assert mode == tf.estimator.ModeKeys.EVAL, 'invalid mode'
# you may not use this file except in compliance with the License.
    tfrecords_file = os.path.join(FLAGS.data_dir, 'test.tfrecords')
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
</pre>


==Import Statements and Variables==
  assert tf.gfile.Exists(tfrecords_file), (
      'Run convert_to_records.py first to convert the MNIST data to TFRecord '
      'file format.')


Import statements:
  dataset = tf.contrib.data.TFRecordDataset([tfrecords_file])


<pre>
  # For training, repeat the dataset forever
from __future__ import absolute_import
  if mode == tf.estimator.ModeKeys.TRAIN:
from __future__ import division
    dataset = dataset.repeat()
from __future__ import print_function


import argparse
  # Map example_parser over dataset, and batch results by up to batch_size
import gzip
  dataset = dataset.map(
import os
      example_parser, num_threads=1, output_buffer_size=batch_size)
import sys
  dataset = dataset.batch(batch_size)
import time
  images, labels = dataset.make_one_shot_iterator().get_next()


import numpy
  return images, labels
from six.moves import urllib
from six.moves import xrange  # pylint: disable=redefined-builtin
import tensorflow as tf
</pre>
</pre>


Variable definitions for use in the rest of the model:
==Prepare Model==


<pre>
<pre>
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
def mnist_model(inputs, mode):
WORK_DIRECTORY = 'data'
  """Takes the MNIST inputs and mode and outputs a tensor of logits."""
IMAGE_SIZE = 28
  # Input Layer
NUM_CHANNELS = 1
  # Reshape X to 4-D tensor: [batch_size, width, height, channels]
PIXEL_DEPTH = 255
  # MNIST images are 28x28 pixels, and have one color channel
NUM_LABELS = 10
  inputs = tf.reshape(inputs, [-1, 28, 28, 1])
VALIDATION_SIZE = 5000  # Size of the validation set.
  data_format = FLAGS.data_format
SEED = 66478  # Set to None for random seed.
 
BATCH_SIZE = 64
  if data_format is None:
NUM_EPOCHS = 10
    # When running on GPU, transpose the data from channels_last (NHWC) to
EVAL_BATCH_SIZE = 64
    # channels_first (NCHW) to improve performance.
EVAL_FREQUENCY = 100  # Number of steps between evaluations.
    # See https://www.tensorflow.org/performance/performance_guide#data_formats
FLAGS = None
    data_format = ('channels_first' if tf.test.is_built_with_cuda() else
                  'channels_last')
 
  if data_format == 'channels_first':
    inputs = tf.transpose(inputs, [0, 3, 1, 2])
</pre>
</pre>


==Obtaining the Data==
==Construct Model==


Several functions are defined to help obtain the data. First, define the variable types we will use in the model:
<pre>
  # Convolutional Layer #1
  # Computes 32 features using a 5x5 filter with ReLU activation.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 28, 28, 1]
  # Output Tensor Shape: [batch_size, 28, 28, 32]
  conv1 = tf.layers.conv2d(
      inputs=inputs,
      filters=32,
      kernel_size=[5, 5],
      padding='same',
      activation=tf.nn.relu,
      data_format=data_format)


<pre>
  # Pooling Layer #1
def data_type():
  # First max pooling layer with a 2x2 filter and stride of 2
   """Return the type of the activations, weights, and placeholder variables."""
  # Input Tensor Shape: [batch_size, 28, 28, 32]
   if FLAGS.use_fp16:
  # Output Tensor Shape: [batch_size, 14, 14, 32]
    return tf.float16
   pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2,
  else:
                                  data_format=data_format)
    return tf.float32
 
</pre>
  # Convolutional Layer #2
  # Computes 64 features using a 5x5 filter.
  # Padding is added to preserve width and height.
   # Input Tensor Shape: [batch_size, 14, 14, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 64]
  conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding='same',
      activation=tf.nn.relu,
      data_format=data_format)


Now define a function that will attempt to download the data if it does not already exist on disk. This uses urllib to obtain the MNIST files, and TensorFlow's gfile module to interact with the file and filesystem.
  # Pooling Layer #2
  # Second max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 14, 14, 64]
  # Output Tensor Shape: [batch_size, 7, 7, 64]
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2,
                                  data_format=data_format)


<pre>
  # Flatten tensor into a batch of vectors
def maybe_download(filename):
  # Input Tensor Shape: [batch_size, 7, 7, 64]
  """Download the data from Yann's website, unless it's already here."""
   # Output Tensor Shape: [batch_size, 7 * 7 * 64]
   if not tf.gfile.Exists(WORK_DIRECTORY):
   pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    tf.gfile.MakeDirs(WORK_DIRECTORY)
  filepath = os.path.join(WORK_DIRECTORY, filename)
   if not tf.gfile.Exists(filepath):
    filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename, filepath)
    with tf.gfile.GFile(filepath) as f:
      size = f.size()
    print('Successfully downloaded', filename, size, 'bytes.')
  return filepath
</pre>


Once the data is downloaded, it must be converted to a format convenient for Tensorflow - in particular, a 4D tensor in which the first index is the image number, the second and third are the width and height, and the fourth dimension is each channel of the image.
  # Dense Layer
  # Densely connected layer with 1024 neurons
  # Input Tensor Shape: [batch_size, 7 * 7 * 64]
  # Output Tensor Shape: [batch_size, 1024]
  dense = tf.layers.dense(inputs=pool2_flat, units=1024,
                          activation=tf.nn.relu)


These values are then normalized and re-scaled.
  # Add dropout operation; 0.6 probability that element will be kept
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN))


<pre>
  # Logits layer
def extract_data(filename, num_images):
  # Input Tensor Shape: [batch_size, 1024]
  """Extract the images into a 4D tensor [image index, y, x, channels].
   # Output Tensor Shape: [batch_size, 10]
   Values are rescaled from [0, 255] down to [-0.5, 0.5].
   logits = tf.layers.dense(inputs=dropout, units=10)
   """
  return logits
  print('Extracting', filename)
  with gzip.open(filename) as bytestream:
    bytestream.read(16)
    buf = bytestream.read(IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)
    data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.float32)
    data = (data - (PIXEL_DEPTH / 2.0)) / PIXEL_DEPTH
    data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
    return data
</pre>
</pre>


The labels - the predictions - must also be put into a format conducive for TensorFlow - a 1D vector:
==Get Estimator==


<pre>
<pre>
def extract_labels(filename, num_images):
def mnist_model_fn(features, labels, mode):
   """Extract the labels into a vector of int64 label IDs."""
   """Model function for MNIST."""
   print('Extracting', filename)
   logits = mnist_model(features, mode)
   with gzip.open(filename) as bytestream:
 
    bytestream.read(8)
   predictions = {
    buf = bytestream.read(1 * num_images)
      'classes': tf.argmax(input=logits, axis=1),
     labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(numpy.int64)
      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
  return labels
  }
</pre>
 
  if mode == tf.estimator.ModeKeys.PREDICT:
     return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 
  loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)


There's also a utility for creating a fake data set.
  # Configure the training op
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step())
  else:
    train_op = None


==Error Rate==
  accuracy = tf.metrics.accuracy(
      tf.argmax(labels, axis=1), predictions['classes'])
  metrics = {'accuracy': accuracy}


There is a function defined to compute the error rate. It computes the accuracy first: sums up the number of correctly-labeled digits, divides by the total number of digits, multiplies by 100 to convert to percent. Last, it subtracts the accuracy from 100 to get a percent error.
  # Create a tensor named train_accuracy for logging purposes
  tf.identity(accuracy[1], name='train_accuracy')
  tf.summary.scalar('train_accuracy', accuracy[1])


<pre>
   return tf.estimator.EstimatorSpec(
def error_rate(predictions, labels):
       mode=mode,
   """Return the error rate based on dense predictions and sparse labels."""
       predictions=predictions,
  return 100.0 - (
      loss=loss,
       100.0 *
      train_op=train_op,
       numpy.sum(numpy.argmax(predictions, 1) == labels) /
       eval_metric_ops=metrics)
       predictions.shape[0])
</pre>
</pre>


Note that this metric is NOT used for training the convolutional network, it is only used for printing purposes.
==Main Function==
 
<pre>
def main(unused_argv):
  # Create the Estimator
  mnist_classifier = tf.estimator.Estimator(
      model_fn=mnist_model_fn, model_dir=FLAGS.model_dir)


==Main Method==
  # Train the model
  tensors_to_log = {
      'train_accuracy': 'train_accuracy'
  }


===Get Data===
  logging_hook = tf.train.LoggingTensorHook(
      tensors=tensors_to_log, every_n_iter=100)


<pre>
   batches_per_epoch = _NUM_IMAGES['train'] / FLAGS.batch_size
   if FLAGS.self_test:
    print('Running self-test.')
    train_data, train_labels = fake_data(256)
    validation_data, validation_labels = fake_data(EVAL_BATCH_SIZE)
    test_data, test_labels = fake_data(EVAL_BATCH_SIZE)
    num_epochs = 1
  else:
    # Get the data.
    train_data_filename = maybe_download('train-images-idx3-ubyte.gz')
    train_labels_filename = maybe_download('train-labels-idx1-ubyte.gz')
    test_data_filename = maybe_download('t10k-images-idx3-ubyte.gz')
    test_labels_filename = maybe_download('t10k-labels-idx1-ubyte.gz')


    # Extract it into numpy arrays.
  mnist_classifier.train(
    train_data = extract_data(train_data_filename, 60000)
      input_fn=lambda: input_fn(tf.estimator.ModeKeys.TRAIN, FLAGS.batch_size),
    train_labels = extract_labels(train_labels_filename, 60000)
      steps=FLAGS.train_epochs * batches_per_epoch,
    test_data = extract_data(test_data_filename, 10000)
      hooks=[logging_hook])
    test_labels = extract_labels(test_labels_filename, 10000)


    # Generate a validation set.
  # Evaluate the model and print results
    validation_data = train_data[:VALIDATION_SIZE, ...]
  eval_results = mnist_classifier.evaluate(
    validation_labels = train_labels[:VALIDATION_SIZE]
      input_fn=lambda: input_fn(tf.estimator.ModeKeys.EVAL))
    train_data = train_data[VALIDATION_SIZE:, ...]
  print()
    train_labels = train_labels[VALIDATION_SIZE:]
   print('Evaluation results:\n    %s' % eval_results)
    num_epochs = NUM_EPOCHS
   train_size = train_labels.shape[0]
</pre>
</pre>



Latest revision as of 01:44, 28 October 2017

Simple MNIST Convolutional Network

Input Function

Define an input function. This has an internal function that parses the example data (one piece of data at a time) and one-hot encodes the labeled images with the digit it corresponds to.

def input_fn(mode, batch_size=1):
  """A simple input_fn using the contrib.data input pipeline."""

  def example_parser(serialized_example):
    """Parses a single tf.Example into image and label tensors."""
    features = tf.parse_single_example(
        serialized_example,
        features={
            'image_raw': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64),
        })
    image = tf.decode_raw(features['image_raw'], tf.uint8)
    image.set_shape([28 * 28])

    # Normalize the values of the image from the range [0, 255] to [-0.5, 0.5]
    image = tf.cast(image, tf.float32) / 255 - 0.5
    label = tf.cast(features['label'], tf.int32)
    return image, tf.one_hot(label, 10)

  if mode == tf.estimator.ModeKeys.TRAIN:
    tfrecords_file = os.path.join(FLAGS.data_dir, 'train.tfrecords')
  else:
    assert mode == tf.estimator.ModeKeys.EVAL, 'invalid mode'
    tfrecords_file = os.path.join(FLAGS.data_dir, 'test.tfrecords')

  assert tf.gfile.Exists(tfrecords_file), (
      'Run convert_to_records.py first to convert the MNIST data to TFRecord '
      'file format.')

  dataset = tf.contrib.data.TFRecordDataset([tfrecords_file])

  # For training, repeat the dataset forever
  if mode == tf.estimator.ModeKeys.TRAIN:
    dataset = dataset.repeat()

  # Map example_parser over dataset, and batch results by up to batch_size
  dataset = dataset.map(
      example_parser, num_threads=1, output_buffer_size=batch_size)
  dataset = dataset.batch(batch_size)
  images, labels = dataset.make_one_shot_iterator().get_next()

  return images, labels

Prepare Model

def mnist_model(inputs, mode):
  """Takes the MNIST inputs and mode and outputs a tensor of logits."""
  # Input Layer
  # Reshape X to 4-D tensor: [batch_size, width, height, channels]
  # MNIST images are 28x28 pixels, and have one color channel
  inputs = tf.reshape(inputs, [-1, 28, 28, 1])
  data_format = FLAGS.data_format

  if data_format is None:
    # When running on GPU, transpose the data from channels_last (NHWC) to
    # channels_first (NCHW) to improve performance.
    # See https://www.tensorflow.org/performance/performance_guide#data_formats
    data_format = ('channels_first' if tf.test.is_built_with_cuda() else
                   'channels_last')

  if data_format == 'channels_first':
    inputs = tf.transpose(inputs, [0, 3, 1, 2])

Construct Model

  # Convolutional Layer #1
  # Computes 32 features using a 5x5 filter with ReLU activation.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 28, 28, 1]
  # Output Tensor Shape: [batch_size, 28, 28, 32]
  conv1 = tf.layers.conv2d(
      inputs=inputs,
      filters=32,
      kernel_size=[5, 5],
      padding='same',
      activation=tf.nn.relu,
      data_format=data_format)

  # Pooling Layer #1
  # First max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 28, 28, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 32]
  pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2,
                                  data_format=data_format)

  # Convolutional Layer #2
  # Computes 64 features using a 5x5 filter.
  # Padding is added to preserve width and height.
  # Input Tensor Shape: [batch_size, 14, 14, 32]
  # Output Tensor Shape: [batch_size, 14, 14, 64]
  conv2 = tf.layers.conv2d(
      inputs=pool1,
      filters=64,
      kernel_size=[5, 5],
      padding='same',
      activation=tf.nn.relu,
      data_format=data_format)

  # Pooling Layer #2
  # Second max pooling layer with a 2x2 filter and stride of 2
  # Input Tensor Shape: [batch_size, 14, 14, 64]
  # Output Tensor Shape: [batch_size, 7, 7, 64]
  pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2,
                                  data_format=data_format)

  # Flatten tensor into a batch of vectors
  # Input Tensor Shape: [batch_size, 7, 7, 64]
  # Output Tensor Shape: [batch_size, 7 * 7 * 64]
  pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])

  # Dense Layer
  # Densely connected layer with 1024 neurons
  # Input Tensor Shape: [batch_size, 7 * 7 * 64]
  # Output Tensor Shape: [batch_size, 1024]
  dense = tf.layers.dense(inputs=pool2_flat, units=1024,
                          activation=tf.nn.relu)

  # Add dropout operation; 0.6 probability that element will be kept
  dropout = tf.layers.dropout(
      inputs=dense, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN))

  # Logits layer
  # Input Tensor Shape: [batch_size, 1024]
  # Output Tensor Shape: [batch_size, 10]
  logits = tf.layers.dense(inputs=dropout, units=10)
  return logits

Get Estimator

def mnist_model_fn(features, labels, mode):
  """Model function for MNIST."""
  logits = mnist_model(features, mode)

  predictions = {
      'classes': tf.argmax(input=logits, axis=1),
      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

  loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)

  # Configure the training op
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    train_op = optimizer.minimize(loss, tf.train.get_or_create_global_step())
  else:
    train_op = None

  accuracy = tf.metrics.accuracy(
      tf.argmax(labels, axis=1), predictions['classes'])
  metrics = {'accuracy': accuracy}

  # Create a tensor named train_accuracy for logging purposes
  tf.identity(accuracy[1], name='train_accuracy')
  tf.summary.scalar('train_accuracy', accuracy[1])

  return tf.estimator.EstimatorSpec(
      mode=mode,
      predictions=predictions,
      loss=loss,
      train_op=train_op,
      eval_metric_ops=metrics)

Main Function

def main(unused_argv):
  # Create the Estimator
  mnist_classifier = tf.estimator.Estimator(
      model_fn=mnist_model_fn, model_dir=FLAGS.model_dir)

  # Train the model
  tensors_to_log = {
      'train_accuracy': 'train_accuracy'
  }

  logging_hook = tf.train.LoggingTensorHook(
      tensors=tensors_to_log, every_n_iter=100)

  batches_per_epoch = _NUM_IMAGES['train'] / FLAGS.batch_size

  mnist_classifier.train(
      input_fn=lambda: input_fn(tf.estimator.ModeKeys.TRAIN, FLAGS.batch_size),
      steps=FLAGS.train_epochs * batches_per_epoch,
      hooks=[logging_hook])

  # Evaluate the model and print results
  eval_results = mnist_classifier.evaluate(
      input_fn=lambda: input_fn(tf.estimator.ModeKeys.EVAL))
  print()
  print('Evaluation results:\n    %s' % eval_results)

Flags