Week 05 (Introduction to Keras and TensorFlow)

# !pip install tensorflow # uncomment if you don't have tensorflow installed
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import matplotlib.pyplot as plt

All about Tensors and Tensorflow

# All-ones or all-zeros tensors

x = tf.ones(shape = (2,1)) # 2x3 matrix of ones, similar to np.ones((2,1))

x = tf.zeros(shape = (2,1)) # 2x3 matrix of zeros, similar to np.zeros((2,1))
 [1.]], shape=(2, 1), dtype=float32)
 [0.]], shape=(2, 1), dtype=float32)
# Random tensors

# create a tensor with random values from a normal distribution
x = tf.random.normal(shape = (2,3), mean = 0, stddev = 1)

# create a tensor with random values from a uniform distribution
x = tf.random.uniform(shape = (2,3), minval = 0, maxval = 1)
[[ 0.63700163  1.8413717   0.12851602]
 [-1.0153099  -1.3446143   1.6644784 ]], shape=(2, 3), dtype=float32)
[[0.838336   0.8172778  0.42057896]
 [0.21810079 0.07237494 0.9222772 ]], shape=(2, 3), dtype=float32)
# numpy array are assignable while tensors are not
x = np.random.normal(loc = 0, scale = 1, size = (2,3))
x[0,0] = 100
[[ 1.00000000e+02 -1.25304057e+00 -1.18967720e+00]
 [ 4.74877369e-01 -8.13430401e-02 -4.57822064e-01]]
# numpy array are assignable while tensors are not
x = tf.ones(shape = (2,3))
x[0,0] = 100
TypeError                                 Traceback (most recent call last)
Cell In[10], line 3
      1 # numpy array are assignable while tensors are not
      2 x = tf.ones(shape = (2,3))
----> 3 x[0,0] = 100
      4 print(x)

TypeError: 'tensorflow.python.framework.ops.EagerTensor' object does not support item assignment
# Creating a TensorFlow variable
v = tf.Variable(initial_value = tf.random.normal(shape = (2,3)))

v.assign(tf.zeros(shape = (2,3)))
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[-0.10799041,  2.325188  , -0.20042379],
       [ 0.48759696,  0.53195345,  0.29525948]], dtype=float32)>

<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[0., 0., 0.],
       [0., 0., 0.]], dtype=float32)>
# Assigning a value to a subset of a TensorFlow variable
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[100.,   0.,   0.],
       [  0.,   0.,   0.]], dtype=float32)>
# adding to the current value
v.assign_add(tf.ones(shape = (2,3)))
<tf.Variable 'Variable:0' shape=(2, 3) dtype=float32, numpy=
array([[101.,   1.,   1.],
       [  1.,   1.,   1.]], dtype=float32)>
# just like numpy, TensorFlow offers a large collection of tensor operations to express
# mathematical formulas.
a = tf.ones((2, 2))
b = tf.square(a)
c = tf.sqrt(a)
d = b + c
e = tf.matmul(a, b)
e *= d
[[4. 4.]
 [4. 4.]], shape=(2, 2), dtype=float32)

So far, TensorFlow seems to look a lot like NumPy. But here’s something NumPy can’t do: retrieve the gradient of any differentiable expression with respect to any of its inputs. Just open a GradientTape scope, apply some computation to one or several input tensors, and retrieve the gradient of the result with respect to the inputs

# Using the GradientTape
input_var = tf.Variable(initial_value = 3.0)
with tf.GradientTape() as tape:
    result = tf.square(input_var)
grad = tape.gradient(result, input_var)
tf.Tensor(6.0, shape=(), dtype=float32)
# Using GradientTape with constant tensor inputs
input_var = tf.constant(3.0)
with tf.GradientTape() as tape:
    result = tf.square(input_var)
grad = tape.gradient(result, input_var)
tf.Tensor(6.0, shape=(), dtype=float32)
# Using nested gradient tapes to compute second-order gradients
time = tf.Variable(0.0)
with tf.GradientTape() as outer_tape:
    with tf.GradientTape() as inner_tape:
        position = 4.9 * time ** 2
    speed = inner_tape.gradient(position, time) 
acceleration = outer_tape.gradient(speed, time)

tf.Tensor(0.0, shape=(), dtype=float32)
tf.Tensor(9.8, shape=(), dtype=float32)

An end-to-end example: A linear classifier in pure TensorFlow

# Generating two classes of random points in a 2D plane
num_samples_per_class, num_classes = 1000, 2
negative_samples = np.random.multivariate_normal(mean = [0,3], cov = [[1,0.5],[0.5,1]], size = num_samples_per_class)
positive_samples = np.random.multivariate_normal(mean = [3,0], cov = [[1,0.5],[0.5,1]], size = num_samples_per_class)

inputs = np.vstack((negative_samples, positive_samples)).astype(np.float32)
targets = np.vstack((np.zeros((num_samples_per_class, 1), dtype = 'float32'), np.ones((num_samples_per_class, 1), dtype = 'float32')))
import matplotlib.pyplot as plt
plt.scatter(inputs[:, 0], inputs[:, 1], c=targets[:, 0])

# Creating the linear classifier variables
input_dim = 2
output_dim = 1
W = tf.Variable(tf.random.normal(shape = (input_dim, output_dim)))
b = tf.Variable(tf.random.normal(shape = (output_dim,)))
# the forward pass
def model(inputs):
    return tf.sigmoid(tf.matmul(inputs, W) + b)
# The mean squared error loss function

def entropy_loss(targets, predictions):
    per_sample_losses = - targets * tf.math.log(predictions) - (1 - targets) * tf.math.log(1 - predictions)
    return tf.reduce_mean(per_sample_losses)

# training step 
learning_rate = 0.1
def training_step(inputs, targets):
    with tf.GradientTape() as tape:
        predictions = model(inputs)
        loss = square_loss(targets, predictions)
        grad_loss_wrt_W, grad_loss_wrt_b = tape.gradient(loss, [W, b])
        W.assign_sub(learning_rate * grad_loss_wrt_W)
        b.assign_sub(learning_rate * grad_loss_wrt_b)
        return loss

# training loop/process/epoch
for step in range(100):
    loss = training_step(inputs, targets)
    print(f"Loss at step {step}: {loss:.4f}")
predictions = model(inputs)
plt.scatter(inputs[:, 0], inputs[:, 1], c=predictions[:, 0] > 0.5)
 [0.9823857 ]
 [0.9144001 ]
 [0.98359877]], shape=(2000, 1), dtype=float32)

Deep learning with Keras

So, the APIs that we will often use when building a neural network in Keras are keras.layers and keras.models.

Simply put, each keras.layers is responsible for data processing (taking input and producing output), while keras.models is the API for connecting one keras.layers to another.

# Using the Keras Sequential API to build a linear classifier
model = keras.Sequential([
    keras.layers.InputLayer(input_shape  = (2,)), # input layers (stateless layer)
    keras.layers.Dense(units = 10, activation = 'relu'), # FC  layer (stateful layer)
    keras.layers.Dense(units = 1, activation = 'sigmoid'), # FC layer (stateful layer)
# plotting the model
keras.utils.plot_model(model, show_shapes = True, show_layer_names = True, rankdir = 'TB', expand_nested = False, dpi = 96)

Once the model architecture is defined, you still have to choose three more things:

  • Loss function (objective function)—The quantity that will be minimized during training. It represents a measure of success for the task at hand
  • Optimizer—Determines how the network will be updated based on the loss function. It implements a specific variant of stochastic gradient descent (SGD).
  • Metrics—The measures of success you want to monitor during training and validation, such as classification accuracy. Unlike the loss, training will not optimize directly for these metrics. As such, metrics don’t need to be differentiable.

Once you’ve picked your loss, optimizer, and metrics, you can use the built-in compile() and fit() methods to start training your model.

The compile() method configures the training process

# we can pass strings to the loss and metrics arguments

# or we can pass loss and metrics objects (both produce the same result)
# benefit of using objects is that we can configure them
# dont run this code

class my_custom_loss(keras.losses.Loss):

class my_custom_metric_1(keras.metrics.Metric):

class my_custom_metric_2(keras.metrics.Metric):

              metrics=[my_custom_metric_1, my_custom_metric_2]

The built-in loss functions and metrics can be found in keras.losses and keras.metrics documentation.

After compile(), the next method is fit(), which implements the training loop itself. The key arguments of fit() include the data to train on, which is typically passed as NumPy arrays or a TensorFlow Dataset object. The number of epochs to train for is also specified, indicating how many times the training loop should iterate over the passed data. Additionally, the batch size to use within each epoch of mini-batch gradient descent is specified, indicating the number of training examples considered to compute the gradients for one weight update step.

The fit() method returns a History object, which contains a record of the loss and metric values observed during training. This record is stored as a dictionary, with keys being the name of the metrics and values being a list of values recorded at each epoch.

x_train = inputs
y_train = targets
history = model.fit(x_train, y_train, batch_size=64, epochs=3, validation_split=0.2)
Epoch 1/3
25/25 [==============================] - 1s 17ms/step - loss: 0.1882 - accuracy: 0.9937 - val_loss: 0.2297 - val_accuracy: 0.9825
Epoch 2/3
25/25 [==============================] - 0s 5ms/step - loss: 0.1431 - accuracy: 0.9956 - val_loss: 0.1806 - val_accuracy: 0.9875
Epoch 3/3
25/25 [==============================] - 0s 5ms/step - loss: 0.1119 - accuracy: 0.9969 - val_loss: 0.1423 - val_accuracy: 0.9875

# plotting the loss and accuracy curves
plt.plot(history.history['loss'], label = 'training loss')
plt.plot(history.history['val_loss'], label = 'validation loss')

Inference: Using a model after training

instead of using model(new_data) to make predictions, we use model.predict(new_data) to make predictions on new data.

new_inputs = np.random.uniform(low = -1, high = 3, size = (256, 2))
predictions = model.predict(new_inputs, batch_size=128)
2/2 [==============================] - 0s 5ms/step
# check the shape of the predictions
(256, 1)
# get class predictions
predictions_class = np.round(predictions)

Additional Notes

For those interested in learning more about TensorFlow and Keras, I personally believe that the documentation available on the web is good enough. However, if you prefer reading a book, I recommend “Deep Learning with Python” by Francois Chollet, the creator of Keras. This book essentially summarizes the content of the documentation in a more cohesive and structured manner.


  • Chollet, F. (2021). Deep Learning with Python. Manning Publications.
  • TensorFlow. (n.d.). Retrieved from https://www.tensorflow.org/
  • Keras. (n.d.). Retrieved from https://keras.io/