## Neural Style Transfer with VGG19

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

In [None]:
from tensorflow.keras.applications.vgg19 import VGG19

# this will be a global variable
vgg19 = VGG19(weights='imagenet', include_top=False)

In [None]:
vgg19.summary()

### The base image

In [None]:
base_image_path = tf.keras.utils.get_file("sanfran.jpg", origin="https://img-datasets.s3.amazonaws.com/sf.jpg")

In [None]:
base_image_path

In [None]:
base_image = tf.keras.utils.load_img(base_image_path)

In [None]:
base_image

In [None]:
type(base_image)

In [None]:
base_image.size

In [None]:
original_width, original_height = base_image.size

The new generated image will have a height of 400 pixels:

In [None]:
aspect_ratio = original_width / original_height

# these will be global variables
new_img_height = 400
new_img_width = round(aspect_ratio * new_img_height)

In [None]:
new_img_height, new_img_width

### Loading and processing VGG19 images

In [None]:
image = tf.keras.utils.load_img(base_image_path, target_size=(new_img_height, new_img_width)) # note order

In [None]:
image

In [None]:
np.array(image).reshape((1, new_img_height, new_img_width, 3)).shape

In [None]:
np.expand_dims(image, axis=0).shape

In [None]:
# loads an image from a file and preprocesses it for the VGG19 network

def preprocess_image(image_path):
    image = tf.keras.utils.load_img(image_path, target_size=(new_img_height, new_img_width))
    # make it a single-image batch
    batch = np.array(image).reshape((1, new_img_height, new_img_width, 3))  # OR: np.expand_dims(image, axis=0)
    batch = tf.keras.applications.vgg19.preprocess_input(batch)
    return batch

In [None]:
# true if the input array is a batch containing a single image

def single_image_batch(img):
    return img.ndim == 4 and img.shape[0] == 1

In [None]:
def values(img):
    if type(img) is not np.ndarray:
        print("Not a numpy array")
    else:
        print(f"{img.min():g} to {img.max():g}, dtype={img.dtype}, shape={img.shape}")

In [None]:
base_image_path

In [None]:
img = preprocess_image(base_image_path)

In [None]:
single_image_batch(img)

In [None]:
img.shape

In [None]:
values(img)

In [None]:
plt.imshow(img[0]);

In [None]:
# converts a numpy array into a valid image

# WARNING: this function _side-effects_ the input array!

def deprocess_image(img):
    assert single_image_batch(img), 'must be a single-image batch'
    height, width = img.shape[1], img.shape[2]
    img = img.reshape((height, width, 3))
    # undo zero-centering transformation done by vgg19.preprocess_input
    img[:,:,0] += 103.939  # mean pixel values of
    img[:,:,1] += 116.779  # each color channel
    img[:,:,2] += 123.68   # over ImageNet dataset
    # convert image from 'BGR' to 'RGB' format (another effect of vgg19.preprocess_input)
    img = img[:,:,::-1]
    return img.clip(0, 255).astype('uint8')

In [None]:
values(img)

In [None]:
img2 = deprocess_image(img)

In [None]:
values(img2)

In [None]:
values(img)  # NOTE: img values were modified!

In [None]:
plt.imshow(img2);

### The style reference image

In [None]:
style_image_path = tf.keras.utils.get_file(origin="https://img-datasets.s3.amazonaws.com/starry_night.jpg")

In [None]:
style_image = tf.keras.utils.load_img(style_image_path)

In [None]:
style_image

In [None]:
style_image.size

### Feature extractor for the pre-trained VGG19 network

In [None]:
vgg19.summary()

In [None]:
layer_outputs = dict([(layer.name, layer.output) for layer in vgg19.layers])

In [None]:
layer_outputs

In [None]:
from tensorflow.keras.models import Model

# this will be a global variable
feature_extractor = Model(inputs=vgg19.input, outputs=layer_outputs)

In [None]:
feature_extractor

In [None]:
feature_extractor(vgg19.input)

### Loss components

The overall **loss** function comprises three components: the **content loss**, the **style loss**, and the **total variation loss**.

In [None]:
content_loss_layer_name = "block5_conv2"

style_loss_layer_names = {
    "block1_conv1",
    "block2_conv1",
    "block3_conv1",
    "block4_conv1",
    "block5_conv1",
}

# weight coefficients for each loss component
content_loss_weight = 2.5e-8
style_loss_weight = 1e-6
total_variation_loss_weight = 1e-6

### Content loss

To compute the **content loss**, we apply the VGG19 network to the base image and to the generated image, and then compare the internal activation patterns created at the final convolution layer for both images. The closer the activation patterns are to each other, the lower the content loss value.

In [None]:
def content_loss(base_img, generated_img):
    return tf.reduce_sum(tf.square(generated_img - base_img))

### Style loss

The **style loss** measures the similarity of the feature maps computed from the style-reference image and the generated image at varying levels of abstraction within the VGG19 convolutional network.  The correlation of the features at each layer specified by `style_loss_layer_names` are computed based on the *Gram matrix* from linear algebra.

In [None]:
a = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [None]:
print(a)

In [None]:
print(tf.transpose(a).numpy())

In [None]:
print(a)

In [None]:
print(tf.transpose(a, (1, 0)).numpy())

In [None]:
b = np.array([[[1,2,3],[4,5,6],[7,8,9]],
              [[11,22,33],[44,55,66],[77,88,99]],
              [[111,222,333],[444,555,666],[777,888,999]]])

In [None]:
print(b)

In [None]:
print(tf.transpose(b, (2, 0, 1)).numpy())

In [None]:
tf.shape(b)[0]

In [None]:
def show_gram_matrix(x):
    print("Input matrix:")
    print(x)
    x = tf.transpose(x, (2, 0, 1))  # x must be a 3-D matrix
    print("\nTranspose:")
    print(x.numpy())
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    print("\nFeatures:")
    print(features.numpy())
    gram = tf.matmul(features, tf.transpose(features))
    print("\nGram matrix:")
    print(gram.numpy())

In [None]:
show_gram_matrix(b)

In [None]:
def gram_matrix(x):
    x = tf.transpose(x, (2, 0, 1))  # x must be a 3-D matrix
    features = tf.reshape(x, (tf.shape(x)[0], -1))
    gram = tf.matmul(features, tf.transpose(features))
    return gram

In [None]:
gram_matrix(b)

In [None]:
def style_loss(style_features, generated_features):
    S = gram_matrix(style_features)
    G = gram_matrix(generated_features)
    color_channels = 3
    print("processing style_features of shape", style_features.shape)
    size = new_img_height * new_img_width
    loss = tf.reduce_sum(tf.square(S - G)) / (4.0 * (color_channels ** 2) * (size ** 2))
    return loss

### Total variation loss

The **total variation loss** encourages spatial continuity within the generated image, thus avoiding overly pixelated results.

In [None]:
def total_variation_loss(generated_img):
    height, width = generated_img.shape[1], generated_img.shape[2]
    a = tf.square(generated_img[:,:height-1,:width-1,:] - generated_img[:,1:,:width-1,:])
    b = tf.square(generated_img[:,:height-1,:width-1,:] - generated_img[:,:height-1,1:,:])
    return tf.reduce_sum(tf.pow(a + b, 1.25))

We can now define the overall loss function:

In [None]:
def compute_loss(generated_img, base_img, style_img):
    input_tensor = tf.concat([base_img, style_img, generated_img], axis=0)
    features = feature_extractor(input_tensor)
    
    # initialize total loss to 0
    loss = tf.zeros(shape=())

    # add the content loss
    layer_features = features[content_loss_layer_name]
    base_img_features = layer_features[0, :, :, :]
    generated_img_features = layer_features[2, :, :, :]
    loss += content_loss_weight * content_loss(base_img_features, generated_img_features)

    # add the style loss
    num_style_layers = len(style_loss_layer_names)
    for layer_name in style_loss_layer_names:
        layer_features = features[layer_name]
        style_img_features = layer_features[1, :, :, :]
        generated_img_features = layer_features[2, :, :, :]
        style_loss_value = style_loss(style_img_features, generated_img_features)
        loss += style_loss_weight * style_loss_value / num_style_layers

    # add the total variation loss
    loss += total_variation_loss_weight * total_variation_loss(generated_img)
    
    return loss

### Gradient descent process

In [None]:
# compiled for speed

@tf.function
def compute_loss_and_gradient(generated_img, base_img, style_img):
    with tf.GradientTape() as tape:
        loss = compute_loss(generated_img, base_img, style_img)
    gradient = tape.gradient(loss, generated_img)
    return loss, gradient

We will use a learning-rate schedule, which will start with a high learning rate and gradually decrease it as we minimize the loss.

In [None]:
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.optimizers.schedules import ExponentialDecay

# start with learning rate 100 and decrease it by 4% every 100 steps
schedule = ExponentialDecay(initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96)

SGD_optimizer = SGD(schedule)

Initialize the images:

In [None]:
base_img = preprocess_image(base_image_path)

In [None]:
values(base_img)

In [None]:
style_img = preprocess_image(style_image_path)

In [None]:
values(style_img)

In [None]:
np.sum(np.abs(base_img - style_img))  # check that images are different

In [None]:
generated_img = tf.Variable(preprocess_image(base_image_path))

In [None]:
values(generated_img.numpy())

In [None]:
loss, gradient = compute_loss_and_gradient(generated_img, base_img, style_img)

In [None]:
for name in style_loss_layer_names:
    num_filters = vgg19.get_layer(name).filters
    print(f"{name}: {num_filters} filters")

In [None]:
loss.numpy()

In [None]:
gradient.numpy()  # = d_loss / d_generated_img

In [None]:
np.mean(generated_img.numpy())

In [None]:
# updates generated_img in a direction that reduces the overall loss

SGD_optimizer.apply_gradients([(gradient, generated_img)])

In [None]:
np.mean(generated_img.numpy()) # generated_img changed a little

In [None]:
img = deprocess_image(generated_img.numpy())

In [None]:
plt.imshow(img);

In [None]:
# removed print statements

def style_loss(style_features, generated_features):
    S = gram_matrix(style_features)
    G = gram_matrix(generated_features)
    color_channels = 3
    size = new_img_height * new_img_width
    loss = tf.reduce_sum(tf.square(S - G)) / (4.0 * (color_channels ** 2) * (size ** 2))
    return loss

In [None]:
# we must recompile this, since we redefined style_loss

@tf.function
def compute_loss_and_gradient(generated_img, base_img, style_img):
    with tf.GradientTape() as tape:
        loss = compute_loss(generated_img, base_img, style_img)
    gradient = tape.gradient(loss, generated_img)
    return loss, gradient

In [None]:
for i in range(1, 5+1):
    print(f"Iteration {i}: loss={loss:.2f}")
    loss, gradient = compute_loss_and_gradient(generated_img, base_img, style_img)
    img = deprocess_image(generated_img.numpy())
    filename = f"img_{i:04d}.png"
    tf.keras.utils.save_img(filename, img)
    print("Saved", filename)
print("Done")

In [None]:
!ls img*

In [None]:
result = tf.keras.utils.load_img('img_0005.png')

In [None]:
result

### Putting it all together

In [None]:
# base image files
sanfran = tf.keras.utils.get_file("sanfran.jpg", origin="https://img-datasets.s3.amazonaws.com/sf.jpg")
elephants = tf.keras.utils.get_file(origin="http://science.slc.edu/jmarshall/bioai/images/elephants.jpg")
jellyfish = tf.keras.utils.get_file(origin="http://science.slc.edu/jmarshall/bioai/images/jellyfish.jpg")
flamingos = tf.keras.utils.get_file(origin="http://science.slc.edu/jmarshall/bioai/images/flamingos.jpg")
tiger = tf.keras.utils.get_file(origin="http://science.slc.edu/jmarshall/bioai/images/tiger.jpg")

# style image files
starry_night = tf.keras.utils.get_file(origin="https://img-datasets.s3.amazonaws.com/starry_night.jpg")
the_scream = tf.keras.utils.get_file(origin="http://science.slc.edu/jmarshall/bioai/images/the_scream.jpg")
eiffel_tower = tf.keras.utils.get_file(origin="http://science.slc.edu/jmarshall/bioai/images/eiffel_tower.jpg")


In [None]:
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.optimizers import SGD

def style_transfer(base_path, style_path, iterations=1000, save_img_tag=''):
    global new_img_height, new_img_width
    
    # load base image and resize
    base_image = tf.keras.utils.load_img(base_path)
    original_width, original_height = base_image.size
    aspect_ratio = original_width / original_height
    new_img_height = 400
    new_img_width = round(aspect_ratio * new_img_height)

    # initialize images
    base_img = preprocess_image(base_path)
    style_img = preprocess_image(style_path)
    generated_img = tf.Variable(preprocess_image(base_path))
    
    # start with learning rate 100 and decrease it by 4% every 100 steps
    schedule = ExponentialDecay(initial_learning_rate=100.0, decay_steps=100, decay_rate=0.96)
    SGD_optimizer = SGD(schedule)
    
    for i in range(1, iterations+1):
        loss, gradient = compute_loss_and_gradient(generated_img, base_img, style_img)
        SGD_optimizer.apply_gradients([(gradient, generated_img)])
        if i == 1 or i % 10 == 0:
            print(f"Iteration {i}: loss = {loss:.2f}")
        if i == 1 or i % 100 == 0:
            img = deprocess_image(generated_img.numpy())
            filename = ('img' if save_img_tag=='' else f'img_{save_img_tag}') + f'_{i:04d}.png'
            tf.keras.utils.save_img(filename, img)
            print("Saved", filename)
    print("Done")

In [None]:
tf.keras.utils.load_img(starry_night, target_size=(new_img_height, new_img_width))

In [None]:
tf.keras.utils.load_img(the_scream, target_size=(new_img_height, new_img_width))

In [None]:
tf.keras.utils.load_img(eiffel_tower, target_size=(new_img_height, new_img_width))

In [None]:
style_transfer(tiger, eiffel_tower, 1000)

In [None]:
result = tf.keras.utils.load_img('img_1000.png')

In [None]:
result

In [None]:
!ls img*

In [None]:
!mkdir results

In [None]:
!mv img* results

In [None]:
!zip -rq results results

In [None]:
!ls

In [None]:
# if running in Google Colab

from google.colab import files
files.download('results.zip')