## TensorFlow cookbook
This section includes implementation of a set of common operations in TensorFlow.
1. Get shape
2. Batch gather
3. Beam search
4. Merge
5. Entropy
6. KL-Divergence
7. Make parallel
8. Leaky Relu
9. Batch normalization
### Get shape
```python
def get_shape(tensor):
"""Returns static shape if available and dynamic shape otherwise."""
static_shape = tensor.shape.as_list()
dynamic_shape = tf.unstack(tf.shape(tensor))
dims = [s[1] if s[0] is None else s[0]
for s in zip(static_shape, dynamic_shape)]
return dims
```
### Batch Gather
```python
def batch_gather(tensor, indices):
"""Gather in batch from a tensor of arbitrary size.
In pseudocode this module will produce the following:
output[i] = tf.gather(tensor[i], indices[i])
Args:
tensor: Tensor of arbitrary size.
indices: Vector of indices.
Returns:
output: A tensor of gathered values.
"""
shape = get_shape(tensor)
flat_first = tf.reshape(tensor, [shape[0] * shape[1]] + shape[2:])
indices = tf.convert_to_tensor(indices)
offset_shape = [shape[0]] + [1] * (indices.shape.ndims - 1)
offset = tf.reshape(tf.range(shape[0]) * shape[1], offset_shape)
output = tf.gather(flat_first, indices + offset)
return output
```
### Beam Search
```python
import tensorflow as tf
def rnn_beam_search(update_fn, initial_state, sequence_length, beam_width,
begin_token_id, end_token_id, name="rnn"):
"""Beam-search decoder for recurrent models.
Args:
update_fn: Function to compute the next state and logits given the current
state and ids.
initial_state: Recurrent model states.
sequence_length: Length of the generated sequence.
beam_width: Beam width.
begin_token_id: Begin token id.
end_token_id: End token id.
name: Scope of the variables.
Returns:
ids: Output indices.
logprobs: Output log probabilities probabilities.
"""
batch_size = initial_state.shape.as_list()[0]
state = tf.tile(tf.expand_dims(initial_state, axis=1), [1, beam_width, 1])
sel_sum_logprobs = tf.log([[1.] + [0.] * (beam_width - 1)])
ids = tf.tile([[begin_token_id]], [batch_size, beam_width])
sel_ids = tf.zeros([batch_size, beam_width, 0], dtype=ids.dtype)
mask = tf.ones([batch_size, beam_width], dtype=tf.float32)
for i in range(sequence_length):
with tf.variable_scope(name, reuse=True if i > 0 else None):
state, logits = update_fn(state, ids)
logits = tf.nn.log_softmax(logits)
sum_logprobs = (
tf.expand_dims(sel_sum_logprobs, axis=2) +
(logits * tf.expand_dims(mask, axis=2)))
num_classes = logits.shape.as_list()[-1]
sel_sum_logprobs, indices = tf.nn.top_k(
tf.reshape(sum_logprobs, [batch_size, num_classes * beam_width]),
k=beam_width)
ids = indices % num_classes
beam_ids = indices // num_classes
state = batch_gather(state, beam_ids)
sel_ids = tf.concat([batch_gather(sel_ids, beam_ids),
tf.expand_dims(ids, axis=2)], axis=2)
mask = (batch_gather(mask, beam_ids) *
tf.to_float(tf.not_equal(ids, end_token_id)))
return sel_ids, sel_sum_logprobs
```
## Merge
```python
import tensorflow as tf
def merge(tensors, units, activation=tf.nn.relu, name=None, **kwargs):
"""Merge features with broadcasting support.
This operation concatenates multiple features of varying length and applies
non-linear transformation to the outcome.
Example:
a = tf.zeros([m, 1, d1])
b = tf.zeros([1, n, d2])
c = merge([a, b], d3) # shape of c would be [m, n, d3].
Args:
tensors: A list of tensor with the same rank.
units: Number of units in the projection function.
"""
with tf.variable_scope(name, default_name="merge"):
# Apply linear projection to input tensors.
projs = []
for i, tensor in enumerate(tensors):
proj = tf.layers.dense(
tensor, units,
name="proj_%d" % i,
**kwargs)
projs.append(proj)
# Compute sum of tensors.
result = projs.pop()
for proj in projs:
result = result + proj
# Apply nonlinearity.
if activation:
result = activation(result)
return result
```
## Entropy
```python
import tensorflow as tf
def softmax_entropy(logits, dim=-1):
"""Compute entropy over specified dimensions."""
plogp = tf.nn.softmax(logits, dim) * tf.nn.log_softmax(logits, dim)
return -tf.reduce_sum(plogp, dim)
```
## KL-Divergence
```python
def gaussian_kl(q, p=(0., 0.)):
"""Computes KL divergence between two isotropic Gaussian distributions.
To ensure numerical stability, this op uses mu, log(sigma^2) to represent
the distribution. If q is not provided, it's assumed to be unit Gaussian.
Args:
q: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian.
p: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian.
Returns:
A tensor representing KL(q, p).
"""
mu1, log_sigma1_sq = q
mu2, log_sigma2_sq = p
return tf.reduce_sum(
0.5 * (log_sigma2_sq - log_sigma1_sq +
tf.exp(log_sigma1_sq - log_sigma2_sq) +
tf.square(mu1 - mu2) / tf.exp(log_sigma2_sq) -
1), axis=-1)
```
## Make parallel
```python
def make_parallel(fn, num_gpus, **kwargs):
"""Parallelize given model on multiple gpu devices.
Args:
fn: Arbitrary function that takes a set of input tensors and outputs a
single tensor. First dimension of inputs and output tensor are assumed
to be batch dimension.
num_gpus: Number of GPU devices.
**kwargs: Keyword arguments to be passed to the model.
Returns:
A tensor corresponding to the model output.
"""
in_splits = {}
for k, v in kwargs.items():
in_splits[k] = tf.split(v, num_gpus)
out_split = []
for i in range(num_gpus):
with tf.device(tf.DeviceSpec(device_type="GPU", device_index=i)):
with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
out_split.append(fn(**{k : v[i] for k, v in in_splits.items()}))
return tf.concat(out_split, axis=0)
```
## Leaky relu
```python
def leaky_relu(tensor, alpha=0.1):
"""Computes the leaky rectified linear activation."""
return tf.maximum(tensor, alpha * tensor)
```
## Batch normalization
```python
def batch_normalization(tensor, training=False, epsilon=0.001, momentum=0.9,
fused_batch_norm=False, name=None):
"""Performs batch normalization on given 4-D tensor.
The features are assumed to be in NHWC format. Noe that you need to
run UPDATE_OPS in order for this function to perform correctly, e.g.:
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
train_op = optimizer.minimize(loss)
Based on: https://arxiv.org/abs/1502.03167
"""
with tf.variable_scope(name, default_name="batch_normalization"):
channels = tensor.shape.as_list()[-1]
axes = list(range(tensor.shape.ndims - 1))
beta = tf.get_variable(
'beta', channels, initializer=tf.zeros_initializer())
gamma = tf.get_variable(
'gamma', channels, initializer=tf.ones_initializer())
avg_mean = tf.get_variable(
"avg_mean", channels, initializer=tf.zeros_initializer(),
trainable=False)
avg_variance = tf.get_variable(
"avg_variance", channels, initializer=tf.ones_initializer(),
trainable=False)
if training:
if fused_batch_norm:
mean, variance = None, None
else:
mean, variance = tf.nn.moments(tensor, axes=axes)
else:
mean, variance = avg_mean, avg_variance
if fused_batch_norm:
tensor, mean, variance = tf.nn.fused_batch_norm(
tensor, scale=gamma, offset=beta, mean=mean, variance=variance,
epsilon=epsilon, is_training=training)
else:
tensor = tf.nn.batch_normalization(
tensor, mean, variance, beta, gamma, epsilon)
if training:
update_mean = tf.assign(
avg_mean, avg_mean * momentum + mean * (1.0 - momentum))
update_variance = tf.assign(
avg_variance, avg_variance * momentum + variance * (1.0 - momentum))
tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_mean)
tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, update_variance)
return tensor
```