from google.colab import drive
drive.mount('/content/drive')
import os
import re
import sys
import math
import random
import logging
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow_probability as tfp
from tqdm.notebook import *
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import *
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import *
from tensorflow.keras import mixed_precision
try:
import albumentations as A
import tensorflow_addons as tfa
from vit_keras import vit, layers
import efficientnet.tfkeras as efn
from classification_models.tfkeras import Classifiers
except:
!pip install -qq gcsfs
!pip install -qq vit-keras
!pip install -qq efficientnet
!pip install -qq albumentations
!pip install -qq image-classifiers
!pip install -qq tensorflow-addons
!gdown --id 16MJuZ3wovKcc1B7V6eaUZKwkH1Dipykg
import albumentations as A
import tensorflow_addons as tfa
from vit_keras import vit, layers
import efficientnet.tfkeras as efn
from classification_models.tfkeras import Classifiers
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
print('Running on TPU ', tpu.master())
#tf.config.set_soft_device_placement(True)
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)
strategy = tf.distribute.experimental.TPUStrategy(tpu)
print("REPLICAS: ", strategy.num_replicas_in_sync)
AUTO = tf.data.experimental.AUTOTUNE
config = {
"seed": 1312,
"lr": 1e-4,
"valid_size": 0.05,
"epochs": 50,
"batch_size": 8 * strategy.num_replicas_in_sync,
"image_size": [800, 800, 3],
"embedding_dimensions": 1024,
"n_classes": 81313,
"image_paths": [
'gs://kds-02a5b8675d55c9a79251760390f626ffd3a0807438e67d2c7edea3cb',
'gs://kds-7aa79b5bc6f9af00ef7fd0c00f645a0abe32ebc8426ce4dd299077e6',
'gs://kds-971e58e5965ae894e73daa42bc53c93538d7afd16f8cc31a7e0ec68b',
'gs://kds-9c55456bf87ba673337d07f52d56d36e16e1ead2da1bb15e16610dd7',
'gs://kds-5949695dad43c3d30cd209773b1365e354a3613d9b63a25051eb4305',
'gs://kds-c4a1c215158b3e3002adac53b7de364a742bae7c6557212557d0378e',
'gs://kds-969690135ac88129bd11436ab06669e2aa1a23d66083087e9a692255',
'gs://kds-1793eae3b59c9d40461a1b04d82452040a42b8b0f063554dcd024ccf',
'gs://kds-73589684dcf1e8ebd2d37605cdb19ebfa650cccb17f36221e0ee48ac',
'gs://kds-d0e89541a75c0bf3642ef1f91a8f5ed5ff25630b417d504fba0d77ab',
'gs://kds-ebc1e3faa8dbe5cc846799207a330dd245c63befa4c93241ce526d1a',
'gs://kds-3b830115dd341cae8ac39c8d611fe4bc6fcad798e1e84ec336de6b36',
'gs://kds-8cb07756a69b20d2e4f294f26a928e7d285cf6d6db6640d9873c7ee1',
'gs://kds-e9f50c957467dcf68c27f8b865d87720e2cd4f1ff058733e474920d0',
'gs://kds-20d0c45f1756ef8664edcf2aa2db83ca4a0efbfb8ea533cd0bf85633',
'gs://kds-6ea4f0da52f0c996a7fbe5835459d1301a824236e793dadea8776b7d',
'gs://kds-05fdfc56d42ee43c29806c0dc9a06edb5d6f9a1cd82871b67538986a',
'gs://kds-6fb2e328eab4580255aa16b7f9bc7074babb1f68488144750b2e1c9f',
'gs://kds-02f60c478a0a861b6b80dba0ebb07e4d8547ad036ea01065a560d520',
'gs://kds-75583a573fde7550697fd8f591a2db35399acd2e772ca31eaecce602'
],
"encoded_csv_path": 'train_encoded.csv',
"save_path": "/content/drive/My Drive/2021_GLR/",
"margin": "AdaCos",
"backbone": "efficientnetv2-b3",
"last_epoch": 34,
"total_save": 5
}
config["save_path"] += config["backbone"]
if (config["last_epoch"] != 0):
config["weight_path"] = os.path.join(config["save_path"], "model_{}_{}_{}.h5".format(config["backbone"], config["margin"], (config["last_epoch"])%config["total_save"]))
else:
config["weight_path"] = None
augmentation = A.Compose([
A.HorizontalFlip(p = 0.5),
A.RandomBrightnessContrast(brightness_limit=0.1, p=0.2),
A.JpegCompression(quality_lower=95, quality_upper=100, p=0.25),
A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.25),
A.Cutout(num_holes=2, max_h_size=4, max_w_size=4, p=0.1),
])
def seed_everything(seed):
random.seed(seed)
np.random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
tf.random.set_seed(seed)
seed_everything(config["seed"])
def transform(image, label):
image = augmentation(image=image)["image"]
return image, label
def decode_image(image_data, image_size = config['image_size']):
image = tf.image.decode_jpeg(image_data, channels = 3)
image = tf.cast(image, tf.float32) / 255.0
#image = tf.image.resize(image, (config["image_size"][0], config["image_size"][1]))
image = tf.image.resize_with_pad(image, target_height = config["image_size"][0], target_width = config["image_size"][1])
image = tf.reshape(image, image_size)
return image
def count_data_items(filenames):
records = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in filenames]
df = pd.read_csv(config["encoded_csv_path"])
n = df[df['group'].isin(records)].shape[0]
return n
def read_tfrecord(example):
TFREC_FORMAT = {
"image": tf.io.FixedLenFeature([], tf.string),
"target": tf.io.FixedLenFeature([], tf.int64)
}
example = tf.io.parse_single_example(example, TFREC_FORMAT)
image = decode_image(example['image'], config['image_size'])
target = tf.cast(example["target"], tf.int32)
if (config['margin'] != "ArcMargin"):
target = tf.one_hot(target, tf.constant(config["n_classes"], name = "C"), on_value = 1.0, off_value = 0.0, axis =-1)
return image, target
def load_dataset(filenames, ordered = False):
ignore_order = tf.data.Options()
if not ordered:
ignore_order.experimental_deterministic = False
dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads = AUTO)
dataset = dataset.with_options(ignore_order)
dataset = dataset.map(read_tfrecord, num_parallel_calls = AUTO)
return dataset
def arcface_format(image, target):
return {'input': image, 'label': target}, target
def get_training_dataset(filenames, ordered = False, do_aug = False):
dataset = load_dataset(filenames, ordered = ordered)
if (do_aug):
dataset = dataset.map(transform, num_parallel_calls = AUTO)
dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
dataset = dataset.repeat()
dataset = dataset.shuffle(config["seed"])
dataset = dataset.batch(config["batch_size"])
dataset = dataset.prefetch(AUTO)
return dataset
def get_validation_dataset(filenames, ordered = True, prediction = False):
dataset = load_dataset(filenames, ordered = ordered)
dataset = dataset.map(arcface_format, num_parallel_calls = AUTO)
if prediction:
dataset = dataset.batch(config["batch_size"])
else:
dataset = dataset.batch(config["batch_size"])
dataset = dataset.prefetch(AUTO)
return dataset
class CosineSimilarity(tf.keras.layers.Layer):
def __init__(self, num_classes, **kwargs):
super().__init__(**kwargs)
self.num_classes = num_classes
def build(self, input_shape):
input_dim = input_shape[-1]
self.W = self.add_weight(shape=(input_dim, self.num_classes),
initializer='random_normal',
trainable=True)
def call(self, inputs):
x = tf.nn.l2_normalize(inputs, axis=-1) # (batch_size, ndim)
w = tf.nn.l2_normalize(self.W, axis=0) # (ndim, nclass)
cos = tf.matmul(x, w) # (batch_size, nclass)
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'num_classes': self.num_classes
})
return config
class ArcFace(tf.keras.layers.Layer):
def __init__(self, num_classes, margin=0.5, scale=64, **kwargs):
super().__init__(**kwargs)
self.num_classes = num_classes
self.margin = margin
self.scale = scale
self.cos_similarity = CosineSimilarity(num_classes)
def call(self, inputs, training):
# If not training (prediction), labels are ignored
feature, labels = inputs
cos = self.cos_similarity(feature)
if training:
theta = tf.acos(tf.clip_by_value(cos, -1, 1))
cos_add = tf.cos(theta + self.margin)
mask = tf.cast(labels, dtype=cos_add.dtype)
logits = mask*cos_add + (1-mask)*cos
logits *= self.scale
return logits
else:
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'num_classes': self.num_classes,
'margin': self.margin,
'scale': self.scale
})
return config
class GeMPooling(tf.keras.layers.Layer):
def __init__(self, p=1.0, eps=1e-7):
super().__init__()
self.p = p
self.eps = 1e-7
def get_config(self):
config = super().get_config().copy()
config.update({
'p': self.p,
'eps': self.eps
})
return config
def call(self, inputs: tf.Tensor, **kwargs):
inputs = tf.clip_by_value(inputs, clip_value_min=self.eps, clip_value_max=tf.reduce_max(inputs))
inputs = tf.pow(inputs, self.p)
inputs = tf.reduce_mean(inputs, axis=[1, 2])
inputs = tf.pow(inputs, 1./self.p)
return inputs
class CosineSimilarity(tf.keras.layers.Layer):
"""
Cosine similarity with classwise weights
"""
def __init__(self, num_classes, **kwargs):
super().__init__(**kwargs)
self.num_classes = num_classes
def build(self, input_shape):
input_dim = input_shape[-1]
self.W = self.add_weight(shape=(input_dim, self.num_classes),
initializer='random_normal',
trainable=True)
def call(self, inputs):
x = tf.nn.l2_normalize(inputs, axis=-1) # (batch_size, ndim)
w = tf.nn.l2_normalize(self.W, axis=0) # (ndim, nclass)
cos = tf.matmul(x, w) # (batch_size, nclass)
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'num_classes': self.num_classes
})
return config
class ArcMargin(tf.keras.layers.Layer):
def __init__(self, n_classes, s=30, m=0.50, easy_margin=False, ls_eps=0.0, **kwargs):
super(ArcMargin, self).__init__(**kwargs)
self.n_classes = n_classes
self.s = s
self.m = m
self.ls_eps = ls_eps
self.easy_margin = easy_margin
self.cos_m = tf.math.cos(m)
self.sin_m = tf.math.sin(m)
self.th = tf.math.cos(math.pi - m)
self.mm = tf.math.sin(math.pi - m) * m
def get_config(self):
config = super().get_config().copy()
config.update({
'n_classes': self.n_classes,
's': self.s,
'm': self.m,
'ls_eps': self.ls_eps,
'easy_margin': self.easy_margin,
})
return config
def build(self, input_shape):
super(ArcMargin, self).build(input_shape[0])
self.W = self.add_weight(
name='W',
shape=(int(input_shape[0][-1]), self.n_classes),
initializer='glorot_uniform',
dtype='float32',
trainable=True,
regularizer=None
)
def call(self, inputs):
X, y = inputs
y = tf.cast(y, dtype=tf.int32)
cosine = tf.matmul(
tf.math.l2_normalize(X, axis=1),
tf.math.l2_normalize(self.W, axis=0)
)
sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
phi = cosine * self.cos_m - sine * self.sin_m
if self.easy_margin:
phi = tf.where(cosine > 0, phi, cosine)
else:
phi = tf.where(cosine > self.th, phi, cosine - self.mm)
one_hot = tf.cast(
tf.one_hot(y, depth=self.n_classes),
dtype=cosine.dtype
)
if self.ls_eps > 0:
one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes
output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
output *= self.s
return output
class ArcFace(tf.keras.layers.Layer):
"""
Implementation of https://arxiv.org/pdf/1801.07698.pdf
"""
def __init__(self, num_classes, margin=0.5, scale=64, **kwargs):
super().__init__(**kwargs)
self.num_classes = num_classes
self.margin = margin
self.scale = scale
self.cos_similarity = CosineSimilarity(num_classes)
def call(self, inputs, training):
# If not training (prediction), labels are ignored
feature, labels = inputs
cos = self.cos_similarity(feature)
if training:
theta = tf.acos(tf.clip_by_value(cos, -1, 1))
cos_add = tf.cos(theta + self.margin)
mask = tf.cast(labels, dtype=cos_add.dtype)
logits = mask*cos_add + (1-mask)*cos
logits *= self.scale
return logits
else:
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'num_classes': self.num_classes,
'margin': self.margin,
'scale': self.scale
})
return config
class AdaCos(tf.keras.layers.Layer):
def __init__(self, num_classes, **kwargs):
super().__init__(**kwargs)
self.num_classes = num_classes
self.cos_similarity = CosineSimilarity(num_classes)
self.scale = tf.Variable(tf.sqrt(2.0)*tf.math.log(num_classes - 1.0),
trainable=False)
def call(self, inputs, training):
# In inference, labels are ignored
feature, labels = inputs
cos = self.cos_similarity(feature)
if training:
mask = tf.cast(labels, dtype=cos.dtype)
# Collect cosine values at only false labels
B = (1 - mask)*tf.exp(self.scale*cos)
B_avg = tf.reduce_mean(tf.reduce_sum(B, axis=-1), axis=0)
theta = tf.acos(tf.clip_by_value(cos, -1, 1))
# Collect cosine at true labels
theta_true = tf.reduce_sum(mask*theta, axis=-1)
# get median (=50-percentile)
theta_med = tfp.stats.percentile(theta_true, q=50)
scale = tf.math.log(B_avg) / tf.cos(tf.minimum(np.pi/4, theta_med))
scale = tf.stop_gradient(scale)
logits = scale*cos
self.scale.assign(scale)
return logits
else:
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'num_classes': self.num_classes
})
return config
class CircleLoss(tf.keras.layers.Layer):
"""
Implementation of https://arxiv.org/abs/2002.10857 (pair-level label version)
"""
def __init__(self, margin=0.25, scale=256, **kwargs):
"""
Args
margin: a float value, margin for the true label (default 0.25)
scale: a float value, final scale value,
stated as gamma in the original paper (default 256)
Returns:
a tf.keras.layers.Layer object, outputs logit values of each class
In the original paper, margin and scale (=gamma) are set depends on tasks
- Face recognition: m=0.25, scale=256 (default)
- Person re-identification: m=0.25, scale=256
- Fine-grained image retrieval: m=0.4, scale=64
"""
super().__init__(**kwargs)
self.margin = margin
self.scale = scale
self._Op = 1 + margin # O_positive
self._On = -margin # O_negative
self._Dp = 1 - margin # Delta_positive
self._Dn = margin # Delta_negative
def call(self, inputs, training):
feature, labels = inputs
x = tf.nn.l2_normalize(feature, axis=-1)
cos = tf.matmul(x, x, transpose_b=True) # (batch_size, batch_size)
if training:
# pairwise version
mask = tf.cast(labels, dtype=cos.dtype)
mask_p = tf.matmul(mask, mask, transpose_b=True)
mask_n = 1 - mask_p
mask_p = mask_p - tf.eye(mask_p.shape[0])
logits_p = - self.scale * tf.nn.relu(self._Op - cos) * (cos - self._Dp)
logits_n = self.scale * tf.nn.relu(cos - self._On) * (cos - self._Dn)
logits_p = tf.where(mask_p == 1, logits_p, -np.inf)
logits_n = tf.where(mask_n == 1, logits_n, -np.inf)
logsumexp_p = tf.reduce_logsumexp(logits_p, axis=-1)
logsumexp_n = tf.reduce_logsumexp(logits_n, axis=-1)
mask_p_row = tf.reduce_max(mask_p, axis=-1)
mask_n_row = tf.reduce_max(mask_n, axis=-1)
logsumexp_p = tf.where(mask_p_row == 1, logsumexp_p, 0)
logsumexp_n = tf.where(mask_n_row == 1, logsumexp_n, 0)
losses = tf.nn.softplus(logsumexp_p + logsumexp_n)
mask_paired = mask_p_row*mask_n_row
losses = mask_paired * losses
return losses
else:
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'margin': self.margin,
'scale': self.scale
})
return config
class CircleLossCL(tf.keras.layers.Layer):
"""
Implementation of https://arxiv.org/abs/2002.10857 (class-level label version)
"""
def __init__(self, num_classes, margin=0.25, scale=256, **kwargs):
"""
Args
num_classes: an int value, number of target classes
margin: a float value, margin for the true label (default 0.25)
scale: a float value, final scale value,
stated as gamma in the original paper (default 256)
Returns:
a tf.keras.layers.Layer object, outputs logit values of each class
In the original paper, margin and scale (=gamma) are set depends on tasks
- Face recognition: m=0.25, scale=256 (default)
- Person re-identification: m=0.25, scale=256
- Fine-grained image retrieval: m=0.4, scale=64
"""
super().__init__(**kwargs)
self.num_classes = num_classes
self.margin = margin
self.scale = scale
self._Op = 1 + margin # O_positive
self._On = -margin # O_negative
self._Dp = 1 - margin # Delta_positive
self._Dn = margin # Delta_negative
self.cos_similarity = CosineSimilarity(num_classes)
def call(self, inputs, training):
feature, labels = inputs
cos = self.cos_similarity(feature)
if training:
# class-lebel version
mask = tf.cast(labels, dtype=cos.dtype)
alpha_p = tf.nn.relu(self._Op - cos)
alpha_n = tf.nn.relu(cos - self._On)
logits_p = self.scale*alpha_p*(cos - self._Dp)
logits_n = self.scale*alpha_n*(cos - self._Dn)
logits = mask*logits_p + (1-mask)*logits_n
return logits
else:
return cos
def get_config(self):
config = super().get_config().copy()
config.update({
'num_classes': self.num_classes,
'margin': self.margin,
'scale': self.scale
})
return config
def get_margin(margin):
if (margin == "ArcMargin"):
return ArcMargin(n_classes = config["n_classes"], m = 0.1, s = 32)
elif (margin == "ArcFace"):
return ArcFace(num_classes = config["n_classes"], margin = 0.1, scale = 32)
elif (margin == "AdaCos"):
return AdaCos(num_classes = config["n_classes"])
elif (margin == "CircleLossCL"):
return CircleLossCL(num_classes = config["n_classes"], margin = 0.6, scale = 32)
elif (margin == "CosFace"):
return ArcFace(num_classes = config["n_classes"], margin = -0.1, scale = 32)
def get_backbone(backbone, x):
if (hasattr(efn, backbone)):
return GeMPooling(p = 3.0)(getattr(efn, backbone)(weights = "noisy-student", include_top = False)(x))
elif hasattr(tf.keras.applications, backbone):
return GeMPooling(p = 3.0)(getattr(tf.keras.applications, backbone)(weights = "imagenet", include_top = False)(x))
elif hasattr(vit, backbone):
return getattr(vit, backbone)(image_size = (config["image_size"][0], config["image_size"][1]),
pretrained=True,
include_top=False,
pretrained_top=False)(x)
elif "eff" in backbone:
return hub.KerasLayer("gs://cloud-tpu-checkpoints/efficientnet/v2/hub/"+backbone+"-21k-ft1k/feature-vector", trainable=True)(x)
def model_factory(backbone, image_size, embedding_dimensions, margin):
x = Input(shape = (*image_size,), name = 'input')
label = Input(shape = (), name = 'label')
headModel = get_backbone(backbone, x)
headModel = Dense(embedding_dimensions, activation = "linear")(headModel)
headModel = BatchNormalization()(headModel)
headModel = PReLU()(headModel)
headModel = get_margin(margin = margin)([headModel, label])
output = Softmax(dtype='float32')(headModel)
model = tf.keras.models.Model(inputs = [x, label], outputs = [output])
return model
def get_lr_callback(plot=False):
LR_START = config["lr"] * (config["batch_size"] / 256)
LR_MAX = 5 * LR_START
LR_MIN = LR_START/10
LR_RAMPUP_EPOCHS = 5
LR_SUSTAIN_EPOCHS = 0
LR_EXP_DECAY = 0.8
def lrfn(epoch):
if epoch < LR_RAMPUP_EPOCHS:
lr = (LR_MAX - LR_START) / LR_RAMPUP_EPOCHS * epoch + LR_START
elif epoch < LR_RAMPUP_EPOCHS + LR_SUSTAIN_EPOCHS:
lr = LR_MAX
else:
lr = (LR_MAX - LR_MIN) * LR_EXP_DECAY**(epoch - LR_RAMPUP_EPOCHS - LR_SUSTAIN_EPOCHS) + LR_MIN
return lr
if plot:
epochs = list(range(config["epochs"]))
learning_rates = [lrfn(x) for x in epochs]
plt.plot(epochs, learning_rates)
print("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(learning_rates[0], max(learning_rates), learning_rates[-1]))
plt.show()
lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
return lr_callback
class SaveModelCheckpoint(tf.keras.callbacks.Callback):
def __init__(self, path):
self.path = path
def on_epoch_end(self, epoch, logs={}):
self.model.save(os.path.join(self.path, "model_{}_{}_{}.h5".format(config["backbone"], config["margin"], (epoch + 1)%config["total_save"])))
df = pd.read_csv(config["encoded_csv_path"])
FILENAMES = []
for GCS_PATH in config["image_paths"]:
FILENAMES += tf.io.gfile.glob(GCS_PATH + '/train*.tfrec')
TRAINING_FILENAMES, VALIDATION_FILENAMES = train_test_split(FILENAMES,
test_size=config["valid_size"],
random_state=42)
training_groups = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in TRAINING_FILENAMES]
validation_groups = [int(re.compile(r"_([0-9]*)\.").search(filename).group(1)) for filename in VALIDATION_FILENAMES]
n_trn_classes = df[df['group'].isin(training_groups)]['landmark_id_encode'].nunique()
n_val_classes = df[df['group'].isin(validation_groups)]['landmark_id_encode'].nunique()
print(f'The number of unique training classes is {n_trn_classes} of {config["n_classes"]} total classes')
print(f'The number of unique validation classes is {n_val_classes} of {config["n_classes"]} total classes')
STEPS_PER_EPOCH = count_data_items(TRAINING_FILENAMES) // config["batch_size"]
train_dataset = get_training_dataset(TRAINING_FILENAMES, ordered = False, do_aug = True)
valid_dataset = get_validation_dataset(VALIDATION_FILENAMES, ordered = True, prediction = False)
with strategy.scope():
optimizer = Adam(learning_rate = config["lr"])
model = model_factory(margin = config["margin"],
backbone = config["backbone"],
image_size = config["image_size"],
embedding_dimensions = config["embedding_dimensions"])
if (config["weight_path"]):
model.load_weights(config["weight_path"])
model.compile(optimizer = optimizer,
loss = [tf.keras.losses.CategoricalCrossentropy() if (config['margin'] != "ArcMargin") else tf.keras.losses.SparseCategoricalCrossentropy()],
metrics = [tf.keras.metrics.CategoricalAccuracy() if (config['margin'] != "ArcMargin") else tf.keras.metrics.SparseCategoricalAccuracy()])
checkpoint = SaveModelCheckpoint(path = config["save_path"])
lr_callback = get_lr_callback(plot = True)
H = model.fit(train_dataset,
steps_per_epoch = STEPS_PER_EPOCH,
epochs = config["epochs"],
callbacks = [checkpoint, lr_callback],
validation_data = valid_dataset,
initial_epoch = config["last_epoch"],
verbose = 1)
import gc
import os
import csv
import cv2
import sys
import math
import random
import shutil
import logging
import numpy as np
import albumentations as A
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.layers import *
from tqdm.notebook import *
from sklearn.metrics import *
from sklearn.preprocessing import QuantileTransformer
try:
from vit_keras import vit
import efficientnet.tfkeras as efn
from ffyytt_tools.metric_learning.metric_learning_layers import *
except:
sys.path.append("../input/ffyytt-tools")
!pip install -qq vit-keras --no-index --find-links=file:///kaggle/input/2021-glr-lib
!pip install -qq efficientnet --no-index --find-links=file:///kaggle/input/2021-glr-lib
from vit_keras import vit
import efficientnet.tfkeras as efn
from ffyytt_tools.metric_learning.metric_learning_layers import *
model_paths = [
"../input/2021-glr-all-best-model/model_efficientnetv2-b3_1024_ArcMargin_0.h5",
"../input/2021-glr-all-best-model/model_efficientnetv2-m_1024_ArcMargin_0.h5",
"../input/2021-glr-all-best-model/model_EfficientNetB4_512_ArcMargin_0.h5",
#"../input/2021-glr-efficientnetv2b3-adacos-padding-1024/model_efficientnetv2-b3_AdaCos_1.h5",
"../input/2021-glr-efficientnetv2b3-adacos-padding-1024/model_efficientnetv2-b3_AdaCos_2.h5",
#"../input/2021-glr-efficientnetv2s-adacos-padding-1024/model_efficientnetv2-s_AdaCos_1.h5",
"../input/2021-glr-efficientnetv2s-adacos-padding-1024/model_efficientnetv2-s_AdaCos_2.h5",
"../input/2021-glr-efficientnetb6-adacos-padding-1024/model_EfficientNetB6_AdaCos_2.h5",
"../input/2021-glr-efficientnetb5-adacos-padding-1024/model_EfficientNetB5_AdaCos_0.h5",
"../input/2021-glr-craw-weights-13092021/model_InceptionResNetV2_AdaCos_3.h5",
"../input/2021-glr-craw-weights-13092021/model_EfficientNetB5_ArcMargin_3.h5",
"../input/2021-glr-craw-weights-13092021/model_EfficientNetB5_ArcFace_3.h5",
"../input/2021-glr-vit-b32-adacos-padding-1024/model_vit_b32_AdaCos_1.h5",
"../input/2021-glr-model-temp/model_vit_b32_AdaCos_1.h5",
]
backbones = [
"efficientnetv2-b3",
"efficientnetv2-m",
"EfficientNetB4",
#"efficientnetv2-b3",
"efficientnetv2-b3",
#"efficientnetv2-s",
"efficientnetv2-s",
"EfficientNetB6",
"EfficientNetB5",
"InceptionResNetV2",
"EfficientNetB5",
"EfficientNetB5",
"vit_b32",
#"vit_b32",
"vit_b32"
]
embedding_sizes = [
1024,
1024,
512,
#1024,
1024,
#1024,
1024,
1024,
1024,
1024,
1024,
1024,
1024,
#1024,
1024
]
paddings = [
False,
False,
False,
#True,
True,
#True,
True,
True,
True,
True,
True,
True,
True,
#False,
False,
]
margins = [
"ArcMargin",
"ArcMargin",
"ArcMargin",
#"AdaCos",
"AdaCos",
#"AdaCos",
"AdaCos",
"AdaCos",
"AdaCos",
"AdaCos",
"ArcMargin",
"ArcFace",
"AdaCos",
#"AdaCos",
"AdaCos",
]
image_sizes = [
(512, 512, 3),
(512, 512, 3),
(512, 512, 3),
#(800, 800, 3),
(800, 800, 3),
#(800, 800, 3),
(800, 800, 3),
(800, 800, 3),
(800, 800, 3),
(800, 800, 3),
(800, 800, 3),
(800, 800, 3),
(800, 800, 3),
#(512, 512, 3),
(512, 512, 3),
]
ensemble_weight = np.array([1]*len(backbones))
augmentation = A.Compose([])
config = {
"n_workers": 4,
"batch_size": 16,
"n_classes": 81313,
"distance_batch": 512,
"NUM_PUBLIC_TEST_IMAGES": 1129,
}
TOP_K = 100
DEBUG = True
NUM_EMBEDDING_DIMENSIONS = max(embedding_sizes)
def get_margin(margin):
if (margin == "ArcMargin"):
return ArcMargin(n_classes = config["n_classes"], m = 0.1, s = 32)
elif (margin == "ArcFace"):
return ArcFace(num_classes = config["n_classes"], margin = 0.1, scale = 32)
elif (margin == "AdaCos"):
return AdaCos(num_classes = config["n_classes"])
elif (margin == "CircleLossCL"):
return CircleLossCL(num_classes = config["n_classes"], margin = 0.25, scale = 32)
elif (margin == "CosFace"):
return ArcFace(num_classes = config["n_classes"], margin = -0.1, scale = 32)
def get_backbone(backbone, x, image_size):
if (hasattr(efn, backbone)):
return GeMPooling(p = 3.0)(getattr(efn, backbone)(weights = None, include_top = False)(x))
elif hasattr(tf.keras.applications, backbone):
return GeMPooling(p = 3.0)(getattr(tf.keras.applications, backbone)(weights = None, include_top = False)(x))
elif hasattr(vit, backbone):
return getattr(vit, backbone)(image_size = (image_size[0], image_size[1]),
pretrained=False,
include_top=False,
pretrained_top=False)(x)
else:
return hub.KerasLayer("../input/efficientnetv2-tfhub-weight-files/tfhub_models/"+backbone+"/feature_vector", trainable=True)(x)
def model_factory(backbone, image_size, embedding_dimensions, margin):
x = Input(shape = (*image_size,), name = 'input')
label = Input(shape = (), name = 'label')
headModel = get_backbone(backbone, x, image_size)
headModel = Dense(embedding_dimensions, activation = "linear")(headModel)
headModel = BatchNormalization()(headModel)
headModel = PReLU()(headModel)
headModel = get_margin(margin = margin)([headModel, label])
output = Softmax(dtype='float32')(headModel)
model = tf.keras.models.Model(inputs = [x, label], outputs = [output])
return model
global_models = [None]*len(model_paths)
for model_index in trange(len(model_paths)):
model = model_factory(image_size = image_sizes[model_index],
margin = margins[model_index],
backbone = backbones[model_index],
embedding_dimensions = embedding_sizes[model_index])
model.load_weights(model_paths[model_index])
global_models[model_index] = tf.keras.models.Model(inputs = model.input[0],
outputs = model.layers[-4].output)
def do_image_normal(image):
image = augmentation(image=image)["image"]
image = cv2.resize(image, (512, 512))
image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
image = tf.image.decode_jpeg(image, channels = 3)
return tf.cast(image, tf.float32) / 255.0
def do_image_padding(image):
image = augmentation(image=image)["image"]
image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
image = tf.image.decode_jpeg(image, channels = 3)
image = tf.image.resize_with_pad(image, target_height = 800, target_width = 800)
return tf.cast(image, tf.float32) / 255.0
def process_image(image, padding):
if (padding):
return do_image_padding(image)
else:
return do_image_normal(image)
def read_image(image_path):
image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
return image
def read_image_batch(image_paths):
X_normal = [None]*len(image_paths)
X_padding = [None]*len(image_paths)
for image_index, image_path in enumerate(image_paths):
image = read_image(image_path)
X_normal[image_index] = process_image(image, False)
X_padding[image_index] = process_image(image, True)
return tf.convert_to_tensor(X_normal), tf.convert_to_tensor(X_padding)
def make_global_model_predict(X, index, embeddings, model_index):
features = global_models[model_index].predict(X)
embeddings[index, :features.shape[1]] += ensemble_weight[model_index]*tf.nn.l2_normalize(features, axis=1).numpy()
return None
def extract_global_features(image_root_dir):
ids, image_paths = my_glob(image_root_dir, ".jpg")
num_embeddings = len(image_paths)
steps = math.ceil(num_embeddings/config["batch_size"])
embeddings = np.zeros((num_embeddings, NUM_EMBEDDING_DIMENSIONS))
for step in trange(steps):
index = range(step*config["batch_size"], min(num_embeddings, (step+1)*config["batch_size"]))
X_normal, X_padding = read_image_batch([image_paths[i] for i in index])
for model_index in range(len(global_models)):
if (paddings[model_index]):
make_global_model_predict(X_padding, index, embeddings, model_index)
else:
make_global_model_predict(X_normal, index, embeddings, model_index)
gc.collect()
tf.keras.backend.clear_session()
return ids, embeddings
def compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings):
train_ids_and_scores = [None] * len(test_ids)
steps = math.ceil(len(test_ids)/config["distance_batch"])
for step in trange(steps):
index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
metric = 'cosine', n_jobs = -1)
for test_index in index:
partition = np.argpartition(distances[test_index%config["distance_batch"]], min(TOP_K, len(train_ids)-1))[:TOP_K]
nearest = sorted([(train_ids[p], distances[test_index%config["distance_batch"]][p]) for p in partition], key=lambda x: x[1])
train_ids_and_scores[test_index] = [(train_id, 1 - distance) for train_id, distance in nearest][:TOP_K]
gc.collect()
return train_ids_and_scores
def compute_remove_top_global(train_ids, test_ids, train_embeddings, test_embeddings):
train_remove = set()
train_socres = [None]*len(train_ids)
steps = math.ceil(len(train_ids)/config["distance_batch"])
for step in trange(steps):
index = range(step*config["distance_batch"], min(len(train_ids), (step+1)*config["distance_batch"]))
distances = pairwise_distances(train_embeddings[index, :], test_embeddings,
metric = 'cosine', n_jobs = -1)
for train_index in index:
partition = np.argpartition(distances[train_index%config["distance_batch"]], 3)[:3]
train_socres[train_index] = sum([1-distances[train_index%config["distance_batch"]][p] for p in partition])
gc.collect()
remove_thresh = np.percentile(train_socres, REMOVE_TOP_GLOBAL, interpolation='nearest')
for train_id, train_socre in zip(train_ids, train_socres):
if (train_socre < remove_thresh):
train_remove.add(train_id)
return train_remove
def compute_remove_times_app(train_ids, test_ids, train_embeddings, test_embeddings):
train_time_dict = {train_id:0 for train_id in train_ids}
steps = math.ceil(len(test_ids)/config["distance_batch"])
for step in trange(steps):
index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
metric = 'cosine', n_jobs = -1)
for test_index in index:
partition = np.argpartition(distances[test_index%config["distance_batch"]], 100)[:100]
for p in partition:
train_time_dict[train_ids[p]] += 1
gc.collect()
train_remove = set(sorted(train_ids, key = train_time_dict.get, reverse = True)[:MAX_TIME_REMOVE])
return train_remove
def remove_train_by_remove_set(train_ids, train_embeddings, train_remove):
index = [i for i in range(len(train_ids)) if train_ids[i] not in train_remove]
return [train_ids[i] for i in index], train_embeddings[index]
def global_predictions():
train_ids, train_embeddings = extract_global_features(TRAIN_IMAGE_DIR + "/0/0"*DEBUG)
gc.collect()
test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR + "/0"*DEBUG)
gc.collect()
train_ids_and_scores = compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings)
return test_ids, train_ids_and_scores
def get_prediction_map(test_ids, train_ids_and_scores):
prediction_map = dict()
for test_index, test_id in enumerate(test_ids):
prediction_map[test_id] = " ".join(train_id for train_id,_ in train_ids_and_scores[test_index])
return prediction_map
def get_predictions():
test_ids, train_ids_and_scores = global_predictions()
gc.collect()
verification_predictions = get_prediction_map(test_ids, train_ids_and_scores)
return verification_predictions
def do_image_normal(image):
image = augmentation(image=image)["image"]
image = cv2.resize(image, (512, 512))
image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
image = tf.image.decode_jpeg(image, channels = 3)
return tf.cast(image, tf.float32) / 255.0
def do_image_padding(image):
image = augmentation(image=image)["image"]
image = cv2.imencode('.jpg', image, (cv2.IMWRITE_JPEG_QUALITY, 100))[1].tobytes()
image = tf.image.decode_jpeg(image, channels = 3)
image = tf.image.resize_with_pad(image, target_height = 800, target_width = 800)
return tf.cast(image, tf.float32) / 255.0
def process_image(image, padding):
if (padding):
return do_image_padding(image)
else:
return do_image_normal(image)
def read_image(image_path):
image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
return image
def read_image_batch(image_paths):
X_normal = [None]*len(image_paths)
X_padding = [None]*len(image_paths)
for image_index, image_path in enumerate(image_paths):
image = read_image(image_path)
X_normal[image_index] = process_image(image, False)
X_padding[image_index] = process_image(image, True)
return tf.convert_to_tensor(X_normal), tf.convert_to_tensor(X_padding)
def make_global_model_predict(X, index, embeddings, model_index):
features = global_models[model_index].predict(X)
embeddings[index, :features.shape[1]] += ensemble_weight[model_index]*tf.nn.l2_normalize(features, axis=1).numpy()
return None
def extract_global_features(image_root_dir):
ids, image_paths = my_glob(image_root_dir, ".jpg")
num_embeddings = len(image_paths)
steps = math.ceil(num_embeddings/config["batch_size"])
embeddings = np.zeros((num_embeddings, NUM_EMBEDDING_DIMENSIONS))
for step in trange(steps):
index = range(step*config["batch_size"], min(num_embeddings, (step+1)*config["batch_size"]))
X_normal, X_padding = read_image_batch([image_paths[i] for i in index])
for model_index in range(len(global_models)):
if (paddings[model_index]):
make_global_model_predict(X_padding, index, embeddings, model_index)
else:
make_global_model_predict(X_normal, index, embeddings, model_index)
gc.collect()
tf.keras.backend.clear_session()
return ids, embeddings
def compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings):
train_ids_and_scores = [None] * len(test_ids)
steps = math.ceil(len(test_ids)/config["distance_batch"])
for step in trange(steps):
index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
metric = 'cosine', n_jobs = -1)
for test_index in index:
partition = np.argpartition(distances[test_index%config["distance_batch"]], min(TOP_K, len(train_ids)-1))[:TOP_K]
nearest = sorted([(train_ids[p], distances[test_index%config["distance_batch"]][p]) for p in partition], key=lambda x: x[1])
train_ids_and_scores[test_index] = [(train_id, 1 - distance) for train_id, distance in nearest][:TOP_K]
gc.collect()
return train_ids_and_scores
def compute_remove_top_global(train_ids, test_ids, train_embeddings, test_embeddings):
train_remove = set()
train_socres = [None]*len(train_ids)
steps = math.ceil(len(train_ids)/config["distance_batch"])
for step in trange(steps):
index = range(step*config["distance_batch"], min(len(train_ids), (step+1)*config["distance_batch"]))
distances = pairwise_distances(train_embeddings[index, :], test_embeddings,
metric = 'cosine', n_jobs = -1)
for train_index in index:
partition = np.argpartition(distances[train_index%config["distance_batch"]], 3)[:3]
train_socres[train_index] = sum([1-distances[train_index%config["distance_batch"]][p] for p in partition])
gc.collect()
remove_thresh = np.percentile(train_socres, REMOVE_TOP_GLOBAL, interpolation='nearest')
for train_id, train_socre in zip(train_ids, train_socres):
if (train_socre < remove_thresh):
train_remove.add(train_id)
return train_remove
def compute_remove_times_app(train_ids, test_ids, train_embeddings, test_embeddings):
train_time_dict = {train_id:0 for train_id in train_ids}
steps = math.ceil(len(test_ids)/config["distance_batch"])
for step in trange(steps):
index = range(step*config["distance_batch"], min(len(test_ids), (step+1)*config["distance_batch"]))
distances = pairwise_distances(test_embeddings[index, :], train_embeddings,
metric = 'cosine', n_jobs = -1)
for test_index in index:
partition = np.argpartition(distances[test_index%config["distance_batch"]], 100)[:100]
for p in partition:
train_time_dict[train_ids[p]] += 1
gc.collect()
train_remove = set(sorted(train_ids, key = train_time_dict.get, reverse = True)[:MAX_TIME_REMOVE])
return train_remove
def remove_train_by_remove_set(train_ids, train_embeddings, train_remove):
index = [i for i in range(len(train_ids)) if train_ids[i] not in train_remove]
return [train_ids[i] for i in index], train_embeddings[index]
def global_predictions():
train_ids, train_embeddings = extract_global_features(TRAIN_IMAGE_DIR + "/0/0"*DEBUG)
gc.collect()
test_ids, test_embeddings = extract_global_features(TEST_IMAGE_DIR + "/0"*DEBUG)
gc.collect()
train_ids_and_scores = compute_train_ids_and_scores(train_ids, test_ids, train_embeddings, test_embeddings)
return test_ids, train_ids_and_scores
def get_prediction_map(test_ids, train_ids_and_scores):
prediction_map = dict()
for test_index, test_id in enumerate(test_ids):
prediction_map[test_id] = " ".join(train_id for train_id,_ in train_ids_and_scores[test_index])
return prediction_map
def get_predictions():
test_ids, train_ids_and_scores = global_predictions()
gc.collect()
verification_predictions = get_prediction_map(test_ids, train_ids_and_scores)
return verification_predictions
def seed_everything(seed):
random.seed(seed)
np.random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
tf.random.set_seed(seed)
def my_glob(path, filetype):
ids = []
filepaths = []
for root, dirs, files in os.walk(path):
for file in files:
if file.endswith(filetype):
filepaths.append(os.path.join(root, file))
ids.append(file[:-4])
return ids, filepaths
def save_submission_csv(predictions=None):
if predictions is None:
shutil.copyfile(os.path.join(DATASET_DIR, 'sample_submission.csv'), 'submission.csv')
return True
with open('submission.csv', 'w') as submission_csv:
csv_writer = csv.DictWriter(submission_csv, fieldnames=['id', 'images'])
csv_writer.writeheader()
for image_id, prediction in predictions.items():
csv_writer.writerow({'id': image_id, 'images': f'{prediction}'})
In [10]:
seed_everything(1312)
INPUT_DIR = os.path.join('..', 'input')
DATASET_DIR = os.path.join(INPUT_DIR, 'landmark-retrieval-2021')
TEST_IMAGE_DIR = os.path.join(DATASET_DIR, 'test')
TRAIN_IMAGE_DIR = os.path.join(DATASET_DIR, 'index')
In [11]:
_, test_image_list = my_glob(TEST_IMAGE_DIR, ".jpg")
if len(test_image_list) == config["NUM_PUBLIC_TEST_IMAGES"] and not DEBUG:
print("Copying sample submission")
save_submission_csv()
else:
if (len(test_image_list) != config["NUM_PUBLIC_TEST_IMAGES"]):
DEBUG = False
verification_predictions = get_predictions()
save_submission_csv(verification_predictions)