Loading the dataset

Loading the dataset is a process that takes several steps. Let's explore each step one by one.

  1. The first step is to load the class IDs, which are stored in a pickle file. The following code will load the class IDs and return a list of all of them:
def load_class_ids(class_info_file_path):
"""
Load class ids from class_info.pickle file
"""
with open(class_info_file_path, 'rb') as f:
class_ids = pickle.load(f, encoding='latin1')
return class_ids

  1. Next, load the filenames, which are also stored in a pickle file. This can be done as follows:
def load_filenames(filenames_file_path):
"""
Load filenames.pickle file and return a list of all file names
"""
with open(filenames_file_path, 'rb') as f:
filenames = pickle.load(f, encoding='latin1')
return filenames
  1. After that, we need to load the text embeddings, which are in a pickle file as well. Load the files and retrieve the text embeddings as follows:
def load_embeddings(embeddings_file_path):
"""
Load embeddings
"""
with open(embeddings_file_path, 'rb') as f:
embeddings = pickle.load(f, encoding='latin1')
embeddings = np.array(embeddings)
print('embeddings: ', embeddings.shape)
return embeddings
  1. Next, get the bounding boxes, which are used to extract objects from the raw images. The following self-explanatory code shows how to retrieve the bounding boxes:
def load_bounding_boxes(dataset_dir):
"""
Load bounding boxes and return a dictionary of file names and corresponding bounding boxes
"""
# Paths
bounding_boxes_path = os.path.join(dataset_dir, 'bounding_boxes.txt')
file_paths_path = os.path.join(dataset_dir, 'images.txt')

# Read bounding_boxes.txt and images.txt file
df_bounding_boxes = pd.read_csv(bounding_boxes_path,
delim_whitespace=True, header=None).astype(int)
df_file_names = pd.read_csv(file_paths_path, delim_whitespace=True, header=None)

# Create a list of file names
file_names = df_file_names[1].tolist()

# Create a dictionary of file_names and bounding boxes
filename_boundingbox_dict = {img_file[:-4]: [] for img_file in file_names[:2]}

# Assign a bounding box to the corresponding image
for i in range(0, len(file_names)):
# Get the bounding box
bounding_box = df_bounding_boxes.iloc[i][1:].tolist()
key = file_names[i][:-4]
filename_boundingbox_dict[key] = bounding_box

return filename_boundingbox_dict
  1. Next, write a method to load and crop an image. The following code loads the image and crops it around the provided bounding box. It also resizes the image to a specified size:
def get_img(img_path, bbox, image_size):
"""
Load and resize image
"""
img = Image.open(img_path).convert('RGB')
width, height = img.size
if bbox is not None:
R = int(np.maximum(bbox[2], bbox[3]) * 0.75)
center_x = int((2 * bbox[0] + bbox[2]) / 2)
center_y = int((2 * bbox[1] + bbox[3]) / 2)
y1 = np.maximum(0, center_y - R)
y2 = np.minimum(height, center_y + R)
x1 = np.maximum(0, center_x - R)
x2 = np.minimum(width, center_x + R)
img = img.crop([x1, y1, x2, y2])
img = img.resize(image_size, PIL.Image.BILINEAR)
return img
  1. Finally, combine all of the preceding methods to get the dataset, which we need for our training. This code returns all the images, their labels, and the corresponding embeddings. We need these for the training:
def load_dataset(filenames_file_path, class_info_file_path, cub_dataset_dir, embeddings_file_path, image_size):
filenames = load_filenames(filenames_file_path)
class_ids = load_class_ids(class_info_file_path)
bounding_boxes = load_bounding_boxes(cub_dataset_dir)
all_embeddings = load_embeddings(embeddings_file_path)

X, y, embeddings = [], [], []

# TODO: Change filenames indexing
for index, filename in enumerate(filenames[:500]):
# print(class_ids[index], filenames[index])
bounding_box = bounding_boxes[filename]

try:
# Load images
img_name = '{}/images/{}.jpg'.format(cub_dataset_dir, filename)
img = get_img(img_name, bounding_box, image_size)

all_embeddings1 = all_embeddings[index, :, :]

embedding_ix = random.randint(0, all_embeddings1.shape[0] - 1)
embedding = all_embeddings1[embedding_ix, :]

X.append(np.array(img))
y.append(class_ids[index])
embeddings.append(embedding)
except Exception as e:
print(e)

X = np.array(X)
y = np.array(y)
embeddings = np.array(embeddings)

return X, y, embeddings
  1. Finally, load the dataset and make it available for the training:
X_train, y_train, embeddings_train = load_dataset(filenames_file_path=filenames_file_path_train,
class_info_file_path=class_info_file_path_train,
cub_dataset_dir=cub_dataset_dir,
embeddings_file_path=embeddings_file_path_train,
image_size=(64, 64))

X_test, y_test, embeddings_test = load_dataset(filenames_file_path=filenames_file_path_test,
class_info_file_path=class_info_file_path_test,
cub_dataset_dir=cub_dataset_dir,
embeddings_file_path=embeddings_file_path_test,
image_size=(64, 64))

Now that we have successfully loaded the dataset for training, let's create some models.