Import required libraries

In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

Path to data

In [3]:
PATH = '../../../data/flowers/'

Size of data

In [4]:
size = 224

Read data

In [5]:
# Read all flower images (.jpg) from a folder
# The function returns both the path of the flower image and the corresponding label
# which is defined by the name of the foler in which the image is
def read_images_from_dir(base_dir, folder):
    path_folder = os.path.join(base_dir, folder)
    files_directory = os.listdir(path_folder)
    
    labels = []
    images = []
    for file in files_directory:
        if file.endswith('.jpg'):
            labels.append(folder)
            images.append(os.path.join(path_folder, file))
    return labels, images

def read_images(base_dir):
    labels = []
    images = []
    folders = os.listdir(base_dir)
    for folder in folders:
        labels_folder, images_folder = read_images_from_dir(base_dir, folder)
        labels.extend(labels_folder)
        images.extend(images_folder)
    return labels, images

labels, images = read_images(PATH)

Show some images from dataset

In [6]:
from random import randrange
from IPython.display import Image, display

for i in range(3):
    random_index = randrange(len(images))
    display(Image(images[random_index]))

Split dataset into train and valid

In [7]:
from sklearn.model_selection import train_test_split
images_train, images_test, labels_train, labels_test = train_test_split(images, labels, test_size=0.3, random_state=8, stratify=labels)

Generate the train and valid folders / data

In [8]:
TRAIN_PATH = '../../../data/flowers/train'
VALID_PATH = '../../../data/flowers/valid'

def create_output_folders(train_folder, valid_folder):
    if not os.path.exists(train_folder):
        os.mkdir(train_folder)
        for label in set(labels):
            os.mkdir(train_folder + '/' + label)
    if not os.path.exists(valid_folder):
        os.mkdir(valid_folder)
        for label in set(labels):
            os.mkdir(valid_folder + '/' + label)
            
def copy_files_to_train_and_validation_folders(train_folder, valid_folder, images_train, images_valid):  
    print('Copy training files to directory')
    for index, value in enumerate(images_train):
        dest = os.path.join(train_folder, labels_train[index])
        shutil.copy(value, dest)

    print('Copy validation files to directory')        
    for index, value in enumerate(images_valid):
        shutil.copy(value, valid_folder + '/' + labels_test[index])
    
create_output_folders(TRAIN_PATH, VALID_PATH)
copy_files_to_train_and_validation_folders(TRAIN_PATH, VALID_PATH, images_train, images_test)
Copy training files to directory
Copy validation files to directory

Create first model

In [9]:
arch = resnet34
data = ImageClassifierData.from_paths(PATH, tfms = tfms_from_model(arch, size))
model = ConvLearner.pretrained(arch, data, precompute=True)

Fit model

In [10]:
model.fit(0.01, 3)
epoch      trn_loss   val_loss   accuracy                                                                              
    0      0.762809   0.380726   0.868928  
    1      0.526545   0.343351   0.878951                                                                              
    2      0.430926   0.310308   0.888975                                                                              

Out[10]:
[array([0.31031]), 0.8889745567151924]