% dictionary learning
clear

conf = get_conf();
% conf = get_conf_usgs();

pca_filename = sprintf('%s/%s_size-%d_pca', conf.workDir, conf.pcaFilename, conf.patchSize);

filename = sprintf('%s/%s_size-%d_pc-%d_cw-%d_vocabulary', ...
    conf.workDir, conf.codebookFilename, conf.patchSize, conf.nPC, conf.nWords);

if conf.pixelPCA
    load('work/usgs_color-1_pixpca.mat');
    pca_filename = [pca_filename, '_pixpca'];
    filename = [filename, '_pixpca'];
end
if conf.patchPCA
    load(pca_filename)
end

data = zerosq(conf.nPC, conf.nTrainingCodebook*conf.nPatchesCodebook*length(conf.size));

categories = dir([conf.imageDir '/*']);
idx = setdiff(1:length(categories), strmatch('.', {categories.name}));
categories = categories(idx);

siz = conf.size;
ind = makeIndexMatrix(conf);
model.size = conf.size;
model.patchSize = conf.patchSize;
model.nPC = conf.nPC;
model.patchPCA = conf.patchPCA;

kk = 0;
for k = 1:conf.nTrainingCodebook
    c = randi(length(categories), 1);
    dir_list = dir([conf.imageDir '/' categories(c).name '/*.' conf.ext]);
    Nimages = length(dir_list);
    ii = randi(Nimages, 1);
    im0 = im2double(imread([conf.imageDir '/' categories(c).name '/' dir_list(ii).name]));
    actual_size = size(im0);
    if conf.pixelPCA
        im0 = reshape(im0, actual_size(1)*actual_size(2), 3);
        im0 = pcaApply(im0', model.pixelU, model.pixelMu, 3);
        im0 = reshape(im0', actual_size(1), actual_size(2), 3);
        for m = 1:3
            im0(:, :, m) = mat2gray(im0(:, :, m));
        end
    end
    for s = 1:length(conf.size)
        im = imresize(im0, [conf.size(s), conf.size(s)], 'bilinear');
        im = quaternion(im(:, :, 1), im(:, :, 2), im(:, :, 3));
        ind1 = ind{s}(:, randi(size(ind{s}, 2), 1, conf.nPatchesCodebook));
        [frames, data1] = get_patches_grid_ind(im, model, ind1);

        kk = kk + 1;
        data(:, (kk-1)*conf.nPatchesCodebook+1:kk*conf.nPatchesCodebook) = 0 + data1;
    end
end

% model.mu = mean(data, 2);
% model.sigma = std(data, [], 2);
% data = (data - repmat(model.mu, 1, size(data, 2))) ./ repmat(model.sigma, 1, size(data, 2));

model.vocab = vl_colsubset(data, conf.nWords);
switch conf.dictLearning
    case 'noise'
        model.vocab = randq(conf.nPC, conf.nWords);
        model.vocab = model.vocab ./ repmat(sqrt(sum(abs(model.vocab).^2, 1)), conf.nPC, 1);
    case 'rand'
        model.vocab = model.vocab ./ repmat(sqrt(sum(abs(model.vocab).^2, 1)), conf.nPC, 1);
    case 'qkmeans'
        data = [scalar(data); x(data); y(data); z(data)];
        model.vocab = vl_kmeans(data, conf.nWords, 'verbose', 'algorithm', 'elkan');
        model.vocab = quaternion(model.vocab(1:25, :), model.vocab(26:50, :), model.vocab(51:75, :), model.vocab(76:100, :));
        model.vocab = model.vocab ./ repmat(sqrt(sum(abs(model.vocab).^2, 1)), conf.nPC, 1);
    case 'qksvd'
        model.vocab = model.vocab ./ repmat(sqrt(sum(abs(model.vocab).^2, 1)), conf.nPC, 1);
        model.vocab = qksvd(data, model.vocab, 1, 100);
end

save(filename, 'model');

