Source code for submodlib.functions.featureBased

# featureBased.py
# Author: Vishal Kaushal <vishal.kaushal@gmail.com>
import numpy as np
from .setFunction import SetFunction
import submodlib_cpp as subcp
from submodlib_cpp import FeatureBased
from sklearn.preprocessing import MinMaxScaler

[docs]class FeatureBasedFunction(SetFunction): """Implementation of the Feature-Based (FB) function. Feature based functions are essentially sums of concave over modular functions defined as: .. math:: f(X) = \\sum_{f \\in F} w_f g(m_f(X)) where :math:`g` is a concave function, :math:`{m_f}` are a set of feature scores, and :math:`f \\in F` are features. In case of images, features could be, for example, the features extracted from the second last layer of a ConvNet. Feature Based functions model the notion of coverage over features. Parameters ---------- n : int Number of elements in the ground set. Must be > 0. features : list Feature vectors for the elements in the ground set. List of size n. numFeatures : int Dimensionality of the feature vectors of each ground set element. sparse : bool Indicates whether *features* contain sparse feature vectors. If True, *features* is expected to be a list of list of tuples where each sparse feature vector is represented by a list of tuples (i, j), i being the index of the non-ero feature value and j being the feature value. If False, the supplied *features* are converted into sparse representation internally. featureWeights : list Weights of features. List of size numFeatures. mode : FeatureBased.Type, optional The concave function to be used. Can be FeatureBased.logarithmic, FeatureBased.squareRoot, FeatureBased.inverse. Default is FeatureBased.logarithmic. """ def __init__(self, n, features, numFeatures, sparse, featureWeights=None, mode=FeatureBased.logarithmic): self.n = n self.mode = mode self.features = features self.numFeatures = numFeatures self.featureWeights = featureWeights self.cpp_obj = None self.cpp_features = None if self.n <= 0: raise Exception("ERROR: Number of elements in ground set must be positive") if self.mode not in [FeatureBased.squareRoot, FeatureBased.inverse, FeatureBased.logarithmic]: raise Exception("ERROR: Incorrect mode. Must be one of 'squareRoot', 'inverse' or 'logarithmic'") if n != len(features): raise Exception("ERROR: Mismtach between n and len(features)") if (type(featureWeights) != type(None)): if numFeatures != len(featureWeights): raise Exception("ERROR: Mismtach between numFeatures and len(featureWeights)") else: self.featureWeights = [1] * numFeatures #print("Features before normalization: ", features) #min-max normalize the features so that they are between 0 and 1 featuresArray = np.array(features) norm = MinMaxScaler().fit(featuresArray) normalizedFeatures = norm.transform(featuresArray) features = normalizedFeatures.tolist() #print("Features after normalization: ", features) #convert the features into sparse representation (list of tuples) if not already self.cpp_features = [] if not sparse: for i in range(len(features)): featureVec = [] for j in range(len(features[i])): if (features[i][j] != 0): featureVec.append((j, features[i][j])) self.cpp_features.append(featureVec) else: self.cpp_features = features #print("Sparse representation:", self.cpp_features) self.cpp_obj = FeatureBased(self.n, self.mode, self.cpp_features, self.numFeatures, self.featureWeights) self.effective_ground = set(range(n))