HALF.Utils#

HALF.Utils.ALDataModule#

class HALF.Utils.ALDataModule.ALDataModule[source]#

Bases: object

__init__(dataset_labeled, dataset_unlabeled, dataset_test=None, train_transform=None, test_transform=None)[source]#

Class containing the different datasets needed for an active learning loop, i.e. unlabelled, labelled and test set

Parameters:
  • dataset_labeled (Dataset) – labeled dataset of images from which to load the data into the model

  • dataset_unlabeled (Dataset) – unlabeled dataset of images from which to load the data into the model

  • dataset_test (Dataset) – unlabeled dataset of images from which to load the data into the model

  • train_transform (list) – list of transformation objects to apply on the dataset for training

  • test_transform (list) – list of transformation objects to apply on the dataset for the evaluation

Returns:

None

set_test_mode()[source]#

Put the datasets in testing mode with the corresponding transformations

set_train_mode()[source]#

Put the datasets in training mode with the corresponding transformations

HALF.Utils.ALDatasetManager#

class HALF.Utils.ALDatasetManager.ALDatasetManager[source]#

Bases: object

__init__(al_data_module)[source]#

Class handling operations and manipulations on the datasets

Parameters:

al_data_module (ALDataModule) – contains different datasets for active learning

Returns:

None

property dataset_labeled#

Get the labeled dataset

Parameters:

None

Returns:

labeled dataset

Return type:

Dataset

property dataset_test#

Gets the test dataset

Parameters:

None

Returns:

Test set

Return type:

Dataset

property dataset_unlabeled#
property dataset_unlabeled_lb#
set_test_mode()[source]#

Tests the dataset with corresponding transformations

set_train_mode()[source]#

Trains the dataset with corresponding transformations

subset_unlabeled(ulb_idx_selected, label=False)[source]#

Get a subset of the unlabelled dataset

Parameters:
  • ulb_idx_selected (np.array) – array of indices you want in the Subset

  • label (bool) – True if you want to get the labels in the Data

Returns:

subset of the unlabeled dataset with the given indices

Return type:

Subset

update(ulb_idx_selected, subds_labeled)[source]#

Updates the labelled and unlabelled datasets and keeping track of their indices

Parameters:
  • ulb_idx_selected (np.array) – array of indices selected with the active learning strategy with respect to the unlabeled dataset

  • subds_labeled (Dataset) – subset of the unlabeled dataset corresponding to the indices selected by the strategy

Returns:

None

HALF.Utils.ActiveLearner#

class HALF.Utils.ActiveLearner.ActiveLearner[source]#

Bases: object

Class following a Mediator pattern, handling the communication between all the components of an active learning loop : datasets, model and oracle

oracle#

The oracle, in charge of labelling points

Type:

IOracle

dataset_manager#

Manages train, test, (un)labeled datasets

Type:

ALDatasetManager

model#

The predictive model, by default None

Type:

AbstractModel

dict_hooks#

Dictionary of hooks to be applied for each hook identifier

Type:

DefaultDict[str, List[IHook]]

ds_hook#

Contains data manipulated by hooks

Type:

Dict

i_round#

Active learning round index

Type:

int

best_test_accuracy#

Best test accuracy observed during current active learning process

Type:

float

nb_oracle_labeled#

Number of datapoints labeled by the oracle

Type:

int

configAL#

Configuration for the active learning process

Type:

ConfigActiveLearner

trainer#

The model trainer, initialized with configAL

Type:

pytorch_lightning.Trainer

nb_label_increased#

Number of new labels obtained in the latest iteration

Type:

int

current_test_results#

Contains the trainer’s latest test results

__init__(oracle, dataset_manager, model=None)[source]#
Parameters:
  • oracle (IOracle) – The oracle, in charge of labelling points

  • dataset_manager (ALDatasetManager) – Manages train, test, (un)labeled datasets

  • model (AbstractModel, optional) – The predictive model. Defaults to None.

apply_model_strategy(strategy=ModelStrategy.UPDATE)[source]#

Applies strategy to the model

Parameters:

strategy (ModelStrategy) – The strategy to apply, by default ModelStrategy.UPDATE

property dataset_test: Dataset#
increase_label(configAL)[source]#

Samples new points to be labeled and updates training data accordingly

Parameters:

configAL (ConfigActiveLearner) – configuration

on_AL_iteration_begin()[source]#

Set up new AL iteration

on_AL_iteration_end()[source]#

Terminate current AL iteration

on_AL_loop_begin()[source]#

Set up AL process

on_AL_loop_end()[source]#

Terminate AL process

on_increase_dataset_begin()[source]#
on_increase_dataset_end()[source]#
on_test_begin()[source]#
on_test_end()[source]#
on_train_begin()[source]#
on_train_end()[source]#
register_hook(stage, hook)[source]#
Parameters:
  • stage (str) –

  • hook (IHook) –

run(configAL)[source]#

Runs active learning process until stop condition is attained

Parameters:

configAL (ConfigActiveLearner) – configuration

set_list_hook(stage, list_hooks)[source]#
Parameters:
  • stage (str) –

  • list_hooks (List[IHook]) –

stop_condition()[source]#

Checks whether stopping conditions (budget or targeted accuracy) have been reached.

Returns:

True if stopping conditions were attained or no configuration is set

Return type:

bool

test()[source]#

Collects test results for the current model

train()[source]#

Fits model to current data

HALF.Utils.ActiveLearnerBuilder#

class HALF.Utils.ActiveLearnerBuilder.ActiveLearnerBuilder[source]#

Bases: object

__init__()[source]#

Builder class for ActiveLearner

add_defaults_hooks()[source]#

Add the default hooks for the ActiveLearner

add_hook(uid, hook_attach_data)[source]#

Attach a hook to the ActiveLearner

Parameters:
  • uid (str) – unique identifier for the hook

  • hook_attach_data (HookAttachData) – information about the hook

Returns:

None

build(same_instance=False, use_configured_hooks=True)[source]#

Finalise the building of the ActiveLearner instance

Parameters:
  • same_instance (bool) – check if we want to obtain the current build of the ActiveLearner

  • use_configured_hooks (bool) – use the hooks given in the YAML configuration

Returns:

Active learner instance

Return type:

ActiveLearner

hydrate_hook_dict()[source]#

Attach the hooks from the config to the ActiveLearner

Returns:

None

remove_hook(uid)[source]#

Remove the hook attached to the given uid, if it does not exist nothing happens

Parameters:

uid (str) – unique identifier of the hook you wish to remove

Returns:

None

reset()[source]#

Clean the Builder and initialize the necessary attributes

set_data_module(al_data_module)[source]#

Set the data module containing the datasets

Parameters:

al_data_module (ALDataModule) – data module to be used

set_model(model)[source]#

Set the model to be used in the active learning loop

Parameters:

model (pl.LightningModule) – model to be used

set_oracle(oracle)[source]#

Set the oracle

Parameters:

oracle (IOracle) – oracle to be used

class HALF.Utils.ActiveLearnerBuilder.HookAttachData[source]#

Bases: object

HookAttachData(attach_point: str, class_name: str, args: dict)

__init__(attach_point, class_name, args)#
Parameters:
  • attach_point (str) –

  • class_name (str) –

  • args (dict) –

Return type:

None

args: dict#
attach_point: str#
class_name: str#

HALF.Utils.Config#

class HALF.Utils.Config.Config[source]#

Bases: object

Config class with all parameters to configure the framework, built upon OmegaConf

CONFIG = None#
property data#
static setup(file)[source]#

Create the config class

Parameters:

file (Union[OmegaConf, str]) – path where to find the config file or OmegaConf object

HALF.Utils.ConfigActiveLearner#

class HALF.Utils.ConfigActiveLearner.ConfigActiveLearner[source]#

Bases: object

__init__(strategy, budget=5000, AL_batch_size=500, targeted_accuracy=0.8, strategy_args=None)[source]#

Configuratiton for acttive learning

Parameters:
  • strategy (str) – Identifier of the strategy to employ

  • budget (int, optional) – Total number of labels to be queried. Defaults to 5000.

  • AL_batch_size (int, optional) – Size of each query batch. Defaults to 500.

  • targeted_accuracy (float, optional) – Accuracy at which the active learning should be stopped. Defaults to 0.8.

  • strategy_args (Dict, optional) – Arguments to be passed to the strategy. Defaults to None.

HALF.Utils.GlobalIndexDataset#

class HALF.Utils.GlobalIndexDataset.GlobalIndexDataset[source]#

Bases: Dataset

__init__(dataset)[source]#

HALF.Utils.Registry#

class HALF.Utils.Registry.Registry[source]

Bases: object

glob_module_registry = {'AdversarialBIM': <class 'distil.active_learning_strategies.adversarial_bim.AdversarialBIM'>, 'AdversarialDeepFool': <class 'distil.active_learning_strategies.adversarial_deepfool.AdversarialDeepFool'>, 'BADGE': <class 'distil.active_learning_strategies.badge.BADGE'>, 'BALDDropout': <class 'distil.active_learning_strategies.bayesian_active_learning_disagreement_dropout.BALDDropout'>, 'BatchBALDDropout': <class 'distil.active_learning_strategies.batch_bald.BatchBALDDropout'>, 'CoreGCN': <class 'HALF.Strategies.sequence_gcn.CoreGCN'>, 'CoreSet': <class 'distil.active_learning_strategies.core_set.CoreSet'>, 'EntropySampling': <class 'distil.active_learning_strategies.entropy_sampling.EntropySampling'>, 'EntropySamplingDropout': <class 'distil.active_learning_strategies.entropy_sampling_dropout.EntropySamplingDropout'>, 'FASS': <class 'distil.active_learning_strategies.fass.FASS'>, 'GLISTER': <class 'distil.active_learning_strategies.glister.GLISTER'>, 'GradMatchActive': <class 'distil.active_learning_strategies.gradmatch_active.GradMatchActive'>, 'KMeansSampling': <class 'distil.active_learning_strategies.kmeans_sampling.KMeansSampling'>, 'LeastConfidenceSampling': <class 'distil.active_learning_strategies.least_confidence_sampling.LeastConfidenceSampling'>, 'LeastConfidenceSamplingDropout': <class 'distil.active_learning_strategies.least_confidence_sampling_dropout.LeastConfidenceSamplingDropout'>, 'LoadModelHook': <class 'HALF.Hooks.LoadModelHook.LoadModelHook'>, 'MarginSampling': <class 'distil.active_learning_strategies.margin_sampling.MarginSampling'>, 'MarginSamplingDropout': <class 'distil.active_learning_strategies.margin_sampling_dropout.MarginSamplingDropout'>, 'PartitionStrategy': <class 'distil.active_learning_strategies.partition_strategy.PartitionStrategy'>, 'RandomSampling': <class 'distil.active_learning_strategies.random_sampling.RandomSampling'>, 'SCG': <class 'distil.active_learning_strategies.scg.SCG'>, 'SCMI': <class 'distil.active_learning_strategies.scmi.SCMI'>, 'SMI': <class 'distil.active_learning_strategies.smi.SMI'>, 'SaveModelHook': <class 'HALF.Hooks.SaveModelHook.SaveModelHook'>, 'SetupEarlyStopTraining': <class 'HALF.Hooks.SetupEarlyStopTraining.SetupEarlyStopTraining'>, 'SetupTensorboardHook': <class 'HALF.Hooks.SetupTensorboardHook.SetupTensorboardHook'>, 'SubmodularSampling': <class 'distil.active_learning_strategies.submod_sampling.SubmodularSampling'>, 'TestAccuracyLoggerHook': <class 'HALF.Hooks.TestAccuracyLoggerHook.TestAccuracyLoggerHook'>, 'UncertainGCN': <class 'HALF.Strategies.sequence_gcn.UncertainGCN'>, 'WriteAccuracyLogsHook': <class 'HALF.Hooks.WriteAccuracyLogsHook.WriteAccuracyLogsHook'>}
classmethod register(name=None, force=False)[source]

Class method to register Executor class to the internal registry.

Parameters:
  • name (str, optional) – The name of the executor. Defaults to None.

  • force (bool, optional) – If True, write registry key even if it already exists. Defaults to False.

Returns:

The Executor class itself.

Return type:

Registry

HALF.Utils.sample#

Sample and SampleList are data structures for arbitrary data returned from a dataset. To work with MMF, minimum requirement for datasets is to return an object of Sample class and for models to accept an object of type SampleList as an argument.

Sample is used to represent an arbitrary sample from dataset, while SampleList is list of Sample combined in an efficient way to be used by the model. In simple term, SampleList is a batch of Sample but allow easy access of attributes from Sample while taking care of properly batching things.

class HALF.Utils.sample.Sample[source]#

Bases: OrderedDict

Sample represent some arbitrary data. All datasets in MMF must return an object of type Sample.

Parameters:

init_dict (Dict) – Dictionary to init Sample class with.

Usage:

>>> sample = Sample({"text": torch.tensor(2)})
>>> sample.text.zero_()
# Custom attributes can be added to ``Sample`` after initialization
>>> sample.context = torch.tensor(4)
__init__(init_dict=None)[source]#
fields()[source]#

Get current attributes/fields registered under the sample.

Returns:

Attributes registered under the Sample.

Return type:

List[str]

class HALF.Utils.sample.SampleList[source]#

Bases: OrderedDict

SampleList is used to collate a list of Sample into a batch during batch preparation. It can be thought of as a merger of list of Dicts into a single Dict.

If Sample contains an attribute ‘text’ of size (2) and there are 10 samples in list, the returned SampleList will have an attribute ‘text’ which is a tensor of size (10, 2).

Parameters:

samples (type) – List of Sample from which the SampleList will be created.

Usage:

>>> sample_list = [
        Sample({"text": torch.tensor(2)}),
        Sample({"text": torch.tensor(2)})
    ]
>>> sample_list.text
torch.tensor([2, 2])
__init__(samples=None)[source]#
add_field(field, data)[source]#

Add an attribute field with value data to the SampleList

Parameters:
  • field (str) – Key under which the data will be added.

  • data (object) – Data to be added, can be a torch.Tensor, list or Sample

copy()[source]#

Get a copy of the current SampleList

Returns:

Copy of current SampleList.

Return type:

SampleList

detach()[source]#
fields()[source]#

Get current attributes/fields registered under the SampleList.

Returns:

list of attributes of the SampleList.

Return type:

List[str]

get_batch_size()[source]#

Get batch size of the current SampleList. There must be a tensor be a tensor present inside sample list to use this function. :returns: Size of the batch in SampleList. :rtype: int

get_device()[source]#
get_field(field)[source]#

Get value of a particular attribute

Parameters:

field (str) – Attribute whose value is to be returned.

get_fields(fields)[source]#

Get a new SampleList generated from the current SampleList but contains only the attributes passed in fields argument

Parameters:

fields (List[str]) – Attributes whose SampleList will be made.

Returns:

SampleList containing only the attribute values of the fields which were passed.

Return type:

SampleList

get_item_list(key)[source]#

Get SampleList of only one particular attribute that is present in the SampleList.

Parameters:

key (str) – Attribute whose SampleList will be made.

Returns:

SampleList containing only the attribute value of the key which was passed.

Return type:

SampleList

pin_memory()[source]#

In custom batch object, we need to define pin_memory function so that PyTorch can actually apply pinning. This function just individually pins all of the tensor fields

to(device, non_blocking=True)[source]#

Similar to .to function on a torch.Tensor. Moves all of the tensors present inside the SampleList to a particular device. If an attribute’s value is not a tensor, it is ignored and kept as it is.

Parameters:
  • device (str|torch.device) – Device on which the SampleList should moved.

  • non_blocking (bool) – Whether the move should be non_blocking. Default: True

Returns:

a SampleList moved to the device.

Return type:

SampleList

to_dict()[source]#

Converts a sample list to dict, this is useful for TorchScript and for other internal API unification efforts.

Returns:

A dict representation of current sample list

Return type:

Dict[str, Any]

HALF.Utils.sample.convert_batch_to_sample_list(batch)[source]#
Parameters:

batch (Union[SampleList, Dict[str, Any]]) –

Return type:

SampleList

HALF.Utils.sample.detach_tensor(tensor)[source]#

Detaches any element passed which has a .detach function defined. Currently, in MMF can be SampleList, Report or a tensor.

Parameters:

tensor (Any) – Item to be detached

Returns:

Detached element

Return type:

Any

HALF.Utils.sample.to_device(sample_list, device='cuda')[source]#
Parameters:
  • sample_list (Union[SampleList, Dict[str, Any]]) –

  • device (Union[str, device]) –

Return type:

SampleList

HALF.Utils.utils#

HALF.Utils.utils.camel2snake(name_camel)[source]#

Converts CamelCase to snake_case

Parameters:

name_camel (str) – The name formatted in CamelCase

Returns:

The name converted to snake_case

Return type:

str

HALF.Utils.utils.now_str()[source]#

Get current time

Returns:

The current time, formatted

Return type:

str