Skip to content

Latest commit

 

History

History
74 lines (64 loc) · 5.06 KB

configuration.md

File metadata and controls

74 lines (64 loc) · 5.06 KB

Configurations

Configuration files should be created for training, testing and inference. Our configuration file has a yaml format. The following items and sub-items are provided in configuration file.

"model":
  "type": # str (default: "DFNet"), model type, now only support "DFNet".
  "params": # parameters for the model.
    "in_channels": # int (default: 4), the input channels of the model (default: 4).
    "hidden_channels": # int (default: 64), the hiden channels of the model.
    "L": # int (default: 5), the number of layers in a dense block.
    "k": # int (default: 12), the output channel of a single layer in a dense block.
    "use_DUC": # bool (default: True), whether to use the DUC instead of the deconvolution in up-sampling process.

"optimizer":
  "type": # str (default: "AdamW"), optimizer type, refer to objects in torch.optim.
  "params": # parameters for the optimizer, see specific optimizer parameters in torch for details.

"lr_scheduler":
  "type": # str (default: ""), learning rate scheduler type, refer to objects in torch.optim.lr_scheduler. Specially, "" for no learning rate scheduler.
  "params": # parameters for the learning rate scheduler, see specific learning rate scheduler parameters in torch for details.

"dataset":
  "train": # list or dict, one item for one dataset.
    "type": # str (default: "transcg"), dataset type, support type: ["transcg", "cleargrasp-real", "cleargrasp-syn", "omniverse", "transparent-object"]
    "data_dir": # str, directory to the data
    "image_size": # tuple of (int, int) (default: (1280, 720)), the resolution of the data (samples will be scaled to this size and fed into the network).
    "use_augmentation": # bool (default: True), whether to use data augmentation.
    "rgb_augmentation_probability": # float (default: 0.8), RGB augmentation probability.
    "depth_min": # float (default: 0.3), the minimum depth.
    "depth_max": # float (default: 1.5), the maximum depth
    "depth_norm": # float (default: 1.0), the depth normalization coefficient.
  "test": # list or dict, one item for one dataset.
    # The dataset description is the same as the training dataset description.

"dataloader": 
  # the following parameters are the same as parameters in torch.utils.data.DataLoader.
  "num_workers": # the number of workers in data loader.
  "shuffle": # whether shuffle the data.
  "drop_last": # whether drop the last data (that is not in a batch).

"trainer": 
  "batch_size": # int (default: 32), batch size
  "test_batch_size": # int (default: 1), batch size during testing.
  "multigpu": # bool (default: False), whether to use the multigpu for training/testing.
  "max_epoch": # int (default: 40), the number of epochs.
  "resume_lr": # float (default: 0.001), the resume learning rate (only used when resume training from checkpoints).
  "criterion":
    "type": # main loss type, support type: ["mse_loss", "masked_mse_loss", "custom_masked_mse_loss", "l1_loss", "masked_l1_loss", "custom_masked_l1_loss", "l2_loss", "masked_l2_loss", "custom_masked_l2_loss","huber_loss", "masked_huber_loss", "custom_masked_huber_loss"]
    "epsilon": # float (default: 1e-8), the epsilon to avoid NaN during calculation.
    "huber_k": # float (default: 0.1) the k value in huber loss. The parameter is only used when using loss type concerning huber loss.
    "combined_smooth": # bool (default: False), whether to combine the smooth loss calculated by the cosine similarity between surface normals of ground-truth depth maps and predicted depth maps.
    "combined_beta": # float (default: 0.005), the balance coefficient between depth loss and the smooth loss.
    "combined_beta_decay": # float (default: 0.1), the balance coefficient decay value.
    "combined_beta_decay_milestones": # list (default: []), the milestones of balance coefficient decay.

"metrics":
  "types": # list of str (default: []), metric types, supporting type: ["MSE", "MaskedMSE", "RMSE", "MaskedRMSE", "REL", "MaskedREL", "MAE", "MaskedMAE", "Threshold@k", "MaskedThreshold@k"] (where k is a float value denoting "delta" in the paper).
  "epsilon": # float (default: 1e-8), the epsilon to avoid NaN during calculation.
  "depth_scale": # float (default: 1.0), the depth scale, should be the same as "depth_norm" in the testing set.

"stats":
  "stats_dir": # str (default: "stats"), the directory to the statistics.
  "stats_exper": # str (default: "default"), the experiment directory to the statistics. Notice that if the path is not empty and contains a "checkpoint.tar" file, then the training script will resume training from the checkpoint, and the testing script will test the model using the checkpoint.

####### Only Used For Inference #######
"inference":
  "checkpoint_path": # str (default: "checkpoint/checkpoint.tar"), the path to the checkpoint.
  "image_size": # pair of (int, int) (default: (320, 240)), the image size to feed into the network.
  "cuda_id": # int (default: 0), the cuda ID.
  "depth_min": # float (default: 0.3), the minimum depth.
  "depth_max": # float (default: 1.5), the maximum depth
  "depth_norm": # float (default: 1.0), the depth normalization coefficient.