OML-Team · b0nce · Sep 1, 2022 · Sep 2, 2022 · Sep 2, 2022 · Sep 2, 2022
diff --git a/.github/workflows/pre-commit-workflow.yaml b/.github/workflows/pre-commit-workflow.yaml
@@ -16,7 +16,7 @@ on:
 
 jobs:
   pre_commit:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - name: Checkout
       uses: actions/checkout@v3

diff --git a/docs/source/contents/losses.rst b/docs/source/contents/losses.rst
@@ -33,3 +33,19 @@ TripletLossWithMiner
 
     .. automethod:: __init__
     .. automethod:: forward
+
+ArcFaceLoss
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: oml.losses.arcface.ArcFaceLoss
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: __init__
+
+ArcFaceLossWithMLP
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: oml.losses.arcface.ArcFaceLossWithMLP
+    :undoc-members:
+    :show-inheritance:
+
+    .. automethod:: __init__
diff --git a/docs/source/contents/utils.rst b/docs/source/contents/utils.rst
@@ -14,3 +14,7 @@ check_retrieval_dataframe_format
 download_mock_dataset
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autofunction:: oml.utils.download_mock_dataset.download_mock_dataset
+
+label_smoothing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: oml.utils.misc_torch.label_smoothing
diff --git a/examples/inshop/configs/train_inshop_arcface.yaml b/examples/inshop/configs/train_inshop_arcface.yaml
@@ -0,0 +1,90 @@
+postfix: metric_learning
+
+seed: 42
+precision: 32
+accelerator: gpu
+devices: 1
+
+dataframe_name: df_no_bboxes.csv
+dataset_root: data/DeepFashion_InShop/
+logs_root: logs/DeepFashion_InShop/
+logs_folder: ${now:%Y-%m-%d_%H-%M-%S}_${postfix}
+
+num_workers: 20
+cache_size: 100000
+
+transforms_train:
+  name: augs_hypvit_torch
+  args:
+    im_size: 224
+
+transforms_val:
+  name: norm_resize_hypvit_torch
+  args:
+    im_size: 224
+    crop_size: 224
+
+sampler: null
+bs_train: 256
+
+bs_val: 256
+max_epochs: 10000
+valid_period: 1
+
+metric_args:
+  metrics_to_exclude_from_visualization: [cmc,]
+  cmc_top_k: [1]
+  map_top_k: [5]
+  return_only_main_category: True
+  visualize_only_main_category: True
+
+log_images: True
+
+metric_for_checkpointing: OVERALL/cmc/1
+
+model:
+  name: extractor_with_mlp
+  args:
+    mlp_features: [384, 256]
+    extractor:
+      name: vit
+      args:
+        normalise_features: False
+        use_multi_scale: False
+        weights: vits16_dino
+        arch: vits16
+
+criterion:
+  name: arcface
+  args:
+    label_smoothing: null
+    m: 0.4
+    s: 64
+    in_features: 256
+    num_classes: 3985
+
+optimizer:
+  name: adam
+  args:
+    lr: 1e-5
+
+
+scheduling: null
+
+
+# To use neptune you should also specify NEPTUNE_API_TOKEN in
+# .env file or via `export NEPTUNE_API_TOKEN=...`
+neptune_project: null
+
+
+hydra_dir: ${logs_root}/${logs_folder}/
+
+tags:
+  - ${postfix}
+  - deepfashion
+
+hydra:
+  run:
+    dir: ${hydra_dir}
+  searchpath:
+   - pkg://oml.configs
diff --git a/oml/configs/criterion/arcface.yaml b/oml/configs/criterion/arcface.yaml
@@ -0,0 +1,7 @@
+name: arcface
+args:
+  label_smoothing: null
+  m: 0.4
+  s: 64
+  in_features: 384
+  num_classes: 3985
diff --git a/oml/configs/criterion/mlp_arcface.yaml b/oml/configs/criterion/mlp_arcface.yaml
@@ -0,0 +1,8 @@
+name: mlp_arcface
+args:
+  label_smoothing: null
+  m: 0.4
+  s: 64
+  in_features: 384
+  num_classes: 3985
+  mlp_features: [512, 256]
diff --git a/oml/datasets/base.py b/oml/datasets/base.py
@@ -260,12 +260,20 @@ def get_retrieval_datasets(
     f_imread_val: Optional[TImReader] = None,
     dataframe_name: str = "df.csv",
     cache_size: int = 100_000,
+    verbose: bool = True,
 ) -> Tuple[DatasetWithLabels, DatasetQueryGallery]:
     df = pd.read_csv(dataset_root / dataframe_name, index_col=False)
-    check_retrieval_dataframe_format(df, dataset_root=dataset_root)
+
+    check_retrieval_dataframe_format(df, dataset_root=dataset_root, verbose=verbose)
+
+    # first half will consist of "train" split, second one of "val"
+    # so labels in train will be from 0 to N-1 and labels in test will be from N to K
+    mapper = {l: i for i, l in enumerate(df.sort_values(by=[SPLIT_COLUMN])[LABELS_COLUMN].unique())}
 
     # train
     df_train = df[df[SPLIT_COLUMN] == "train"].reset_index(drop=True)
+    df_train[LABELS_COLUMN] = df_train[LABELS_COLUMN].map(mapper)
+
     train_dataset = DatasetWithLabels(
         df=df_train,
         dataset_root=dataset_root,

diff --git a/oml/lightning/entrypoints/train.py b/oml/lightning/entrypoints/train.py
@@ -64,6 +64,7 @@ def pl_train(cfg: TCfg) -> None:
         transforms_val=transforms_val,
         dataframe_name=cfg["dataframe_name"],
         cache_size=cfg["cache_size"],
+        verbose=cfg.get("show_dataset_warnings", True),
     )
 
     if isinstance(transforms_train, albu.Compose):
@@ -79,15 +80,25 @@ def pl_train(cfg: TCfg) -> None:
         )
 
     sampler_runtime_args = {"labels": train_dataset.get_labels()}
+    label2category = None
     df = train_dataset.df
     if train_dataset.categories_key:
-        sampler_runtime_args["label2category"] = dict(zip(df[LABELS_COLUMN], df[CATEGORIES_COLUMN]))
+        label2category = dict(zip(df[LABELS_COLUMN], df[CATEGORIES_COLUMN]))
+        sampler_runtime_args["label2category"] = label2category
     # note, we pass some runtime arguments to sampler here, but not all of the samplers use all of these arguments
     sampler = get_sampler_by_cfg(cfg["sampler"], **sampler_runtime_args) if cfg["sampler"] is not None else None
 
     extractor = get_extractor_by_cfg(cfg["model"])
-    criterion = get_criterion_by_cfg(cfg["criterion"])
-    optimizer = get_optimizer_by_cfg(cfg["optimizer"], params=extractor.parameters())
+
+    criterion = get_criterion_by_cfg(
+        cfg["criterion"],
+        label2category=label2category,
+    )
+    optimizable_parameters = [
+        {"lr": cfg["optimizer"]["args"]["lr"], "params": extractor.parameters()},
+        {"lr": cfg["optimizer"]["args"]["lr"], "params": criterion.parameters()},
+    ]
+    optimizer = get_optimizer_by_cfg(cfg["optimizer"], params=optimizable_parameters)  # type: ignore
 
     # unpack scheduler to the Lightning format
     if cfg.get("scheduling"):

diff --git a/oml/lightning/modules/retrieval.py b/oml/lightning/modules/retrieval.py
@@ -78,7 +78,8 @@ def training_step(self, batch: Dict[str, Any], batch_idx: int) -> torch.Tensor:
         bs = len(embeddings)
 
         loss = self.criterion(embeddings, batch[self.labels_key])
-        self.log("loss", loss.item(), prog_bar=True, batch_size=bs, on_step=True, on_epoch=True)
+        loss_name = (getattr(self.criterion, "criterion_name", "") + "_loss").strip("_")
+        self.log(loss_name, loss.item(), prog_bar=True, batch_size=bs, on_step=True, on_epoch=True)
 
         if hasattr(self.criterion, "last_logs"):
             self.log_dict(self.criterion.last_logs, prog_bar=False, batch_size=bs, on_step=True, on_epoch=False)

diff --git a/oml/losses/arcface.py b/oml/losses/arcface.py
@@ -0,0 +1,133 @@
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.ops import MLP
+
+from oml.utils.misc_torch import label_smoothing
+
+
+class ArcFaceLoss(nn.Module):
+    """
+    ArcFace loss from `paper <https://arxiv.org/abs/1801.07698>`_ with possibility to use label smoothing.
+    It contains projection (num_features x num_classes) inside itself so you don't have to produce output of
+    ``num_classes`` yourself. Please make sure that class labels started with 0 and ended as ``num_classes - 1``.
+    """
+
+    criterion_name = "arcface"  # for better logging
+
+    def __init__(
+        self,
+        in_features: int,
+        num_classes: int,
+        label2category: Optional[Dict[Any, Any]] = None,
+        label_smoothing: Optional[float] = None,
+        m: float = 0.5,
+        s: float = 64,
+    ):
+        """
+        Args:
+            in_features: Input feature size
+            num_classes: Number of classes in train set
+            label2category: Optional, for label smoothing. If you will not provide it label smoothing will be global and
+                not category-wise
+            label_smoothing: Label smoothing effect strength
+            m: Margin parameter for ArcFace loss. Usually you should use 0.3-0.5 values for it
+            s: Scaling parameter for ArcFace loss. Usually you should use 30-64 values for it
+        """
+        super(ArcFaceLoss, self).__init__()
+
+        assert (
+            label_smoothing is None or 0 < label_smoothing < 1
+        ), f"Choose another label_smoothing parametrization, got {label_smoothing}"
+
+        self.criterion = nn.CrossEntropyLoss()
+        self.num_classes = num_classes
+        if label2category is not None:
+            mapper = {l: i for i, l in enumerate(sorted(list(set(label2category.values()))))}
+            label2category = {k: mapper[v] for k, v in label2category.items()}
+            self.label2category = torch.arange(num_classes).apply_(label2category.get)
+        else:
+            self.label2category = None
+        self.label_smoothing = label_smoothing
+        self.weight = nn.Parameter(torch.FloatTensor(num_classes, in_features))
+        nn.init.xavier_uniform_(self.weight)
+        self.rescale = s
+        self.m = m
+        self.cos_m = np.cos(m)
+        self.sin_m = np.sin(m)
+        self.th = -self.cos_m
+        self.mm = self.sin_m * m
+        self.last_logs: Dict[str, float] = {}
+
+    def fc(self, x: torch.Tensor) -> torch.Tensor:
+        return F.linear(F.normalize(x, p=2), F.normalize(self.weight, p=2))
+
+    def smooth_labels(self, y: torch.Tensor) -> torch.Tensor:
+        if self.label2category is not None:
+            self.label2category = self.label2category.to(self.weight.device)
+        return label_smoothing(y, self.num_classes, self.label_smoothing, self.label2category)
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        assert torch.all(y < self.num_classes), "You should provide labels between 0 and num_classes - 1."
+
+        cos = self.fc(x)
+
+        self._log_accuracy_on_batch(cos, y)
+
+        sin = torch.sqrt(1.0 - torch.pow(cos, 2))
+
+        cos_w_margin = cos * self.cos_m - sin * self.sin_m
+        cos_w_margin = torch.where(cos > self.th, cos_w_margin, cos - self.mm)
+
+        ohe = F.one_hot(y, self.num_classes)
+        logit = torch.where(ohe.bool(), cos_w_margin, cos) * self.rescale
+
+        if self.label_smoothing:
+            y = self.smooth_labels(y)
+
+        return self.criterion(logit, y)
+
+    @torch.no_grad()
+    def _log_accuracy_on_batch(self, logits: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        self.last_logs["accuracy"] = torch.mean((y == torch.argmax(logits, 1)).to(torch.float32))
+
+
+class ArcFaceLossWithMLP(nn.Module):
+    """
+    Almost the same as ``ArcFaceLoss``, but also has MLP projector before the loss.
+    You may want to use `ArcFaceLossWithMLP` to boost the expressive power of ArcFace loss during the training
+    (for example, in a multi-head setup it may be a good idea to have task-specific projectors in each of the losses).
+    Note, the criterion does not exist during the validation time.
+    Thus, if you want to keep your MLP layers, you should create them as a part of the model you train.
+    """
+
+    def __init__(
+        self,
+        in_features: int,
+        num_classes: int,
+        mlp_features: List[int],
+        label2category: Optional[Dict[str, Any]] = None,
+        label_smoothing: Optional[float] = None,
+        m: float = 0.5,
+        s: float = 64,
+    ):
+        """
+        Args:
+            in_features: Input feature size
+            num_classes: Number of classes in train set
+            mlp_features: Layers sizes for MLP before ArcFace
+            label2category: Optional, for label smoothing. If you will not provide it label smoothing will be global and
+                not category-wise
+            label_smoothing: Label smoothing effect strength
+            m: Margin parameter for ArcFace loss. Usually you should use 0.3-0.5 values for it
+            s: Scaling parameter for ArcFace loss. Usually you should use 30-64 values for it
+        """
+        super().__init__()
+        self.mlp = MLP(in_features, mlp_features)
+        self.arcface = ArcFaceLoss(mlp_features[-1], num_classes, label2category, label_smoothing, m, s)
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return self.arcface(self.mlp(x), y)
diff --git a/oml/losses/triplet.py b/oml/losses/triplet.py
@@ -26,6 +26,8 @@ class TripletLoss(Module):
 
     """
 
+    criterion_name = "triplet"  # for better logging
+
     def __init__(self, margin: Optional[float], reduction: str = "mean", need_logs: bool = False):
         """
 
@@ -116,6 +118,8 @@ class TripletLossPlain(Module):
 
     """
 
+    criterion_name = "triplet"  # for better logging
+
     def __init__(self, margin: Optional[float], reduction: str = "mean", need_logs: bool = False):
         """
 
@@ -160,6 +164,8 @@ class TripletLossWithMiner(ITripletLossWithMiner):
 
     """
 
+    criterion_name = "triplet"  # for better logging
+
     def __init__(
         self,
         margin: Optional[float],