tostenzel · tostenzel · Jan 2, 2024 · Jan 2, 2024 · Jan 2, 2024 · Jan 2, 2024
diff --git a/edugrad/_tensor/tensor_broadcasted_binary_mlops.py b/edugrad/_tensor/tensor_broadcasted_binary_mlops.py
@@ -2,7 +2,7 @@
 
 import math
 
-from edugrad.helpers import dtypes
+from edugrad.dtypes import dtypes
 import edugrad.function as function
 
 

diff --git a/edugrad/_tensor/tensor_combine_segment.py b/edugrad/_tensor/tensor_combine_segment.py
@@ -7,7 +7,7 @@
 
 
 def cat(tensor, *args, dim) -> Tensor:
-    from edugrad._tensor import Tensor
+    from edugrad.tensor import Tensor
 
     dim = (dim + len(tensor.shape)) if dim < 0 else dim
     assert all(

diff --git a/edugrad/_tensor/tensor_create.py b/edugrad/_tensor/tensor_create.py
@@ -2,7 +2,8 @@
 import time
 import math
 
-from edugrad.helpers import argfix, DType, prod, shape_int, dtypes
+from edugrad.helpers import argfix, prod, shape_int
+from edugrad.dtypes import DType, dtypes
 from edugrad.data import TensorData
 from edugrad.ops import LoadOps
 

diff --git a/edugrad/_tensor/tensor_index_slice.py b/edugrad/_tensor/tensor_index_slice.py
@@ -1,7 +1,8 @@
 from typing import Sequence, Optional, Tuple
 from collections import defaultdict
 
-from edugrad.helpers import shape_int, dtypes
+from edugrad.helpers import shape_int
+from edugrad.dtypes import dtypes
 from edugrad._tensor.tensor_reshape import pad, _flatten
 
 
@@ -35,7 +36,7 @@
 def __getitem__(
     tensor: "Tensor", val
 ) -> "Tensor":  # val: Union[int, slice, Tensor, None, Ellipsis, Tuple[Union[int, slice, Tensor, None, Ellipsis], ...]]
-    from edugrad._tensor import Tensor
+    from edugrad.tensor import Tensor
 
     def normalize_int(e, i, dim_sz):
         if -dim_sz <= e < dim_sz:
@@ -141,10 +142,12 @@ def __setitem__(tensor: "Tensor", s, v):
 
 
 # NOTE: using slice is discouraged and things should migrate to pad and shrink
-def slice(tensor: "Tensor", arg: Sequence[Optional[Tuple[int, shape_int]]], value: float) -> "Tensor":
+def tslice(tensor: "Tensor", arg: Sequence[Optional[Tuple[int, shape_int]]], value: float = 0) -> "Tensor":
+    from edugrad.tensor import Tensor
+
     arg_ = tuple([a if a is not None else (0, s) for s, a in zip(tensor.shape, arg)])
     padding = tuple([(max(0, -p[0]), max(0, p[1] - tensor.shape[i])) for i, p in enumerate(arg_)])
-    return pad(tensor, padding, value=value).shrink(
+    return tensor.pad(padding, value=value).shrink(
         tuple([(p[0] + padding[i][0], p[1] + padding[i][0]) for i, p in enumerate(arg_)])
     )
     # FIXME: tensor.pad(padding, value=value)... returns None...

diff --git a/edugrad/_tensor/tensor_nn.py b/edugrad/_tensor/tensor_nn.py
@@ -3,7 +3,8 @@
 from __future__ import annotations
 import math
 
-from edugrad.helpers import make_pair, flatten, dtypes, all_int, shape_int
+from edugrad.helpers import make_pair, flatten, all_int, shape_int
+from edugrad.dtypes import dtypes
 
 
 # processing ops

diff --git a/edugrad/_tensor/tensor_reduce.py b/edugrad/_tensor/tensor_reduce.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from edugrad.helpers import dtypes, prod, all_int
+from edugrad.helpers import prod, all_int
+from edugrad.dtypes import dtypes
 from edugrad.function import Function
 import edugrad.function as function
 
@@ -44,6 +45,7 @@ def _reduce(self, fxn: type[Function], axis: int | tuple[int, ...] | None, keepd
     return ret if keepdim else ret.reshape(shape=shape)
 
 
+# ----------------------------------------------------------------------------------------------------------------------
 # Functions that use the generic _reduce method for specific reduction operations.
 
 
@@ -59,7 +61,7 @@ def tmax(tensor: Tensor, axis, keepdim):
 
 def tmin(tensor: Tensor, axis, keepdim):
     """Computes the minimum value of elements over the specified axis."""
-    return -((-tensor).tmax((-tensor), axis=axis, keepdim=keepdim))
+    return -tmax((-tensor), axis=axis, keepdim=keepdim)
 
 
 def mean(tensor: Tensor, axis, keepdim):
@@ -76,6 +78,7 @@ def std(tensor: Tensor, axis, keepdim, correction):
     return square_sum.div(prod(tensor.shape) / prod(square_sum.shape) - correction).sqrt()
 
 
+# ----------------------------------------------------------------------------------------------------------------------
 # Functions for softmax and its logarithmic variant, as well as argmax and argmin operations.
 
 

diff --git a/edugrad/_tensor/tensor_reshape.py b/edugrad/_tensor/tensor_reshape.py
@@ -44,10 +44,12 @@ def shrink(tensor: Tensor, arg: tuple[tuple[shape_int, shape_int] | None, ...])
 
 
 def pad(tensor: Tensor, arg: tuple[tuple[int, int] | None, ...], value: float) -> Tensor:
+    from edugrad.tensor import Tensor
+
     if all(x is None or x == (0, 0) for x in arg):
         return tensor
     ret = function.Pad.apply(tensor, arg=(narg := tuple(x if x is not None else (0, 0) for x in arg)))
-    return ret if 0 == value else ret + function.Pad.apply("Tensor".ones_like(tensor), arg=narg).where(0, value)
+    return ret if 0 == value else ret + function.Pad.apply(Tensor.ones_like(tensor), arg=narg).where(0, value)
 
 
 # (padding_left, padding_right, padding_top, padding_bottom)

diff --git a/edugrad/data.py b/edugrad/data.py
@@ -13,7 +13,8 @@
 from typing import Tuple
 import numpy as np
 from edugrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps, LoadOps  # consider reading the docs there
-from edugrad.helpers import DType, dtypes, DEBUG
+from edugrad.helpers import DEBUG
+from edugrad.dtypes import DType, dtypes
 
 
 class TensorData:

diff --git a/edugrad/dtypes.py b/edugrad/dtypes.py
@@ -0,0 +1,93 @@
+from typing import ClassVar, Dict, Optional, Final
+import numpy as np
+from dataclasses import dataclass
+
+
+@dataclass(frozen=True, order=True)
+class DType:
+    """Data type class for managing different data types."""
+
+    priority: int  # Priority for upcasting
+    itemsize: int  # Size of the data type in bytes
+    name: str  # Name of the data type
+    np: Optional[type]  # Corresponding numpy data type
+    sz: int = 1  # Size factor
+
+    def __repr__(self):
+        return f"dtypes.{self.name}"
+
+
+class dtypes:
+    """Container for different data types and utility methods.
+
+    We need this because some layer operation might use different trade-offs between precision and efficiency. In such
+    cases, we have to translate b/w dtypes.
+
+    """
+
+    @staticmethod
+    def is_int(x: DType) -> bool:
+        """Check if a data type is an integer type."""
+        return x in (
+            dtypes.int8,
+            dtypes.int16,
+            dtypes.int32,
+            dtypes.int64,
+            dtypes.uint8,
+            dtypes.uint16,
+            dtypes.uint32,
+            dtypes.uint64,
+        )
+
+    @staticmethod
+    def is_float(x: DType) -> bool:
+        """Check if a data type is a float type."""
+        return x in (dtypes.float16, dtypes.float32, dtypes.float64)
+
+    @staticmethod
+    def is_unsigned(x: DType) -> bool:
+        """Check if a data type is an unsigned type."""
+        return x in (dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64)
+
+    @staticmethod
+    def from_np(x) -> DType:
+        """Convert a numpy data type to a DType."""
+        return DTYPES_DICT[np.dtype(x).name]
+
+    @staticmethod
+    def fields() -> Dict[str, DType]:
+        return DTYPES_DICT
+
+    @staticmethod  # NOTE: isinstance(True, int) is True in python
+    def from_py(x) -> DType:
+        return (
+            dtypes.default_float if isinstance(x, float) else dtypes.bool if isinstance(x, bool) else dtypes.default_int
+        )
+
+    # Definition of various data types
+    bool: Final[DType] = DType(0, 1, "bool", np.bool_)
+    float16: Final[DType] = DType(9, 2, "half", np.float16)
+    half = float16
+    float32: Final[DType] = DType(10, 4, "float", np.float32)
+    float = float32
+    float64: Final[DType] = DType(11, 8, "double", np.float64)
+    double = float64
+    int8: Final[DType] = DType(1, 1, "char", np.int8)
+    int16: Final[DType] = DType(3, 2, "short", np.int16)
+    int32: Final[DType] = DType(5, 4, "int", np.int32)
+    int64: Final[DType] = DType(7, 8, "long", np.int64)
+    uint8: Final[DType] = DType(2, 1, "unsigned char", np.uint8)
+    uint16: Final[DType] = DType(4, 2, "unsigned short", np.uint16)
+    uint32: Final[DType] = DType(6, 4, "unsigned int", np.uint32)
+    uint64: Final[DType] = DType(8, 8, "unsigned long", np.uint64)
+
+    default_float: ClassVar[DType] = float32
+    default_int: ClassVar[DType] = int32
+
+
+# Dictionary mapping data type names to DType objects
+DTYPES_DICT = {
+    k: v
+    for k, v in dtypes.__dict__.items()
+    if not k.startswith("__") and not callable(v) and not v.__class__ == staticmethod
+}
diff --git a/edugrad/function.py b/edugrad/function.py
@@ -11,7 +11,8 @@
 """
 import math
 from typing import Tuple, Optional, cast
-from edugrad.helpers import argsort, DType, shape_int
+from edugrad.helpers import argsort, shape_int
+from edugrad.dtypes import DType
 from edugrad.ops import UnaryOps, BinaryOps, TernaryOps, ReduceOps
 from edugrad.data import TensorData
 
@@ -355,3 +356,12 @@ def backward(self, grad_output: TensorData) -> TensorData:
         ), "symbolic shrink does not support backward"
         # need this cast because mypy cannot narrow the type even with assert
         return grad_output.pad(cast(Tuple[Tuple[int, int], ...], self.narg))
+
+
+class Flip(Function):
+    def forward(self, x: TensorData, axis: Tuple[int, ...]) -> TensorData:
+        self.arg = tuple([-1 if i in set(axis) else 1 for i in range(len(x.shape))])
+        return x.stride(self.arg)
+
+    def backward(self, grad_output: TensorData) -> TensorData:
+        return grad_output.stride(self.arg)
diff --git a/edugrad/helpers.py b/edugrad/helpers.py
@@ -1,9 +1,7 @@
-from typing import Union, Tuple, Iterator, Optional, Final, Any
+from typing import Union, Tuple, Iterator, Any
 import os
 import functools
-import numpy as np
 from math import prod  # noqa: F401 # pylint:disable=unused-import
-from dataclasses import dataclass
 
 shape_int = int
 
@@ -28,6 +26,12 @@ def flatten(list_: Iterator):
     return [item for sublist in list_ for item in sublist]
 
 
+def fully_flatten(l):
+    return [
+        item for sublist in l for item in (fully_flatten(sublist) if isinstance(sublist, (tuple, list)) else [sublist])
+    ]
+
+
 def argsort(x):
     """Return the indices that would sort an array.
 
@@ -55,83 +59,3 @@ def getenv(key, default=0):
 
 # Global flags for debugging and continuous integration
 DEBUG = getenv("DEBUG")
-
-
-@dataclass(frozen=True, order=True)
-class DType:
-    """Data type class for managing different data types."""
-
-    priority: int  # Priority for upcasting
-    itemsize: int  # Size of the data type in bytes
-    name: str  # Name of the data type
-    np: Optional[type]  # Corresponding numpy data type
-    sz: int = 1  # Size factor
-
-    def __repr__(self):
-        return f"dtypes.{self.name}"
-
-
-class dtypes:
-    """Container for different data types and utility methods.
-
-    We need this because some layer operation might use different trade-offs between precision and efficiency. In such
-    cases, we have to translate b/w dtypes.
-
-    """
-
-    @staticmethod
-    def is_int(x: DType) -> bool:
-        """Check if a data type is an integer type."""
-        return x in (
-            dtypes.int8,
-            dtypes.int16,
-            dtypes.int32,
-            dtypes.int64,
-            dtypes.uint8,
-            dtypes.uint16,
-            dtypes.uint32,
-            dtypes.uint64,
-        )
-
-    @staticmethod
-    def is_float(x: DType) -> bool:
-        """Check if a data type is a float type."""
-        return x in (dtypes.float16, dtypes.float32, dtypes.float64)
-
-    @staticmethod
-    def is_unsigned(x: DType) -> bool:
-        """Check if a data type is an unsigned type."""
-        return x in (dtypes.uint8, dtypes.uint16, dtypes.uint32, dtypes.uint64)
-
-    @staticmethod
-    def from_np(x) -> DType:
-        """Convert a numpy data type to a DType."""
-        return DTYPES_DICT[np.dtype(x).name]
-
-    # Definition of various data types
-    bool: Final[DType] = DType(0, 1, "bool", np.bool_)
-    float16: Final[DType] = DType(9, 2, "half", np.float16)
-    half = float16
-    float32: Final[DType] = DType(10, 4, "float", np.float32)
-    float = float32
-    float64: Final[DType] = DType(11, 8, "double", np.float64)
-    double = float64
-    int8: Final[DType] = DType(1, 1, "char", np.int8)
-    int16: Final[DType] = DType(3, 2, "short", np.int16)
-    int32: Final[DType] = DType(5, 4, "int", np.int32)
-    int64: Final[DType] = DType(7, 8, "long", np.int64)
-    uint8: Final[DType] = DType(2, 1, "unsigned char", np.uint8)
-    uint16: Final[DType] = DType(4, 2, "unsigned short", np.uint16)
-    uint32: Final[DType] = DType(6, 4, "unsigned int", np.uint32)
-    uint64: Final[DType] = DType(8, 8, "unsigned long", np.uint64)
-
-    # Note: bfloat16 isn't supported in numpy
-    bfloat16: Final[DType] = DType(9, 2, "__bf16", None)
-
-
-# Dictionary mapping data type names to DType objects
-DTYPES_DICT = {
-    k: v
-    for k, v in dtypes.__dict__.items()
-    if not k.startswith("__") and not callable(v) and not v.__class__ == staticmethod
-}