Map Operations¶

Value mapping, discretization, encoding, and transformation operations.

Overview¶

Map operations transform column values using dictionaries, bins, or encoding schemes. They're useful for categorization, value replacement, data normalization, and ML feature preparation.

from transformplan import TransformPlan

plan = (
    TransformPlan()
    .map_values("status", {"A": "Active", "I": "Inactive"})
    .map_discretize("age", bins=[18, 35, 55], labels=["Young", "Adult", "Senior"])
    .map_onehot("color", categories=["red", "green", "blue"], drop="first")
)

Class Reference¶

MapOps ¶

Mixin providing value mapping and transformation operations.

map_values ¶

map_values(
    column: str,
    mapping: dict[Any, Any],
    default: Any = None,
    *,
    keep_unmapped: bool = True,
) -> Self

Map values in a column using a dictionary.

Parameters:

Name	Type	Description	Default
`column`	`str`	Column to transform.	required
`mapping`	`dict[Any, Any]`	Dictionary mapping old values to new values.	required
`default`	`Any`	Default value for unmapped values (if keep_unmapped=False).	`None`
`keep_unmapped`	`bool`	If True, keep original value when not in mapping.	`True`

Returns:

Type	Description
`Self`	Self for method chaining.

Source code in transformplan/ops/map.py

def map_values(
    self,
    column: str,
    mapping: dict[Any, Any],
    default: Any = None,  # noqa: ANN401
    *,
    keep_unmapped: bool = True,
) -> Self:
    """Map values in a column using a dictionary.

    Args:
        column: Column to transform.
        mapping: Dictionary mapping old values to new values.
        default: Default value for unmapped values (if keep_unmapped=False).
        keep_unmapped: If True, keep original value when not in mapping.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_values,
        {
            "column": column,
            "mapping": mapping,
            "default": default,
            "keep_unmapped": keep_unmapped,
        },
    )

map_discretize ¶

map_discretize(
    column: str,
    bins: Sequence[float],
    labels: Sequence[str] | None = None,
    new_column: str | None = None,
    *,
    right: bool = True,
) -> Self

Discretize a numeric column into bins/categories.

Parameters:

Name	Type	Description	Default
`column`	`str`	Column to discretize.	required
`bins`	`Sequence[float]`	Bin edges (e.g., [0, 18, 65, 100] creates 4 bins).	required
`labels`	`Sequence[str] \| None`	Labels for each bin (must be len(bins)+1 if provided).	`None`
`new_column`	`str \| None`	Name for result column (None = modify in place).	`None`
`right`	`bool`	If True, bins are (left, right]. If False, [left, right).	`True`

Returns:

Type	Description
`Self`	Self for method chaining.

Source code in transformplan/ops/map.py

def map_discretize(
    self,
    column: str,
    bins: Sequence[float],
    labels: Sequence[str] | None = None,
    new_column: str | None = None,
    *,
    right: bool = True,
) -> Self:
    """Discretize a numeric column into bins/categories.

    Args:
        column: Column to discretize.
        bins: Bin edges (e.g., [0, 18, 65, 100] creates 4 bins).
        labels: Labels for each bin (must be len(bins)+1 if provided).
        new_column: Name for result column (None = modify in place).
        right: If True, bins are (left, right]. If False, [left, right).

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_discretize,
        {
            "column": column,
            "bins": list(bins),
            "labels": list(labels) if labels else None,
            "new_column": new_column or column,
            "right": right,
        },
    )

map_case ¶

map_case(
    column: str,
    cases: list[tuple[Any, Any]],
    default: Any = None,
    new_column: str | None = None,
) -> Self

Apply case-when logic to a column.

Parameters:

Name	Type	Description	Default
`column`	`str`	Column to evaluate.	required
`cases`	`list[tuple[Any, Any]]`	List of (condition_value, result_value) tuples.	required
`default`	`Any`	Default value if no case matches.	`None`
`new_column`	`str \| None`	Name for result column (None = modify in place).	`None`

Returns:

Type	Description
`Self`	Self for method chaining.

Example

.map_case('grade', [(90, 'A'), (80, 'B'), (70, 'C')], default='F') Maps: >= 90 -> A, >= 80 -> B, >= 70 -> C, else F

Source code in transformplan/ops/map.py

def map_case(
    self,
    column: str,
    cases: list[tuple[Any, Any]],
    default: Any = None,  # noqa: ANN401
    new_column: str | None = None,
) -> Self:
    """Apply case-when logic to a column.

    Args:
        column: Column to evaluate.
        cases: List of (condition_value, result_value) tuples.
        default: Default value if no case matches.
        new_column: Name for result column (None = modify in place).

    Returns:
        Self for method chaining.

    Example:
        .map_case('grade', [(90, 'A'), (80, 'B'), (70, 'C')], default='F')
        Maps: >= 90 -> A, >= 80 -> B, >= 70 -> C, else F
    """
    return self._register(
        self._map_case,
        {
            "column": column,
            "cases": cases,
            "default": default,
            "new_column": new_column or column,
        },
    )

map_bool_to_int ¶

map_bool_to_int(column: str) -> Self

Convert a boolean column to integer (True=1, False=0).

Returns:

Type	Description
`Self`	Self for method chaining.

Source code in transformplan/ops/map.py

def map_bool_to_int(self, column: str) -> Self:
    """Convert a boolean column to integer (True=1, False=0).

    Returns:
        Self for method chaining.
    """
    return self._register(self._map_bool_to_int, {"column": column})

map_null_to_value ¶

map_null_to_value(column: str, value: Any) -> Self

Replace null values with a specific value.

Returns:

Type	Description
`Self`	Self for method chaining.

Source code in transformplan/ops/map.py

def map_null_to_value(self, column: str, value: Any) -> Self:  # noqa: ANN401
    """Replace null values with a specific value.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_null_to_value, {"column": column, "value": value}
    )

map_value_to_null ¶

map_value_to_null(column: str, value: Any) -> Self

Replace a specific value with null.

Returns:

Type	Description
`Self`	Self for method chaining.

Source code in transformplan/ops/map.py

def map_value_to_null(self, column: str, value: Any) -> Self:  # noqa: ANN401
    """Replace a specific value with null.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_value_to_null, {"column": column, "value": value}
    )

map_from_column ¶

map_from_column(
    column: str,
    lookup_column: str,
    value_column: str,
    new_column: str | None = None,
    default: Any = None,
) -> Self

Map values using another column as lookup (like vlookup).

This maps values from column using lookup_column -> value_column mapping from the same DataFrame. Useful for denormalization.

Parameters:

Name	Type	Description	Default
`column`	`str`	Column containing keys to look up.	required
`lookup_column`	`str`	Column containing lookup keys.	required
`value_column`	`str`	Column containing values to map to.	required
`new_column`	`str \| None`	Name for result column (None = modify in place).	`None`
`default`	`Any`	Default value if lookup fails.	`None`

Returns:

Type	Description
`Self`	Self for method chaining.

Source code in transformplan/ops/map.py

def map_from_column(
    self,
    column: str,
    lookup_column: str,
    value_column: str,
    new_column: str | None = None,
    default: Any = None,  # noqa: ANN401
) -> Self:
    """Map values using another column as lookup (like vlookup).

    This maps values from `column` using `lookup_column` -> `value_column` mapping
    from the same DataFrame. Useful for denormalization.

    Args:
        column: Column containing keys to look up.
        lookup_column: Column containing lookup keys.
        value_column: Column containing values to map to.
        new_column: Name for result column (None = modify in place).
        default: Default value if lookup fails.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_from_column,
        {
            "column": column,
            "lookup_column": lookup_column,
            "value_column": value_column,
            "new_column": new_column or column,
            "default": default,
        },
    )

map_onehot ¶

map_onehot(
    column: str,
    categories: list[Any] | None = None,
    prefix: str | None = None,
    *,
    drop: Literal["first", "last"] | Any | None = None,
    drop_original: bool = True,
    unknown_value: Literal["all_zero", "ignore"] = "all_zero",
) -> Self

One-hot encode a categorical column.

Creates binary indicator columns (0/1) for each category.

Parameters:

Name	Type	Description	Default
`column`	`str`	Source column to encode.	required
`categories`	`list[Any] \| None`	List of category values. If None, derived from data.	`None`
`prefix`	`str \| None`	Prefix for new columns (default: column name).	`None`
`drop`	`Literal['first', 'last'] \| Any \| None`	Drop one category column to avoid multicollinearity: - None: Keep all columns (default). - "first": Drop the first category. - "last": Drop the last category. - Any value: Drop that specific category.	`None`
`drop_original`	`bool`	Drop source column after encoding (default: True).	`True`
`unknown_value`	`Literal['all_zero', 'ignore']`	How to handle unknown values: - "all_zero": Set all indicator columns to 0. - "ignore": Keep original value behavior.	`'all_zero'`

Returns:

Type	Description
`Self`	Self for method chaining.

Example

plan.map_onehot("color", categories=["red", "green", "blue"])

Creates: color_red, color_green, color_blue¶

plan.map_onehot("color", categories=["red", "green"], drop="first")

Creates: color_green (drops color_red)¶

Source code in transformplan/ops/map.py

def map_onehot(
    self,
    column: str,
    categories: list[Any] | None = None,
    prefix: str | None = None,
    *,
    drop: Literal["first", "last"] | Any | None = None,  # noqa: ANN401
    drop_original: bool = True,
    unknown_value: Literal["all_zero", "ignore"] = "all_zero",
) -> Self:
    """One-hot encode a categorical column.

    Creates binary indicator columns (0/1) for each category.

    Args:
        column: Source column to encode.
        categories: List of category values. If None, derived from data.
        prefix: Prefix for new columns (default: column name).
        drop: Drop one category column to avoid multicollinearity:
            - None: Keep all columns (default).
            - "first": Drop the first category.
            - "last": Drop the last category.
            - Any value: Drop that specific category.
        drop_original: Drop source column after encoding (default: True).
        unknown_value: How to handle unknown values:
            - "all_zero": Set all indicator columns to 0.
            - "ignore": Keep original value behavior.

    Returns:
        Self for method chaining.

    Example:
        >>> plan.map_onehot("color", categories=["red", "green", "blue"])
        # Creates: color_red, color_green, color_blue

        >>> plan.map_onehot("color", categories=["red", "green"], drop="first")
        # Creates: color_green (drops color_red)
    """
    return self._register(
        self._map_onehot,
        {
            "column": column,
            "categories": categories,
            "prefix": prefix or column,
            "drop": drop,
            "drop_original": drop_original,
            "unknown_value": unknown_value,
        },
    )

map_ordinal ¶

map_ordinal(
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self

Ordinal encode a categorical column.

Maps categories to integers based on explicit ordering.

Parameters:

Name	Type	Description	Default
`column`	`str`	Source column to encode.	required
`categories`	`list[Any] \| None`	List of categories in desired order (first=0, second=1, etc.). If None, uses sorted unique values from data.	`None`
`new_column`	`str \| None`	Output column name. If None, replaces original.	`None`
`drop_original`	`bool`	Drop source column if new_column differs (default: True).	`True`
`unknown_value`	`int`	Integer for unknown values (default: -1).	`-1`

Returns:

Type	Description
`Self`	Self for method chaining.

Example

plan.map_ordinal("size", categories=["small", "medium", "large"])

Maps: small->0, medium->1, large->2¶

Source code in transformplan/ops/map.py

def map_ordinal(
    self,
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self:
    """Ordinal encode a categorical column.

    Maps categories to integers based on explicit ordering.

    Args:
        column: Source column to encode.
        categories: List of categories in desired order (first=0, second=1, etc.).
            If None, uses sorted unique values from data.
        new_column: Output column name. If None, replaces original.
        drop_original: Drop source column if new_column differs (default: True).
        unknown_value: Integer for unknown values (default: -1).

    Returns:
        Self for method chaining.

    Example:
        >>> plan.map_ordinal("size", categories=["small", "medium", "large"])
        # Maps: small->0, medium->1, large->2
    """
    return self._register(
        self._map_ordinal,
        {
            "column": column,
            "categories": categories,
            "new_column": new_column or column,
            "drop_original": drop_original,
            "unknown_value": unknown_value,
        },
    )

map_label ¶

map_label(
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self

Label encode a categorical column.

Simple integer encoding (alphabetically sorted by default).

Parameters:

Name	Type	Description	Default
`column`	`str`	Source column to encode.	required
`categories`	`list[Any] \| None`	List of categories. If None, uses sorted unique values.	`None`
`new_column`	`str \| None`	Output column name. If None, replaces original.	`None`
`drop_original`	`bool`	Drop source column if new_column differs (default: True).	`True`
`unknown_value`	`int`	Integer for unknown values (default: -1).	`-1`

Returns:

Type	Description
`Self`	Self for method chaining.

Example

plan.map_label("department")

Maps alphabetically: Engineering->0, HR->1, Sales->2¶

Source code in transformplan/ops/map.py

def map_label(
    self,
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self:
    """Label encode a categorical column.

    Simple integer encoding (alphabetically sorted by default).

    Args:
        column: Source column to encode.
        categories: List of categories. If None, uses sorted unique values.
        new_column: Output column name. If None, replaces original.
        drop_original: Drop source column if new_column differs (default: True).
        unknown_value: Integer for unknown values (default: -1).

    Returns:
        Self for method chaining.

    Example:
        >>> plan.map_label("department")
        # Maps alphabetically: Engineering->0, HR->1, Sales->2
    """
    return self._register(
        self._map_label,
        {
            "column": column,
            "categories": categories,
            "new_column": new_column or column,
            "drop_original": drop_original,
            "unknown_value": unknown_value,
        },
    )

Examples¶

Dictionary Mapping¶

# Map values using a dictionary
plan = TransformPlan().map_values(
    column="country_code",
    mapping={"US": "United States", "CA": "Canada", "MX": "Mexico"}
)

# With default for unmapped values
plan = TransformPlan().map_values(
    column="status",
    mapping={"A": "Active", "I": "Inactive"},
    default="Unknown",
    keep_unmapped=False
)

Discretization (Binning)¶

# Discretize numeric values into categories
plan = TransformPlan().map_discretize(
    column="age",
    bins=[0, 18, 35, 55, 100],
    labels=["Child", "Young Adult", "Adult", "Senior"],
    new_column="age_group"
)

# Auto-generated labels
plan = TransformPlan().map_discretize(
    column="score",
    bins=[0, 50, 75, 100],
    new_column="score_band"
)

Case-When Logic¶

# Apply case-when transformations
plan = TransformPlan().map_case(
    column="score",
    cases=[
        (90, "A"),
        (80, "B"),
        (70, "C"),
        (60, "D"),
    ],
    default="F",
    new_column="grade"
)

Null Handling¶

# Replace null with a value
plan = TransformPlan().map_null_to_value("status", "Unknown")

# Replace a value with null
plan = TransformPlan().map_value_to_null("status", "N/A")

Type Conversion¶

# Convert boolean to integer
plan = TransformPlan().map_bool_to_int("is_active")
# True -> 1, False -> 0

Column-based Lookup¶

# Map using values from other columns (vlookup-style)
plan = TransformPlan().map_from_column(
    column="category_id",
    lookup_column="category_id",
    value_column="category_name",
    new_column="category_label",
    default="Unknown"
)

Use Cases¶

Categorizing Continuous Data¶

# Income brackets
plan = TransformPlan().map_discretize(
    column="income",
    bins=[0, 30000, 60000, 100000, 200000],
    labels=["Low", "Lower-Middle", "Middle", "Upper-Middle", "High"],
    new_column="income_bracket"
)

Standardizing Codes¶

# Standardize department codes
plan = TransformPlan().map_values(
    column="dept",
    mapping={
        "ENG": "Engineering",
        "MKT": "Marketing",
        "SAL": "Sales",
        "HR": "Human Resources"
    }
)

Data Cleaning¶

# Replace sentinel values with null
plan = TransformPlan().map_value_to_null("score", -999)

# Replace null with default
plan = TransformPlan().map_null_to_value("category", "Uncategorized")

One-Hot Encoding¶

# Basic one-hot encoding
plan = TransformPlan().map_onehot(
    column="color",
    categories=["red", "green", "blue"]
)
# Creates columns: color_red, color_green, color_blue

# Drop first category to avoid multicollinearity (for regression models)
plan = TransformPlan().map_onehot(
    column="color",
    categories=["red", "green", "blue"],
    drop="first"
)
# Creates columns: color_green, color_blue (drops color_red)

Ordinal Encoding¶

# Ordinal encoding with meaningful order
plan = TransformPlan().map_ordinal(
    column="size",
    categories=["small", "medium", "large"]
)
# Maps: small -> 0, medium -> 1, large -> 2

Label Encoding¶

# Label encoding (alphabetically sorted by default)
plan = TransformPlan().map_label(column="department")
# Maps alphabetically: Engineering -> 0, HR -> 1, Sales -> 2

ML Feature Preparation¶

# One-hot encode categorical features, dropping first to avoid multicollinearity
plan = (
    TransformPlan()
    .map_onehot("color", categories=["red", "green", "blue"], drop="first")
    .map_ordinal("quality", categories=["low", "medium", "high"])
)