Skip to content

Map Operations

Value mapping, discretization, encoding, and transformation operations.

Overview

Map operations transform column values using dictionaries, bins, or encoding schemes. They're useful for categorization, value replacement, data normalization, and ML feature preparation.

from transformplan import TransformPlan

plan = (
    TransformPlan()
    .map_values("status", {"A": "Active", "I": "Inactive"})
    .map_discretize("age", bins=[18, 35, 55], labels=["Young", "Adult", "Senior"])
    .map_onehot("color", categories=["red", "green", "blue"], drop="first")
)

Class Reference

MapOps

Mixin providing value mapping and transformation operations.

map_values

map_values(
    column: str,
    mapping: dict[Any, Any],
    default: Any = None,
    *,
    keep_unmapped: bool = True,
) -> Self

Map values in a column using a dictionary.

Parameters:

Name Type Description Default
column str

Column to transform.

required
mapping dict[Any, Any]

Dictionary mapping old values to new values.

required
default Any

Default value for unmapped values (if keep_unmapped=False).

None
keep_unmapped bool

If True, keep original value when not in mapping.

True

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/map.py
def map_values(
    self,
    column: str,
    mapping: dict[Any, Any],
    default: Any = None,  # noqa: ANN401
    *,
    keep_unmapped: bool = True,
) -> Self:
    """Map values in a column using a dictionary.

    Args:
        column: Column to transform.
        mapping: Dictionary mapping old values to new values.
        default: Default value for unmapped values (if keep_unmapped=False).
        keep_unmapped: If True, keep original value when not in mapping.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_values,
        {
            "column": column,
            "mapping": mapping,
            "default": default,
            "keep_unmapped": keep_unmapped,
        },
    )

map_discretize

map_discretize(
    column: str,
    bins: Sequence[float],
    labels: Sequence[str] | None = None,
    new_column: str | None = None,
    *,
    right: bool = True,
) -> Self

Discretize a numeric column into bins/categories.

Parameters:

Name Type Description Default
column str

Column to discretize.

required
bins Sequence[float]

Bin edges (e.g., [0, 18, 65, 100] creates 4 bins).

required
labels Sequence[str] | None

Labels for each bin (must be len(bins)+1 if provided).

None
new_column str | None

Name for result column (None = modify in place).

None
right bool

If True, bins are (left, right]. If False, [left, right).

True

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/map.py
def map_discretize(
    self,
    column: str,
    bins: Sequence[float],
    labels: Sequence[str] | None = None,
    new_column: str | None = None,
    *,
    right: bool = True,
) -> Self:
    """Discretize a numeric column into bins/categories.

    Args:
        column: Column to discretize.
        bins: Bin edges (e.g., [0, 18, 65, 100] creates 4 bins).
        labels: Labels for each bin (must be len(bins)+1 if provided).
        new_column: Name for result column (None = modify in place).
        right: If True, bins are (left, right]. If False, [left, right).

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_discretize,
        {
            "column": column,
            "bins": list(bins),
            "labels": list(labels) if labels else None,
            "new_column": new_column or column,
            "right": right,
        },
    )

map_case

map_case(
    column: str,
    cases: list[tuple[Any, Any]],
    default: Any = None,
    new_column: str | None = None,
) -> Self

Apply case-when logic to a column.

Parameters:

Name Type Description Default
column str

Column to evaluate.

required
cases list[tuple[Any, Any]]

List of (condition_value, result_value) tuples.

required
default Any

Default value if no case matches.

None
new_column str | None

Name for result column (None = modify in place).

None

Returns:

Type Description
Self

Self for method chaining.

Example

.map_case('grade', [(90, 'A'), (80, 'B'), (70, 'C')], default='F') Maps: >= 90 -> A, >= 80 -> B, >= 70 -> C, else F

Source code in transformplan/ops/map.py
def map_case(
    self,
    column: str,
    cases: list[tuple[Any, Any]],
    default: Any = None,  # noqa: ANN401
    new_column: str | None = None,
) -> Self:
    """Apply case-when logic to a column.

    Args:
        column: Column to evaluate.
        cases: List of (condition_value, result_value) tuples.
        default: Default value if no case matches.
        new_column: Name for result column (None = modify in place).

    Returns:
        Self for method chaining.

    Example:
        .map_case('grade', [(90, 'A'), (80, 'B'), (70, 'C')], default='F')
        Maps: >= 90 -> A, >= 80 -> B, >= 70 -> C, else F
    """
    return self._register(
        self._map_case,
        {
            "column": column,
            "cases": cases,
            "default": default,
            "new_column": new_column or column,
        },
    )

map_bool_to_int

map_bool_to_int(column: str) -> Self

Convert a boolean column to integer (True=1, False=0).

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/map.py
def map_bool_to_int(self, column: str) -> Self:
    """Convert a boolean column to integer (True=1, False=0).

    Returns:
        Self for method chaining.
    """
    return self._register(self._map_bool_to_int, {"column": column})

map_null_to_value

map_null_to_value(column: str, value: Any) -> Self

Replace null values with a specific value.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/map.py
def map_null_to_value(self, column: str, value: Any) -> Self:  # noqa: ANN401
    """Replace null values with a specific value.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_null_to_value, {"column": column, "value": value}
    )

map_value_to_null

map_value_to_null(column: str, value: Any) -> Self

Replace a specific value with null.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/map.py
def map_value_to_null(self, column: str, value: Any) -> Self:  # noqa: ANN401
    """Replace a specific value with null.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_value_to_null, {"column": column, "value": value}
    )

map_from_column

map_from_column(
    column: str,
    lookup_column: str,
    value_column: str,
    new_column: str | None = None,
    default: Any = None,
) -> Self

Map values using another column as lookup (like vlookup).

This maps values from column using lookup_column -> value_column mapping from the same DataFrame. Useful for denormalization.

Parameters:

Name Type Description Default
column str

Column containing keys to look up.

required
lookup_column str

Column containing lookup keys.

required
value_column str

Column containing values to map to.

required
new_column str | None

Name for result column (None = modify in place).

None
default Any

Default value if lookup fails.

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/map.py
def map_from_column(
    self,
    column: str,
    lookup_column: str,
    value_column: str,
    new_column: str | None = None,
    default: Any = None,  # noqa: ANN401
) -> Self:
    """Map values using another column as lookup (like vlookup).

    This maps values from `column` using `lookup_column` -> `value_column` mapping
    from the same DataFrame. Useful for denormalization.

    Args:
        column: Column containing keys to look up.
        lookup_column: Column containing lookup keys.
        value_column: Column containing values to map to.
        new_column: Name for result column (None = modify in place).
        default: Default value if lookup fails.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._map_from_column,
        {
            "column": column,
            "lookup_column": lookup_column,
            "value_column": value_column,
            "new_column": new_column or column,
            "default": default,
        },
    )

map_onehot

map_onehot(
    column: str,
    categories: list[Any] | None = None,
    prefix: str | None = None,
    *,
    drop: Literal["first", "last"] | Any | None = None,
    drop_original: bool = True,
    unknown_value: Literal["all_zero", "ignore"] = "all_zero",
) -> Self

One-hot encode a categorical column.

Creates binary indicator columns (0/1) for each category.

Parameters:

Name Type Description Default
column str

Source column to encode.

required
categories list[Any] | None

List of category values. If None, derived from data.

None
prefix str | None

Prefix for new columns (default: column name).

None
drop Literal['first', 'last'] | Any | None

Drop one category column to avoid multicollinearity: - None: Keep all columns (default). - "first": Drop the first category. - "last": Drop the last category. - Any value: Drop that specific category.

None
drop_original bool

Drop source column after encoding (default: True).

True
unknown_value Literal['all_zero', 'ignore']

How to handle unknown values: - "all_zero": Set all indicator columns to 0. - "ignore": Keep original value behavior.

'all_zero'

Returns:

Type Description
Self

Self for method chaining.

Example

plan.map_onehot("color", categories=["red", "green", "blue"])

Creates: color_red, color_green, color_blue

plan.map_onehot("color", categories=["red", "green"], drop="first")

Creates: color_green (drops color_red)

Source code in transformplan/ops/map.py
def map_onehot(
    self,
    column: str,
    categories: list[Any] | None = None,
    prefix: str | None = None,
    *,
    drop: Literal["first", "last"] | Any | None = None,  # noqa: ANN401
    drop_original: bool = True,
    unknown_value: Literal["all_zero", "ignore"] = "all_zero",
) -> Self:
    """One-hot encode a categorical column.

    Creates binary indicator columns (0/1) for each category.

    Args:
        column: Source column to encode.
        categories: List of category values. If None, derived from data.
        prefix: Prefix for new columns (default: column name).
        drop: Drop one category column to avoid multicollinearity:
            - None: Keep all columns (default).
            - "first": Drop the first category.
            - "last": Drop the last category.
            - Any value: Drop that specific category.
        drop_original: Drop source column after encoding (default: True).
        unknown_value: How to handle unknown values:
            - "all_zero": Set all indicator columns to 0.
            - "ignore": Keep original value behavior.

    Returns:
        Self for method chaining.

    Example:
        >>> plan.map_onehot("color", categories=["red", "green", "blue"])
        # Creates: color_red, color_green, color_blue

        >>> plan.map_onehot("color", categories=["red", "green"], drop="first")
        # Creates: color_green (drops color_red)
    """
    return self._register(
        self._map_onehot,
        {
            "column": column,
            "categories": categories,
            "prefix": prefix or column,
            "drop": drop,
            "drop_original": drop_original,
            "unknown_value": unknown_value,
        },
    )

map_ordinal

map_ordinal(
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self

Ordinal encode a categorical column.

Maps categories to integers based on explicit ordering.

Parameters:

Name Type Description Default
column str

Source column to encode.

required
categories list[Any] | None

List of categories in desired order (first=0, second=1, etc.). If None, uses sorted unique values from data.

None
new_column str | None

Output column name. If None, replaces original.

None
drop_original bool

Drop source column if new_column differs (default: True).

True
unknown_value int

Integer for unknown values (default: -1).

-1

Returns:

Type Description
Self

Self for method chaining.

Example

plan.map_ordinal("size", categories=["small", "medium", "large"])

Maps: small->0, medium->1, large->2

Source code in transformplan/ops/map.py
def map_ordinal(
    self,
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self:
    """Ordinal encode a categorical column.

    Maps categories to integers based on explicit ordering.

    Args:
        column: Source column to encode.
        categories: List of categories in desired order (first=0, second=1, etc.).
            If None, uses sorted unique values from data.
        new_column: Output column name. If None, replaces original.
        drop_original: Drop source column if new_column differs (default: True).
        unknown_value: Integer for unknown values (default: -1).

    Returns:
        Self for method chaining.

    Example:
        >>> plan.map_ordinal("size", categories=["small", "medium", "large"])
        # Maps: small->0, medium->1, large->2
    """
    return self._register(
        self._map_ordinal,
        {
            "column": column,
            "categories": categories,
            "new_column": new_column or column,
            "drop_original": drop_original,
            "unknown_value": unknown_value,
        },
    )

map_label

map_label(
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self

Label encode a categorical column.

Simple integer encoding (alphabetically sorted by default).

Parameters:

Name Type Description Default
column str

Source column to encode.

required
categories list[Any] | None

List of categories. If None, uses sorted unique values.

None
new_column str | None

Output column name. If None, replaces original.

None
drop_original bool

Drop source column if new_column differs (default: True).

True
unknown_value int

Integer for unknown values (default: -1).

-1

Returns:

Type Description
Self

Self for method chaining.

Example

plan.map_label("department")

Maps alphabetically: Engineering->0, HR->1, Sales->2

Source code in transformplan/ops/map.py
def map_label(
    self,
    column: str,
    categories: list[Any] | None = None,
    new_column: str | None = None,
    *,
    drop_original: bool = True,
    unknown_value: int = -1,
) -> Self:
    """Label encode a categorical column.

    Simple integer encoding (alphabetically sorted by default).

    Args:
        column: Source column to encode.
        categories: List of categories. If None, uses sorted unique values.
        new_column: Output column name. If None, replaces original.
        drop_original: Drop source column if new_column differs (default: True).
        unknown_value: Integer for unknown values (default: -1).

    Returns:
        Self for method chaining.

    Example:
        >>> plan.map_label("department")
        # Maps alphabetically: Engineering->0, HR->1, Sales->2
    """
    return self._register(
        self._map_label,
        {
            "column": column,
            "categories": categories,
            "new_column": new_column or column,
            "drop_original": drop_original,
            "unknown_value": unknown_value,
        },
    )

Examples

Dictionary Mapping

# Map values using a dictionary
plan = TransformPlan().map_values(
    column="country_code",
    mapping={"US": "United States", "CA": "Canada", "MX": "Mexico"}
)

# With default for unmapped values
plan = TransformPlan().map_values(
    column="status",
    mapping={"A": "Active", "I": "Inactive"},
    default="Unknown",
    keep_unmapped=False
)

Discretization (Binning)

# Discretize numeric values into categories
plan = TransformPlan().map_discretize(
    column="age",
    bins=[0, 18, 35, 55, 100],
    labels=["Child", "Young Adult", "Adult", "Senior"],
    new_column="age_group"
)

# Auto-generated labels
plan = TransformPlan().map_discretize(
    column="score",
    bins=[0, 50, 75, 100],
    new_column="score_band"
)

Case-When Logic

# Apply case-when transformations
plan = TransformPlan().map_case(
    column="score",
    cases=[
        (90, "A"),
        (80, "B"),
        (70, "C"),
        (60, "D"),
    ],
    default="F",
    new_column="grade"
)

Null Handling

# Replace null with a value
plan = TransformPlan().map_null_to_value("status", "Unknown")

# Replace a value with null
plan = TransformPlan().map_value_to_null("status", "N/A")

Type Conversion

# Convert boolean to integer
plan = TransformPlan().map_bool_to_int("is_active")
# True -> 1, False -> 0

Column-based Lookup

# Map using values from other columns (vlookup-style)
plan = TransformPlan().map_from_column(
    column="category_id",
    lookup_column="category_id",
    value_column="category_name",
    new_column="category_label",
    default="Unknown"
)

Use Cases

Categorizing Continuous Data

# Income brackets
plan = TransformPlan().map_discretize(
    column="income",
    bins=[0, 30000, 60000, 100000, 200000],
    labels=["Low", "Lower-Middle", "Middle", "Upper-Middle", "High"],
    new_column="income_bracket"
)

Standardizing Codes

# Standardize department codes
plan = TransformPlan().map_values(
    column="dept",
    mapping={
        "ENG": "Engineering",
        "MKT": "Marketing",
        "SAL": "Sales",
        "HR": "Human Resources"
    }
)

Data Cleaning

# Replace sentinel values with null
plan = TransformPlan().map_value_to_null("score", -999)

# Replace null with default
plan = TransformPlan().map_null_to_value("category", "Uncategorized")

One-Hot Encoding

# Basic one-hot encoding
plan = TransformPlan().map_onehot(
    column="color",
    categories=["red", "green", "blue"]
)
# Creates columns: color_red, color_green, color_blue

# Drop first category to avoid multicollinearity (for regression models)
plan = TransformPlan().map_onehot(
    column="color",
    categories=["red", "green", "blue"],
    drop="first"
)
# Creates columns: color_green, color_blue (drops color_red)

Ordinal Encoding

# Ordinal encoding with meaningful order
plan = TransformPlan().map_ordinal(
    column="size",
    categories=["small", "medium", "large"]
)
# Maps: small -> 0, medium -> 1, large -> 2

Label Encoding

# Label encoding (alphabetically sorted by default)
plan = TransformPlan().map_label(column="department")
# Maps alphabetically: Engineering -> 0, HR -> 1, Sales -> 2

ML Feature Preparation

# One-hot encode categorical features, dropping first to avoid multicollinearity
plan = (
    TransformPlan()
    .map_onehot("color", categories=["red", "green", "blue"], drop="first")
    .map_ordinal("quality", categories=["low", "medium", "high"])
)