Skip to content

Column Operations

Operations for adding, dropping, renaming, and transforming columns.

Overview

Column operations modify the structure of a DataFrame by adding, removing, or transforming columns. All operations return the TransformPlan instance for method chaining.

from transformplan import TransformPlan

plan = (
    TransformPlan()
    .col_rename("old_name", "new_name")
    .col_drop("temp_column")
    .col_cast("price", pl.Float64)
    .col_add("status", value="active")
)

Class Reference

ColumnOps

Mixin providing column-level operations.

col_drop

col_drop(column: str) -> Self

Drop a column from the DataFrame.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_drop(self, column: str) -> Self:
    """Drop a column from the DataFrame.

    Returns:
        Self for method chaining.
    """
    return self._register(self._col_drop, {"column": column})

col_rename

col_rename(column: str, new_name: str) -> Self

Rename a column.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_rename(self, column: str, new_name: str) -> Self:
    """Rename a column.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._col_rename, {"column": column, "new_name": new_name}
    )

col_cast

col_cast(column: str, dtype: type) -> Self

Cast a column to a different dtype.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_cast(self, column: str, dtype: type) -> Self:
    """Cast a column to a different dtype.

    Returns:
        Self for method chaining.
    """
    return self._register(self._col_cast, {"column": column, "dtype": dtype})

col_reorder

col_reorder(columns: Sequence[str]) -> Self

Reorder columns. Unlisted columns are dropped.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_reorder(self, columns: Sequence[str]) -> Self:
    """Reorder columns. Unlisted columns are dropped.

    Returns:
        Self for method chaining.
    """
    return self._register(self._col_reorder, {"columns": list(columns)})

col_select

col_select(columns: Sequence[str]) -> Self

Keep only the specified columns (order preserved).

Parameters:

Name Type Description Default
columns Sequence[str]

Columns to keep.

required

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_select(self, columns: Sequence[str]) -> Self:
    """Keep only the specified columns (order preserved).

    Args:
        columns: Columns to keep.

    Returns:
        Self for method chaining.
    """
    return self._register(self._col_select, {"columns": list(columns)})

col_duplicate

col_duplicate(column: str, new_name: str) -> Self

Duplicate a column under a new name.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_duplicate(self, column: str, new_name: str) -> Self:
    """Duplicate a column under a new name.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._col_duplicate, {"column": column, "new_name": new_name}
    )

col_fill_null

col_fill_null(
    column: str, value: Any = None, strategy: FillNullStrategy | None = None
) -> Self

Fill null values in a column.

Parameters:

Name Type Description Default
column str

Column to fill.

required
value Any

Value to fill nulls with (if strategy is None).

None
strategy FillNullStrategy | None

Fill strategy - 'forward', 'backward', 'mean', 'min', 'max', 'zero', 'one'.

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_fill_null(
    self,
    column: str,
    value: Any = None,  # noqa: ANN401
    strategy: FillNullStrategy | None = None,
) -> Self:
    """Fill null values in a column.

    Args:
        column: Column to fill.
        value: Value to fill nulls with (if strategy is None).
        strategy: Fill strategy - 'forward', 'backward', 'mean', 'min', 'max',
            'zero', 'one'.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._col_fill_null,
        {"column": column, "value": value, "strategy": strategy},
    )

col_drop_null

col_drop_null(columns: str | Sequence[str] | None = None) -> Self

Drop rows with null values in specified columns.

Parameters:

Name Type Description Default
columns str | Sequence[str] | None

Column(s) to check for nulls. If None, checks all columns.

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_drop_null(self, columns: str | Sequence[str] | None = None) -> Self:
    """Drop rows with null values in specified columns.

    Args:
        columns: Column(s) to check for nulls. If None, checks all columns.

    Returns:
        Self for method chaining.
    """
    if isinstance(columns, str):
        columns = [columns]
    return self._register(self._col_drop_null, {"columns": columns})

col_drop_zero

col_drop_zero(column: str) -> Self

Drop rows where the specified column is zero.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_drop_zero(self, column: str) -> Self:
    """Drop rows where the specified column is zero.

    Returns:
        Self for method chaining.
    """
    return self._register(self._col_drop_zero, {"column": column})

col_add

col_add(
    new_column: str, expr: str | float | None = None, value: Any = None
) -> Self

Add a new column with a constant value or expression.

Parameters:

Name Type Description Default
new_column str

Name of the new column.

required
expr str | float | None

Column name to copy from, or None for constant value.

None
value Any

Constant value to fill the column with.

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_add(
    self,
    new_column: str,
    expr: str | float | None = None,
    value: Any = None,  # noqa: ANN401
) -> Self:
    """Add a new column with a constant value or expression.

    Args:
        new_column: Name of the new column.
        expr: Column name to copy from, or None for constant value.
        value: Constant value to fill the column with.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._col_add, {"new_column": new_column, "expr": expr, "value": value}
    )

col_add_uuid

col_add_uuid(column: str, length: int = 16) -> Self

Add a column with unique random identifiers.

Parameters:

Name Type Description Default
column str

Name of the new column.

required
length int

Length of the identifier string.

16

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_add_uuid(self, column: str, length: int = 16) -> Self:
    """Add a column with unique random identifiers.

    Args:
        column: Name of the new column.
        length: Length of the identifier string.

    Returns:
        Self for method chaining.
    """
    return self._register(self._col_add_uuid, {"column": column, "length": length})

col_hash

col_hash(columns: str | Sequence[str], new_column: str, salt: str = '') -> Self

Hash one or more columns into a new column.

Parameters:

Name Type Description Default
columns str | Sequence[str]

Column(s) to hash.

required
new_column str

Name for the hash column.

required
salt str

Optional salt to add to the hash.

''

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_hash(
    self,
    columns: str | Sequence[str],
    new_column: str,
    salt: str = "",
) -> Self:
    """Hash one or more columns into a new column.

    Args:
        columns: Column(s) to hash.
        new_column: Name for the hash column.
        salt: Optional salt to add to the hash.

    Returns:
        Self for method chaining.
    """
    if isinstance(columns, str):
        columns = [columns]
    return self._register(
        self._col_hash,
        {"columns": list(columns), "new_column": new_column, "salt": salt},
    )

col_coalesce

col_coalesce(columns: Sequence[str], new_column: str) -> Self

Take the first non-null value across multiple columns.

Parameters:

Name Type Description Default
columns Sequence[str]

Columns to coalesce (in priority order).

required
new_column str

Name for the result column.

required

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/column.py
def col_coalesce(
    self,
    columns: Sequence[str],
    new_column: str,
) -> Self:
    """Take the first non-null value across multiple columns.

    Args:
        columns: Columns to coalesce (in priority order).
        new_column: Name for the result column.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._col_coalesce, {"columns": list(columns), "new_column": new_column}
    )

Examples

Basic Column Operations

# Drop a column
plan = TransformPlan().col_drop("temp")

# Rename a column
plan = TransformPlan().col_rename("old", "new")

# Cast to a different type
plan = TransformPlan().col_cast("price", pl.Float64)

Column Selection

# Keep only specific columns (in order)
plan = TransformPlan().col_select(["id", "name", "value"])

# Reorder columns (drops unlisted columns)
plan = TransformPlan().col_reorder(["value", "name", "id"])

Adding Columns

# Add column with constant value
plan = TransformPlan().col_add("status", value="pending")

# Copy from existing column
plan = TransformPlan().col_add("price_backup", expr="price")

# Add unique identifiers
plan = TransformPlan().col_add_uuid("row_id", length=16)

Handling Null Values

# Fill nulls with a value
plan = TransformPlan().col_fill_null("score", value=0)

# Fill with strategy
plan = TransformPlan().col_fill_null("value", strategy="forward")

# Drop rows with nulls
plan = TransformPlan().col_drop_null(columns=["required_field"])

Advanced Operations

# Create hash from multiple columns
plan = TransformPlan().col_hash(
    columns=["first_name", "last_name", "email"],
    new_column="user_hash",
    salt="my_salt"
)

# Take first non-null from multiple columns
plan = TransformPlan().col_coalesce(
    columns=["primary_email", "secondary_email", "backup_email"],
    new_column="contact_email"
)

# Duplicate a column
plan = TransformPlan().col_duplicate("original", "copy")