Skip to content

String Operations

Text manipulation operations on string columns.

Overview

String operations allow you to transform text data in DataFrame columns. Operations include case conversion, trimming, splitting, concatenation, and pattern matching.

from transformplan import TransformPlan

plan = (
    TransformPlan()
    .str_lower("email")
    .str_strip("name")
    .str_replace("phone", "-", "")
)

Class Reference

StrOps

Mixin providing string operations on columns.

str_replace

str_replace(
    column: str, pattern: str, replacement: str, *, literal: bool = True
) -> Self

Replace occurrences of a pattern in a string column.

Parameters:

Name Type Description Default
column str

Column to modify.

required
pattern str

Pattern to search for.

required
replacement str

String to replace with.

required
literal bool

If True, treat pattern as literal string. If False, treat as regex.

True

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_replace(
    self,
    column: str,
    pattern: str,
    replacement: str,
    *,
    literal: bool = True,
) -> Self:
    """Replace occurrences of a pattern in a string column.

    Args:
        column: Column to modify.
        pattern: Pattern to search for.
        replacement: String to replace with.
        literal: If True, treat pattern as literal string. If False, treat as regex.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_replace,
        {
            "column": column,
            "pattern": pattern,
            "replacement": replacement,
            "literal": literal,
        },
    )

str_slice

str_slice(column: str, offset: int, length: int | None = None) -> Self

Extract a substring from a string column.

Parameters:

Name Type Description Default
column str

Column to modify.

required
offset int

Start position (0-indexed, negative counts from end).

required
length int | None

Number of characters to extract (None = to end).

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_slice(
    self,
    column: str,
    offset: int,
    length: int | None = None,
) -> Self:
    """Extract a substring from a string column.

    Args:
        column: Column to modify.
        offset: Start position (0-indexed, negative counts from end).
        length: Number of characters to extract (None = to end).

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_slice, {"column": column, "offset": offset, "length": length}
    )

str_truncate

str_truncate(column: str, max_length: int, suffix: str = '...') -> Self

Truncate strings to a maximum length with optional suffix.

Parameters:

Name Type Description Default
column str

Column to modify.

required
max_length int

Maximum length of the string (including suffix).

required
suffix str

Suffix to append to truncated strings.

'...'

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_truncate(self, column: str, max_length: int, suffix: str = "...") -> Self:
    """Truncate strings to a maximum length with optional suffix.

    Args:
        column: Column to modify.
        max_length: Maximum length of the string (including suffix).
        suffix: Suffix to append to truncated strings.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_truncate,
        {"column": column, "max_length": max_length, "suffix": suffix},
    )

str_lower

str_lower(column: str) -> Self

Convert string column to lowercase.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_lower(self, column: str) -> Self:
    """Convert string column to lowercase.

    Returns:
        Self for method chaining.
    """
    return self._register(self._str_lower, {"column": column})

str_upper

str_upper(column: str) -> Self

Convert string column to uppercase.

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_upper(self, column: str) -> Self:
    """Convert string column to uppercase.

    Returns:
        Self for method chaining.
    """
    return self._register(self._str_upper, {"column": column})

str_strip

str_strip(column: str, chars: str | None = None) -> Self

Strip leading and trailing characters from a string column.

Parameters:

Name Type Description Default
column str

Column to modify.

required
chars str | None

Characters to strip (None = whitespace).

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_strip(self, column: str, chars: str | None = None) -> Self:
    """Strip leading and trailing characters from a string column.

    Args:
        column: Column to modify.
        chars: Characters to strip (None = whitespace).

    Returns:
        Self for method chaining.
    """
    return self._register(self._str_strip, {"column": column, "chars": chars})

str_pad

str_pad(
    column: str, length: int, fill_char: str = " ", side: str = "left"
) -> Self

Pad a string column to a specified length.

Parameters:

Name Type Description Default
column str

Column to modify.

required
length int

Target length.

required
fill_char str

Character to pad with.

' '
side str

'left' or 'right'.

'left'

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_pad(
    self,
    column: str,
    length: int,
    fill_char: str = " ",
    side: str = "left",
) -> Self:
    """Pad a string column to a specified length.

    Args:
        column: Column to modify.
        length: Target length.
        fill_char: Character to pad with.
        side: 'left' or 'right'.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_pad,
        {"column": column, "length": length, "fill_char": fill_char, "side": side},
    )

str_split

str_split(
    column: str,
    separator: str,
    new_columns: list[str] | None = None,
    *,
    keep_original: bool = False,
) -> Self

Split a string column by separator.

Parameters:

Name Type Description Default
column str

Column to split.

required
separator str

String to split on.

required
new_columns list[str] | None

Names for the resulting columns. If None, explodes into rows.

None
keep_original bool

Whether to keep the original column.

False

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_split(
    self,
    column: str,
    separator: str,
    new_columns: list[str] | None = None,
    *,
    keep_original: bool = False,
) -> Self:
    """Split a string column by separator.

    Args:
        column: Column to split.
        separator: String to split on.
        new_columns: Names for the resulting columns. If None, explodes into rows.
        keep_original: Whether to keep the original column.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_split,
        {
            "column": column,
            "separator": separator,
            "new_columns": new_columns,
            "keep_original": keep_original,
        },
    )

str_concat

str_concat(columns: list[str], new_column: str, separator: str = '') -> Self

Concatenate multiple string columns into one.

Parameters:

Name Type Description Default
columns list[str]

Columns to concatenate.

required
new_column str

Name for the new column.

required
separator str

Separator between values.

''

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_concat(
    self,
    columns: list[str],
    new_column: str,
    separator: str = "",
) -> Self:
    """Concatenate multiple string columns into one.

    Args:
        columns: Columns to concatenate.
        new_column: Name for the new column.
        separator: Separator between values.

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_concat,
        {"columns": columns, "new_column": new_column, "separator": separator},
    )

str_extract

str_extract(
    column: str,
    pattern: str,
    group_index: int = 1,
    new_column: str | None = None,
) -> Self

Extract substring using regex capture group.

Parameters:

Name Type Description Default
column str

Column to extract from.

required
pattern str

Regex pattern with capture group(s).

required
group_index int

Which capture group to extract (1-indexed).

1
new_column str | None

Name for result column (None = modify in place).

None

Returns:

Type Description
Self

Self for method chaining.

Source code in transformplan/ops/string.py
def str_extract(
    self,
    column: str,
    pattern: str,
    group_index: int = 1,
    new_column: str | None = None,
) -> Self:
    """Extract substring using regex capture group.

    Args:
        column: Column to extract from.
        pattern: Regex pattern with capture group(s).
        group_index: Which capture group to extract (1-indexed).
        new_column: Name for result column (None = modify in place).

    Returns:
        Self for method chaining.
    """
    return self._register(
        self._str_extract,
        {
            "column": column,
            "pattern": pattern,
            "group_index": group_index,
            "new_column": new_column or column,
        },
    )

Examples

Case Conversion

# Convert to lowercase
plan = TransformPlan().str_lower("email")

# Convert to uppercase
plan = TransformPlan().str_upper("code")

Trimming and Padding

# Strip whitespace
plan = TransformPlan().str_strip("name")

# Strip specific characters
plan = TransformPlan().str_strip("code", chars="-_")

# Pad to fixed length
plan = TransformPlan().str_pad("id", length=10, fill_char="0", side="left")

Replacement

# Replace literal string
plan = TransformPlan().str_replace("phone", "-", "")

# Replace with regex
plan = TransformPlan().str_replace(
    column="text",
    pattern=r"\s+",
    replacement=" ",
    literal=False
)

Substring Operations

# Extract substring by position
plan = TransformPlan().str_slice("code", offset=0, length=3)

# Truncate with suffix
plan = TransformPlan().str_truncate("description", max_length=100, suffix="...")

Splitting

# Split into rows (explode)
plan = TransformPlan().str_split("tags", separator=",")

# Split into columns
plan = TransformPlan().str_split(
    column="full_name",
    separator=" ",
    new_columns=["first_name", "last_name"],
    keep_original=False
)

Concatenation

# Concatenate columns
plan = TransformPlan().str_concat(
    columns=["first_name", "last_name"],
    new_column="full_name",
    separator=" "
)

Pattern Extraction

# Extract with regex capture group
plan = TransformPlan().str_extract(
    column="email",
    pattern=r"@(.+)$",
    group_index=1,
    new_column="domain"
)