str/core
Core grapheme-aware string utilities for Unicode-correct operations.
This module provides fundamental string operations that respect Unicode grapheme cluster boundaries, ensuring correct handling of:
- Complex emoji (ZWJ sequences, skin tones, flags)
- Combining character sequences (diacritics, accents)
- Regional indicators and variation selectors
All functions operate at the grapheme level rather than codepoint or byte level, preventing visual corruption of composed characters.
Types
Position for the fill function.
pub type FillPosition {
Left
Right
Both
}
Constructors
-
Left -
Right -
Both
Values
pub fn at(text: String, index: Int) -> Result(String, Nil)
Returns the grapheme cluster at the given index (0-based).
at(“hello”, 1) -> Ok(“e”) at(“👨👩👧👦abc”, 0) -> Ok(“👨👩👧👦”) at(“hi”, 10) -> Error(Nil)
pub fn capitalize(text: String) -> String
Capitalizes text: first letter uppercase, rest lowercase.
capitalize(“hello”) -> “Hello” capitalize(“hELLO wORLD”) -> “Hello world” capitalize(“”) -> “” capitalize(“👋 hello”) -> “👋 hello”
pub fn center(text: String, width: Int, pad: String) -> String
Centers text within the specified width using the given padding. When padding is uneven, the left side receives more (left-biased).
center(“hi”, 6, “ “) -> “ hi “ center(“hi”, 5, “ “) -> “ hi “
pub fn chomp(text: String) -> String
Removes trailing newline if present.
chomp(“hello\n”) -> “hello” chomp(“hello\r\n”) -> “hello” chomp(“hello”) -> “hello”
pub fn chunk(text: String, size: Int) -> List(String)
Splits text into chunks of n grapheme clusters each. The last chunk may be smaller if text doesn’t divide evenly.
chunk(“abcdefg”, 2) -> [“ab”, “cd”, “ef”, “g”] chunk(“hello”, 3) -> [“hel”, “lo”] chunk(“👨👩👧👦abc”, 2) -> [“👨👩👧👦a”, “bc”] chunk(“hi”, 10) -> [“hi”]
pub fn common_prefix(strings: List(String)) -> String
Finds the longest common prefix among a list of strings.
common_prefix([“abc”, “abd”, “abe”]) -> “ab” common_prefix([“hello”, “world”]) -> “” common_prefix([]) -> “”
pub fn common_suffix(strings: List(String)) -> String
Finds the longest common suffix among a list of strings.
common_suffix([“abc”, “xbc”, “zbc”]) -> “bc” common_suffix([“hello”, “world”]) -> “”
pub fn contains(text: String, needle: String) -> Bool
Returns True if needle is found in text (grapheme-aware).
contains(“hello world”, “world”) -> True contains(“hello”, “x”) -> False contains(“”, “”) -> False
pub fn contains_all(text: String, needles: List(String)) -> Bool
Checks if text contains all of the given needles (grapheme-aware). Uses grapheme-boundary matching for correct Unicode handling.
contains_all(“hello world”, [“hello”, “world”]) -> True contains_all(“hello”, [“hello”, “x”]) -> False contains_all(“test”, []) -> True contains_all(“👨👩👧👦 family”, [“👨👩👧👦”, “family”]) -> True
pub fn contains_any(text: String, needles: List(String)) -> Bool
Checks if text contains any of the given needles (grapheme-aware). Uses grapheme-boundary matching for correct Unicode handling.
contains_any(“hello world”, [“foo”, “world”]) -> True contains_any(“hello”, [“x”, “y”, “z”]) -> False contains_any(“test”, []) -> False contains_any(“👨👩👧👦 family”, [“👨👩👧👦”, “test”]) -> True
pub fn count(
haystack: String,
needle: String,
overlapping: Bool,
) -> Int
Counts occurrences of needle in haystack (grapheme-aware). If overlapping is True, counts overlapping matches.
count(“aaaa”, “aa”, True) -> 3 count(“aaaa”, “aa”, False) -> 2 count(“hello”, “”, False) -> 0
pub fn dedent(text: String) -> String
Removes common leading whitespace from all lines. Useful for multiline string literals.
dedent(“ a\n b\n c“) -> “a\nb\nc” dedent(“ hello\n world“) -> “hello\nworld”
pub fn distance(a: String, b: String) -> Int
Calculates Levenshtein distance between two strings. Returns the minimum number of single-character edits (insertions, deletions, substitutions) needed to transform one string into another.
distance(“kitten”, “sitting”) -> 3 distance(“hello”, “hello”) -> 0 distance(“”, “abc”) -> 3
pub fn drop(text: String, n: Int) -> String
Drops the first N grapheme clusters from text.
drop(“hello”, 2) -> “llo” drop(“👨👩👧👦abc”, 1) -> “abc” drop(“hi”, 10) -> “”
pub fn drop_right(text: String, n: Int) -> String
Drops the last N grapheme clusters from text.
drop_right(“hello”, 2) -> “hel” drop_right(“👨👩👧👦abc”, 2) -> “👨👩👧👦a” drop_right(“hi”, 10) -> “”
pub fn ellipsis(text: String, max_len: Int) -> String
Truncates text with ellipsis (…). Convenience wrapper for truncate with single-character suffix.
ellipsis(“Hello World”, 8) -> “Hello W…”
pub fn ends_with(text: String, suffix: String) -> Bool
Returns True if text ends with suffix on grapheme boundaries.
ends_with(“hello.txt”, “.txt”) -> True ends_with(“hello”, “”) -> True ends_with(“hi”, “hello”) -> False
pub fn ends_with_any(
text: String,
suffixes: List(String),
) -> Bool
Checks if text ends with any of the given suffixes.
ends_with_any(“hello.txt”, [“.txt”, “.md”, “.gleam”]) -> True ends_with_any(“hello”, [“bye”, “world”]) -> False ends_with_any(“test”, []) -> False
pub fn ensure_prefix(text: String, prefix: String) -> String
Adds prefix if not already present.
ensure_prefix(“world”, “hello “) -> “hello world” ensure_prefix(“hello world”, “hello “) -> “hello world”
pub fn ensure_suffix(text: String, suffix: String) -> String
Adds suffix if not already present.
ensure_suffix(“hello”, “ world“) -> “hello world” ensure_suffix(“hello world”, “ world“) -> “hello world”
pub fn escape_html(text: String) -> String
Escapes HTML special characters to their entity equivalents. Escapes: & < > “ ’
escape_html(“
pub fn escape_regex(text: String) -> String
Escapes regex metacharacters so the string can be used as a literal pattern. Escapes: \ ^ $ . | ? * + ( ) [ ] { }
escape_regex(“hello.world”) -> “hello\.world” escape_regex(“[test]”) -> “\[test\]” escape_regex(“a+b*c?”) -> “a\+b\*c\?”
pub fn fill(
text: String,
width: Int,
pad: String,
position: FillPosition,
) -> String
Fills text to reach width by adding pad characters. Position specifies where to add padding: Left, Right, or Both (center).
fill(“42”, 5, “0”, Left) -> “00042” fill(“hi”, 6, “”, Right) -> “hi***” fill(“x”, 5, “-”, Both) -> “–x–”
pub fn hamming_distance(a: String, b: String) -> Result(Int, Nil)
Calculates Hamming distance between two strings of equal length. Returns the number of positions where the corresponding graphemes differ. Returns Error(Nil) if strings have different lengths.
hamming_distance(“karolin”, “kathrin”) -> Ok(3) hamming_distance(“hello”, “hallo”) -> Ok(1) hamming_distance(“abc”, “ab”) -> Error(Nil)
pub fn indent(text: String, spaces: Int) -> String
Adds indentation to each line.
indent(“hello\nworld”, 2) -> “ hello\n world“ indent(“hi”, 4) -> “ hi“
pub fn index_of(text: String, needle: String) -> Result(Int, Nil)
Finds the index of the first occurrence of needle in text (grapheme-aware). Returns Ok(index) if found, Error(Nil) if not found.
index_of(“hello world”, “world”) -> Ok(6) index_of(“hello”, “x”) -> Error(Nil) index_of(“👨👩👧👦 family”, “family”) -> Ok(2)
pub fn initials(text: String) -> String
Extracts initials from text (first letter of each word, uppercase).
initials(“John Doe”) -> “JD” initials(“visual studio code”) -> “VSC” initials(“hello”) -> “H” initials(“”) -> “”
pub fn is_alpha(text: String) -> Bool
Checks if text contains only ASCII letters (a-z, A-Z).
is_alpha(“hello”) -> True is_alpha(“Hello”) -> True is_alpha(“hello123”) -> False is_alpha(“”) -> False
pub fn is_alphanumeric(text: String) -> Bool
Checks if text contains only ASCII letters and digits.
is_alphanumeric(“hello123”) -> True is_alphanumeric(“hello-world”) -> False is_alphanumeric(“”) -> False
pub fn is_ascii(text: String) -> Bool
Checks if text contains only ASCII characters (0x00-0x7F).
is_ascii(“hello”) -> True is_ascii(“hello!@#”) -> True is_ascii(“café”) -> False is_ascii(“👋”) -> False is_ascii(“”) -> True
pub fn is_blank(text: String) -> Bool
Checks if a string contains only whitespace characters. Returns True for empty strings or strings with only spaces, tabs, newlines.
is_blank(“”) -> True is_blank(“ “) -> True is_blank(”\t\n“) -> True is_blank(“ hello “) -> False
pub fn is_empty(text: String) -> Bool
Returns True if text is an empty string.
is_empty(“”) -> True is_empty(“ “) -> False
pub fn is_hex(text: String) -> Bool
Checks if text contains only hexadecimal characters (0-9, a-f, A-F).
is_hex(“abc123”) -> True is_hex(“DEADBEEF”) -> True is_hex(“xyz”) -> False is_hex(“”) -> False
pub fn is_lowercase(text: String) -> Bool
Checks if all cased characters in text are lowercase. Non-cased characters (numbers, symbols) are ignored. Returns False for empty strings or strings with no cased characters.
is_lowercase(“hello”) -> True is_lowercase(“Hello”) -> False is_lowercase(“hello123”) -> True is_lowercase(“123”) -> False is_lowercase(“”) -> False
pub fn is_numeric(text: String) -> Bool
Checks if text contains only ASCII digits (0-9).
is_numeric(“12345”) -> True is_numeric(“123.45”) -> False is_numeric(“”) -> False
pub fn is_printable(text: String) -> Bool
Checks if text contains only printable ASCII characters (0x20-0x7E).
is_printable(“hello”) -> True is_printable(“hello\n”) -> False is_printable(“hello\t”) -> False is_printable(“”) -> True
pub fn is_title_case(text: String) -> Bool
Checks if text is in Title Case (first letter of each word is uppercase). Non-alphabetic characters are ignored. Empty strings return False.
is_title_case(“Hello World”) -> True is_title_case(“Hello world”) -> False is_title_case(“HELLO WORLD”) -> False is_title_case(“Hello”) -> True is_title_case(“”) -> False
pub fn is_uppercase(text: String) -> Bool
Checks if all cased characters in text are uppercase. Non-cased characters (numbers, symbols) are ignored. Returns False for empty strings or strings with no cased characters.
is_uppercase(“HELLO”) -> True is_uppercase(“Hello”) -> False is_uppercase(“HELLO123”) -> True is_uppercase(“123”) -> False is_uppercase(“”) -> False
pub fn last_index_of(
text: String,
needle: String,
) -> Result(Int, Nil)
Finds the index of the last occurrence of needle in text (grapheme-aware). Returns Ok(index) if found, Error(Nil) if not found.
last_index_of(“hello hello”, “hello”) -> Ok(6) last_index_of(“hello”, “x”) -> Error(Nil) last_index_of(“a-b-c”, “-”) -> Ok(3)
pub fn lines(text: String) -> List(String)
Splits text into lines. Handles \n, \r\n, and \r line endings.
lines(“a\nb\nc”) -> [“a”, “b”, “c”] lines(“hello”) -> [“hello”] lines(“a\r\nb”) -> [“a”, “b”]
pub fn normalize_whitespace(text: String) -> String
Normalizes whitespace: collapses multiple whitespace characters into single spaces. Also trims leading and trailing whitespace.
normalize_whitespace(“hello world”) -> “hello world” normalize_whitespace(“ foo bar baz “) -> “foo bar baz” normalize_whitespace(“a\t\nb”) -> “a b”
pub fn pad_left(text: String, width: Int, pad: String) -> String
Pads text on the left to reach the specified width (in grapheme clusters). If the text is already equal to or longer than the width, returns unchanged.
pad_left(“hi”, 5, “ “) -> “ hi” pad_left(“hello”, 3, “*”) -> “hello”
pub fn pad_right(text: String, width: Int, pad: String) -> String
Pads text on the right to reach the specified width (in grapheme clusters). If the text is already equal to or longer than the width, returns unchanged.
pad_right(“hi”, 5, “ “) -> “hi “ pad_right(“hello”, 3, “*”) -> “hello”
pub fn partition(
text: String,
sep: String,
) -> #(String, String, String)
Splits text into three parts: before, separator, and after.
partition(“a-b-c”, “-”) -> #(“a”, “-”, “b-c”) partition(“hello”, “-”) -> #(“hello”, “”, “”)
pub fn remove_prefix(text: String, prefix: String) -> String
Removes prefix from text if present.
remove_prefix(“hello world”, “hello “) -> “world” remove_prefix(“hello”, “bye”) -> “hello”
pub fn remove_suffix(text: String, suffix: String) -> String
Removes suffix from text if present.
remove_suffix(“hello world”, “ world“) -> “hello” remove_suffix(“hello”, “bye”) -> “hello”
pub fn replace_first(
text: String,
old: String,
new: String,
) -> String
Replaces only the first occurrence of old with new.
replace_first(“hello hello”, “hello”, “hi”) -> “hi hello” replace_first(“aaa”, “a”, “b”) -> “baa” replace_first(“test”, “x”, “y”) -> “test”
pub fn replace_last(
text: String,
old: String,
new: String,
) -> String
Replaces only the last occurrence of old with new.
replace_last(“hello hello”, “hello”, “hi”) -> “hello hi” replace_last(“aaa”, “a”, “b”) -> “aab” replace_last(“test”, “x”, “y”) -> “test”
pub fn reverse(text: String) -> String
Reverses text at grapheme cluster boundaries. Preserves combining marks and keeps emoji sequences intact. Involutive: reverse(reverse(x)) == x
reverse(“café”) -> “éfac” reverse(“👨👩👧👦”) -> “👨👩👧👦”
pub fn reverse_words(text: String) -> String
Reverses the order of words in text.
reverse_words(“hello world”) -> “world hello” reverse_words(“one two three”) -> “three two one” reverse_words(“single”) -> “single”
pub fn rpartition(
text: String,
sep: String,
) -> #(String, String, String)
Splits text into three parts from the last occurrence of separator. Returns #(before, separator, after). If separator not found, returns #(“”, “”, text). This mirrors Python’s str.rpartition() behavior.
rpartition(“a-b-c”, “-”) -> #(“a-b”, “-”, “c”) rpartition(“hello”, “-”) -> #(“”, “”, “hello”) rpartition(“one::two::three”, “::”) -> #(“one::two”, “::”, “three”)
pub fn similarity(a: String, b: String) -> Float
Calculates similarity as a percentage (0.0 to 1.0) based on Levenshtein distance. Returns 1.0 for identical strings, 0.0 for completely different strings.
similarity(“hello”, “hello”) -> 1.0 similarity(“hello”, “hallo”) -> 0.8 similarity(“abc”, “xyz”) -> 0.0 similarity(“”, “”) -> 1.0
pub fn splitn(text: String, sep: String, n: Int) -> List(String)
Splits text into at most n parts. The last part contains the remainder of the string.
splitn(“a-b-c-d”, “-”, 2) -> [“a”, “b-c-d”] splitn(“a-b-c-d”, “-”, 3) -> [“a”, “b”, “c-d”] splitn(“hello”, “-”, 5) -> [“hello”] splitn(“a-b-c”, “-”, 0) -> []
pub fn squeeze(text: String, char: String) -> String
Collapses consecutive occurrences of a character to a single instance.
squeeze(“heeello”, “e”) -> “helo” squeeze(“mississippi”, “s”) -> “misisippi” squeeze(“ hello world “, “ “) -> “ hello world “
pub fn starts_with(text: String, prefix: String) -> Bool
Returns True if text starts with prefix on grapheme boundaries.
starts_with(“hello”, “he”) -> True starts_with(“hello”, “”) -> True starts_with(“hi”, “hello”) -> False
pub fn starts_with_any(
text: String,
prefixes: List(String),
) -> Bool
Checks if text starts with any of the given prefixes.
starts_with_any(“hello”, [“hi”, “he”, “ho”]) -> True starts_with_any(“hello”, [“bye”, “world”]) -> False starts_with_any(“test”, []) -> False
pub fn strip(text: String, chars: String) -> String
Removes specified characters from both ends of text.
strip(“..hello..”, “.”) -> “hello” strip(“xxhelloxx”, “x”) -> “hello”
pub fn surround(
text: String,
prefix: String,
suffix: String,
) -> String
Wraps text with a prefix and suffix.
surround(“world”, “Hello “, “!”) -> “Hello world!”
pub fn swapcase(text: String) -> String
Swaps case of all ASCII letters.
swapcase(“Hello World”) -> “hELLO wORLD” swapcase(“ABC”) -> “abc”
pub fn take(text: String, n: Int) -> String
Returns the first N grapheme clusters from text.
take(“hello”, 3) -> “hel” take(“👨👩👧👦abc”, 2) -> “👨👩👧👦a” take(“hi”, 10) -> “hi”
pub fn take_right(text: String, n: Int) -> String
Returns the last N grapheme clusters from text.
take_right(“hello”, 3) -> “llo” take_right(“👨👩👧👦abc”, 2) -> “bc” take_right(“hi”, 10) -> “hi”
pub fn truncate(
text: String,
max_len: Int,
suffix: String,
) -> String
Truncates text to max_len grapheme clusters, preserving emoji sequences.
truncate(“Hello 👨👩👧👦 World”, 10, “…”) -> “Hello 👨👩👧👦…”
pub fn truncate_default(text: String, max_len: Int) -> String
Truncates text using “…” as the default suffix.
truncate_default(“Hello World”, 8) -> “Hello…”
pub fn truncate_preserve(
text: String,
max_len: Int,
suffix: String,
) -> String
Truncates text while prioritizing complete emoji sequences. Explicit alias for the default truncate behavior.
pub fn truncate_strict(
text: String,
max_len: Int,
suffix: String,
) -> String
Strictly truncates text to exact length without emoji preservation.
truncate_strict(“Hi 👩👩👧👦”, 3, “…”) -> “Hi…”
pub fn truncate_with_flag(
text: String,
max_len: Int,
suffix: String,
keep_whole_emoji: Bool,
) -> String
Truncates text to max_len grapheme clusters with configurable options. If keep_whole_emoji is True, extends to include complete emoji sequences.
truncate_with_flag(“Hello World”, 8, “…”, True) -> “Hello…”
pub fn unescape_html(text: String) -> String
Unescapes HTML entities to their character equivalents. Handles: & < > " '
unescape_html(“<div>”) -> “
pub fn unwrap(
text: String,
prefix: String,
suffix: String,
) -> String
Removes prefix and suffix from text if both are present. Operates on grapheme cluster boundaries.
unwrap(“Hello world!”, “Hello “, “!”) -> “world” unwrap(“test”, “<<”, “>>”) -> “test”
pub fn words(text: String) -> List(String)
Splits text into words by whitespace. Normalizes tabs, newlines and multiple spaces, then filters empty strings.
words(“Hello world\n\ttest”) -> [“Hello”, “world”, “test”] words(“ “) -> []