String

A String is an ordered collection of characters.

Strings don't specify an encoding.

Example usage of some common String methods:

actor Main
  new create(env: Env) =>
    try
      // construct a new string
      let str = "Hello"

      // make an uppercased version
      let str_upper = str.upper()
      // make a reversed version
      let str_reversed = str.reverse()

      // add " world" to the end of our original string
      let str_new = str.add(" world")

      // count occurrences of letter "l"
      let count = str_new.count("l")

      // find first occurrence of letter "w"
      let first_w = str_new.find("w")
      // find first occurrence of letter "d"
      let first_d = str_new.find("d")

      // get substring capturing "world"
      let substr = str_new.substring(first_w, first_d+1)
      // clone substring
      let substr_clone = substr.clone()

      // print our substr
      env.out.print(consume substr)
  end
class val String is
  Seq[U8 val] ref,
  Comparable[String box] ref,
  Stringable box

Implements


Constructors

create

An empty string. Enough space for len bytes is reserved.

new ref create(
  len: USize val = seq)
: String ref^

Parameters

Returns


from_array

Create a string from an array, reusing the underlying data pointer.

new val from_array(
  data: Array[U8 val] val)
: String val^

Parameters

Returns


from_iso_array

Create a string from an array, reusing the underlying data pointer

new iso from_iso_array(
  data: Array[U8 val] iso)
: String iso^

Parameters

Returns


from_cpointer

Return a string from binary pointer data without making a copy. This must be done only with C-FFI functions that return pony_alloc'd character arrays. If a null pointer is given then an empty string is returned.

new ref from_cpointer(
  str: Pointer[U8 val] ref,
  len: USize val,
  alloc: USize val = seq)
: String ref^

Parameters

Returns


from_cstring

Return a string from a pointer to a null-terminated cstring without making a copy. The data is not copied. This must be done only with C-FFI functions that return pony_alloc'd character arrays. The pointer is scanned for the first null byte, which will be interpreted as the null terminator. Note that the scan is unbounded; the pointed to data must be null-terminated within the allocated array to preserve memory safety. If a null pointer is given then an empty string is returned.

new ref from_cstring(
  str: Pointer[U8 val] ref)
: String ref^

Parameters

Returns


copy_cpointer

Create a string by copying a fixed number of bytes from a pointer.

new ref copy_cpointer(
  str: Pointer[U8 val] box,
  len: USize val)
: String ref^

Parameters

Returns


copy_cstring

Create a string by copying a null-terminated C string. Note that the scan is unbounded; the pointed to data must be null-terminated within the allocated array to preserve memory safety. If a null pointer is given then an empty string is returned.

new ref copy_cstring(
  str: Pointer[U8 val] box)
: String ref^

Parameters

Returns


from_utf32

Create a UTF-8 string from a single UTF-32 code point.

new ref from_utf32(
  value: U32 val)
: String ref^

Parameters

  • value: U32 val

Returns


Public Functions

push_utf32

Push a UTF-32 code point.

fun ref push_utf32(
  value: U32 val)
: None val

Parameters

  • value: U32 val

Returns


cpointer

Returns a C compatible pointer to the underlying string allocation.

fun box cpointer(
  offset: USize val = seq)
: Pointer[U8 val] tag

Parameters

  • offset: USize val = seq

Returns


cstring

Returns a C compatible pointer to a null-terminated version of the string, safe to pass to an FFI function that doesn't accept a size argument, expecting a null-terminator. If the underlying string is already null terminated, this is returned; otherwise the string is copied into a new, null-terminated allocation.

fun box cstring()
: Pointer[U8 val] tag

Returns


array

Returns an Array[U8] that that reuses the underlying data pointer.

fun val array()
: Array[U8 val] val

Returns


size

Returns the length of the string data in bytes.

fun box size()
: USize val

Returns


codepoints

Returns the number of unicode code points in the string between the two offsets. Index range [from .. to) is half-open.

fun box codepoints(
  from: ISize val = seq,
  to: ISize val = seq)
: USize val

Parameters

Returns


space

Returns the space available for data, not including the null terminator.

fun box space()
: USize val

Returns


reserve

Reserve space for len bytes. An additional byte will be reserved for the null terminator.

fun ref reserve(
  len: USize val)
: None val

Parameters

Returns


compact

Try to remove unused space, making it available for garbage collection. The request may be ignored. The string is returned to allow call chaining.

fun ref compact()
: None val

Returns


recalc

Recalculates the string length. This is only needed if the string is changed via an FFI call. If a null terminator byte is not found within the allocated length, the size will not be changed.

fun ref recalc()
: None val

Returns


truncate

Truncates the string at the minimum of len and space. Ensures there is a null terminator. Does not check for null terminators inside the string.

Note that memory is not freed by this operation.

fun ref truncate(
  len: USize val)
: None val

Parameters

Returns


trim_in_place

Trim the string to a portion of itself, covering from until to. Unlike slice, the operation does not allocate a new string nor copy elements.

fun ref trim_in_place(
  from: USize val = seq,
  to: USize val = seq)
: None val

Parameters

Returns


trim

Return a shared portion of this string, covering from until to. Both the original and the new string are immutable, as they share memory. The operation does not allocate a new string pointer nor copy elements.

fun val trim(
  from: USize val = seq,
  to: USize val = seq)
: String val

Parameters

Returns


is_null_terminated

Return true if the string is null-terminated and safe to pass to an FFI function that doesn't accept a size argument, expecting a null-terminator. This method checks that there is a null byte just after the final position of populated bytes in the string, but does not check for other null bytes which may be present earlier in the content of the string. If you need a null-terminated copy of this string, use the clone method.

fun box is_null_terminated()
: Bool val

Returns


utf32

Return a UTF32 representation of the character at the given offset and the number of bytes needed to encode that character. If the offset does not point to the beginning of a valid UTF8 encoding, return 0xFFFD (the unicode replacement character) and a length of one. Raise an error if the offset is out of bounds.

fun box utf32(
  offset: ISize val)
: (U32 val , U8 val) ?

Parameters

Returns


apply

Returns the i-th byte. Raise an error if the index is out of bounds.

fun box apply(
  i: USize val)
: U8 val ?

Parameters

Returns

  • U8 val ?

update

Change the i-th byte. Raise an error if the index is out of bounds.

fun ref update(
  i: USize val,
  value: U8 val)
: U8 val ?

Parameters

Returns

  • U8 val ?

at_offset

Returns the byte at the given offset. Raise an error if the offset is out of bounds.

fun box at_offset(
  offset: ISize val)
: U8 val ?

Parameters

Returns

  • U8 val ?

update_offset

Changes a byte in the string, returning the previous byte at that offset. Raise an error if the offset is out of bounds.

fun ref update_offset(
  offset: ISize val,
  value: U8 val)
: U8 val ?

Parameters

Returns

  • U8 val ?

clone

Returns a copy of the string. The resulting string is null-terminated even if the original is not.

fun box clone()
: String iso^

Returns


find

Return the index of the n-th instance of s in the string starting from the beginning. Raise an error if there is no n-th occurrence of s or s is empty.

fun box find(
  s: String box,
  offset: ISize val = seq,
  nth: USize val = seq)
: ISize val ?

Parameters

Returns


rfind

Return the index of n-th instance of s in the string starting from the end. The offset represents the highest index to included in the search. Raise an error if there is no n-th occurrence of s or s is empty.

fun box rfind(
  s: String box,
  offset: ISize val = seq,
  nth: USize val = seq)
: ISize val ?

Parameters

Returns


contains

Returns true if contains s as a substring, false otherwise.

fun box contains(
  s: String box,
  offset: ISize val = seq,
  nth: USize val = seq)
: Bool val

Parameters

Returns


count

Counts the non-overlapping occurrences of s in the string.

fun box count(
  s: String box,
  offset: ISize val = seq)
: USize val

Parameters

Returns


at

Returns true if the substring s is present at the given offset.

fun box at(
  s: String box,
  offset: ISize val = seq)
: Bool val

Parameters

Returns


delete

Delete len bytes at the supplied offset, compacting the string in place.

fun ref delete(
  offset: ISize val,
  len: USize val = seq)
: None val

Parameters

Returns


substring

Returns a substring. Index range [from .. to) is half-open. Returns an empty string if nothing is in the range.

Note that this operation allocates a new string to be returned. For similar operations that don't allocate a new string, see trim and trim_in_place.

fun box substring(
  from: ISize val,
  to: ISize val = seq)
: String iso^

Parameters

Returns


lower

Returns a lower case version of the string.

fun box lower()
: String iso^

Returns


lower_in_place

Transforms the string to lower case. Currently only knows ASCII case.

fun ref lower_in_place()
: None val

Returns


upper

Returns an upper case version of the string. Currently only knows ASCII case.

fun box upper()
: String iso^

Returns


upper_in_place

Transforms the string to upper case.

fun ref upper_in_place()
: None val

Returns


reverse

Returns a reversed version of the string.

fun box reverse()
: String iso^

Returns


reverse_in_place

Reverses the byte order in the string. This needs to be changed to handle UTF-8 correctly.

fun ref reverse_in_place()
: None val

Returns


push

Add a byte to the end of the string.

fun ref push(
  value: U8 val)
: None val

Parameters

  • value: U8 val

Returns


pop

Remove a byte from the end of the string.

fun ref pop()
: U8 val ?

Returns

  • U8 val ?

unshift

Adds a byte to the beginning of the string.

fun ref unshift(
  value: U8 val)
: None val

Parameters

  • value: U8 val

Returns


shift

Removes a byte from the beginning of the string.

fun ref shift()
: U8 val ?

Returns

  • U8 val ?

append

Append the elements from a sequence, starting from the given offset.

fun ref append(
  seq: ReadSeq[U8 val] box,
  offset: USize val = seq,
  len: USize val = seq)
: None val

Parameters

Returns


concat

Add len iterated bytes to the end of the string, starting from the given offset.

fun ref concat(
  iter: Iterator[U8 val] ref,
  offset: USize val = seq,
  len: USize val = seq)
: None val

Parameters

Returns


clear

Truncate the string to zero length.

fun ref clear()
: None val

Returns


insert

Returns a version of the string with the given string inserted at the given offset.

fun box insert(
  offset: ISize val,
  that: String val)
: String iso^

Parameters

Returns


insert_in_place

Inserts the given string at the given offset. Appends the string if the offset is out of bounds.

fun ref insert_in_place(
  offset: ISize val,
  that: String box)
: None val

Parameters

Returns


insert_byte

Inserts a byte at the given offset. Appends if the offset is out of bounds.

fun ref insert_byte(
  offset: ISize val,
  value: U8 val)
: None val

Parameters

Returns


cut

Returns a version of the string with the given range deleted. Index range [from .. to) is half-open.

fun box cut(
  from: ISize val,
  to: ISize val = seq)
: String iso^

Parameters

Returns


cut_in_place

Cuts the given range out of the string. Index range [from .. to) is half-open.

fun ref cut_in_place(
  from: ISize val,
  to: ISize val = seq)
: None val

Parameters

Returns


remove

Remove all instances of s from the string. Returns the count of removed instances.

fun ref remove(
  s: String box)
: USize val

Parameters

Returns


replace

Replace up to n occurrences of from in this with to. If n is 0, all occurrences will be replaced. Returns the count of replaced occurrences.

fun ref replace(
  from: String box,
  to: String box,
  n: USize val = seq)
: USize val

Parameters

Returns


split_by

Split the string into an array of strings that are delimited by delim in the original string. If n > 0, then the split count is limited to n.

Adjacent delimiters result in a zero length entry in the array. For example, "1,,2".split(",") => ["1", "", "2"].

An empty delimiter results in an array that contains a single element equal to the whole string.

fun box split_by(
  delim: String val,
  n: USize val = seq)
: Array[String val] iso^

Parameters

Returns


split

Split the string into an array of strings. Any character in the delimiter string is accepted as a delimiter. If n > 0, then the split count is limited to n.

Adjacent delimiters result in a zero length entry in the array. For example, "1,,2".split(",") => ["1", "", "2"].

fun box split(
  delim: String val = seq,
  n: USize val = seq)
: Array[String val] iso^

Parameters

Returns


strip

Remove all leading and trailing characters from the string that are in s.

fun ref strip(
  s: String box = seq)
: None val

Parameters

Returns


rstrip

Remove all trailing characters within the string that are in s. By default, trailing whitespace is removed.

fun ref rstrip(
  s: String box = seq)
: None val

Parameters

Returns


lstrip

Remove all leading characters within the string that are in s. By default, leading whitespace is removed.

fun ref lstrip(
  s: String box = seq)
: None val

Parameters

Returns


add

Return a string that is a concatenation of this and that.

fun box add(
  that: String box)
: String val

Parameters

Returns


join

Return a string that is a concatenation of the strings in data, using this as a separator.

fun box join(
  data: Iterator[Stringable box] ref)
: String iso^

Parameters

Returns


compare

Lexically compare two strings.

fun box compare(
  that: String box)
: (Less val | Equal val | Greater val)

Parameters

Returns


compare_sub

Lexically compare at most n bytes of the substring of this starting at offset with the substring of that starting at that_offset. The comparison is case sensitive unless ignore_case is true.

If the substring of this is a proper prefix of the substring of that, then this is Less than that. Likewise, if that is a proper prefix of this, then this is Greater than that.

Both offset and that_offset can be negative, in which case the offsets are computed from the end of the string.

If n + offset is greater than the length of this, or n + that_offset is greater than the length of that, then the number of positions compared will be reduced to the length of the longest substring.

Needs to be made UTF-8 safe.

fun box compare_sub(
  that: String box,
  n: USize val,
  offset: ISize val = seq,
  that_offset: ISize val = seq,
  ignore_case: Bool val = seq)
: (Less val | Equal val | Greater val)

Parameters

Returns


eq

Returns true if the two strings have the same contents.

fun box eq(
  that: String box)
: Bool val

Parameters

Returns


lt

Returns true if this is lexically less than that. Needs to be made UTF-8 safe.

fun box lt(
  that: String box)
: Bool val

Parameters

Returns


le

Returns true if this is lexically less than or equal to that. Needs to be made UTF-8 safe.

fun box le(
  that: String box)
: Bool val

Parameters

Returns


offset_to_index

fun box offset_to_index(
  i: ISize val)
: USize val

Parameters

Returns


bool

fun box bool()
: Bool val ?

Returns


i8

fun box i8(
  base: U8 val = seq)
: I8 val ?

Parameters

  • base: U8 val = seq

Returns

  • I8 val ?

i16

fun box i16(
  base: U8 val = seq)
: I16 val ?

Parameters

  • base: U8 val = seq

Returns


i32

fun box i32(
  base: U8 val = seq)
: I32 val ?

Parameters

  • base: U8 val = seq

Returns


i64

fun box i64(
  base: U8 val = seq)
: I64 val ?

Parameters

  • base: U8 val = seq

Returns


i128

fun box i128(
  base: U8 val = seq)
: I128 val ?

Parameters

  • base: U8 val = seq

Returns


ilong

fun box ilong(
  base: U8 val = seq)
: ILong val ?

Parameters

  • base: U8 val = seq

Returns


isize

fun box isize(
  base: U8 val = seq)
: ISize val ?

Parameters

  • base: U8 val = seq

Returns


u8

fun box u8(
  base: U8 val = seq)
: U8 val ?

Parameters

  • base: U8 val = seq

Returns

  • U8 val ?

u16

fun box u16(
  base: U8 val = seq)
: U16 val ?

Parameters

  • base: U8 val = seq

Returns


u32

fun box u32(
  base: U8 val = seq)
: U32 val ?

Parameters

  • base: U8 val = seq

Returns


u64

fun box u64(
  base: U8 val = seq)
: U64 val ?

Parameters

  • base: U8 val = seq

Returns


u128

fun box u128(
  base: U8 val = seq)
: U128 val ?

Parameters

  • base: U8 val = seq

Returns


ulong

fun box ulong(
  base: U8 val = seq)
: ULong val ?

Parameters

  • base: U8 val = seq

Returns


usize

fun box usize(
  base: U8 val = seq)
: USize val ?

Parameters

  • base: U8 val = seq

Returns


read_int[A: ((I8 val | I16 val | I32 val | I64 val | I128 val | ILong val | ISize val | U8 val | U16 val | U32 val | U64 val | U128 val | ULong val | USize val) & Integer[A] val)]

Read an integer from the specified location in this string. The integer value read and the number of characters consumed are reported. The base parameter specifies the base to use, 0 indicates using the prefix, if any, to detect base 2, 10 or 16. If no integer is found at the specified location, then (0, 0) is returned, since no characters have been used. An integer out of range for the target type throws an error. A leading minus is allowed for signed integer types. Underscore characters are allowed throughout the integer and are ignored.

fun box read_int[A: ((I8 val | I16 val | I32 val | 
    I64 val | I128 val | ILong val | 
    ISize val | U8 val | U16 val | 
    U32 val | U64 val | U128 val | 
    ULong val | USize val) & Integer[A] val)](
  offset: ISize val = seq,
  base: U8 val = seq)
: (A , USize val) ?

Parameters

  • offset: ISize val = seq
  • base: U8 val = seq

Returns


f32

fun box f32(
  offset: ISize val = seq)
: F32 val

Parameters

  • offset: ISize val = seq

Returns


f64

fun box f64(
  offset: ISize val = seq)
: F64 val

Parameters

  • offset: ISize val = seq

Returns


hash

fun box hash()
: U64 val

Returns


string

fun box string()
: String iso^

Returns


values

Return an iterator over the bytes in the string.

fun box values()
: StringBytes ref^

Returns


runes

Return an iterator over the codepoints in the string.

fun box runes()
: StringRunes ref^

Returns


ge

fun box ge(
  that: String box)
: Bool val

Parameters

Returns


gt

fun box gt(
  that: String box)
: Bool val

Parameters

Returns


ne

fun box ne(
  that: String box)
: Bool val

Parameters

Returns


Private Functions

_copy_to

Copy copy_len characters from this to that at specified offsets.

fun box _copy_to(
  ptr: Pointer[U8 val] ref,
  copy_len: USize val,
  from_offset: USize val = seq,
  to_offset: USize val = seq)
: None val

Parameters

Returns


_append

fun iso _append(
  s: String box)
: String iso^

Parameters

Returns


_to_int[A: ((I8 val | I16 val | I32 val | I64 val | I128 val | ILong val | ISize val | U8 val | U16 val | U32 val | U64 val | U128 val | ULong val | USize val) & Integer[A] val)]

Convert the whole string to the specified type. If there are any other characters in the string, or the integer found is out of range for the target type then an error is thrown.

fun box _to_int[A: ((I8 val | I16 val | I32 val | 
    I64 val | I128 val | ILong val | 
    ISize val | U8 val | U16 val | 
    U32 val | U64 val | U128 val | 
    ULong val | USize val) & Integer[A] val)](
  base: U8 val)
: A ?

Parameters

  • base: U8 val

Returns

  • A ?

_read_int_base[A: ((I8 val | I16 val | I32 val | I64 val | I128 val | ILong val | ISize val | U8 val | U16 val | U32 val | U64 val | U128 val | ULong val | USize val) & Integer[A] val)]

Determine the base of an integer starting at the specified index. If a non-0 base is given use that. If given base is 0 read the base specifying prefix, if any, to detect base 2 or 16. If no base is specified and no prefix is found default to decimal. Note that a leading 0 does NOT imply octal. Report the base found and the number of characters in the prefix.

fun box _read_int_base[A: ((I8 val | I16 val | I32 val | 
    I64 val | I128 val | ILong val | 
    ISize val | U8 val | U16 val | 
    U32 val | U64 val | U128 val | 
    ULong val | USize val) & Integer[A] val)](
  base: U8 val,
  index: USize val)
: (A , USize val)

Parameters

Returns


_set

Unsafe update, used internally.

fun ref _set(
  i: USize val,
  value: U8 val)
: U8 val

Parameters

Returns