1  package frost.core
   2  
   3  uses frost.unsafe.Pointer
   4  
   5  ---------------
   6  -- IMPORTANT --
   7  ------------------------------------------------------------------------------
   8  -- String and MutableString are assumed to have a compatible memory layout! --
   9  -- See MutableString.finish().                                              --
  10  ------------------------------------------------------------------------------
  11  
  12  ====================================================================================================
  13  An immutable sequence of Unicode codepoints. Each Unicode codepoint is a number between 0 and
  14  1,114,112; a `String` may consist of any sequence of zero or more codepoints, regardless of whether
  15  this sequence forms a sensible Unicode string. Note that a single logical character may be composed
  16  of multiple Unicode codepoints, such as a string consisting of REGIONAL INDICATOR SYMBOL LETTER B
  17  followed by REGIONAL INDICATOR SYMBOL LETTER R, which many platforms will render as a Brazilian flag
  18  Emoji. Even though this string will generally be displayed as a single logical character, the String
  19  class deals with Unicode codepoints and thus considers the string to have a length of 2.
  20  
  21  Internally, `String`s are stored using the UTF-8 encoding. The fact that UTF-8 is a variable-length
  22  encoding impacts the performance of some operations, as determining the offset of a given codepoint
  23  requires traversing the string from the beginning. Because of this, it can be much faster to use
  24  [String.Index] as opposed to numeric offsets to index into `String`s. For instance, the code:
  25  
  26      -- testcase StringIntro1(CharProcess)
  27      for i in 0 .. string.length {
  28          process(string[i])
  29      }
  30  
  31  is relatively slow, as the repeated calls to `s[i]` constantly re-scan the `String` from the
  32  beginning to find each successive character. We can rewrite this code using `String.Index`:
  33  
  34      -- testcase StringIntro2(CharProcess)
  35      var index := string.start
  36      while index != string.end {
  37          process(string[index])
  38          index := string.next(index)
  39      }
  40  
  41  This avoids the expensive re-scan of the string. Of course, iteration over the string is even
  42  simpler:
  43  
  44      -- testcase StringIntro3(CharProcess)
  45      for c in string {
  46          process(c)
  47      }
  48  
  49  As `String` is immutable, assembling a string via repeated concatenation is a very slow operation
  50  that creates many temporary objects. Instead create a [MutableString], repeatedly call `append` on
  51  it, and then finally turn it into an immutable `String` using [finish](MutableString.finish).
  52  ====================================================================================================
  53  class String : Immutable, HashKey<String>, Comparable<String>, Iterable<Char32>, Formattable {
  54      ================================================================================================
  55      Represents the position of a Unicode codepoint within a `String`.
  56      ================================================================================================
  57      class Index : Value, HashKey<Index>, Comparable<Index> {
  58          def byteOffset:Int
  59  
  60          init(byteOffset:Int) {
  61              self.byteOffset := byteOffset
  62          }
  63  
  64          @override
  65          function =(other:Index):Bit {
  66              return byteOffset = other.byteOffset
  67          }
  68  
  69          @override
  70          function >(other:Index):Bit {
  71              return byteOffset > other.byteOffset
  72          }
  73  
  74          @override
  75          function get_hash():Int {
  76              return byteOffset
  77          }
  78      }
  79  
  80      @private
  81      class UTF8List : ListView<Char8> {
  82          def str:String
  83  
  84          init(str:String) {
  85              self.str := str
  86          }
  87  
  88          @override
  89          function [](index:Int):Char8 {
  90              return str.data[index]
  91          }
  92  
  93          @override
  94          function get_count():Int {
  95              return str._length
  96          }
  97  
  98          @override
  99          function get_iterator():Iterator<Char8> {
 100              return UTF8Iterator(str)
 101          }
 102      }
 103  
 104      @private
 105      class UTF8Iterator : Iterator<Char8> {
 106          var index := 0
 107  
 108          def str:String
 109  
 110          init(str:String) {
 111              self.str := str
 112          }
 113  
 114          @override
 115          function get_done():Bit {
 116              return index >= str._length
 117          }
 118  
 119          @override
 120          method next():Char8 {
 121              index += 1
 122              return str.data[index - 1]
 123          }
 124      }
 125  
 126      @private
 127      class UTF16Iterator : Iterator<Char16> {
 128          var index := 0
 129  
 130          def str:String
 131  
 132          init(str:String) {
 133              self.str := str
 134              unreachable, "unimplemented"
 135          }
 136  
 137          @override
 138          function get_done():Bit {
 139              return index >= str._length
 140          }
 141  
 142          @override
 143          method next():Char16 {
 144              index += 1
 145              return Char16(1)
 146          }
 147      }
 148  
 149      @private
 150      class UTF32Iterator : Iterator<Char32> {
 151          var index := 0
 152  
 153          def str:String
 154  
 155          init(str:String) {
 156              self.str := str
 157          }
 158  
 159          @override
 160          function get_done():Bit {
 161              return index >= str._length
 162          }
 163  
 164          @override
 165          method next():Char32 {
 166              assert index < str._length
 167              def< str._length
 168              def c := str.data[index]
 169              var result := c.asInt32
 170              if c.asInt && 0xFF < 0b10000000 {
 171                  index += 1
 172                  return Char32(result)
 173              }
 174              if c.asInt && 0xFF < 0b11100000 {
 175                  if index + 1 < str._length {
 176                      result := (result && 0b11111) << 6 + str.data[index + 1].asInt32 &&
 177                              0b111111
 178                  }
 179                  else {
 180                      result := 0
 181                  }
 182                  index += 2
 183                  return Char32(result)
 184              }
 185              if c.asInt && 0xFF < 0b11110000 {
 186                  if index + 2 < str._length {
 187                      result := (result && 0b1111) << 12 + (str.data[index + 1].asInt32 &&
 188                              0b111111) << 6 + str.data[index + 2].asInt32 && 0b111111
 189                  }
 190                  else {
 191                      result := 0
 192                  }
 193                  index += 3
 194                  return Char32(result)
 195              }
 196              if index + 3 < str._length {
 197                  result := (result && 0b111) << 18 + (str.data[index + 1].asInt32 &&
 198                          0b111111) << 12 + (str.data[index + 2].asInt32 && 0b111111) << 6 +
 199                          str.data[index + 3].asInt32 && 0b111111
 200              }
 201              else {
 202                  result := 0
 203              }
 204              index += 4
 205              return Char32(result)
 206          }
 207      }
 208  
 209      @private
 210      class MatchIterator : Iterator<String.Index> {
 211          def haystack:String
 212  
 213          def needle:String
 214  
 215          var nextMatch:String.Index?
 216  
 217          var allowOverlaps:Bit
 218  
 219          init(haystack:String, needle:String, allowOveraps:Bit) {
 220              self.haystack := haystack
 221              self.needle := needle
 222              self.allowOverlaps := allowOverlaps
 223              nextMatch := haystack.indexOf(needle)
 224          }
 225  
 226          @override
 227          function get_done():Bit {
 228              return nextMatch == null
 229          }
 230  
 231          @override
 232          method next():String.Index {
 233              def result := nextMatch
 234              def start:String.Index
 235              if allowOverlaps {
 236                  start := haystack.next(start)
 237              }
 238              else {
 239                  start := String.Index(nextMatch.byteOffset + needle.byteLength)
 240              }
 241              nextMatch := haystack.indexOf(needle, start)
 242              return result
 243          }
 244      }
 245  
 246      ================================================================================================
 247      Represents a regular expression match within a string.
 248      ================================================================================================
 249      class Match : Immutable {
 250          ============================================================================================
 251          The index of the first character of the match.
 252          ============================================================================================
 253          def start:String.Index
 254  
 255          ============================================================================================
 256          The index just past the end of the match.
 257          ============================================================================================
 258          def end:String.Index
 259  
 260          ============================================================================================
 261          The match's group captures. Group 0 is always the entire match.
 262          ============================================================================================
 263          def groups:ImmutableArray<String>
 264  
 265          @private
 266          init(m:Matcher) {
 267              start := m.get_start()
 268              end := m.get_end()
 269              def g := Array<String>()
 270              for i in 0 .. m.get_groupCount() {
 271                  g.add(m.group(i))
 272              }
 273              groups := ImmutableArray<String>.from(g)
 274          }
 275  
 276          @override
 277          function get_toString():String {
 278              return "Match(\{groups[0]})"
 279          }
 280      }
 281  
 282      @private
 283      class RegexMatchIterator : Iterator<Match> {
 284          def matcher:Matcher
 285  
 286          def haystack:String
 287  
 288          def allowOverlaps:Bit
 289  
 290          var found:Bit
 291  
 292          init(haystack:String, needle:RegularExpression, allowOverlaps:Bit) {
 293              self.haystack := haystack
 294              self.allowOverlaps := allowOverlaps
 295              matcher := needle.matcher(haystack)
 296              found := matcher.find()
 297          }
 298  
 299          @override
 300          function get_done():Bit {
 301              return !found
 302          }
 303  
 304          @override
 305          method next():Match {
 306              def result := Match(matcher)
 307              def start:String.Index
 308              if allowOverlaps {
 309                  start := haystack.next(matcher.start)
 310              }
 311              else {
 312                  start := matcher.end
 313              }
 314              found := matcher.find(start)
 315              return result
 316          }
 317      }
 318  
 319      @private
 320      def data:Pointer<Char8>
 321  
 322      @private
 323      def _length:Int
 324  
 325      @private
 326      def dummy:Int := 0
 327     
 328      ================================================================================================
 329      For dependent substrings, points to the parent String.
 330      ================================================================================================
 331      @private
 332      def owner:String?
 333  
 334      ================================================================================================
 335      A view of the UTF8 bytes this string contains.
 336      ================================================================================================
 337      property utf8:ListView<Char8>
 338  
 339      ================================================================================================
 340      A view of the UTF16 words this string contains.
 341      ================================================================================================
 342      property utf16:Iterator<Char16>
 343  
 344      ================================================================================================
 345      The number of Unicode codepoints this string contains. As the string is internally stored in the
 346      variable-width UTF8 format, determining the length of the string takes an amount of time
 347      proportional to the number of characters it contains.
 348      ================================================================================================
 349      property length:Int
 350  
 351      ================================================================================================
 352      The number of UTF8 bytes this string contains.
 353      ================================================================================================
 354      property byteLength:Int
 355  
 356      ================================================================================================
 357      An `Index` representing the beginning of the string.
 358      ================================================================================================
 359      property start:Index
 360  
 361      ================================================================================================
 362      An `Index` representing the end of the string.
 363      ================================================================================================
 364      property end:Index
 365  
 366      ================================================================================================
 367      A copy of this string with leading and trailing whitespace characters removed.
 368      ================================================================================================
 369      property trimmed:String
 370  
 371      ===@hidden===
 372      init(data:Pointer<Char8>, length:Int) {
 373          self.data := data
 374          _length := length
 375          self.owner := null
 376      }
 377  
 378      @private
 379      init(data:Pointer<Char8>, length:Int, owner:String) {
 380          self.data := data
 381          _length := length
 382          self.owner := owner
 383      }
 384  
 385      ================================================================================================
 386      Creates a new string containing the given characters.
 387      ================================================================================================
 388      init(chars:ListView<Char8>) {
 389          _length := chars.count
 390          data := Pointer<Char8>.alloc(_length)
 391          for i in 0 .. _length {
 392              data[i] := chars[i]
 393          }
 394          owner := null
 395      }
 396  
 397      ================================================================================================
 398      Creates a new string containing the given characters.
 399      ================================================================================================
 400      @unsafeAccess
 401      init(chars:ListView<Char32>) {
 402          def result := MutableString()
 403          for c in chars {
 404              result.append(c)
 405          }
 406          self.data := Pointer<Char8>.alloc(result._length)
 407          for i in 0 .. result._length {
 408              self.data[i] := result.data[i]
 409          }
 410          self._length := result._length
 411          owner := null
 412      }
 413  
 414      ================================================================================================
 415      Returns this string.
 416      ================================================================================================
 417      @override
 418      function get_toString():String {
 419          return self
 420      }
 421  
 422      ================================================================================================
 423      Returns a formatted representation of this string. With an empty format string, the raw string
 424      is returned. With the format string `"frost"`, a representation of the string as it would appear
 425      in Frost source code is returned.
 426  
 427      @param fmt the format string
 428      @returns a formatted string
 429      ================================================================================================
 430      @override
 431      function format(fmt:String):String {
 432          match fmt {
 433              when "" {
 434                  return self
 435              }
 436              when "frost" {
 437                  def result := MutableString('"')
 438                  for c in utf8 {
 439                      match c {
 440                          when "\"" {
 441                              result.append("\\\"")
 442                          }
 443                          when "\\" {
 444                              result.append("\\\\")
 445                          }
 446                          when "\n" {
 447                              result.append("\\n")
 448                          }
 449                          when "\r" {
 450                              result.append("\\r")
 451                          }
 452                          when "\t" {
 453                              result.append("\\t")
 454                          }
 455                          otherwise {
 456                              if c.asUInt8 >= 32 & c.asUInt8 <= 126 {
 457                                  result.append(c)
 458                              }
 459                              else {
 460                                  -- FIXME add unicode escape
 461                                  result.append("?")
 462                              }
 463                          }
 464                      }
 465                  }
 466                  result.append("\"")
 467                  return result.finish()
 468              }
 469              otherwise {
 470                  unreachable
 471              }
 472          }
 473      }
 474  
 475      @override
 476      @private
 477      method cleanup() {
 478          if owner == null {
 479              data.destroy()
 480          }
 481      }
 482  
 483      ================================================================================================
 484      Returns a list of the bytes in this string's UTF-8 representation.
 485      ================================================================================================
 486      function get_utf8():ListView<Char8> {
 487          return UTF8List(self)
 488      }
 489  
 490      ================================================================================================
 491      Returns an iterator which returns the code units of this string's UTF-16 representation. As not
 492      all Unicode codepoints fit into a single UTF-16 code unit, the returned code units may contain
 493      surrogate pairs.
 494      ================================================================================================
 495      function get_utf16():Iterator<Char16> {
 496          return UTF16Iterator(self)
 497      }
 498  
 499      ================================================================================================
 500      Returns an `Iterator` over the characters in this string.
 501      ================================================================================================
 502      @override
 503      function get_iterator():Iterator<Char32> {
 504          return UTF32Iterator(self)
 505      }
 506  
 507      ================================================================================================
 508      Returns the number of Unicode codepoints in the string. Note that because the string is
 509      internally stored in the variable-length UTF-8 encoding, determining the number of Unicode
 510      codepoints in the string can be a relatively expensive operation for long strings (linear with
 511      respect to the size of the string).
 512      ================================================================================================
 513      function get_length():Int {
 514          return iterator.count()
 515      }
 516  
 517      ================================================================================================
 518      Returns the number of bytes of storage taken up by this string's internal UTF-8 encoding.
 519      ================================================================================================
 520      function get_byteLength():Int {
 521          return _length
 522      }
 523  
 524      ================================================================================================
 525      Returns `true` if this string begins with `other`.
 526      ================================================================================================
 527      function startsWith(other:String):Bit {
 528          if _length < other._length {< other._length {
 529              return false
 530          }
 531          for i in 0 .. other._length {
 532              if data[i] != other.data[i] {
 533                  return false
 534              }
 535          }
 536          return true
 537      }
 538  
 539      ================================================================================================
 540      Returns `true` if this string ends with `other`.
 541      ================================================================================================
 542      function endsWith(other:String):Bit {
 543          if _length < other._length {< other._length {
 544              return false
 545          }
 546          for i in 0 .. other._length {
 547              if data[_length - other._length + i] != other.data[i] {
 548                  return false
 549              }
 550          }
 551          return true
 552      }
 553  
 554      function get_trimmed():String {
 555          if _length = 0 {
 556              return self
 557          }
 558          var start := 0
 559          while start < _length &< _length & data[start].isWhitespace {
 560              start += 1
 561          }
 562  
 563          var end := _length - 1
 564          while end >= start & data[end].isWhitespace {
 565              end -= 1
 566          }
 567  
 568          return self[Index(start) ... Index(end)]
 569      }
 570  
 571      ================================================================================================
 572      Returns the concatenation of this string and another string.
 573      ================================================================================================
 574      function +(other:String):String {
 575          def result := Pointer<Char8>.alloc(_length + other._length)
 576          for i in 0 .. _length {
 577              result[i] := data[i]
 578          }
 579          for i in 0 .. other._length {
 580              result[_length + i] := other.data[i]
 581          }
 582          return String(result, _length + other._length)
 583      }
 584  
 585      ================================================================================================
 586      Returns the concatenation of this string and another object's string representation. The
 587      object's string representation is computed using its [toString property](Object.toString).
 588      ================================================================================================
 589      function +(other:Object):String {
 590          return self + other.toString
 591      }
 592  
 593      ================================================================================================
 594      Returns a string consisting of `count` copies of this string.
 595      ================================================================================================
 596      @pre(count >= 0)
 597      function *(count:Int):String {
 598          def result := MutableString()
 599          for i in 0 .. count {
 600              result.append(self)
 601          }
 602          return result.finish()
 603      }
 604  
 605      ================================================================================================
 606      Returns a string consisting of `count` copies of `s`.
 607      ================================================================================================
 608      @class
 609      @pre(count >= 0)
 610      function *(count:Int, s:String):String {
 611          def result := MutableString()
 612          for i in 0 .. count {
 613              result.append(s)
 614          }
 615          return result.finish()
 616      }
 617  
 618      ================================================================================================
 619      Returns the concatenation of another object's string representation and this string. The
 620      object's string representation is computed using its [toString property](Object.toString).
 621      ================================================================================================
 622      @class
 623      function +(o:Object, s:String):String {
 624          return o.toString + s
 625      }
 626  
 627      ================================================================================================
 628      Returns true if these two strings are equal (contain the same sequence of codepoints). Strings
 629      which logically mean the same thing but contain different codepoints are not equal. For
 630      instance, the string with Unicode codepoint LATIN CAPITAL LETTER A WITH ACUTE and the string
 631      with Unicode codepoints LATIN CAPITAL LETTER A followed by COMBINING ACUTE ACCENT will (in most
 632      programs) display and behave exactly the same, but they do not contain the same sequence of
 633      codepoints and therefore are not equal.
 634      ================================================================================================
 635      @override
 636      function =(other:String):Bit {
 637          if _length != other._length {
 638              return false
 639          }
 640          for i in 0 .. _length {
 641              if data[i]->Char8 != other.data[i]->Char8 {
 642                  return false
 643              }
 644          }
 645          return true
 646      }
 647  
 648      ================================================================================================
 649      Returns `true` if this string is greater than the other string when considered in a
 650      codepoint-by-codepoint fashion. This is sufficient to provide *some* ordering of strings, and
 651      will generally work acceptably for pure ASCII strings, but will not yield the expected sort
 652      order in most locales.
 653      ================================================================================================
 654      @override
 655      function >(other:String):Bit {
 656          var selfIndex := start
 657          var otherIndex := other.start
 658          while selfIndex < end &< end & otherIndex < other.end {< other.end {
 659              def c1 := self[selfIndex]
 660              def c2 := other[otherIndex]
 661              if c1 = c2 {
 662                  selfIndex := next(selfIndex)
 663                  otherIndex := other.next(otherIndex)
 664                  continue
 665              }
 666              return c1 > c2
 667          }
 668          return selfIndex != end
 669      }
 670  
 671      ================================================================================================
 672      Returns the Unicode codepoint at the given offset within the string.
 673      ================================================================================================
 674      function [](index:Index):Char32 {
 675          def idx := index.byteOffset
 676          def c := data[idx]
 677          var result := c.asInt32
 678          if c.asUInt8 < 0b11000000< 0b11000000 {
 679              return Char32(result)
 680          }
 681          if c.asUInt8 < 0b11100000< 0b11100000 {
 682              if idx + 1 >= _length {
 683                  return Char32(0)
 684              }
 685              result := (result && 0b11111) << 6 + data[idx + 1].asInt32 && 0b111111
 686              return Char32(result)
 687          }
 688          if c.asUInt8 < 0b11110000< 0b11110000 {
 689              if idx + 2 >= _length {
 690                  return Char32(0)
 691              }
 692              result := (result && 0b1111) << 12 + (data[idx + 1].asInt32 && 0b111111) << 6 +
 693                      data[idx + 2].asInt32 && 0b111111
 694              return Char32(result)
 695          }
 696          if idx + 3 >= _length {
 697              return Char32(0)
 698          }
 699          result := (result && 0b111) << 18 + (data[idx + 1].asInt32 && 0b111111) << 12 +
 700                  (data[idx + 2].asInt32 && 0b111111) << 6 +
 701                  data[idx + 3].asInt32 && 0b111111
 702          return Char32(result)
 703      }
 704  
 705      ================================================================================================
 706      Returns the Unicode codepoint at the given offset within the string. This overload of the `[]`
 707      operator is slower than the overload that accepts an `Index` parameter, as it must scan the
 708      (internally UTF-8) string from the beginning to find the correct index.
 709      ================================================================================================
 710      function [](index:Int):Char32 {
 711          return self[offset(start, index)]
 712      }
 713  
 714      ================================================================================================
 715      Returns a 'dependent' substring of a string. `string.substring(range)` behaves exactly the same
 716      as the more-common `string[range]`, except that `substring` does not copy the characters into a
 717      new memory buffer, instead referring directly to the memory held by the "parent" string. This
 718      means that the parent string will remain in memory as long as any of its substrings do.
 719  
 720      `string.substring(range)` is therefore much more efficient than `string[range]`, provided that
 721      forcing the parent string to remain in memory is acceptable.
 722      ================================================================================================
 723      @pre(r.min.byteOffset >= 0 & r.min.byteOffset <= byteLength &
 724              r.max.byteOffset >= 0 & r.max.byteOffset < byteLength +< byteLength + r.inclusive.choose(0, 1))
 725      function substring(r:Range<Index>):String {
 726          var length := r.max.byteOffset - r.min.byteOffset
 727          if r.inclusive {
 728              length += 1
 729          }
 730          return String(data + r.min.byteOffset, length, self)
 731      }
 732  
 733      ================================================================================================
 734      Returns a 'dependent' substring of a string. `string.substring(range)` behaves exactly the same
 735      as the more-common `string[range]`, except that `substring` does not copy the characters into a
 736      new memory buffer, instead referring directly to the memory held by the "parent" string. This
 737      means that the parent string will remain in memory as long as any of its substrings do.
 738  
 739      `string.substring(range)` is therefore much more efficient than `string[range]`, provided that
 740      forcing the parent string to remain in memory is acceptable.
 741  
 742      As with other `Range` methods, a null `min` starts at the beginning of the string, and a null
 743      `max` ends at the end of the string.
 744      ================================================================================================
 745      @pre((r.min == null | (r.min.byteOffset >= 0 & r.min.byteOffset <= byteLength)) &
 746              (r.max == null | (r.max.byteOffset >= 0 & r.max.byteOffset < byteLength +< byteLength +
 747              r.inclusive.choose(0, 1))))
 748      function substring(r:Range<Index?>):String {
 749          def min:Index
 750          if r.min !== null {
 751              min := r.min
 752          }
 753          else {
 754              min := start
 755          }
 756  
 757          var inclusive := r.inclusive
 758          def max:Index
 759          if r.max !== null {
 760              max := r.max
 761          }
 762          else {
 763              max := end
 764              inclusive := false
 765          }
 766          return substring(Range<Index>(min, max, inclusive))
 767      }
 768  
 769      ================================================================================================
 770      Returns a substring of a string. If `Range.min` is greater than `Range.max`, the resulting
 771      substring will be empty.
 772      ================================================================================================
 773      @pre(r.min.byteOffset >= 0 & r.min.byteOffset <= byteLength &
 774              r.max.byteOffset >= 0 & r.max.byteOffset < byteLength +< byteLength + r.inclusive.choose(0, 1))
 775      function [](r:Range<Index>):String {
 776          var current := r.min.byteOffset
 777          def result := MutableString()
 778          var max := r.max.byteOffset
 779          if r.inclusive {
 780              max += 1
 781          }
 782          while current < max {< max {
 783              def c := data[current]
 784              result.append(c)
 785              current += 1
 786              if c.asUInt8 >= 0b11000000 {
 787                  result.append(data[current])
 788                  current += 1
 789              }
 790              if c.asUInt8 >= 0b11100000 {
 791                  result.append(data[current])
 792                  current += 1
 793              }
 794              if c.asUInt8 >= 0b11110000 {
 795                  result.append(data[current])
 796                  current += 1
 797              }
 798          }
 799          return result.finish()
 800      }
 801  
 802      ================================================================================================
 803      Returns a substring of a string. If `Range.min` is not specified, the substring will start at
 804      the beginning of the string. If `Range.max` is not specified, the substring will end at the end
 805      of the string. If `Range.min` is greater than `Range.max`, the resulting substring will be
 806      empty.
 807      ================================================================================================
 808      @priority(1)
 809      @pre((r.min == null | (r.min.byteOffset >= 0 & r.min.byteOffset <= byteLength)) &
 810              (r.max == null | (r.max.byteOffset >= 0 & r.max.byteOffset < byteLength +< byteLength +
 811              r.inclusive.choose(0, 1))))
 812      function [](r:Range<Index?>):String {
 813          if _length = 0 {
 814              return ""
 815          }
 816          def min:Index
 817          if r.min !== null {
 818              min := r.min
 819          }
 820          else {
 821              min := start
 822          }
 823  
 824          var inclusive := r.inclusive
 825          def max:Index
 826          if r.max !== null {
 827              max := r.max
 828          }
 829          else {
 830              max := end
 831              inclusive := false
 832          }
 833          return self[Range<Index>(min, max, inclusive)]
 834      }
 835  
 836      ================================================================================================
 837      Returns a substring of a string. The `Range.step` value is interpreted in terms of Unicode
 838      codepoints: that is, `s[... by 2]` will return a `String` consisting of every other Unicode
 839      codepoint in `s`. As some Unicode characters consist of more than one codepoint (e.g. when using
 840      combining diacriticals, Emoji skin tone modifiers, or Emoji flags), this will mangle such
 841      characters when the step is not `1`.
 842  
 843      A negative range value will scan the string backwards from the starting point, thus `[.. by -1]`
 844      will reverse the Unicode codepoints in the input string. Note again that this will mangle
 845      Unicode characters which consist of more than one Unicode codepoint.
 846      ================================================================================================
 847      @priority(1)
 848      @pre((r.start == null | (r.start.byteOffset >= 0 & r.start.byteOffset <= byteLength)) &
 849              (r.end == null | (r.end.byteOffset >= 0 & r.end.byteOffset < byteLength +< byteLength +
 850              r.inclusive.choose(0, 1))))
 851      function [](r:SteppedRange<Index?, Int>):String {
 852          if _length = 0 {
 853              return ""
 854          }
 855          def step := r.step
 856  
 857          var current:Int
 858          if r.start !== null {
 859              current := r.start.byteOffset
 860          }
 861          else if step > 0 {
 862              current := start.byteOffset
 863          }
 864          else {
 865              current := previous(end).byteOffset
 866          }
 867  
 868          def end:Int
 869          if r.end !== null {
 870              end := r.end.byteOffset
 871          }
 872          else if step > 0 {
 873              end := self.end.byteOffset
 874          }
 875          else {
 876              end := self.start.byteOffset
 877          }
 878  
 879          def result := MutableString()
 880          if r.step > 0 {
 881              while current < end {< end {
 882                  def c := data[current]
 883                  result.append(c)
 884                  current += 1
 885                  if c.asUInt8 >= 0b11000000 {
 886                      result.append(data[current])
 887                      current += 1
 888                  }
 889                  if c.asUInt8 >= 0b11100000 {
 890                      result.append(data[current])
 891                      current += 1
 892                  }
 893                  if c.asUInt8 >= 0b11110000 {
 894                      result.append(data[current])
 895                      current += 1
 896                  }
 897                  for i in 1 .. step {
 898                      if current >= end {
 899                          return result.toString
 900                      }
 901                      current := next(Index(current)).byteOffset
 902                  }
 903              }
 904          }
 905          else {
 906              assert r.step < 0< 0
 907              while current > end {
 908                  def c := data[current]
 909                  result.append(c)
 910                  def old := current
 911                  current += 1
 912                  if c.asUInt8 >= 0b11000000 {
 913                      result.append(data[current])
 914                      current += 1
 915                  }
 916                  if c.asUInt8 >= 0b11100000 {
 917                      result.append(data[current])
 918                      current += 1
 919                  }
 920                  if c.asUInt8 >= 0b11110000 {
 921                      result.append(data[current])
 922                  }
 923                  current := old - 1
 924                  while data[current].asUInt8 >= 0b10000000 & data[current].asUInt8 < 0b11000000< 0b11000000 {
 925                      current -= 1
 926                  }
 927                  for i in -1 .. step by -1 {
 928                      if current <= end {
 929                          return result.toString
 930                      }
 931                      current := previous(Index(current)).byteOffset
 932                  }
 933              }
 934          }
 935          if (r.inclusive | r.end == null) & current = end & end < _length {< _length {
 936              def c := data[current]
 937              result.append(c)
 938              current += 1
 939              if c.asUInt8 >= 0b11000000 {
 940                  result.append(data[current])
 941                  current += 1
 942              }
 943              if c.asUInt8 >= 0b11100000 {
 944                  result.append(data[current])
 945                  current += 1
 946              }
 947              if c.asUInt8 >= 0b11110000 {
 948                  result.append(data[current])
 949              }
 950          }
 951          return result.finish()
 952      }
 953  
 954      ================================================================================================
 955      Returns a substring of a string. This version of the `[]` operator is slower than the one that
 956      accepts a `Range<Index>` parameter, as it must scan the (internally UTF-8) string from the
 957      beginning to find the right offsets.
 958      ================================================================================================
 959      function [](r:Range<Int>):String {
 960          return self[Range<Index>(offset(self.start, r.min), offset(self.start, r.max),
 961                  r.inclusive)]
 962      }
 963  
 964      ================================================================================================
 965      Returns a substring of a string. This version of the `[]` operator is slower than the one that
 966      accepts a `Range<Index?>` parameter, as it must scan the (internally UTF-8) string from the
 967      beginning to find the right offsets.
 968      ================================================================================================
 969      function [](r:Range<Int?>):String {
 970          def start:Index?
 971          if r.min !== null {
 972              start := offset(self.start, r.min)
 973          }
 974          else {
 975              start := null
 976          }
 977          def end:Index?
 978          if r.max !== null {
 979              end := offset(self.start, r.max)
 980          }
 981          else {
 982              end := null
 983          }
 984          return self[Range<Index?>(start, end, r.inclusive)]
 985      }
 986  
 987      ================================================================================================
 988      Returns a substring of a string. This version of the `[]` operator is slower than the one that
 989      accepts a `Range<Index?>` parameter, as it must scan the (internally UTF-8) string from the
 990      beginning to find the right offsets.
 991      ================================================================================================
 992      function [](r:SteppedRange<Int?, Int>):String {
 993          def start:Index?
 994          if r.start !== null {
 995              start := offset(self.start, r.start)
 996          }
 997          else {
 998              start := null
 999          }
1000          def end:Index?
1001          if r.end !== null {
1002              end := offset(self.start, r.end)
1003          }
1004          else {
1005              end := null
1006          }
1007          return self[SteppedRange<Index?, Int>(start, end, r.step, r.inclusive)]
1008      }
1009  
1010      ================================================================================================
1011      Returns `true` if this string contains at least one occurrence of the given character.
1012      ================================================================================================
1013      function contains(c:Char8):Bit {
1014          for i in 0 .. _length {
1015              if data[i] = c {
1016                  return true
1017              }
1018          }
1019          return false
1020      }
1021  
1022      ================================================================================================
1023      Returns `true` if this string contains at least one occurrence of the given substring.
1024      ================================================================================================
1025      function contains(s:String):Bit {
1026          return indexOf(s) !== null
1027      }
1028  
1029      ================================================================================================
1030      Returns the index of the first occurrence of the string `s` within this string, or `null` if not
1031      found.
1032  
1033      @param s the string to search for
1034      @returns the index of the match, or `null` if not found
1035      ================================================================================================
1036      function indexOf(s:String):Index? {
1037          return indexOf(s, start)
1038      }
1039  
1040      ================================================================================================
1041      Returns the index of the first occurrence of the string `s` within this string, starting from
1042      the specified `index`, or `null` if not found.
1043  
1044      @param s the string to search for
1045      @param start the index to begin searching from
1046      @returns the index of the match, or `null` if not found
1047      ================================================================================================
1048      function indexOf(s:String, start:Index):Index? {
1049          if _length < s._length {< s._length {
1050              return null
1051          }
1052          outer: for i in start.byteOffset ... _length - s._length {
1053              for j in 0 .. s._length {
1054                  if data[i + j] != s.data[j] {
1055                      continue outer
1056                  }
1057              }
1058              return Index(i)
1059          }
1060          return null
1061      }
1062  
1063      ================================================================================================
1064      Returns the index of the last occurrence of the string `s` within this string, or `null` if not
1065      found.
1066  
1067      @param s the string to search for
1068      @returns the index of the match, or `null` if not found
1069      ================================================================================================
1070      function lastIndexOf(s:String):Index? {
1071          return lastIndexOf(s, end)
1072      }
1073  
1074      ================================================================================================
1075      Returns the index of the last occurrence of the string `s` within this string, starting the
1076      search backwards from the specified `index`, or `null` if not found.
1077  
1078      @param s the string to search for
1079      @param start the index to begin searching from
1080      @returns the index of the match, or `null` if not found
1081      ================================================================================================
1082      function lastIndexOf(s:String, start:Index):Index? {
1083          if _length < s._length {< s._length {
1084              return null
1085          }
1086          def startPos := start.byteOffset.min(_length - s._length)
1087          outer: for i in startPos ... 0 by -1 {
1088              for j in 0 .. s._length {
1089                  if data[i + j] != s.data[j] {
1090                      continue outer
1091                  }
1092              }
1093              return Index(i)
1094          }
1095          return null
1096      }
1097  
1098      ================================================================================================
1099      Returns `true` if this string matches the given regular expression. The regular expression must
1100      match the entire string.
1101  
1102      @param regex the regular expression to compare against
1103      @returns `true` if the string matches
1104      ================================================================================================
1105      function matches(regex:RegularExpression):Bit {
1106          return regex.matcher(self).matches()
1107      }
1108  
1109      ================================================================================================
1110      Returns `true` if this string contains a match for the given regular expression. The regular
1111      expression may match zero or more characters of the string, starting at any point.
1112  
1113      @param needle the regular expression to search for
1114      @returns `true` if the string contains a match
1115      ================================================================================================
1116      function contains(needle:RegularExpression):Bit {
1117          return needle.matcher(self).find()
1118      }
1119  
1120      ================================================================================================
1121      Matches the string against the given regular expression, returning an array of its capture
1122      groups. Group 0, the group containing the entire string, is not returned. If the string does not
1123      match the regular expression, returns `null`. For example, `"1,2,34".parse(/(\d+),(\d+),(\d+)/)`
1124      will return a list consisting of `"1"`, `"2"`, and `"34"`.
1125      
1126      @param regex the regular expression to parse against
1127      @returns a list of the capture groups, or `null` if the string did not match
1128      ================================================================================================
1129      -- @safeReturn FIXME
1130      function parse(regex:RegularExpression):Array<String?>? {
1131          def matcher := regex.matcher(self)
1132          if matcher.matches() {
1133              def result := Array<String?>()
1134              for i in 1 .. matcher.get_groupCount() {
1135                  result.add(matcher.group(i))
1136              }
1137              return result
1138          }
1139          return null
1140      }
1141  
1142      ================================================================================================
1143      Returns a new string with every occurrence of `search` replaced with `replacement`.
1144  
1145      @param search the string to search for
1146      @param replacement the replacement text
1147      @returns a string with all matches replaced
1148      ================================================================================================
1149      function replace(search:String, replacement:String):String {
1150          if search = "" {
1151              return self
1152          }
1153          def result := MutableString()
1154          var index := start
1155          loop {
1156              def nextIndex := indexOf(search, index)
1157              if nextIndex == null {
1158                  result.append(self[index..])
1159                  break
1160              }
1161              result.append(self[index .. nextIndex->Index])
1162              result.append(replacement)
1163              index := Index(nextIndex.byteOffset + search._length)
1164          }
1165          return result.finish()
1166      }
1167  
1168      ================================================================================================
1169      Returns a new string with every match of `search` replaced with `replacement`. The replacement
1170      string may contain `$1`-style regular expression group references; for instance
1171      `s.replace(regex, "$1")` will replace every occurrence of the regex with the contents of its
1172      first group.
1173      
1174      @param search the regular expression to search for
1175      @param replacement the replacement text
1176      @returns a string with all matches replaced
1177      ================================================================================================
1178      function replace(search:RegularExpression, replacement:String):String {
1179          return replace(search, replacement, true)
1180      }
1181  
1182      function replace(search:RegularExpression, replacement:String,
1183              allowGroupReferences:Bit):String {
1184          def result := MutableString()
1185          def matcher := search.matcher(self)
1186          while matcher.find() {
1187              matcher.appendReplacement(result, replacement, allowGroupReferences)
1188          }
1189          matcher.appendTail(result)
1190          return result.finish()
1191      }
1192  
1193      ================================================================================================
1194      Searches the string for a regular expression, replacing occurrences of the regular expression
1195      with new text determined by a function. For instance, given:
1196  
1197          -- testcase StringReplace(PrintLine)
1198          "This is a test!".replace(/\w+/, word => word.length)
1199  
1200      The regular expression `/\w+/` matches sequences of one or more word characters; in other words,
1201      it matches all words occurring in the string. The replacement function `word => word.length`
1202      replaces each matched sequence with the number of characters in the sequence, resulting in the
1203      text:
1204  
1205          4 2 1 4!
1206  
1207      @param search the regular expression to match the string with
1208      @param replacement a function generating the replacement text
1209      @returns a new string with all occurrences of the regular expression replaced
1210      ================================================================================================
1211      function replace(search:RegularExpression, replacement:(String)=>(Object)):String {
1212          def result := MutableString()
1213          def matcher := search.matcher(self)
1214          while matcher.find() {
1215              matcher.appendReplacement(result, replacement(matcher.group(0)).toString, false)
1216          }
1217          matcher.appendTail(result)
1218          return result.toString
1219      }
1220  
1221      method replace(search:RegularExpression, replacement:(String)=&>(Object)):String {
1222          def result := MutableString()
1223          def matcher := search.matcher(self)
1224          while matcher.find() {
1225              matcher.appendReplacement(result, replacement(matcher.group(0)).toString, false)
1226          }
1227          matcher.appendTail(result)
1228          return result.toString
1229      }
1230  
1231      ================================================================================================
1232      As [replace(RegularExpression, (String)=>(Object))], but the replacement function receives the
1233      capture groups from the regular expression rather than the raw matched text. The groups list
1234      includes the special whole-match group at index `0`, with the first set of parentheses in the
1235      regular expression corresponding to index `1`.
1236  
1237      @param search the regular expression to match the string with
1238      @param replacement a function generating the replacement text
1239      @returns a new string with all occurrences of the regular expression replaced
1240      ================================================================================================
1241      function replace(search:RegularExpression, replacement:(ListView<String?>)=>(Object)):String {
1242          def result := MutableString()
1243          def matcher := search.matcher(self)
1244          while matcher.find() {
1245              def groups := Array<String?>()
1246              for i in 0 .. matcher.get_groupCount() {
1247                  groups.add(matcher.group(i))
1248              }
1249              matcher.appendReplacement(result, replacement(groups).toString, false)
1250          }
1251          matcher.appendTail(result)
1252          return result.toString
1253      }
1254  
1255      method replace(search:RegularExpression, replacement:(ListView<String?>)=&>(Object)):String {
1256          def result := MutableString()
1257          def matcher := search.matcher(self)
1258          while matcher.find() {
1259              def groups := Array<String?>()
1260              for i in 0 .. matcher.get_groupCount() {
1261                  groups.add(matcher.group(i))
1262              }
1263              matcher.appendReplacement(result, replacement(groups).toString, false)
1264          }
1265          matcher.appendTail(result)
1266          return result.toString
1267      }
1268      
1269      function find(needle:String):Iterator<String.Index> {
1270          return find(needle, false)
1271      }
1272  
1273      function find(needle:String, overlapping:Bit):Iterator<String.Index> {
1274          return MatchIterator(self, needle, overlapping)
1275      }
1276  
1277      function find(needle:RegularExpression):Iterator<Match> {
1278          return find(needle, false)
1279      }
1280  
1281      function find(needle:RegularExpression, overlapping:Bit):Iterator<Match> {
1282          return RegexMatchIterator(self, needle, overlapping)
1283      }
1284  
1285      ================================================================================================
1286      Returns the index of the first character in the string.
1287      ================================================================================================
1288      function get_start():Index {
1289          return Index(0)
1290      }
1291  
1292      ================================================================================================
1293      Returns the index just past the end of the string.
1294      ================================================================================================
1295      function get_end():Index {
1296          return Index(_length)
1297      }
1298  
1299      ================================================================================================
1300      Returns the index of the Unicode codepoint after the given index. It is an error to call
1301      `next()` when already at the end of the string. Note that because a logical character can
1302      consist of multiple Unicode codepoints (such as LATIN SMALL LETTER A followed by COMBINING ACUTE
1303      ACCENT), this may return an index in the middle of such a compound character.
1304      ================================================================================================
1305      function next(i:Index):Index {
1306          assert i.byteOffset < _length
1307          def< _length
1308          def c := data[i.byteOffset].asUInt8
1309          if c >= 0b11110000 {
1310              return Index(i.byteOffset + 4)
1311          }
1312          if c >= 0b11100000 {
1313              return Index(i.byteOffset + 3)
1314          }
1315          if c >= 0b11000000 {
1316              return Index(i.byteOffset + 2)
1317          }
1318          return Index(i.byteOffset + 1)
1319      }
1320  
1321      ================================================================================================
1322      Returns the index of the Unicode codepoint before the given index. It is an error to call
1323      `previous()` when already at the beginning of the string. Note that because a logical character
1324      can consist of multiple Unicode codepoints (such as LATIN SMALL LETTER A followed by COMBINING
1325      ACUTE ACCENT), this may return an index in the middle of such a compound character.
1326      ================================================================================================
1327      function previous(i:Index):Index {
1328          assert i.byteOffset > 0
1329          var newValue := i.byteOffset - 1
1330          while data[newValue].asInt && 0xFF >= 0b10000000 &
1331                  data[newValue].asInt && 0xFF < 0b11000000 {
1332              newValue -= 1
1333          }
1334          return Index(newValue)
1335      }
1336  
1337      ================================================================================================
1338      Returns the index offset by `offset` Unicode codepoints. It is an error to index before the
1339      beginning or after the end of the string.  Note that because a logical character can consist of
1340      multiple Unicode codepoints (such as LATIN SMALL LETTER A followed by COMBINING ACUTE ACCENT),
1341      this may return an index in the middle of such a compound character.
1342      ================================================================================================
1343      function offset(index:Index, offset:Int):Index {
1344          var result := index
1345          if offset > 0 {
1346              for i in 0 .. offset {
1347                  result := next(result)
1348              }
1349          }
1350          else {
1351              for i in 0 .. offset by -1 {
1352                  result := previous(result)
1353              }
1354          }
1355          return result
1356      }
1357  
1358      ================================================================================================
1359      Returns a new string consisting of this string left-justified in a field of at least `width`
1360      characters. If this string has a length greater than or equal to `width`, this string is
1361      returned. If this string is shorter than `width`, space characters are appended until the
1362      resulting string is `width` characters long.
1363  
1364      @param width the minimum width of the string
1365      @returns a string at least `width` characters long
1366      ================================================================================================
1367      function leftAlign(width:Int):String {
1368          return leftAlign(width, " ")
1369      }
1370  
1371      ================================================================================================
1372      Returns a new string consisting of this string left-justified in a field of at least `width`
1373      characters, filled with the specified character. If this string has a length greater than or
1374      equal to `width`, this string is returned. If this string is shorter than `width`, `fill`
1375      characters are appended until the resulting string is `width` characters long.
1376  
1377      @param width the minimum width of the string
1378      @param fill the fill character
1379      @returns a string at least `width` characters long
1380      ============================================================================
1381      function leftAlign(width:Int, fill:Char32):String {
1382          if length >= width {
1383              return self
1384          }
1385          return self + fill * (width - length)
1386      }
1387  
1388      ================================================================================================
1389      Returns a new string consisting of this string right-justified in a field of at least `width`
1390      characters. If this string has a length greater than or equal to `width`, this string is
1391      returned. If this string is shorter than `width`, space characters are prepended until the
1392      resulting string is `width` characters long.
1393  
1394      @param width the minimum width of the string
1395      @returns a string at least `width` characters long
1396      ================================================================================================
1397      function rightAlign(width:Int):String {
1398          return rightAlign(width, " ")
1399      }
1400  
1401      ================================================================================================
1402      Returns a new string consisting of this string right-justified in a field of at least `width`
1403      characters, filled with the specified character. If this string has a length greater than or
1404      equal to `width`, this string is returned. If this string is shorter than `width`, `fill`
1405      characters are prepended until the resulting string is `width` characters long.
1406  
1407      @param width the minimum width of the string
1408      @param fill the fill character
1409      @returns a string at least `width` characters long
1410      ================================================================================================
1411      function rightAlign(width:Int, fill:Char32):String {
1412          if length >= width {
1413              return self
1414          }
1415          return fill * (width - length) + self
1416      }
1417  
1418      ================================================================================================
1419      Returns a new string consisting of this string centered in a field of at least `width`
1420      characters. If this string has a length greater than or equal to `width`, this string is
1421      returned. If this string is shorter than `width`, space characters are added as equally as
1422      possible to the left and right until the resulting string is `width` characters long. If the
1423      number of characters to be added is odd, the right side of the string will receive one more
1424      space than the left side.
1425  
1426      @param width the minimum width of the string
1427      @returns a string at least `width` characters long
1428      ================================================================================================
1429      function centerAlign(width:Int):String {
1430          return centerAlign(width, " ")
1431      }
1432  
1433      ================================================================================================
1434      Returns a new string consisting of this string centered in a field of at least `width`
1435      characters, filled with the specified character. If this string has a length greater than or
1436      equal to `width`, this string is returned. If this string is shorter than `width`, `fill`
1437      characters are added as equally as possible to the left and right until the resulting string is
1438      `width` characters long. If the number of characters to be added is odd, the right side of the
1439      string will receive one more `fill` character than the left side.
1440  
1441      @param width the minimum width of the string
1442      @param fill the fill character
1443      @returns a string at least `width` characters long
1444      ================================================================================================
1445      function centerAlign(width:Int, fill:Char32):String {
1446          if length >= width {
1447              return self
1448          }
1449          def pad := width - length
1450          def left := pad // 2
1451          def right := pad - left
1452          return fill * left + self + fill * right
1453      }
1454  
1455      ================================================================================================
1456      Splits this string into tokens separated by a delimiter. For instance,
1457      `"This is a long string".split(" ")` yields `"This"`, `"is"`, `"a"`, `"long"`, and `"string"`.
1458  
1459      @param delimiter the token delimiter
1460      @returns the split tokens
1461      ================================================================================================
1462      function split(delimiter:String):Array<String> {
1463          return split(delimiter, Int.MAX)
1464      }
1465  
1466      ================================================================================================
1467      Splits this string into tokens separated by a delimiter. At most `maxResults` results will be
1468      returned; any additional delimiters beyond that point will be ignored. For instance,
1469      `"This is a long string".split(" ", 3)` yields `"This"`, `"is"`, and `"a long string"`.
1470  
1471      @param delimiter the token delimiter
1472      @param maxResults the maximum number of results to return
1473      @returns the split tokens
1474      ================================================================================================
1475      function split(delimiter:String, maxResults:Int):Array<String> {
1476          def result := Array<String>()
1477          var index := start
1478          loop {
1479              def nextIndex:Index?
1480              if delimiter._length = 0 {
1481                  nextIndex := next(index)
1482              }
1483              else {
1484                  nextIndex := indexOf(delimiter, index)
1485              }
1486              if nextIndex == null | result.count = maxResults - 1 {
1487                  result.add(self[index..])
1488                  break
1489              }
1490              result.add(self[index .. nextIndex->Index])
1491              index := Index(nextIndex.byteOffset + delimiter._length)
1492          }
1493          return result
1494      }
1495  
1496      ================================================================================================
1497      Splits this string into tokens separated by a delimiter. At most `maxResults` different strings
1498      will be returned; any additional delimiters beyond that point will be ignored. For instance,
1499      `"This  is a long    string".split(/\s+/, 3)` yields `"This"`, `"is"`, and `"a long    string"`.
1500  
1501      @param delimiter the token delimiter
1502      @param maxResults the maximum number of results to return
1503      @returns the split tokens
1504      ================================================================================================
1505      -- FIXME @safeReturn
1506      @pre(maxResults > 0)
1507      function split(delimiter:RegularExpression, maxResults:Int):Array<String> {
1508          def result := Array<String>()
1509          def matcher := delimiter.matcher(self)
1510          var index := start
1511          loop {
1512              def found := matcher.find()
1513              if !found | result.count = maxResults - 1 {
1514                  result.add(self[index..])
1515                  break
1516              }
1517              def start := matcher.start
1518              result.add(self[index .. start])
1519              index := Index(start.byteOffset.max(matcher.end.byteOffset))
1520          }
1521          return result
1522      }
1523  
1524      ================================================================================================
1525      Splits this string into tokens separated by a delimiter. For instance,
1526      `"This  is a long    string".split(/\s+/)` yields `"This"`, `"is"`, `"a"`, `"long"`, and
1527      `"string"`.
1528  
1529      @param delimiter the token delimiter
1530      @returns the split tokens
1531      ================================================================================================
1532      -- FIXME @safeReturn
1533      function split(delimiter:RegularExpression):Array<String> {
1534          return split(delimiter, Int.MAX)
1535      }
1536  
1537      ================================================================================================
1538      Converts this string to a signed number. The string must be a sequence of decimal digits,
1539      optionally preceded by a minus sign (`-`), whose numeric representation can fit into an `Int64`.
1540      Returns `null` if the conversion fails.
1541  
1542      @returns this string converted to a number, or `null`
1543      ================================================================================================
1544      property asInt:Int?
1545      function get_asInt():Int? {
1546          def result := asInt64
1547          if result !== null {
1548              return result.asInt
1549          }
1550          return null
1551      }
1552  
1553      ================================================================================================
1554      Converts this string to a signed number. The string must be a sequence of decimal digits,
1555      optionally preceded by a minus sign (`-`), whose numeric representation can fit into an `Int64`.
1556      Returns `null` if the conversion fails.
1557  
1558      @returns this string converted to a number, or `null`
1559      ================================================================================================
1560      property asInt64:Int64?
1561      function get_asInt64():Int64? {
1562          if _length = 0 {
1563              return null
1564          }
1565          var result:Int64 := 0
1566          var start:Int
1567          if data[0] = "-" {
1568              start := 1
1569          }
1570          else {
1571              start := 0
1572          }
1573          for i in start .. _length {
1574              def digit := data[i].asInt64 - 48
1575              if digit < 0< 0 | digit > 9 {
1576                  return null
1577              }
1578              result := result * 10 + digit
1579          }
1580          if data[0] = "-" {
1581              result := -result
1582          }
1583          return result
1584      }
1585  
1586      ================================================================================================
1587      Converts this string to an unsigned number. The string must be a sequence of decimal digits
1588      whose numeric representation can fit into a `UInt64`. Returns `null` if the conversion fails.
1589  
1590      @returns this string converted to a number, or `null`
1591      ================================================================================================
1592      property asUInt64:UInt64?
1593      function get_asUInt64():UInt64? {
1594          if _length = 0 {
1595              return null
1596          }
1597          var result:UInt64 := 0
1598          for i in 0 .. _length {
1599              def digit := data[i].asUInt64 - 48
1600              if digit < 0< 0 | digit > 9 {
1601                  return null
1602              }
1603              result := result * 10 + digit
1604          }
1605          return result
1606      }
1607  
1608      ================================================================================================
1609      Converts this string to a real number. The string must be a valid Frost real literal. Returns
1610      `null` if the conversion fails.
1611  
1612      @returns this string converted to a number, or `null`
1613      ================================================================================================
1614      property asReal64:Real64?
1615      function get_asReal64():Real64? {
1616          return Frost.toReal64(self)
1617      }
1618  
1619      @override
1620      function get_hash():Int {
1621          -- FNV-1a hash, thanks to Fowler, Landon, and Vo. This algorithm is in the public
1622          -- domain: http://www.isthe.com/chongo/tech/comp/fnv/index.html#public_domain
1623          var h := 0xcbf29ce484222325
1624          for i in 0 .. _length {
1625              h ~~= data[i].asUInt8
1626              h *&= 0x100000001b3
1627          }
1628          return h
1629      }
1630  }