2 "string is an abstract class basically"
3 prototypes define: #String &parents: {Sequence. Collection. Mapping}.
5 String traits define: #Character.
7 a@(String Character traits) cr [$\r].
9 a@(String Character traits) lf [$\n].
11 a@(String Character traits) tab [$\t].
13 a@(String Character traits) space [$\s].
15 a@(String Character traits) stringEscape [$\\].
17 i@(Integer traits) as: a@(String Character traits)
18 "FIXME: convert Integers to ASCIIString Character by default.
19 This should later be replaced with some smarter framework or an optional encoding."
22 ifTrue: [i as: ASCIIString Character]
26 "ASCIIString Character traitsWindow atSlotNamed: #traits1 put: String Character traits."
28 "fix, timmy: i'm not sure what was intended here..."
29 ASCIIString Character traitsWindow _delegates:
30 {Root traits. Oddball traits. String Character traits. ASCIIString Character traits}.
32 ASCIIString Character traits CharacterSet ::= bootstrapCharacters as: Array.
33 "The full ASCII character set."
34 ASCIIString Character traits Whitespace ::= '\s\t\n\v\f\r\0'.
35 ASCIIString Character traits Vowels ::= 'AEIOUaeiou'.
36 ASCIIString Character traits Delimiters ::= '()[]{}\',` "'.
38 a@(ASCIIString Character traits) as: _@(Integer traits)
41 i@(Integer traits) as: a@(ASCIIString Character traits)
42 [a CharacterSet at: i].
44 a@(ASCIIString Character traits) = b@(ASCIIString Character traits)
49 ch@(ASCIIString Character traits) hash
54 a@(ASCIIString Character traits) codeRange
57 a@(ASCIIString Character traits) allCharacters
59 a codeRange collect: #(as: a) `er
62 ch@(ASCIIString Character traits) isAlphanumeric
64 ch isLetter \/ [ch isDigit]
67 ch@(ASCIIString Character traits) isWhitespace
69 ch Whitespace includes: ch
72 ch@(ASCIIString Character traits) isDelimiter
74 ch Delimiters includes: ch
77 ch@(ASCIIString Character traits) isDigit: radix
81 [value between: #[$0 code] and: #[$9 code]]
82 -> [value - #[$0 code] < radix].
83 [value between: #[$A code] and: #[$Z code]]
84 -> [value - #[$A code] < (radix - 10)].
85 [value between: #[$a code] and: #[$z code]]
86 -> [value - #[$a code] < (radix - 10)]
90 ch@(ASCIIString Character traits) isDigit
95 ch@(ASCIIString Character traits) toDigit: radix
97 (ch isDigit: radix) ifFalse: [^ Nil].
100 [value >= #[$a code]]
101 -> [value - #[$a code] + 10].
102 [value >= #[$A code]]
103 -> [value - #[$A code] + 10].
104 [value >= #[$0 code]]
105 -> [value - #[$0 code]]
109 ch@(ASCIIString Character traits) toDigit
114 ch@(ASCIIString Character traits) isLetter
116 ch isUppercase \/ [ch isLowercase]
119 ch@(ASCIIString Character traits) isLowercase
120 [ch code between: #[$a code] and: #[$z code]].
122 ch@(ASCIIString Character traits) toUppercase
125 ifTrue: [ch code - #[$\s code] as: ch]
129 ch@(ASCIIString Character traits) isUppercase
130 [ch code between: #[$A code] and: #[$Z code]].
132 ch@(ASCIIString Character traits) toLowercase
135 ifTrue: [ch code + #[$\s code] as: ch]
139 ch@(ASCIIString Character traits) isVowel
141 ch Vowels includes: ch
144 ch@(ASCIIString Character traits) isQuote
146 ch = $\' \/ [ch = $\"]
149 c1@(ASCIIString Character traits) < c2@(ASCIIString Character traits)
154 c1@(ASCIIString Character traits) > c2@(ASCIIString Character traits)
159 s1@(ASCIIString traits) < s2@(ASCIIString traits)
161 (s1 lexicographicallyCompare: s2) = -1
164 s1@(ASCIIString traits) <= s2@(ASCIIString traits)
169 ch@(ASCIIString Character traits) isPrintable
170 [ch code between: #[$\s code] and: #[$~ code]].
172 s@(String traits) new &capacity: n
173 "By default return ASCIIString's.
174 FIXME: this should redirect to some better encoding-preserving string implementation..."
177 ifTrue: [ASCIIString new &capacity: n]
181 s@(ASCIIString traits) size [18 primitiveDo: {s}]. "faster than: #size sendTo: {s} through: {ByteArray}"
182 s@(String traits) size [overrideThis].
184 a@(ByteArray traits) as: s@(String traits)
185 "FIXME: calls to this method should be either converted to direct ASCIIString calls,
186 or an optional hint should be used to decide on the encoding in the ByteArray."
189 ifTrue: [a as: ASCIIString]
193 s@(String traits) writer
194 "FIXME: calls to this method should be either converted to direct ASCIIString calls,
195 or an optional hint should be used to decide on the encoding in the ByteArray."
198 ifTrue: [ASCIIString writer]
202 s@(String traits) elementType
205 _@(String traits) defaultElement
208 _@(String traits) lexicographicallyCompare: _@(String traits)
211 _@(String traits) sorted
212 "Answers a new String with the same Characters lexicographically sorted."
215 _@(String traits) accepts: _@(Root traits)
218 s@(String traits) copy
221 s@(String traits) hash
227 result := result + (c hash bitShift: bit) bitAnd: 16rFFFFFF.
228 bit := bit + 1 bitAnd: 16rF].
232 s@(String traits) capitalize
233 "Modifies the first Character to be uppercase."
236 ifFalse: [s at: s indexFirst put: s first toUppercase].
240 s@(String traits) uncapitalize
241 "Modifies the first Character to be lowercase."
244 ifFalse: [s at: s indexFirst put: s first toLowercase].
248 s@(String traits) capitalized
252 ifFalse: [s copy capitalize]
255 s@(String traits) uncapitalized
259 ifFalse: [s copy uncapitalize]
262 s@(String traits) toUppercase
263 "Modifies the Characters to be uppercase."
264 [s infect: #toUppercase `er ].
266 s@(String traits) toLowercase
267 "Modifies the Characters to be lowercase."
268 [s infect: #toLowercase `er ].
270 s@(String traits) toSwapCase
271 "Modifies the Characters to have swapped case."
273 s infect: [| :each | each isLowercase
274 ifTrue: [each toUppercase]
275 ifFalse: [each toLowercase]]
278 s@(String traits) toCamelCase &separators: delims
279 "Split the String, capitalize the split parts, and concatenate them together."
280 [((s split &separators: delims) collect: #capitalize `er ) join].
282 s@(String traits) fromCamelCase &separator: delim
283 "Separates the String into words based on lowercase-uppercase letter
284 boundaries, and joins them using the specified delimiter String."
286 delim `defaultsTo: ' '.
287 indices ::= ExtensibleArray new*, -1.
288 s indexFirst below: s indexLast - 1 do:
289 [| :index | (s at: index) isLowercase /\ [(s at: index + 1) isUppercase]
290 ifTrue: [indices addLast: index]].
291 indices addLast: s indexLast.
292 words ::= ExtensibleArray new.
293 indices chainPairsDo:
294 [| :last :next | words addLast:
295 (s copyFrom: last + 1 to: next) toLowercase].
296 words join &separator: delim
299 String traits define: #SmallWords -> {'a'. 'an'. 'and'. 'as'. 'at'. 'but'. 'by'. 'en'. 'for'. 'if'. 'in'. 'of'. 'on'. 'or'. 'the'. 'to'. 'v'. 'via'. 'vs'. 'vs.'}.
301 s@(String traits) titleCase
303 (words ::= s split) doWithIndex:
306 ifTrue: [word toUppercase]
308 [(s SmallWords includes: word) /\ [index > 0]
309 ifTrue: [word toLowercase]
310 ifFalse: [word capitalize]]].
311 words join &separator: ' '
314 s@(String traits) abbreviation
315 "Answers the usual convention for abbreviation via initials."
316 [s toCamelCase select: #isUppercase`er].
318 s@(String traits) escaped
319 "Answers a new String with printed slash-escapes so it can be read in as the
320 same value. This gives programmatic access to the right input for any String."
327 $\' -> [result ; '\\\''].
328 $\t -> [result ; '\\t'].
329 $\n -> [result ; '\\n'].
330 $\v -> [result ; '\\v'].
331 $\f -> [result ; '\\f'].
332 $\r -> [result ; '\\r'].
333 $\b -> [result ; '\\b'].
334 $\a -> [result ; '\\a'].
335 $\e -> [result ; '\\e'].
336 $\0 -> [result ; '\\0'].
337 $\s -> [result ; '\\ '].
341 ifTrue: [result nextPut: c]
344 c code // 16 printOn: result &radix: 16.
345 c code \\ 16 printOn: result &radix: 16]]
349 s@(String traits) readFrom: src
350 "Reads the first thing in the String (or Stream) as a String literal.
351 An error is raised if the return type does not match the requested one."
353 (srcStream ::= src reader) peek = $\'
356 (Syntax Lexer newOn: srcStream) readString value]
357 ifFalse: [error: 'The source does not start with a quotation mark.']
360 s@(String traits) unescaped
361 [s readFrom: '\'' ; s ; '\''].
363 s@(String traits) asAn
364 "Answer a copy of the String prepended with 'a ' or 'an ' in order to treat
365 it as specifying the instance of a noun named by the String."
367 s first isVowel ifTrue: ['an ' ; s] ifFalse: ['a ' ; s]
370 s@(String traits) plural
371 "Answer a copy of the String appended with -s, -es, or -ies in order to treat
372 it as a pluralized noun. This does not try to be too clever, because that
373 requires a full lexicon (for English) - only use this for technical purposes."
376 [s last toLowercase = $o] -> [s ; 'es'].
377 [s last isVowel] -> [s ; 's'].
378 [s last toLowercase = $y]
379 -> [(s at: s indexLast - 1) isVowel not
380 ifTrue: [s allButLast ; 'ies']
382 [s endsWith: 'us'] -> [(s allButLast: 2) ; 'i'].
383 [s endsWith: 'sis'] -> [(s allButLast: 2) ; 'es'].
384 [s endsWith: 'on'] -> [(s allButLast: 2) ; 'a'].
385 [#{'ex'. 'ix'} anySatisfy: #(s endsWith: _)] -> [(s allButLast: 2) ; 'ices'].
386 [#{'ss'. 'sh'. 'ch'. 'dge'} anySatisfy: #(s endsWith: _)] -> [s ; 'es']
387 ) otherwise: [s ; 's']
390 s@(String traits) split &separators: delims
391 [s splitWithAny: (delims ifNil: [s elementType Whitespace])].
393 s@(String traits) stripAll: c@(Collection traits) startingAt: start
395 result ::= (s copyFrom: 0 to: start - 1) writer.
396 start below: s size do:
400 ifFalse: [result nextPut: each]].
404 s@(String traits) stripAll: c@(Collection traits)
405 [s stripAll: c startingAt: 0 ].
407 s@(String traits) strip: c@(String Character traits) startingAt: start
408 [s stripAll: {c} startingAt: start ].
410 s@(String traits) strip: c@(String Character traits)
411 [s stripAll: {c} startingAt: 0].
413 s@(String traits) stripStartingAt: start
414 [s stripAll: s elementType Whitespace startingAt: start].
416 s@(String traits) strip
417 [s stripAll: s elementType Whitespace startingAt: 0].
419 s@(String traits) trimAll: c@(Collection traits)
421 (s indexOfFirstSatisfying: [| :each | (c includes: each) not])
426 to: (s indexOfLastSatisfying: [| :each | (c includes: each) not])]
429 s@(String traits) trim
431 s trimAll: s elementType Whitespace
434 s@(String traits) evaluate
439 s@(String traits) evaluateIn: namespace
441 parser ::= Syntax Parser newOn: s.
442 [[(expr := parser next) evaluateIn: lobby.
443 parser isAtEnd] on: Stream Exhaustion do: [False]] whileFalse.
446 c@(String Character traits) as: s@(String traits)
447 [(s new &capacity: 1) `>> [at: 0 put: c. ]].
449 s@(String traits) as: i@(Integer traits) &radix
450 "Reads a radix-(10) Integer from the string."
452 (s isNotEmpty /\ [s first = $-])
453 ifTrue: [((s copyFrom: 1) as: i &radix: radix) negated]
455 [radix `defaultsTo: 10.
457 [| :n :c | (c isDigit: radix) ifFalse: [^ n].
458 n * radix + (c toDigit: radix)]]
461 "ASCIIString traitsWindow `>> [
462 atSlotNamed: #traits1 put: String traits.
463 atSlotNamed: #traits3 put: Sequence traits.
464 atSlotNamed: #traits4 put: Collection traits.
465 atSlotNamed: #traits5 put: Mapping traits. ].
467 ASCIIString traitsWindow _delegates:
468 {Root traits. Derivable traits. Cloneable traits. Mapping traits.
469 Collection traits. Sequence traits. ByteArray traits. String traits. ASCIIString traits}.
471 s@(ASCIIString traits) at: i
473 (s byteAt: i) as: s elementType
476 s@(ASCIIString traits) at: i put: c
478 s byteAt: i put: c code
481 s@(ASCIIString traits) at: i put: code@(Integer traits)
483 s byteAt: i put: code intoByte
486 _@(ASCIIString traits) defaultElement [$\0].
488 s@(ASCIIString traits) includes: c@(String Character traits)
489 "A pure optimization override to avoid coercing all of the elements to Characters.
490 This is about twice as fast as the very generic Sequence method."
493 s keysDo: [| :i | (s byteAt: i) == c code ifTrue: [^ True]].
497 _@(ASCIIString traits) accepts: _@(ASCIIString Character traits)
500 s1@(ASCIIString traits) lexicographicallyCompare: s2@(ASCIIString traits)
501 "Answer a sign of comparing the two Strings' Characters in order."
504 [| :c1 :c2 cmp | (cmp := c1 code <=> c2 code) isZero ifFalse: [^ cmp]].
508 s@(ASCIIString traits) sorted
509 "Answers a new String with the same Characters lexicographically sorted."
510 [(s sortBy: #<= `er ** #code `er ) as: s].
512 ch@(ASCIIString Character traits) rot13
514 (code := ch toLowercase code) >= #[$a code] /\ [code < #[$n code]]
516 ifFalse: [code > #[$m code] /\ [code <= #[$z code]]
517 ifTrue: [code -= 13]].
519 ifTrue: [(code as: ch) toUppercase]
520 ifFalse: [code as: ch]
523 s@(ASCIIString traits) rot13
524 [s collect: #rot13 `er].
526 s@(ASCIIString traits) as: a@(ByteArray traits)
527 "Since ByteArrays cannot hold objects, they must store character codes."
529 result ::= a newSizeOf: s.
531 [| :each :index | result at: index put: (each as: a elementType)].
535 a@(ByteArray traits) as: s@(ASCIIString traits)
536 "Since ByteArrays cannot hold objects, they must store character codes."
538 result ::= s newSizeOf: a.
540 [| :each :index | result at: index put: (each as: s elementType)].
544 name@(ASCIIString traits) as: _@(Symbol traits)