module Text:sig..end
For compatibility and simplicity reasons, text is represented by UTF-8 encoded strings, and there is no special types for unicode characters, whose are just represented by 1-length text.
All functions of this module expect to by applied on valid UTF-8
encoded strings, and may raise Invalid if this is not the case.
typet =string
exception Invalid of string * string
Invalid(error, text) Exception raised when an invalid UTF-8
encoded string is encountered. text is the faulty text and
error is a description of the first error in text.val check : string -> string optioncheck str checks that str is a valid UTF-8 encoded
string. Returns None if it is the case, or Some error
otherwise.val validate : string -> unitval encode : ?encoding:Encoding.t -> t -> stringencode ?encoding txt encode the given text with encoding,
which defaults to Encoding.system plus transliteration.val decode : ?encoding:Encoding.t -> string -> t
val to_ascii : t -> tto_ascii txt returns an approximative ascii version of
txt. This is the same as encode ~encoding:"ASCII//TRANSLIT" txtval length : t -> intval code : t -> intcode text returns the unicode code-point of first character of
text.
For example:
code "A" = 65code "é" = 0xe9val char : int -> tchar code returns the character corresponding to the given
unicode code-point.
For example:
char 65 = "A"char 0xe9 = "é"Invalid_argument if code is not a valid unicode
code-point. Valid code-point are all integers in the range
0..0x10ffff.val get : t -> int -> tget text n returns the n-th character of text. n is a
number of unicode character, not bytes. A negative value is
interpreted as a position from the end of the text.
For example:
get "abc" 0 = "a"get "abc" 2 = "c"get "aéb" 1 = "é"get "aéb" 2 = "b"nth "abc" (-1) = "c"val sub : t -> int -> int -> tsub text pos len Returns the sub-text of text starting at
position pos and of length len. pos and/or len may be
negative.
For example:
sub "ocaml" 1 2 = "ca"sub "ocaml" 3 (-2) = "ca"sub "ocaml" (-2) 1 = "m"val slice : t -> int -> int -> tslice text a b returns the text contained in txt between a
and b (exlusive). a and/or b may be negative.
For example:
slice "abc" 0 1 = "a"slice "abcdef" 1 (-1) = "bcde"val splice : t -> int -> int -> t -> tsplice text a b repl replace the text between a and b
(exclusive) by repl.
For example:
splice "abcd" 1 2 "plop" = "aplopcd"splice "abcd" 1 2 "" = "acd"val repeat : int -> t -> trepeat n text returns text concatened n-times with
itself.val init : int -> (int -> t) -> tinit n f returns f 0 ^ f 1 ^ ... ^ f (n - 1)val rev_init : int -> (int -> t) -> trev_init n f returns f (n - 1) ^ f 1 ^ ... ^ f 0val upper : t -> tupper t returns the upper-cased version of t.val lower : t -> tlower t returns the upper-cased version of t.val capitalize : t -> tcapitalize t returns t with its first letter upper-casedval uncapitalize : t -> tcapitalize t returns t with its first letter lower-casedval compare : t -> t -> intval icompare : t -> t -> intval transform : t -> ttransform str transforms str in a way such that comparing
two string str1 and str2 transformed with
Pervasives.compare give the same result as comparing them with
Text.compare.val rev : t -> trev t returns the sequence of characters of t in reverse
order.
For example:
rev "ocaml" = "lmaco"rev "héhé" = "éhéh"val concat : t -> t list -> tconcat sep l returns the concatenation of all texts contained
in l, separated by sep.
For example:
concat "/" ["a"; "b"; "c"] = "a/b/c"val rev_concat : t -> t list -> trev_concat sep l returns the concatenation of all texts
contained in l, separated by sep.
For example:
concat "/" ["a"; "b"; "c"] = "c/b/a"val explode : t -> t listexplode txt returns the list of all characters of txt.
For example:
explode "" = []explode "abé" = ["a"; "b"; "é"]val rev_explode : t -> t listrev_explode txt returns the list of all characters of txt,
in reverse order.
For example:
rev_explode "ocaml" = ["l"; "m"; "a"; "c"; "o"]val implode : t list -> timplode l returns the concatenation of all texts contained in
l. This is the same as concat "" l, but a bit more
efficient.
For example:
implode ["o"; "c"; "a"; "m"; "l"] = "ocaml"implode ["abc"; "def"] = "abcdef"val rev_implode : t list -> trev_implode l returns the concatenation of all texts contained
in l, in reverse order.
For example:
implode ["o"; "c"; "a"; "m"; "l"] = "lmaco"implode ["abc"; "def"] = "defabc"val is_ascii : t -> bool
val is_alnum : t -> bool
val is_alpha : t -> bool
val is_blank : t -> bool
val is_cntrl : t -> bool
val is_digit : t -> bool
val is_graph : t -> bool
val is_lower : t -> bool
val is_print : t -> bool
val is_punct : t -> bool
val is_space : t -> bool
val is_upper : t -> bool
val is_xdigit : t -> boolval map : (t -> t) -> t -> tmap f text ~ implode (List.map f (explode text))
map (function "a" -> "x" | t -> t) "abc" = "xbc"
val rev_map : (t -> t) -> t -> trev_map f text ~ implode (List.rev_map f (explode text))
rev_map (function "a" -> "x" | t -> t) "abc" = "cbx"
val fold : (t -> 'a -> 'a) -> t -> 'a -> 'afold f x text ~ List.fold_left f x (explode text)
fold (fun acc t -> acc + code t) 0 "ABC" = 198
val rev_fold : (t -> 'a -> 'a) -> t -> 'a -> 'afold f text x ~ List.fold_left f x (rev_explode text)
rev_fold (fun t acc -> acc + code t) "ABC" 0 = 198
val filter : (t -> bool) -> t -> tfilter text ~ implode (List.filter f (explode text))
filter is_alpha "1a2E" = "aE"
val rev_filter : (t -> bool) -> t -> trev_filter text ~ implode (List.filter f (rev_explode text))
rev_filter is_alpha "1a2E" = "Ea"
val iter : (t -> unit) -> t -> unititer f text ~ List.iter f (explode text)val rev_iter : (t -> unit) -> t -> unititer f text ~ List.iter f (rev_explode text)val for_all : (t -> bool) -> t -> boolfor_all f text returns whether all characters of text verify
the predicate fval exists : (t -> bool) -> t -> boolexists f text returns whether at least one character of text
verify fval count : (t -> bool) -> t -> intcount f text returhs the number of characters of text
verifying fval words : t -> t listval lines : t -> t list"\r\n" and "\n" are recognized as end of
line delimiters.val split : ?max:int -> ?sep:t -> t -> t listsplit ?max ?sep text split text according to sep. If max
is specified, returns at most max splits. sep defaults to " ".
For example:
split ~sep:"/" "a/b/c" = ["a"; "b"; "c"]split ~sep:".." "a..b..c" = ["a"; "b"; "c"]split ~max:1 "a b c" = ["a b c"]split ~max:2 "a b c" = ["a"; "b c"]val rev_split : ?max:int -> ?sep:t -> t -> t listrev_split ?max text sep split text according to sep in reverse
order.
For example:
split ~sep:"/" "a/b/c" = ["c"; "b"; "a"]split ~max:1 "a b c" = ["a b c"]split ~max:2 "a b c" = ["a b"; "c"]rev_split ~max:2 ~sep:"." "toto.mli" = ["toto"; "mli"]val replace : t -> patt:t -> repl:t -> treplace text ~patt ~repl replace all occurences of patt in
text by repl.
For example:
replace "abcd" ~patt:"b" ~repl:"x" = "axcd"replace "Hello world!" ~patt:"world" ~repl:"you" = "Hello you!"val contains : t -> t -> boolcontains text sub returns whether sub appears in textval starts_with : t -> t -> boolstarts_with text prefix returns true iff s starts with
prefix.
For example:
starts_with "abcd" "ab" = truestarts_with "abcd" "af" = falsestarts_with "ab" "abcd" = falseval ends_with : t -> t -> boolends_with s suffix returns true iff s ends with
suffix.
For example:
ends_with "abcd" "cd" = trueends_with "abcd" "hd" = falseends_with "ab" "abc" = falseval strip : ?chars:t list -> t -> tstrip ?chars text removes all characters of text which are
part of chars at the right and left. chars defaults to
whitespaces.val rstrip : ?chars:t list -> t -> trstrip ?chars text removes all characters of text which are
part of chars at the right.val lstrip : ?chars:t list -> t -> tlstrip ?chars text removes all characters of text which are
part of chars at the left.val rchop : t -> trchop t returns t without is last character. Returns "" if
t = "".val lchop : t -> tlchop t returns t without is first character. Returns ""
if t = ""type pointer
val pointer_l : t -> pointerval pointer_r : t -> pointerval pointer_at : t -> int -> pointerpointer_at txt n returns a pointer to the character at
position n in txt.val next : pointer -> (t * pointer) optionnext ptr if ptr is at the end of text, returns None,
otherwise, returns Some(ch, ptr') where ch is the character
at current position and ptr' is the pointer to the next
character of the text.val prev : pointer -> (t * pointer) optionprev ptr if ptr is at the beginning of text, returns None,
otherwise, returns Some(ch, ptr') where ptr' points to the
previous character and ch is the character at ptr'.val move : int -> pointer -> pointermove n ptr moves ptr by n unicode characters. If n < 0
then ptr is moved to the left. Raises Invalid_argument if
the result is outside the text.val chunk : pointer -> pointer -> tchunk a b returns the chunk of text between a and
b. Raises Invalid_arugment if a or b.val offset : pointer -> intoffset ptr returns the position in bytes of ptrval position : pointer -> intposition ptr returns the position in unicode character of ptrval equal_at : pointer -> t -> boolequal_at ptr str returns wether ptr points to a substring
equal to strval find : ?from:pointer -> t -> t -> pointer optionfind ?from text patt returns a pointer to the first occurrence
of patt in text.val rev_find : ?from:pointer -> t -> t -> pointer optionfind ?from text patt returns a pointer to the last occurrence
of patt in text.