Node:StringUtils, Next:System, Previous:Strings, Up:GPC Units
The following listing contains the interface of the StringUtils unit.
This unit provides some routines for string handling on a higher
level than those provided by the RTS.
{ Some routines for string handling on a higher level than those
provided by the RTS.
Copyright (C) 1999-2003 Free Software Foundation, Inc.
Author: Frank Heckenbach <frank@pascal.gnu.de>
This file is part of GNU Pascal.
GNU Pascal is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 2, or (at your
option) any later version.
GNU Pascal is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Pascal; see the file COPYING. If not, write to the
Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
As a special exception, if you link this file with files compiled
with a GNU compiler to produce an executable, this does not cause
the resulting executable to be covered by the GNU General Public
License. This exception does not however invalidate any other
reasons why the executable file might be covered by the GNU
General Public License. }
{$gnu-pascal,I-}
{$if __GPC_RELEASE__ < 20030303}
{$error This unit requires GPC release 20030303 or newer.}
{$endif}
unit StringUtils;
interface
uses GPC;
{ Various routines }
{ Appends Source to s, truncating the result if necessary. }
procedure AppendStr (var s: String; const Source: String);
{ Cuts s to MaxLength characters. If s is already MaxLength
characters or shorter, it doesn't change anything. }
procedure StrCut (var s: String; MaxLength: Integer);
{ Returns the number of disjoint occurences of SubStr in s. Returns
0 if SubStr is empty. }
function StrCount (const SubStr: String; s: String): Integer;
{ Returns s, with all disjoint occurences of Source replaced by
Dest. }
function StrReplace (const s, Source, Dest: String): TString;
{ Sets of characters accepted for True and False by
Char2Boolean and StrReadBoolean. }
var
CharactersTrue : CharSet = ['Y', 'y'];
CharactersFalse: CharSet = ['N', 'n'];
{ If ch is an element of CharactersTrue, Dest is set to True,
otherwise if it is an element of CharactersFalse, Dest is set to
False. In both cases True is returned. If ch is not an element of
either set, Dest is set to False and False is returned. }
function Char2Boolean (ch: Char; var Dest: Boolean): Boolean;
{ Converts a digit character to its numeric value. Handles every
base up to 36 (0 .. 9, a .. z, upper and lower case recognized).
Returns -1 if the character is not a digit at all. If you want to
use it for a base < 36, you have to check if the result is smaller
than the base and not equal to -1. }
function Char2Digit (ch: Char): Integer;
{ Encode a string in a printable format (quoted printable and
surrounded with "). All occurences of " within the string are
encoded, so the result string contains exactly two " characters
(at the beginning and ending). This is useful to store arbitrary
strings in text files while keeping them as readable as possible
(which is the goal of the quoted printable encoding in general,
see RFC 1521, section 5.1) and being able to read them back
losslessly (with UnQuoteString). }
function QuoteString (const s: String): TString;
{ Encode a string in a printable format suitable for StrReadEnum.
All occurences of , within the string are encoded. }
function QuoteEnum (const s: String): TString;
{ Decode a string encoded by QuoteString (removing the " and
expanding quoted printable encoded characters). Returns True if
successful and False if the string has an invalid form. A string
returned by QuoteString is always valid. }
function UnQuoteString (var s: String): Boolean;
{ Decode a quoted-printable string (not enclosed in ", unlike for
UnQuoteString). Returns True if successful and False if the string
has an invalid form. In the latter case, it still decodes as much
as is valid, even after the error position. }
function UnQPString (var s: String): Boolean;
{ Quotes a string as done in shells, i.e. all special characters are
enclosed in either " or ', where ", $ and ` are always
enclosed in ' and ' is always enclosed in ". }
function ShellQuoteString (const s: String): TString;
{ Replaces all tab characters in s with the appropriate amount of
spaces, assuming tab stops at every TabSize columns. Returns True
if successful and False if the expanded string would exceed the
capacity of s. In the latter case, some, but not all of the tabs
in s may have been expanded. }
function ExpandTabs (var s: String; TabSize: Integer): Boolean;
{ Returns s, with all occurences of C style escape sequences (e.g.
\n) replaced by the characters they mean. If AllowOctal is True,
also octal character specifications (e.g. \007) are replaced. If
RemoveQuoteChars is True, any other backslashes are removed (e.g.
\* -> * and \\ -> \), otherwise they are kept, and also
\\ is left as two backslashes then. }
function ExpandCEscapeSequences (const s: String; RemoveQuoteChars,
AllowOctal: Boolean): TString;
{ Routines for TPStrings }
{ Initialise a TPStrings variable, allocate Size characters for each
element. This procedure does not dispose of any previously
allocated storage, so if you use it on a previously used variable
without freeing the storage yourself, this might cause memory
leaks. }
procedure AllocateTPStrings (var Strings: TPStrings; Size: Integer);
{ Clears all elements (set them to empty strings), does not free any
storage. }
procedure ClearTPStrings (var Strings: TPStrings);
{ Divide a string into substrings, using Separators as separator. A
single trailing separator is ignored. Further trailing separators
as well as any leading separators and multiple separators in a row
produce empty substrings. }
function TokenizeString (const Source: String; Separators: CharSet):
PPStrings;
{ Divide a string into substrings, using SpaceCharacters as
separators. The splitting is done according the usual rules of
shells, using (and removing) single and double quotes and
QuotingCharacter. Multiple, leading and trailing separators are
ignored. If there is an error, a message will be stored in ErrMsg,
and nil will be returned. Nil will also be returned (without an
error message) if s in empty. }
function ShellTokenizeString (const s: String; var ErrMsg: String):
PPStrings;
{ String parsing routines }
{ All the following StrReadFoo functions behave similarly. They read
items from a string s, starting at index i, to a variable Dest.
They skip any space characters (spaces and tabs) by incrementing i
first. They return True if successful, False otherwise. i is
incremented accordingly if successful, otherwise i is left
unchanged, apart from the skipping of space characters, and Dest
is undefined. This behaviour makes it easy to use the functions in
a row like this:
i := 1;
if StrReadInt (s, i, Size) and StrReadComma (s, i) and
StrReadQuoted (s, i, Name) and StrReadComma (s, i) and
...
StrReadReal (s, i, Angle) and (i > Length (s)) then ...
(The check i > Length (s) is in case you don't want to accept
trailing "garbage".) }
{ Just skip any space characters as described above. }
procedure StrSkipSpaces (const s: String; var i: Integer);
{ Read a quoted string (as produced by QuoteString) from a string
and unquote the result using UnQuoteString. It is considered
failure if the result (unquoted) would be longer than the capacity
of Dest. }
function StrReadQuoted (const s: String; var i: Integer; var Dest:
String): Boolean;
{ Read a string delimited with Delimiter from a string and return
the result with the delimiters removed. It is considered failure
if the result (without delimiters) would be longer than the
capacity of Dest. }
function StrReadDelimited (const s: String; var i: Integer; var
Dest: String; Delimiter: Char): Boolean;
{ Read a word (consisting of anything but space characters and
commas) from a string. It is considered failure if the result
would be longer than the capacity of Dest. }
function StrReadWord (const s: String; var i: Integer; var Dest:
String): Boolean;
{ Check that a certain string is contained in s (after possible
space characters). }
function StrReadConst (const s: String; var i: Integer; const
Expected: String): Boolean;
{ A simpler to use version of StrReadConst that expects a ,. }
function StrReadComma (const s: String; var i: Integer): Boolean;
{ Read an integer number from a string. }
function StrReadInt (const s: String; var i: Integer; var Dest:
Integer): Boolean;
{ Read a real number from a string. }
function StrReadReal (const s: String; var i: Integer; var Dest:
Real): Boolean;
{ Read a Boolean value, represented by a single character
from CharactersTrue or CharactersFalse (cf. Char2Boolean), from a
string. }
function StrReadBoolean (const s: String; var i: Integer; var Dest:
Boolean): Boolean;
{ Read an enumerated value, i.e., one of the entries of IDs, from a
string, and stores the ordinal value, i.e., the index in IDs
(always zero-based) in Dest. }
function StrReadEnum (const s: String; var i: Integer; var Dest:
Integer; const IDs: array of PString): Boolean;
{ String hash table }
const
DefaultHashSize = 1403;
type
THash = Cardinal;
PStrHashList = ^TStrHashList;
TStrHashList = record
Next: PStrHashList;
s: PString;
i: Integer;
p: Pointer
end;
PStrHashTable = ^TStrHashTable;
TStrHashTable (Size: Cardinal) = record
CaseSensitive: Boolean;
Table: array [0 .. Size - 1] of PStrHashList
end;
function HashString (const s: String): THash;
function NewStrHashTable (Size: Cardinal; CaseSensitive:
Boolean): PStrHashTable;
procedure AddStrHashTable (HashTable: PStrHashTable; s: String;
i: Integer; p: Pointer);
procedure DeleteStrHashTable (HashTable: PStrHashTable; s: String);
function SearchStrHashTable (HashTable: PStrHashTable; const s:
String; var p: Pointer): Integer; { p may be Null }
procedure StrHashTableUsage (HashTable: PStrHashTable; var
Entries, Slots: Integer);
procedure DisposeStrHashTable (HashTable: PStrHashTable);