This package provides functions for handling of unicode characters and utf8 strings. See also Glib.Convert.
Types |
---|
type G_Unicode_Type is (Unicode_Control, Unicode_Format, Unicode_Unassigned, Unicode_Private_Use, Unicode_Surrogate, Unicode_Lowercase_Letter, Unicode_Modifier_Letter, Unicode_Other_Letter, Unicode_Titlecase_Letter, Unicode_Uppercase_Letter, Unicode_Combining_Mark, Unicode_Enclosing_Mark, Unicode_Non_Spacing_Mark, Unicode_Decimal_Number, Unicode_Letter_Number, Unicode_Other_Number, Unicode_Connect_Punctuation, Unicode_Dash_Punctuation, Unicode_Close_Punctuation, Unicode_Final_Punctuation, Unicode_Initial_Punctuation, Unicode_Other_Punctuation, Unicode_Open_Punctuation, Unicode_Currency_Symbol, Unicode_Modifier_Symbol, Unicode_Math_Symbol, Unicode_Other_Symbol, Unicode_Line_Separator, Unicode_Paragraph_Separator, Unicode_Space_Separator); | |
The possible character classifications.
See http://www.unicode.org/Public/UNIDATA/UnicodeData.html
|
Subprograms |
---|
procedure UTF8_Validate (Str : UTF8_String; Valid : out Boolean; Invalid_Pos : out Natural); | ||
Validate a UTF8 string. | ||
Character classes | ||
function Is_Space (Char : Gunichar) return Boolean; | ||
True if Char is a space character
| ||
function Is_Alnum (Char : Gunichar) return Boolean; | ||
True if Char is an alphabetical or numerical character
| ||
function Is_Alpha (Char : Gunichar) return Boolean; | ||
True if Char is an alphabetical character
| ||
function Is_Digit (Char : Gunichar) return Boolean; | ||
True if Char is a digit
| ||
function Is_Lower (Char : Gunichar) return Boolean; | ||
True if Char is a lower-case character
| ||
function Is_Upper (Char : Gunichar) return Boolean; | ||
True if Char is an upper-case character
| ||
function Is_Punct (Char : Gunichar) return Boolean; | ||
True if Char is a punctuation character
| ||
function Unichar_Type (Char : Gunichar) return G_Unicode_Type; | ||
Return the unicode character type of a given character
| ||
Case handling | ||
function To_Lower (Char : Gunichar) return Gunichar; | ||
Convert Char to lower cases
| ||
function To_Upper (Char : Gunichar) return Gunichar; | ||
Convert Char to upper cases
| ||
function UTF8_Strdown (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; | ||
| ||
function UTF8_Strdown (Str : UTF8_String) return UTF8_String; | ||
Convert Str to lower cases
| ||
function UTF8_Strup (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; | ||
| ||
function UTF8_Strup (Str : UTF8_String) return UTF8_String; | ||
Convert Str to upper cases
| ||
Manipulating strings | ||
function UTF8_Strlen (Str : ICS.chars_ptr; Max : Integer := -1) return Glong; | ||
| ||
function UTF8_Strlen (Str : UTF8_String) return Glong; | ||
Return the number of characters in Str
| ||
function UTF8_Find_Next_Char (Str : ICS.chars_ptr; Str_End : ICS.chars_ptr := ICS.Null_Ptr) return ICS.chars_ptr; | ||
| ||
function UTF8_Find_Next_Char (Str : UTF8_String; Index : Natural) return Natural; | ||
| ||
function UTF8_Next_Char (Str : UTF8_String; Index : Natural) return Natural; | ||
| ||
function UTF8_Find_Prev_Char (Str_Start : ICS.chars_ptr; Str : ICS.chars_ptr) return ICS.chars_ptr; | ||
| ||
function UTF8_Find_Prev_Char (Str : UTF8_String; Index : Natural) return Natural; | ||
Find the start of the previous UTF8 character after the Index-th byte. | ||
Conversions | ||
function Unichar_To_UTF8 (C : Gunichar; Buffer : ICS.chars_ptr := ICS.Null_Ptr) return Natural; | ||
| ||
procedure Unichar_To_UTF8 (C : Gunichar; Buffer : out UTF8_String; Last : out Natural); | ||
Encode C into Buffer. Buffer must have at least 6 bytes free. | ||
function UTF8_Get_Char (Str : UTF8_String) return Gunichar; | ||
Converts a sequence of bytes encoded as UTF8 to a unicode character. | ||
function UTF8_Get_Char_Validated (Str : UTF8_String) return Gunichar; | ||
Same as above. However, if the sequence if an incomplete start of a |