/****************************************************************************** * * Copyright (c) 1998-2003 PalmSource, Inc. All rights reserved. * * File: TextMgr.h * * Release: Palm OS 5 SDK (68K) R3. * * Description: * Header file for Text Manager. * *****************************************************************************/ #ifndef __TEXTMGR_H__ #define __TEXTMGR_H__ #include #include /*********************************************************************** * Public types & constants ***********************************************************************/ // See PalmLocale.h for encoding constants of type CharEncodingType, and // character encoding names. typedef UInt8 CharEncodingType; // Transliteration operations for the TxtTransliterate call. We don't use // an enum, since each character encoding contains its own set of special // transliteration operations (which begin at translitOpCustomBase). typedef UInt16 TranslitOpType; // Standard transliteration operations. #define translitOpStandardBase 0 // Beginning of standard operations. #define translitOpUpperCase 0 #define translitOpLowerCase 1 #define translitOpReserved2 2 #define translitOpReserved3 3 // Custom transliteration operations (defined in CharXXXX.h encoding-specific // header files. #define translitOpCustomBase 1000 // Beginning of char-encoding specific ops. #define translitOpPreprocess 0x8000 // Mask for pre-process option, where // no transliteration actually is done. // Structure used to maintain state across calls to TxtConvertEncoding, for // proper handling of source or destination encodings with have modes. #define kTxtConvertStateSize 32 typedef struct { UInt8 ioSrcState[kTxtConvertStateSize]; UInt8 ioDstState[kTxtConvertStateSize]; } TxtConvertStateType; // Character encoding assumed for substitution text by TxtConvertEncoding #define textSubstitutionEncoding charEncodingUTF8 // Flag to OR with the charEncodingType that is passed to TxtConvertEncoding #define charEncodingDstBestFitFlag 0x80 // Flags available in the sysFtrNumCharEncodingFlags feature attribute. #define charEncodingOnlySingleByte 0x00000001 #define charEncodingHasDoubleByte 0x00000002 #define charEncodingHasLigatures 0x00000004 #define charEncodingRightToLeft 0x00000008 // Various byte attribute flags. Note that multiple flags can be // set, thus a byte could be both a single-byte character, or the first // byte of a multi-byte character. #define byteAttrFirst 0x80 // First byte of multi-byte char. #define byteAttrLast 0x40 // Last byte of multi-byte char. #define byteAttrMiddle 0x20 // Middle byte of muli-byte char. #define byteAttrSingle 0x01 // Single byte. // Some double-byte encoding combinations. Every byte in a stream of // double-byte data must be either a single byte, a single/low byte, // or a high/low byte. #define byteAttrSingleLow (byteAttrSingle | byteAttrLast) #define byteAttrHighLow (byteAttrFirst | byteAttrLast) // Character attribute flags. These replace the old flags defined in // CharAttr.h, but are bit-compatible. #define charAttr_XA 0x0200 // extra alphabetic #define charAttr_XS 0x0100 // extra space #define charAttr_BB 0x0080 // BEL, BS, etc. #define charAttr_CN 0x0040 // CR, FF, HT, NL, VT #define charAttr_DI 0x0020 // '0'-'9' #define charAttr_LO 0x0010 // 'a'-'z' and lowercase extended chars. #define charAttr_PU 0x0008 // punctuation #define charAttr_SP 0x0004 // space #define charAttr_UP 0x0002 // 'A'-'Z' and uppercase extended chars. #define charAttr_XD 0x0001 // '0'-'9', 'A'-'F', 'a'-'f' // Various sets of character attribute flags. #define charAttrPrint (charAttr_DI|charAttr_LO|charAttr_PU|charAttr_SP|charAttr_UP|charAttr_XA) #define charAttrSpace (charAttr_CN|charAttr_SP|charAttr_XS) #define charAttrAlNum (charAttr_DI|charAttr_LO|charAttr_UP|charAttr_XA) #define charAttrAlpha (charAttr_LO|charAttr_UP|charAttr_XA) #define charAttrCntrl (charAttr_BB|charAttr_CN) #define charAttrGraph (charAttr_DI|charAttr_LO|charAttr_PU|charAttr_UP|charAttr_XA) #define charAttrDelim (charAttr_SP|charAttr_PU) // Remember that sizeof(0x0D) == 2 because 0x0D is treated like an int. The // same is true of sizeof('a'), sizeof('\0'), and sizeof(chrNull). For this // reason it's safest to use the sizeOf7BitChar macro to document buffer size // and string length calcs. Note that this can only be used with low-ascii // characters, as anything else might be the high byte of a double-byte char. #define sizeOf7BitChar(c) 1 // Maximum size a single WChar character will occupy in a text string. #define maxCharBytes 3 // Text manager error codes. #define txtErrUknownTranslitOp (txtErrorClass | 1) #define txtErrTranslitOverrun (txtErrorClass | 2) #define txtErrTranslitOverflow (txtErrorClass | 3) #define txtErrConvertOverflow (txtErrorClass | 4) #define txtErrConvertUnderflow (txtErrorClass | 5) #define txtErrUnknownEncoding (txtErrorClass | 6) #define txtErrNoCharMapping (txtErrorClass | 7) #define txtErrTranslitUnderflow (txtErrorClass | 8) #define txtErrMalformedText (txtErrorClass | 9) #define txtErrUnknownEncodingFallbackCopy (txtErrorClass | 10) /*********************************************************************** * Public macros ***********************************************************************/ #define TxtCharIsSpace(ch) ((TxtCharAttr(ch) & charAttrSpace) != 0) #define TxtCharIsPrint(ch) ((TxtCharAttr(ch) & charAttrPrint) != 0) #define TxtCharIsDigit(ch) ((TxtCharAttr(ch) & charAttr_DI) != 0) #define TxtCharIsAlNum(ch) ((TxtCharAttr(ch) & charAttrAlNum) != 0) #define TxtCharIsAlpha(ch) ((TxtCharAttr(ch) & charAttrAlpha) != 0) #define TxtCharIsCntrl(ch) ((TxtCharAttr(ch) & charAttrCntrl) != 0) #define TxtCharIsGraph(ch) ((TxtCharAttr(ch) & charAttrGraph) != 0) #define TxtCharIsLower(ch) ((TxtCharAttr(ch) & charAttr_LO) != 0) #define TxtCharIsPunct(ch) ((TxtCharAttr(ch) & charAttr_PU) != 0) #define TxtCharIsUpper(ch) ((TxtCharAttr(ch) & charAttr_UP) != 0) #define TxtCharIsHex(ch) ((TxtCharAttr(ch) & charAttr_XD) != 0) #define TxtCharIsDelim(ch) ((TxtCharAttr(ch) & charAttrDelim) != 0) // is a hard key if the event modifier has the command bit set // and is either in the proper range or is the calculator character. #define TxtCharIsHardKey(m, c) ((((m) & commandKeyMask) != 0) && \ ((((c) >= hardKeyMin) && ((c) <= hardKeyMax)) || ((c) == calcChr))) // is a virtual character if the event modifier has the command // bit set. WARNING!!! This macro is only safe to use on Palm OS 3.5 or // later. With earlier versions of the OS, use TxtGlueCharIsVirtual() // in PalmOSGlue.lib #define TxtCharIsVirtual(m, c) (((m) & commandKeyMask) != 0) #define TxtPreviousCharSize(inText, inOffset) TxtGetPreviousChar((inText), (inOffset), NULL) #define TxtNextCharSize(inText, inOffset) TxtGetNextChar((inText), (inOffset), NULL) /*********************************************************************** * Public routines ***********************************************************************/ #ifndef STRIP_FUNCTION_HEADERS #ifdef __cplusplus extern "C" { #endif // Return back byte attribute (first, last, single, middle) for . UInt8 TxtByteAttr(UInt8 inByte) INTL_TRAP(intlTxtByteAttr); // Return back the standard attribute bits for . UInt16 TxtCharAttr(WChar inChar) INTL_TRAP(intlTxtCharAttr); // Return back the extended attribute bits for . UInt16 TxtCharXAttr(WChar inChar) INTL_TRAP(intlTxtCharXAttr); // Return the size (in bytes) of the character . This represents // how many bytes would be required to store the character in a string. UInt16 TxtCharSize(WChar inChar) INTL_TRAP(intlTxtCharSize); // Return the width (in pixels) of the character . You should // use FntWCharWidth or FntGlueWCharWidth instead of this routine. Int16 TxtCharWidth(WChar inChar) INTL_TRAP(intlTxtCharWidth); // Load the character before offset in the text. Return // back the size of the character. UInt16 TxtGetPreviousChar(const Char *inText, UInt32 inOffset, WChar *outChar) INTL_TRAP(intlTxtGetPreviousChar); // Load the character at offset in the text. Return // back the size of the character. UInt16 TxtGetNextChar(const Char *inText, UInt32 inOffset, WChar *outChar) INTL_TRAP(intlTxtGetNextChar); // Return the character at offset in the text. WChar TxtGetChar(const Char *inText, UInt32 inOffset) INTL_TRAP(intlTxtGetChar); // Set the character at offset in the text, and // return back the size of the character. UInt16 TxtSetNextChar(Char *ioText, UInt32 inOffset, WChar inChar) INTL_TRAP(intlTxtSetNextChar); // Replace the substring "^X" (where X is 0..9, as specified by ) // with the string . If is NULL then don't modify . // Make sure the resulting string doesn't contain more than bytes, // excluding the terminating null. Return back the number of occurances of // the substring found in . UInt16 TxtReplaceStr(Char *ioStr, UInt16 inMaxLen, const Char *inParamStr, UInt16 inParamNum) INTL_TRAP(intlTxtReplaceStr); // Allocate a handle containing the result of substituting param0...param3 // for ^0...^3 in , and return the locked result. If a parameter // is NULL, replace the corresponding substring in the template with "". Char *TxtParamString(const Char *inTemplate, const Char *param0, const Char *param1, const Char *param2, const Char *param3) INTL_TRAP(intlTxtParamString); // Return the bounds of the character at in the // text, via the & offsets, and also return the // actual value of character at or following . WChar TxtCharBounds(const Char *inText, UInt32 inOffset, UInt32 *outStart, UInt32 *outEnd) INTL_TRAP(intlTxtCharBounds); // Return the appropriate byte position for truncating such that it is // at most bytes long. UInt32 TxtGetTruncationOffset(const Char *inText, UInt32 inOffset) INTL_TRAP(intlTxtGetTruncationOffset); // Search for in . If found return true and pass back // the found position (byte offset) in , and the length of the matched // text in . Boolean TxtFindString(const Char *inSourceStr, const Char *inTargetStr, UInt32 *outPos, UInt16 *outLength) INTL_TRAP(intlTxtFindString); // Find the bounds of the word that contains the character at . // Return the offsets in <*outStart> and <*outEnd>. Return true if the // word we found was not empty & not a delimiter (attribute of first char // in word not equal to space or punct). Boolean TxtWordBounds(const Char *inText, UInt32 inLength, UInt32 inOffset, UInt32 *outStart, UInt32 *outEnd) INTL_TRAP(intlTxtWordBounds); // Return the offset of the first break position (for text wrapping) that // occurs at or before in . Note that this routine will // also add trailing spaces and a trailing linefeed to the break position, // thus the result could be greater than . UInt32 TxtGetWordWrapOffset(const Char *iTextP, UInt32 iOffset) INTL_TRAP(intlTxtGetWordWrapOffset); // Return the minimum (lowest) encoding required for . If we // don't know about the character, return encoding_Unknown. CharEncodingType TxtCharEncoding(WChar inChar) INTL_TRAP(intlTxtCharEncoding); // Return the minimum (lowest) encoding required to represent . // This is the maximum encoding of any character in the string, where // highest is unknown, and lowest is ascii. CharEncodingType TxtStrEncoding(const Char *inStr) INTL_TRAP(intlTxtStrEncoding); // Return the higher (max) encoding of and . CharEncodingType TxtMaxEncoding(CharEncodingType a, CharEncodingType b) INTL_TRAP(intlTxtMaxEncoding); // Return a pointer to the 'standard' name for . If the // encoding is unknown, return a pointer to an empty string. const Char *TxtEncodingName(CharEncodingType inEncoding) INTL_TRAP(intlTxtEncodingName); // Map from a character set name to a CharEncodingType. // If the character set name is unknown, return charEncodingUnknown. CharEncodingType TxtNameToEncoding(const Char* iEncodingName) INTL_TRAP(intlTxtNameToEncoding); // Transliterate bytes of text found in , based // on the requested operation. Place the results in , // and set the resulting length in . On entry // must contain the maximum size of the buffer. If the // buffer isn't large enough, return an error (note that outDestText // might have been modified during the operation). Note that if // has the preprocess bit set, then is not modified, and // will contain the total space required in the destination // buffer in order to perform the operation. Err TxtTransliterate(const Char *inSrcText, UInt16 inSrcLength, Char *outDstText, UInt16 *ioDstLength, TranslitOpType inOp) INTL_TRAP(intlTxtTransliterate); // Convert <*ioSrcBytes> of text from between the // and character encodings. If is not NULL, write // the resulting bytes to the buffer, and always return the number of // resulting bytes in <*ioDstBytes>. Update <*srcBytes> with the number of // bytes from the beginning of <*srcTextP> that were successfully converted. // When the routine is called with pointing to the beginning of // a string or text buffer, should be true; if the text is // processed in multiple chunks, either because errors occurred or due to // source/destination buffer size constraints, then subsequent calls to // this routine should pass false for . The TxtConvertStateType // record maintains state information so that if the source or destination // character encodings have state or modes (e.g. JIS), processing a single // sequence of text with multiple calls will work correctly. // When an error occurs due to an unconvertable character, the behavior of // the routine will depend on the parameter. If it is NULL, // then <*ioSrcBytes> will be set to the offset of the unconvertable character, // will be set to the number of successfully converted resulting // bytes, and , in not NULL, will contain conversion results up to // the point of the error. The routine will return an appropriate error code, // and it is up to the caller to either terminate conversion or skip over the // unconvertable character and continue the conversion process (passing false // for the parameter in subsequent calls to TxtConvertEncoding). // If is not NULL, then this string is written to the // destination buffer when an unconvertable character is encountered in the // source text, and the source character is skipped. Processing continues, though // the error code will still be returned when the routine terminates. Note that // if a more serious error occurs during processing (e.g. buffer overflow) then // that error will be returned even if there was an earlier unconvertable character. // Note that the substitution string must use the destination character encoding. Err TxtConvertEncoding(Boolean newConversion, TxtConvertStateType* ioStateP, const Char* srcTextP, UInt16* ioSrcBytes, CharEncodingType srcEncoding, Char* dstTextP, UInt16* ioDstBytes, CharEncodingType dstEncoding, const Char* substitutionStr, UInt16 substitutionLen) INTL_TRAP(intlTxtConvertEncoding); // Return true if is a valid (drawable) character. Note that we'll // return false if it is a virtual character code. Boolean TxtCharIsValid(WChar inChar) INTL_TRAP(intlTxtCharIsValid); // Compare the first bytes of with the first bytes // of . Return the results of the comparison: < 0 if sorts before // , > 0 if sorts after , and 0 if they are equal. Also return // the number of bytes that matched in and // (either one of which can be NULL if the match length is not needed). // This comparison is "caseless", in the same manner as a find operation, // thus case, character size, etc. don't matter. Int16 TxtCaselessCompare(const Char *s1, UInt16 s1Len, UInt16 *s1MatchLen, const Char *s2, UInt16 s2Len, UInt16 *s2MatchLen) INTL_TRAP(intlTxtCaselessCompare); // Compare the first bytes of with the first bytes // of . Return the results of the comparison: < 0 if sorts before // , > 0 if sorts after , and 0 if they are equal. Also return // the number of bytes that matched in and // (either one of which can be NULL if the match length is not needed). Int16 TxtCompare(const Char *s1, UInt16 s1Len, UInt16 *s1MatchLen, const Char *s2, UInt16 s2Len, UInt16 *s2MatchLen) INTL_TRAP(intlTxtCompare); #ifdef __cplusplus } #endif #endif // !STRIP_FUNCTION_HEADERS #endif // __TEXTMGR_H__