171 lines
6.7 KiB
C
171 lines
6.7 KiB
C
/*
|
|
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
/**
|
|
* @file picobase.h
|
|
*
|
|
* base functionality
|
|
*
|
|
* Copyright (C) 2008-2009 SVOX AG, Baslerstr. 30, 8048 Zuerich, Switzerland
|
|
* All rights reserved.
|
|
*
|
|
* History:
|
|
* - 2009-04-20 -- initial version
|
|
*
|
|
*/
|
|
|
|
#ifndef PICOBASE_H_
|
|
#define PICOBASE_H_
|
|
|
|
#include "picoos.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
#if 0
|
|
}
|
|
#endif
|
|
|
|
/* maximum number of bytes of an UTF8 character */
|
|
#define PICOBASE_UTF8_MAXLEN 4
|
|
|
|
typedef picoos_uint8 picobase_utf8char[PICOBASE_UTF8_MAXLEN+1]; /* always zero terminated */
|
|
typedef picoos_uint8 picobase_utf8;
|
|
typedef picoos_uint16 picobase_utf16;
|
|
typedef picoos_uint32 picobase_utf32;
|
|
|
|
/* ***************************************************************/
|
|
/* Unicode UTF8 functions */
|
|
/* ***************************************************************/
|
|
|
|
/**
|
|
* Determines the number of UTF8 characters contained in
|
|
* the UTF8 string 'utf8str' of maximum length maxlen (in bytes)
|
|
* @param utf8str : a string encoded in UTF8
|
|
* @param maxlen : max length (in bytes) accessible in utf8str
|
|
* @return >=0 : length of the UTF8 string in number of UTF8 characters
|
|
* up to the first '\0' or maxlen
|
|
* @return <0 : not starting with a valid UTF8 character
|
|
* @remarks strict implementation, not allowing invalid utf8
|
|
*/
|
|
picoos_int32 picobase_utf8_length(const picoos_uint8 *utf8str,
|
|
const picoos_uint16 maxlen);
|
|
|
|
|
|
/**
|
|
* Determines the number of bytes an UTF8 character used based
|
|
* on the first byte of the UTF8 character
|
|
* @param firstchar: the first (and maybe only) byte of an UTF8 character
|
|
* @return positive value in {1,4} : number of bytes of the UTF8 character
|
|
* @return 0 :if not a valid UTF8 character start
|
|
* @remarks strict implementation, not allowing invalid utf8
|
|
*/
|
|
/* picoos_uint8 picobase_det_utf8_length(const picoos_uint8 firstchar); */
|
|
|
|
#define picobase_det_utf8_length(x) ( ((x)<(picoos_uint8)'\200')?1:(((x)>=(picoos_uint8)'\370')?0:(((x)>=(picoos_uint8)'\360')?4:(((x)>=(picoos_uint8)'\340')?3:(((x)>=(picoos_uint8)'\300')?2:0)))) )
|
|
|
|
/**
|
|
* Converts the content of 'utf8str' to lowercase and stores it on 'lowercase'
|
|
* on the first byte of the UTF8 character
|
|
* @param utf8str : utf8 string
|
|
* @param lowercaseMaxLen : maximal number of bytes available in 'lowercase'
|
|
* @param lowercase : string converted to lowercase (output)
|
|
* @param done : flag to report success/failure of the operation (output)
|
|
* @return TRUE if successful, FALSE otherwise
|
|
*/
|
|
picoos_int32 picobase_lowercase_utf8_str (picoos_uchar utf8str[], picoos_char lowercase[], picoos_int32 lowercaseMaxLen, picoos_uint8 * done);
|
|
|
|
/**
|
|
* Converts the content of 'utf8str' to upperrcase and stores it on 'uppercase'
|
|
* @param utf8str : utf8 string
|
|
* @param uppercase : string converted to uppercase (output)
|
|
* @param uppercaseMaxLen : maximal number of bytes available in 'uppercase'
|
|
* @param done : flag to report success/failure of the operation (output)
|
|
* @return TRUE if successful, FALSE otherwise
|
|
*/
|
|
picoos_int32 picobase_uppercase_utf8_str (picoos_uchar utf8str[], picoos_char uppercase[], int uppercaseMaxLen, picoos_uint8 * done);
|
|
|
|
/**
|
|
* Gets next UTF8 character 'utf8char' from the UTF8 string
|
|
* 'utf8s' starting at position 'pos'
|
|
* @param utf8s : UTF8 string
|
|
* @param utf8slenmax : max length accessible in utf8s
|
|
* @param pos : position from where the UTF8 character is checked and copied
|
|
* (set also as output to the position directly following the UTF8 char)
|
|
* @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
|
|
* @return TRUE if okay
|
|
* @return FALSE if there is no valid UTF8 char or no more UTF8 char available within utf8len
|
|
*/
|
|
picoos_uint8 picobase_get_next_utf8char(const picoos_uint8 *utf8s,
|
|
const picoos_uint32 utf8slenmax,
|
|
picoos_uint32 *pos,
|
|
picobase_utf8char utf8char);
|
|
|
|
/**
|
|
* Same as picobase_get_next_utf8char
|
|
* without copying the char to utf8char
|
|
*/
|
|
picoos_uint8 picobase_get_next_utf8charpos(const picoos_uint8 *utf8s,
|
|
const picoos_uint32 utf8slenmax,
|
|
picoos_uint32 *pos);
|
|
|
|
/**
|
|
* Gets previous UTF8 character 'utf8char' from the UTF8 string
|
|
* 'utf8s' starting the backward search at position 'pos-1'
|
|
* @param utf8s : UTF8 string
|
|
* @param utf8slenmin : min length accessible in utf8s
|
|
* @param pos : the search for the prev UTF8 char starts at [pos-1]
|
|
* (set also as output to the start position of the prev UTF8 character)
|
|
* @param utf8char : zero terminated UTF8 character containing 1 to 4 bytes (output)
|
|
* @return TRUE if okay
|
|
* @return FALSE if there is no valid UTF8 char preceeding pos or no more UTF8 char available within utf8len
|
|
*/
|
|
picoos_uint8 picobase_get_prev_utf8char(const picoos_uint8 *utf8s,
|
|
const picoos_uint32 utf8slenmin,
|
|
picoos_uint32 *pos,
|
|
picobase_utf8char utf8char);
|
|
|
|
/**
|
|
* Same as picobase_get_prev_utf8char
|
|
* without copying the char to utf8char
|
|
*/
|
|
picoos_uint8 picobase_get_prev_utf8charpos(const picoos_uint8 *utf8s,
|
|
const picoos_uint32 utf8slenmin,
|
|
picoos_uint32 *pos);
|
|
|
|
|
|
/**
|
|
* returns TRUE if the input string is UTF8 and uppercase
|
|
* @param str : UTF8 string
|
|
* @param strmaxlen : max length for the input string
|
|
* @return TRUE if string is UTF8 and uppercase
|
|
* @return FALSE otherwise
|
|
*/
|
|
extern picoos_bool picobase_is_utf8_uppercase (picoos_uchar str[], picoos_int32 strmaxlen);
|
|
|
|
/**
|
|
* returns TRUE if the input string is UTF8 and lowercase
|
|
* @param str : UTF8 string
|
|
* @param strmaxlen : max length for the input string
|
|
* @return TRUE if string is UTF8 and lowercase
|
|
* @return FALSE otherwise
|
|
*/
|
|
extern picoos_bool picobase_is_utf8_lowercase (picoos_uchar str[], picoos_int32 strmaxlen);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /*PICOBASE_H_*/
|