123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162 |
- /***************************************************************************
- *
- * Project _____ __ ____ _ _
- * ( _ ) /__\ (_ _)_| |_ _| |_
- * )(_)( /(__)\ )( (_ _)(_ _)
- * (_____)(__)(__)(__) |_| |_|
- *
- *
- * Copyright 2018-present, Leonid Stryzhevskyi <lganzzzo@gmail.com>
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- ***************************************************************************/
- #include "Unicode.hpp"
- #if defined(WIN32) || defined(_WIN32)
- #include <winsock2.h>
- #else
- #include <arpa/inet.h>
- #endif
- namespace oatpp { namespace encoding {
-
- v_buff_size Unicode::getUtf8CharSequenceLength(v_char8 firstByte) {
-
- if(firstByte < 128){
- return 1;
- }
-
- if((firstByte | 192) != firstByte){
- return 0;
- }
-
- if((firstByte | 32) != firstByte){
- return 2;
- } else if((firstByte | 16) != firstByte){
- return 3;
- } else if((firstByte | 8) != firstByte){
- return 4;
- } else if((firstByte | 4) != firstByte){
- return 5;
- } else if((firstByte | 2) != firstByte){
- return 6;
- } else {
- return 0;
- }
-
- }
-
- v_buff_size Unicode::getUtf8CharSequenceLengthForCode(v_uint32 code){
- if(code < 128) {
- return 1;
- } else if(code < 0x00000800){
- return 2;
- } else if(code < 0x00010000){
- return 3;
- } else if(code < 0x00200000){
- return 4;
- } else if(code < 0x04000000){
- return 5;
- } else {
- return 6;
- }
- }
-
- v_int32 Unicode::encodeUtf8Char(const char* sequence, v_buff_size& length){
- v_char8 byte = sequence[0];
- if(byte > 127){
- v_int32 code;
- if((byte | 32) != byte){
- length = 2;
- code = ((31 & byte) << 6) | (sequence[1] & 63);
- return code;
- } else if((byte | 16) != byte){
- code = (15 & byte) << 12;
- length = 3;
- } else if((byte | 8) != byte){
- length = 4;
- v_int32 value = *((p_int32)sequence);
- code = ((7 & byte) << 18) |
- (((value >> 24) & 0xFF) & 63) |
- (((value >> 16) & 0xFF) & 63) << 6 |
- (((value >> 8) & 0xFF) & 63) << 12;
- return code;
- } else if((byte | 4) != byte){
- code = (3 & byte) << 24;
- length = 5;
- } else if((byte | 2) != byte){
- code = (1 & byte) << 30;
- length = 6;
- } else {
- return -1;
- }
-
- v_char8 bitIndex = 0;
- for(v_buff_size i = length; i > 1; i--){
- code |= (sequence[i - 1] & 63) << bitIndex;
- bitIndex += 6;
- }
- return code;
- } else {
- length = 1;
- return byte;
- }
- }
-
- v_buff_size Unicode::decodeUtf8Char(v_int32 code, p_char8 buffer) {
- if(code >= 0x00000080 && code < 0x00000800){
- *((p_int16) buffer) = htons(((((code >> 6) & 31) | 192) << 8) | ((code & 63) | 128));
- return 2;
- } else if(code >= 0x00000800 && code < 0x00010000){
- *((p_int16) buffer) = htons((((( code >> 12 ) & 15) | 224) << 8) |
- (((code >> 6 ) & 63) | 128));
- buffer[2] = (code & 63) | 128;
- return 3;
- } else if(code >= 0x00010000 && code < 0x00200000){
- *((p_int32) buffer) = htonl(((((code >> 18 ) & 7) | 240) << 24) |
- ((((code >> 12 ) & 63) | 128) << 16) |
- ((((code >> 6 ) & 63) | 128) << 8) |
- (( code & 63) | 128) );
- return 4;
- } else if(code >= 0x00200000 && code < 0x04000000){
- *((p_int32) buffer) = htonl(((((code >> 24 ) & 3) | 248) << 24) |
- ((((code >> 18 ) & 63) | 128) << 16) |
- ((((code >> 12 ) & 63) | 128) << 8) |
- (((code >> 6 ) & 63) | 128));
- buffer[4] = (code & 63) | 128;
- return 5;
- } else if(code >= 0x04000000){
- *((p_int32) buffer) = htonl(((((code >> 30 ) & 1) | 252) << 24) |
- ((((code >> 24 ) & 63) | 128) << 16) |
- ((((code >> 18 ) & 63) | 128) << 8) |
- (((code >> 12 ) & 63) | 128));
- *((p_int16) &buffer[4]) = htons(((((code >> 6 ) & 63) | 128) << 8) | (code & 63));
- return 6;
- }
- buffer[0] = v_char8(code);
- return 1;
- }
-
- void Unicode::codeToUtf16SurrogatePair(v_int32 code, v_int16& high, v_int16& low){
- code -= 0x010000;
- high = 0xD800 + ((code >> 10) & 1023);
- low = 0xDC00 + (code & 1023);
- }
-
- v_int32 Unicode::utf16SurrogatePairToCode(v_int16 high, v_int16 low){
- return (((low - 0xDC00) & 1023) | (((high - 0xD800) & 1023) << 10)) + 0x010000;
- }
-
- }}
|