diff -Nur omniORB-4.1.7/src/lib/omniORB/orbcore/cs-UTF-8.cc omniORB-4.1.7.patched/src/lib/omniORB/orbcore/cs-UTF-8.cc --- omniORB-4.1.7/src/lib/omniORB/orbcore/cs-UTF-8.cc 2012-08-14 14:46:23.000000000 +0200 +++ omniORB-4.1.7.patched/src/lib/omniORB/orbcore/cs-UTF-8.cc 2013-12-03 08:38:33.343353892 +0100 @@ -162,6 +162,8 @@ void validateString(const char* s, CORBA::CompletionStatus completion); + int width(int cp); + TCS_C_UTF_8(GIOP::Version v) : omniCodeSet::TCS_C(omniCodeSet::ID_UTF_8, "UTF-8", omniCodeSet::CS_Other, v) @@ -201,8 +203,11 @@ // 111110xx four more bytes. Too big for UTF-16 4, 4, 4, 4, - // 111111xx five more bytes. *** How does this work? - 5, 5, 5, 5 + // 1111110x five more bytes. + 5, 5, + + // 11111110 and 11111111 are illegal in UTF-8 + 6, 6 }; // Mask to remove the prefix bits from the first byte of a UTF-8 sequence @@ -248,8 +253,14 @@ // 11110xxx 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, - // 111110xx and 111111xx - 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03 + // 111110xx + 0x03, 0x03, 0x03, 0x03, + + // 1111110x + 0x01, 0x01, + + // 11111110 and 11111111 are illegal in UTF-8 + 0x00, 0x00 }; @@ -851,26 +862,49 @@ TCS_C_UTF_8::validateString(const char* cs, CORBA::CompletionStatus completion) { // Check that string is valid UTF-8 data. - int bytes; + int bytes, cp; const unsigned char* s = (const unsigned char*)cs; + unsigned char tmp; while (*s) { - bytes = utf8Count[*s++]; + tmp = *s++; // leading byte + bytes = utf8Count[tmp]; // number of trailing bytes - switch (bytes) { - case 6: - case 5: OMNIORB_THROW(DATA_CONVERSION, - DATA_CONVERSION_BadInput, - completion); - case 4: validateExt(*s++, completion); - case 3: validateExt(*s++, completion); - case 2: validateExt(*s++, completion); - case 1: validateExt(*s++, completion); - } + if (bytes!=0) { + cp = tmp & ((1<<(6-bytes))-1); + switch (bytes) { // trailing bytes + case 6: + case 5: + case 4: OMNIORB_THROW(DATA_CONVERSION, + DATA_CONVERSION_BadInput, + completion); + case 3: tmp=*s++; validateExt(tmp, completion); cp = (cp << 6) | (tmp & 0x3F); + case 2: tmp=*s++; validateExt(tmp, completion); cp = (cp << 6) | (tmp & 0x3F); + case 1: tmp=*s++; validateExt(tmp, completion); cp = (cp << 6) | (tmp & 0x3F); + } + if (cp>0x10FFFF || (0xD800<=cp && cp<=0xDFFF) || width(cp)!=bytes+1) + OMNIORB_THROW(DATA_CONVERSION, + DATA_CONVERSION_BadInput, + completion); + } } } +int +TCS_C_UTF_8::width(int cp) +{ + if (cp<=0x7F) + return 1; + else if (cp<=0x7FF) + return 2; + else if (cp<=0xFFFF) + return 3; + else + return 4; +} + + // // Initialiser //