Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

wstrings.c

Go to the documentation of this file.
00001 /****************************** Module Header ******************************\ 00002 * Module Name: wstrings.c 00003 * 00004 * Copyright (c) 1985 - 1999, Microsoft Corporation 00005 * 00006 * History: 00007 * 03-20-91 IanJa Created 00008 \***************************************************************************/ 00009 00010 #include "precomp.h" 00011 #pragma hdrstop 00012 00013 /* LATER these should be in a public header file!!! 00014 * Assorted defines used to support the standard Windows ANSI code page 00015 * (now known as code page 1252 and officially registered by IBM). 00016 * This is intended only for the PDK release. Subsequent releases will 00017 * use the NLSAPI and Unicode. 00018 */ 00019 #define LATIN_CAPITAL_LETTER_A_GRAVE (WCHAR)0xc0 00020 #define LATIN_CAPITAL_LETTER_THORN (WCHAR)0xde 00021 #define LATIN_SMALL_LETTER_SHARP_S (WCHAR)0xdf 00022 #define LATIN_SMALL_LETTER_Y_DIAERESIS (WCHAR)0xff 00023 #define DIVISION_SIGN (WCHAR)0xf7 00024 #define MULTIPLICATION_SIGN (WCHAR)0xd7 00025 00026 00027 /* 00028 * Temporary defines to support Unicode block 1 (0x0000 - 0x00ff). 00029 */ 00030 #define WCTOA(wch) ((wch) & 0xff) 00031 #define IS_UNICODE_BLK1(wch) ((int)(wch) <= 0x00ff) 00032 00033 00034 /***************************************************************************\ 00035 * CharLowerW (API) 00036 * 00037 * Convert either a single character or an entire string to lower case. The 00038 * two cases are differentiated by checking the high-word of pwsz. If it is 00039 * 0 then we just convert the low-word of pwsz. 00040 * 00041 * History: 00042 * 06-24-91 GregoryW Created. Supports Unicode equivalent of code 00043 * page 1252 (simple zero extension). This is for 00044 * the PDK release only. After the PDK this routine 00045 * will be modified to use the NLSAPI. 00046 * 02-11-93 IanJa Modified to use NLS API. 00047 \***************************************************************************/ 00048 00049 LPWSTR WINAPI CharLowerW( 00050 LPWSTR pwsz) 00051 { 00052 /* 00053 * Early out for NULL string or '\0' 00054 */ 00055 if (pwsz == NULL) { 00056 return pwsz; 00057 } 00058 00059 if (!IS_PTR(pwsz)) { 00060 if (!LCMapStringW( 00061 LOCALE_USER_DEFAULT, 00062 LCMAP_LOWERCASE, 00063 (LPWSTR)&pwsz, 00064 1, 00065 (LPWSTR)&pwsz, 00066 1 00067 )) { 00068 /* 00069 * We don't expect LCMapString to fail! The caller is not expecting 00070 * failure, CharLowerW does not have a failure indicator, so we do 00071 * nothing. 00072 */ 00073 RIPMSG1(RIP_WARNING, "CharLowerW(%#p): LCMapString failed\n", pwsz); 00074 } 00075 00076 return pwsz; 00077 } 00078 00079 /* 00080 * pwsz is a null-terminated string 00081 */ 00082 CharLowerBuffW(pwsz, wcslen(pwsz)+1); 00083 return pwsz; 00084 } 00085 00086 00087 /***************************************************************************\ 00088 * CharUpperW (API) 00089 * 00090 * Convert either a single character or an entire string to upper case. The 00091 * two cases are differentiated by checking the high-word of pwsz. If it is 00092 * 0 then we just convert the low-word of pwsz. 00093 * 00094 * History: 00095 * 06-24-91 GregoryW Created. Supports Unicode equivalent of code 00096 * page 1252 (simple zero extension). This is for 00097 * the PDK release only. After the PDK this routine 00098 * will be modified to use the NLSAPI. 00099 * 02-11-93 IanJa Modified to use NLS API. 00100 \***************************************************************************/ 00101 00102 LPWSTR WINAPI CharUpperW( 00103 LPWSTR pwsz) 00104 { 00105 /* 00106 * Early out for NULL string or '\0' 00107 */ 00108 if (pwsz == NULL) { 00109 return pwsz; 00110 } 00111 00112 if (!IS_PTR(pwsz)) { 00113 if (!LCMapStringW( 00114 LOCALE_USER_DEFAULT, 00115 LCMAP_UPPERCASE, 00116 (LPWSTR)&pwsz, 00117 1, 00118 (LPWSTR)&pwsz, 00119 1 00120 )) { 00121 /* 00122 * We don't expect LCMapString to fail! The caller is not expecting 00123 * failure, CharLowerW does not have a failure indicator, so we do 00124 * nothing. 00125 */ 00126 RIPMSG1(RIP_WARNING, "CharUpperW(%#p): LCMapString failed", pwsz); 00127 } 00128 00129 return pwsz; 00130 } 00131 00132 /* 00133 * pwsz is a null-terminated string 00134 */ 00135 CharUpperBuffW(pwsz, wcslen(pwsz)+1); 00136 return pwsz; 00137 } 00138 00139 00140 /***************************************************************************\ 00141 * CharNextW (API) 00142 * 00143 * Move to next character in string unless already at '\0' terminator 00144 * 00145 * History: 00146 * 06-24-91 GregoryW Created. This routine will not work for non-spacing 00147 * characters!! This version is only intended for 00148 * limited use in the PDK release. 00149 * 02-20-92 GregoryW Modified to work with combining marks (formerly known 00150 * as non-spacing). 00151 * 09-21-93 JulieB Added ALPHA to combining mark code. 00152 \***************************************************************************/ 00153 00154 LPWSTR WINAPI CharNextW( 00155 LPCWSTR lpwCurrentChar) 00156 { 00157 WORD ctype3info; 00158 00159 if (*lpwCurrentChar) { 00160 // 00161 // Examine each code element. Skip all combining elements. 00162 // 00163 while (*(++lpwCurrentChar)) { 00164 if (!GetStringTypeW( 00165 CT_CTYPE3, 00166 lpwCurrentChar, 00167 1, 00168 &ctype3info)) { 00169 /* 00170 * GetStringTypeW failed! The caller is not expecting failure, 00171 * CharNextW does not have a failure indicator, so just return 00172 * a pointer to the character we couldn't analyze. 00173 */ 00174 RIPMSG2(RIP_WARNING, "CharNextW failed, L'\\x%.4x' at %#p", 00175 *lpwCurrentChar, lpwCurrentChar); 00176 break; 00177 } 00178 if (!((ctype3info & C3_NONSPACING) && (!(ctype3info & C3_ALPHA)))) { 00179 break; 00180 } 00181 } 00182 } 00183 00184 return (LPWSTR)lpwCurrentChar; 00185 } 00186 00187 00188 /***************************************************************************\ 00189 * CharPrevW (API) 00190 * 00191 * Move to previous character in string, unless already at start 00192 * 00193 * History: 00194 * 06-24-91 GregoryW Created. This routine will not work for non-spacing 00195 * characters!! This version is only intended for 00196 * limited use in the PDK release. 00197 * 02-20-92 GregoryW Modified to work with combining marks (formerly 00198 * known as non-spacing). 00199 * 09-21-93 JulieB Added ALPHA to combining mark code. 00200 * 12-06-93 JulieB Fixed combining mark code. 00201 \***************************************************************************/ 00202 00203 LPWSTR WINAPI CharPrevW( 00204 LPCWSTR lpwStart, 00205 LPCWSTR lpwCurrentChar) 00206 { 00207 WORD ctype3info; 00208 LPWSTR lpwValidChar = (LPWSTR)lpwCurrentChar; 00209 00210 00211 if (lpwCurrentChar > lpwStart) { 00212 // 00213 // Examine each code element. Skip all combining elements. 00214 // 00215 while (lpwCurrentChar-- > lpwStart) { 00216 if (!GetStringTypeW( 00217 CT_CTYPE3, 00218 lpwCurrentChar, 00219 1, 00220 &ctype3info)) { 00221 /* 00222 * GetStringTypeW failed! The caller is not expecting failure, 00223 * CharPrevW does not have a failure indicator, so just return 00224 * a pointer to the character we couldn't analyze. 00225 */ 00226 RIPMSG2(RIP_WARNING, "CharPrevW failed, L'\\x%.4x' at %#p", 00227 *lpwCurrentChar, lpwCurrentChar); 00228 break; 00229 } 00230 if (!((ctype3info & C3_NONSPACING) && (!(ctype3info & C3_ALPHA)))) { 00231 lpwValidChar = (LPWSTR)lpwCurrentChar; 00232 break; // found non-combining code element 00233 } 00234 } 00235 } 00236 00237 return (LPWSTR)lpwValidChar; 00238 } 00239 00240 00241 /***************************************************************************\ 00242 * CharLowerBuffW (API) 00243 * 00244 * History: 00245 * 06-24-91 GregoryW Created. This version only supports Unicode 00246 * block 1 (0x0000 - 0x00ff). All other code points 00247 * are copied verbatim. This version is intended 00248 * only for the PDK release. 00249 * 02-11-93 IanJa Modified to use NLS API. 00250 \***************************************************************************/ 00251 00252 DWORD WINAPI CharLowerBuffW( 00253 LPWSTR pwsz, 00254 DWORD cwch) 00255 { 00256 int cwchT; 00257 DWORD i; 00258 00259 if (cwch == 0) { 00260 return 0; 00261 } 00262 00263 cwchT = LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_LOWERCASE, 00264 pwsz, cwch, pwsz, cwch); 00265 00266 if (cwchT != 0) { 00267 return cwchT; 00268 } 00269 00270 /* 00271 * LCMapString failed! The caller is not expecting failure, 00272 * CharLowerBuffW does not have a failure indicator, so we 00273 * convert the buffer to lower case as best we can. 00274 */ 00275 RIPMSG1(RIP_WARNING, "CharLowerBuffW(%ls) failed", pwsz); 00276 00277 for (i=0; i < cwch; i++) { 00278 if (IS_UNICODE_BLK1(pwsz[i]) && IsCharUpperA((char)pwsz[i])) { 00279 pwsz[i] += 'a'-'A'; 00280 } 00281 } 00282 00283 return cwch; 00284 } 00285 00286 00287 /***************************************************************************\ 00288 * CharUpperBuffW (API) 00289 * 00290 * History: 00291 * 06-24-91 GregoryW Created. This version only supports Unicode 00292 * block 1 (0x0000 - 0x00ff). All other code points 00293 * are copied verbatim. This version is intended 00294 * only for the PDK release. 00295 * 02-11-93 IanJa Modified to use NLS API. 00296 \***************************************************************************/ 00297 00298 DWORD WINAPI CharUpperBuffW( 00299 LPWSTR pwsz, 00300 DWORD cwch) 00301 { 00302 int cwchT; 00303 DWORD i; 00304 00305 if (cwch == 0) { 00306 return 0; 00307 } 00308 00309 cwchT = LCMapStringW(LOCALE_USER_DEFAULT, LCMAP_UPPERCASE, 00310 pwsz, cwch, pwsz, cwch); 00311 00312 if (cwchT != 0) { 00313 return cwchT; 00314 } 00315 00316 /* 00317 * LCMapString failed! The caller is not expecting failure, 00318 * CharUpperBuffW does not have a failure indicator, so we 00319 * convert the buffer to upper case as best we can. 00320 */ 00321 RIPMSG1(RIP_WARNING, "CharUpperBuffW(%ls) failed", pwsz); 00322 00323 for (i=0; i < cwch; i++) { 00324 if (IS_UNICODE_BLK1(pwsz[i]) && 00325 IsCharLowerA((char)pwsz[i]) && 00326 (pwsz[i] != LATIN_SMALL_LETTER_SHARP_S) && 00327 (pwsz[i] != LATIN_SMALL_LETTER_Y_DIAERESIS)) { 00328 pwsz[i] += (WCHAR)('A'-'a'); 00329 } 00330 } 00331 00332 return cwch; 00333 } 00334 00335 00336 00337 /***************************************************************************\ 00338 * IsCharLowerW (API) 00339 * 00340 * History: 00341 * 06-24-91 GregoryW Created. This version only supports Unicode 00342 * block 1 (0x0000 - 0x00ff). FALSE is returned 00343 * for all other code points. This version is intended 00344 * only for the PDK release. 00345 * 02-20-92 GregoryW Modified to use NLS API. 00346 \***************************************************************************/ 00347 00348 BOOL WINAPI IsCharLowerW( 00349 WCHAR wChar) 00350 { 00351 WORD ctype1info; 00352 00353 if (GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) { 00354 if (ctype1info & C1_LOWER) { 00355 return TRUE; 00356 } else { 00357 return FALSE; 00358 } 00359 } 00360 00361 /* 00362 * GetStringTypeW failed! The caller is not expecting 00363 * failure, IsCharLowerW does not have a failure indicator, so we 00364 * determine the case as best we can. 00365 */ 00366 RIPMSG1(RIP_WARNING, "IsCharLowerW(L'\\x%.4lx') failed", wChar); 00367 00368 if (IS_UNICODE_BLK1(wChar)) { 00369 return IsCharLowerA((CHAR)wChar); 00370 } else { 00371 return FALSE; 00372 } 00373 } 00374 00375 00376 /***************************************************************************\ 00377 * IsCharUpperW (API) 00378 * 00379 * History: 00380 * 06-24-91 GregoryW Created. This version only supports Unicode 00381 * block 1 (0x0000 - 0x00ff). FALSE is returned 00382 * for all other code points. This version is intended 00383 * only for the PDK release. 00384 * 02-20-92 GregoryW Modified to use NLS API. 00385 \***************************************************************************/ 00386 00387 BOOL WINAPI IsCharUpperW( 00388 WCHAR wChar) 00389 { 00390 WORD ctype1info; 00391 00392 if (GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) { 00393 if (ctype1info & C1_UPPER) { 00394 return TRUE; 00395 } else { 00396 return FALSE; 00397 } 00398 } 00399 00400 /* 00401 * GetStringTypeW failed! The caller is not expecting 00402 * failure, IsCharLowerW does not have a failure indicator, so we 00403 * determine the case as best we can. 00404 */ 00405 RIPMSG1(RIP_WARNING, "IsCharUpper(L'\\x%.4lx') failed", wChar); 00406 00407 if (IS_UNICODE_BLK1(wChar)) { 00408 return IsCharUpperA((CHAR)wChar); 00409 } else { 00410 return FALSE; 00411 } 00412 } 00413 00414 00415 /***************************************************************************\ 00416 * IsCharAlphaNumericW (API) 00417 * 00418 * Returns TRUE if character is alphabetical or numerical, otherwise FALSE 00419 * 00420 * History: 00421 * 06-24-91 GregoryW Created. This version only supports Unicode 00422 * block 1 (0x0000 - 0x00ff). 00423 * This version is intended only for the PDK release. 00424 * 02-20-92 GregoryW Modified to use NLS API. 00425 \***************************************************************************/ 00426 00427 BOOL WINAPI IsCharAlphaNumericW( 00428 WCHAR wChar) 00429 { 00430 WORD ctype1info; 00431 00432 if (!GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) { 00433 // 00434 // GetStringTypeW returned an error! IsCharAlphaNumericW has no 00435 // provision for returning an error... The best we can do is to 00436 // return FALSE 00437 // 00438 UserAssert(FALSE); 00439 return FALSE; 00440 } 00441 // 00442 // LATER 20 Feb 92 GregoryW 00443 // We may need to check ctype 3 info if we want to check for 00444 // digits other than ASCII '0'-'9' (such as Lao digits or 00445 // Tibetan digits, etc.). 00446 // 00447 #ifdef FE_SB // IsCharAlphaNumericW() 00448 if (ctype1info & C1_ALPHA) { 00449 WORD ctype3info = 0; 00450 /* 00451 * We don't want to return TRUE for halfwidth katakana. 00452 * Katakana is linguistic character (C1_ALPHA), but it is not 00453 * alphabet character. 00454 */ 00455 if (!GetStringTypeW(CT_CTYPE3, &wChar, 1, &ctype3info)) { 00456 UserAssert(FALSE); 00457 /* 00458 * Assume, it is alphabet character, because it has 00459 * C1_ALPHA attribute. 00460 */ 00461 return TRUE; 00462 } 00463 00464 if (ctype3info & (C3_KATAKANA|C3_HIRAGANA)) { 00465 /* 00466 * This is 'Katakana'. 00467 */ 00468 return FALSE; 00469 } else { 00470 return TRUE; 00471 } 00472 } else if (ctype1info & C1_DIGIT) { 00473 return TRUE; 00474 } else { 00475 return FALSE; 00476 } 00477 #else 00478 if ((ctype1info & C1_ALPHA) || (ctype1info & C1_DIGIT)) { 00479 return TRUE; 00480 } else { 00481 return FALSE; 00482 } 00483 #endif // FE_SB 00484 } 00485 00486 00487 /***************************************************************************\ 00488 * IsCharAlphaW (API) 00489 * 00490 * Returns TRUE if character is alphabetical, otherwise FALSE 00491 * 00492 * History: 00493 * 06-24-91 GregoryW Created. This version only supports Unicode 00494 * block 1 (0x0000 - 0x00ff). 00495 * This version is intended only for the PDK release. 00496 * 02-20-92 GregoryW Modified to use NLS API. 00497 \***************************************************************************/ 00498 00499 BOOL WINAPI IsCharAlphaW( 00500 WCHAR wChar) 00501 { 00502 WORD ctype1info; 00503 00504 if (!GetStringTypeW(CT_CTYPE1, &wChar, 1, &ctype1info)) { 00505 // 00506 // GetStringTypeW returned an error! IsCharAlphaW has no 00507 // provision for returning an error... The best we can do 00508 // is to return FALSE 00509 // 00510 UserAssert(FALSE); 00511 return FALSE; 00512 } 00513 if (ctype1info & C1_ALPHA) { 00514 #ifdef FE_SB // IsCharAlphaA() 00515 WORD ctype3info = 0; 00516 /* 00517 * We don't want to return TRUE for halfwidth katakana. 00518 * Katakana is linguistic character (C1_ALPHA), but it is not 00519 * alphabet character. 00520 */ 00521 if (!GetStringTypeW(CT_CTYPE3, &wChar, 1, &ctype3info)) { 00522 UserAssert(FALSE); 00523 /* 00524 * Assume, it is alphabet character, because it has 00525 * C1_ALPHA attribute. 00526 */ 00527 return TRUE; 00528 } 00529 00530 if (ctype3info & (C3_KATAKANA|C3_HIRAGANA)) { 00531 /* 00532 * This is 'Katakana'. 00533 */ 00534 return FALSE; 00535 } else { 00536 return TRUE; 00537 } 00538 #else 00539 return TRUE; 00540 #endif // FE_SB 00541 } else { 00542 return FALSE; 00543 } 00544 }

Generated on Sat May 15 19:42:29 2004 for test by doxygen 1.3.7