correct.cpp
00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 00002 /* enchant 00003 * Copyright (C) 2003 Dom Lachowicz 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the 00017 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 * Boston, MA 02110-1301, USA. 00019 * 00020 * In addition, as a special exception, Dom Lachowicz 00021 * gives permission to link the code of this program with 00022 * non-LGPL Spelling Provider libraries (eg: a MSFT Office 00023 * spell checker backend) and distribute linked combinations including 00024 * the two. You must obey the GNU Lesser General Public License in all 00025 * respects for all of the code used other than said providers. If you modify 00026 * this file, you may extend this exception to your version of the 00027 * file, but you are not obligated to do so. If you do not wish to 00028 * do so, delete this exception statement from your version. 00029 */ 00030 00031 /* 00032 * correct.c - Routines to manage the higher-level aspects of spell-checking 00033 * 00034 * This code originally resided in ispell.c, but was moved here to keep 00035 * file sizes smaller. 00036 * 00037 * Copyright (c), 1983, by Pace Willisson 00038 * 00039 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA 00040 * All rights reserved. 00041 * 00042 * Redistribution and use in source and binary forms, with or without 00043 * modification, are permitted provided that the following conditions 00044 * are met: 00045 * 00046 * 1. Redistributions of source code must retain the above copyright 00047 * notice, this list of conditions and the following disclaimer. 00048 * 2. Redistributions in binary form must reproduce the above copyright 00049 * notice, this list of conditions and the following disclaimer in the 00050 * documentation and/or other materials provided with the distribution. 00051 * 3. All modifications to the source code must be clearly marked as 00052 * such. Binary redistributions based on modified source code 00053 * must be clearly marked as modified versions in the documentation 00054 * and/or other materials provided with the distribution. 00055 * 4. All advertising materials mentioning features or use of this software 00056 * must display the following acknowledgment: 00057 * This product includes software developed by Geoff Kuenning and 00058 * other unpaid contributors. 00059 * 5. The name of Geoff Kuenning may not be used to endorse or promote 00060 * products derived from this software without specific prior 00061 * written permission. 00062 * 00063 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 00064 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00065 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00066 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 00067 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00068 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00069 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00070 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00071 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00072 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00073 * SUCH DAMAGE. 00074 */ 00075 00076 /* 00077 * $Log$ 00078 * Revision 1.1 2004/01/31 16:44:12 zrusin 00079 * ISpell plugin. 00080 * 00081 * Revision 1.4 2003/08/14 17:51:26 dom 00082 * update license - exception clause should be Lesser GPL 00083 * 00084 * Revision 1.3 2003/07/28 20:40:25 dom 00085 * fix up the license clause, further win32-registry proof some directory getting functions 00086 * 00087 * Revision 1.2 2003/07/16 22:52:35 dom 00088 * LGPL + exception license 00089 * 00090 * Revision 1.1 2003/07/15 01:15:04 dom 00091 * ispell enchant backend 00092 * 00093 * Revision 1.2 2003/01/29 05:50:11 hippietrail 00094 * 00095 * Fixed my mess in EncodingManager. 00096 * Changed many C casts to C++ casts. 00097 * 00098 * Revision 1.1 2003/01/24 05:52:31 hippietrail 00099 * 00100 * Refactored ispell code. Old ispell global variables had been put into 00101 * an allocated structure, a pointer to which was passed to many functions. 00102 * I have now made all such functions and variables private members of the 00103 * ISpellChecker class. It was C OO, now it's C++ OO. 00104 * 00105 * I've fixed the makefiles and tested compilation but am unable to test 00106 * operation. Please back out my changes if they cause problems which 00107 * are not obvious or easy to fix. 00108 * 00109 * Revision 1.7 2002/09/19 05:31:15 hippietrail 00110 * 00111 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 00112 * K&R function declarations removed, converted to Doxygen style comments 00113 * where possible. No code has been changed (I hope). Compiles for me but 00114 * unable to test. 00115 * 00116 * Revision 1.6 2002/09/17 03:03:28 hippietrail 00117 * 00118 * After seeking permission on the developer list I've reformatted all the 00119 * spelling source which seemed to have parts which used 2, 3, 4, and 8 00120 * spaces for tabs. It should all look good with our standard 4-space 00121 * tabs now. 00122 * I've concentrated just on indentation in the actual code. More prettying 00123 * could be done. 00124 * * NO code changes were made * 00125 * 00126 * Revision 1.5 2002/09/13 17:20:12 mpritchett 00127 * Fix more warnings for Linux build 00128 * 00129 * Revision 1.4 2002/03/06 08:27:16 fjfranklin 00130 * o Only activate compound handling when the hash file says so (Per Larsson) 00131 * 00132 * Revision 1.3 2001/05/14 09:52:50 hub 00133 * Removed newMain.c from GNUmakefile.am 00134 * 00135 * C++ comments are not C comment. Changed to C comments 00136 * 00137 * Revision 1.2 2001/05/12 16:05:42 thomasf 00138 * Big pseudo changes to ispell to make it pass around a structure rather 00139 * than rely on all sorts of gloabals willy nilly here and there. Also 00140 * fixed our spelling class to work with accepting suggestions once more. 00141 * This code is dirty, gross and ugly (not to mention still not supporting 00142 * multiple hash sized just yet) but it works on my machine and will no 00143 * doubt break other machines. 00144 * 00145 * Revision 1.1 2001/04/15 16:01:24 tomas_f 00146 * moving to spell/xp 00147 * 00148 * Revision 1.2 1999/10/05 16:17:28 paul 00149 * Fixed build, and other tidyness. 00150 * Spell dialog enabled by default, with keyboard binding of F7. 00151 * 00152 * Revision 1.1 1999/09/29 23:33:32 justin 00153 * Updates to the underlying ispell-based code to support suggested corrections. 00154 * 00155 * Revision 1.59 1995/08/05 23:19:43 geoff 00156 * Fix a bug that caused offsets for long lines to be confused if the 00157 * line started with a quoting uparrow. 00158 * 00159 * Revision 1.58 1994/11/02 06:56:00 geoff 00160 * Remove the anyword feature, which I've decided is a bad idea. 00161 * 00162 * Revision 1.57 1994/10/26 05:12:39 geoff 00163 * Try boundary characters when inserting or substituting letters, except 00164 * (naturally) at word boundaries. 00165 * 00166 * Revision 1.56 1994/10/25 05:46:30 geoff 00167 * Fix an assignment inside a conditional that could generate spurious 00168 * warnings (as well as being bad style). Add support for the FF_ANYWORD 00169 * option. 00170 * 00171 * Revision 1.55 1994/09/16 04:48:24 geoff 00172 * Don't pass newlines from the input to various other routines, and 00173 * don't assume that those routines leave the input unchanged. 00174 * 00175 * Revision 1.54 1994/09/01 06:06:41 geoff 00176 * Change erasechar/killchar to uerasechar/ukillchar to avoid 00177 * shared-library problems on HP systems. 00178 * 00179 * Revision 1.53 1994/08/31 05:58:38 geoff 00180 * Add code to handle extremely long lines in -a mode without splitting 00181 * words or reporting incorrect offsets. 00182 * 00183 * Revision 1.52 1994/05/25 04:29:24 geoff 00184 * Fix a bug that caused line widths to be calculated incorrectly when 00185 * displaying lines containing tabs. Fix a couple of places where 00186 * characters were sign-extended incorrectly, which could cause 8-bit 00187 * characters to be displayed wrong. 00188 * 00189 * Revision 1.51 1994/05/17 06:44:05 geoff 00190 * Add support for controlled compound formation and the COMPOUNDONLY 00191 * option to affix flags. 00192 * 00193 * Revision 1.50 1994/04/27 05:20:14 geoff 00194 * Allow compound words to be formed from more than two components 00195 * 00196 * Revision 1.49 1994/04/27 01:50:31 geoff 00197 * Add support to correctly capitalize words generated as a result of a 00198 * missing-space suggestion. 00199 * 00200 * Revision 1.48 1994/04/03 23:23:02 geoff 00201 * Clean up the code in missingspace() to be a bit simpler and more 00202 * efficient. 00203 * 00204 * Revision 1.47 1994/03/15 06:24:23 geoff 00205 * Fix the +/-/~ commands to be independent. Allow the + command to 00206 * receive a suffix which is a deformatter type (currently hardwired to 00207 * be either tex or nroff/troff). 00208 * 00209 * Revision 1.46 1994/02/21 00:20:03 geoff 00210 * Fix some bugs that could cause bad displays in the interaction between 00211 * TeX parsing and string characters. Show_char now will not overrun 00212 * the inverse-video display area by accident. 00213 * 00214 * Revision 1.45 1994/02/14 00:34:51 geoff 00215 * Fix correct to accept length parameters for ctok and itok, so that it 00216 * can pass them to the to/from ichar routines. 00217 * 00218 * Revision 1.44 1994/01/25 07:11:22 geoff 00219 * Get rid of all old RCS log lines in preparation for the 3.1 release. 00220 * 00221 */ 00222 00223 #include <stdlib.h> 00224 #include <string.h> 00225 #include <ctype.h> 00226 #include "ispell_checker.h" 00227 #include "msgs.h" 00228 00229 /* 00230 extern void upcase P ((ichar_t * string)); 00231 extern void lowcase P ((ichar_t * string)); 00232 extern ichar_t * strtosichar P ((char * in, int canonical)); 00233 00234 int compoundflag = COMPOUND_CONTROLLED; 00235 */ 00236 00237 /* 00238 * \param a 00239 * \param b 00240 * \param canonical NZ for canonical string chars 00241 * 00242 * \return 00243 */ 00244 int 00245 ISpellChecker::casecmp (char *a, char *b, int canonical) 00246 { 00247 register ichar_t * ap; 00248 register ichar_t * bp; 00249 ichar_t inta[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; 00250 ichar_t intb[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; 00251 00252 strtoichar (inta, a, sizeof inta, canonical); 00253 strtoichar (intb, b, sizeof intb, canonical); 00254 for (ap = inta, bp = intb; *ap != 0; ap++, bp++) 00255 { 00256 if (*ap != *bp) 00257 { 00258 if (*bp == '\0') 00259 return m_hashheader.sortorder[*ap]; 00260 else if (mylower (*ap)) 00261 { 00262 if (mylower (*bp) || mytoupper (*ap) != *bp) 00263 return static_cast<int>(m_hashheader.sortorder[*ap]) 00264 - static_cast<int>(m_hashheader.sortorder[*bp]); 00265 } 00266 else 00267 { 00268 if (myupper (*bp) || mytolower (*ap) != *bp) 00269 return static_cast<int>(m_hashheader.sortorder[*ap]) 00270 - static_cast<int>(m_hashheader.sortorder[*bp]); 00271 } 00272 } 00273 } 00274 if (*bp != '\0') 00275 return -static_cast<int>(m_hashheader.sortorder[*bp]); 00276 for (ap = inta, bp = intb; *ap; ap++, bp++) 00277 { 00278 if (*ap != *bp) 00279 { 00280 return static_cast<int>(m_hashheader.sortorder[*ap]) 00281 - static_cast<int>(m_hashheader.sortorder[*bp]); 00282 } 00283 } 00284 return 0; 00285 } 00286 00287 /* 00288 * \param word 00289 */ 00290 void 00291 ISpellChecker::makepossibilities (ichar_t *word) 00292 { 00293 register int i; 00294 00295 for (i = 0; i < MAXPOSSIBLE; i++) 00296 m_possibilities[i][0] = 0; 00297 m_pcount = 0; 00298 m_maxposslen = 0; 00299 m_easypossibilities = 0; 00300 00301 #ifndef NO_CAPITALIZATION_SUPPORT 00302 wrongcapital (word); 00303 #endif 00304 00305 /* 00306 * according to Pollock and Zamora, CACM April 1984 (V. 27, No. 4), 00307 * page 363, the correct order for this is: 00308 * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION 00309 * thus, it was exactly backwards in the old version. -- PWP 00310 */ 00311 00312 if (m_pcount < MAXPOSSIBLE) 00313 missingletter (word); /* omission */ 00314 if (m_pcount < MAXPOSSIBLE) 00315 transposedletter (word); /* transposition */ 00316 if (m_pcount < MAXPOSSIBLE) 00317 extraletter (word); /* insertion */ 00318 if (m_pcount < MAXPOSSIBLE) 00319 wrongletter (word); /* substitution */ 00320 00321 if ((m_hashheader.compoundflag != COMPOUND_ANYTIME) && 00322 m_pcount < MAXPOSSIBLE) 00323 missingspace (word); /* two words */ 00324 00325 } 00326 00327 /* 00328 * \param word 00329 * 00330 * \return 00331 */ 00332 int 00333 ISpellChecker::insert (ichar_t *word) 00334 { 00335 register int i; 00336 register char * realword; 00337 00338 realword = ichartosstr (word, 0); 00339 for (i = 0; i < m_pcount; i++) 00340 { 00341 if (strcmp (m_possibilities[i], realword) == 0) 00342 return (0); 00343 } 00344 00345 strcpy (m_possibilities[m_pcount++], realword); 00346 i = strlen (realword); 00347 if (i > m_maxposslen) 00348 m_maxposslen = i; 00349 if (m_pcount >= MAXPOSSIBLE) 00350 return (-1); 00351 else 00352 return (0); 00353 } 00354 00355 #ifndef NO_CAPITALIZATION_SUPPORT 00356 /* 00357 * \param word 00358 */ 00359 void 00360 ISpellChecker::wrongcapital (ichar_t *word) 00361 { 00362 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; 00363 00364 /* 00365 ** When the third parameter to "good" is nonzero, it ignores 00366 ** case. If the word matches this way, "ins_cap" will recapitalize 00367 ** it correctly. 00368 */ 00369 if (good (word, 0, 1, 0, 0)) 00370 { 00371 icharcpy (newword, word); 00372 upcase (newword); 00373 ins_cap (newword, word); 00374 } 00375 } 00376 #endif 00377 00378 /* 00379 * \param word 00380 */ 00381 void 00382 ISpellChecker::wrongletter (ichar_t *word) 00383 { 00384 register int i; 00385 register int j; 00386 register int n; 00387 ichar_t savechar; 00388 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; 00389 00390 n = icharlen (word); 00391 icharcpy (newword, word); 00392 #ifndef NO_CAPITALIZATION_SUPPORT 00393 upcase (newword); 00394 #endif 00395 00396 for (i = 0; i < n; i++) 00397 { 00398 savechar = newword[i]; 00399 for (j=0; j < m_Trynum; ++j) 00400 { 00401 if (m_Try[j] == savechar) 00402 continue; 00403 else if (isboundarych (m_Try[j]) && (i == 0 || i == n - 1)) 00404 continue; 00405 newword[i] = m_Try[j]; 00406 if (good (newword, 0, 1, 0, 0)) 00407 { 00408 if (ins_cap (newword, word) < 0) 00409 return; 00410 } 00411 } 00412 newword[i] = savechar; 00413 } 00414 } 00415 00416 /* 00417 * \param word 00418 */ 00419 void 00420 ISpellChecker::extraletter (ichar_t *word) 00421 { 00422 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; 00423 register ichar_t * p; 00424 register ichar_t * r; 00425 00426 if (icharlen (word) < 2) 00427 return; 00428 00429 icharcpy (newword, word + 1); 00430 for (p = word, r = newword; *p != 0; ) 00431 { 00432 if (good (newword, 0, 1, 0, 0)) 00433 { 00434 if (ins_cap (newword, word) < 0) 00435 return; 00436 } 00437 *r++ = *p++; 00438 } 00439 } 00440 00441 /* 00442 * \param word 00443 */ 00444 void 00445 ISpellChecker::missingletter (ichar_t *word) 00446 { 00447 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1]; 00448 register ichar_t * p; 00449 register ichar_t * r; 00450 register int i; 00451 00452 icharcpy (newword + 1, word); 00453 for (p = word, r = newword; *p != 0; ) 00454 { 00455 for (i = 0; i < m_Trynum; i++) 00456 { 00457 if (isboundarych (m_Try[i]) && r == newword) 00458 continue; 00459 *r = m_Try[i]; 00460 if (good (newword, 0, 1, 0, 0)) 00461 { 00462 if (ins_cap (newword, word) < 0) 00463 return; 00464 } 00465 } 00466 *r++ = *p++; 00467 } 00468 for (i = 0; i < m_Trynum; i++) 00469 { 00470 if (isboundarych (m_Try[i])) 00471 continue; 00472 *r = m_Try[i]; 00473 if (good (newword, 0, 1, 0, 0)) 00474 { 00475 if (ins_cap (newword, word) < 0) 00476 return; 00477 } 00478 } 00479 } 00480 00481 /* 00482 * \param word 00483 */ 00484 void ISpellChecker::missingspace (ichar_t *word) 00485 { 00486 ichar_t firsthalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; 00487 int firstno; /* Index into first */ 00488 ichar_t * firstp; /* Ptr into current firsthalf word */ 00489 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN + 1]; 00490 int nfirsthalf; /* No. words saved in 1st half */ 00491 int nsecondhalf; /* No. words saved in 2nd half */ 00492 register ichar_t * p; 00493 ichar_t secondhalf[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; 00494 int secondno; /* Index into second */ 00495 00496 /* 00497 ** We don't do words of length less than 3; this keeps us from 00498 ** splitting all two-letter words into two single letters. We 00499 ** also don't do maximum-length words, since adding the space 00500 ** would exceed the size of the "possibilities" array. 00501 */ 00502 nfirsthalf = icharlen (word); 00503 if (nfirsthalf < 3 || nfirsthalf >= INPUTWORDLEN + MAXAFFIXLEN - 1) 00504 return; 00505 icharcpy (newword + 1, word); 00506 for (p = newword + 1; p[1] != '\0'; p++) 00507 { 00508 p[-1] = *p; 00509 *p = '\0'; 00510 if (good (newword, 0, 1, 0, 0)) 00511 { 00512 /* 00513 * Save_cap must be called before good() is called on the 00514 * second half, because it uses state left around by 00515 * good(). This is unfortunate because it wastes a bit of 00516 * time, but I don't think it's a significant performance 00517 * problem. 00518 */ 00519 nfirsthalf = save_cap (newword, word, firsthalf); 00520 if (good (p + 1, 0, 1, 0, 0)) 00521 { 00522 nsecondhalf = save_cap (p + 1, p + 1, secondhalf); 00523 for (firstno = 0; firstno < nfirsthalf; firstno++) 00524 { 00525 firstp = &firsthalf[firstno][p - newword]; 00526 for (secondno = 0; secondno < nsecondhalf; secondno++) 00527 { 00528 *firstp = ' '; 00529 icharcpy (firstp + 1, secondhalf[secondno]); 00530 if (insert (firsthalf[firstno]) < 0) 00531 return; 00532 *firstp = '-'; 00533 if (insert (firsthalf[firstno]) < 0) 00534 return; 00535 } 00536 } 00537 } 00538 } 00539 } 00540 } 00541 00542 /* 00543 * \param word 00544 * \param pfxopts Options to apply to prefixes 00545 */ 00546 int 00547 ISpellChecker::compoundgood (ichar_t *word, int pfxopts) 00548 { 00549 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; 00550 register ichar_t * p; 00551 register ichar_t savech; 00552 long secondcap; /* Capitalization of 2nd half */ 00553 00554 /* 00555 ** If compoundflag is COMPOUND_NEVER, compound words are never ok. 00556 */ 00557 if (m_hashheader.compoundflag == COMPOUND_NEVER) 00558 return 0; 00559 /* 00560 ** Test for a possible compound word (for languages like German that 00561 ** form lots of compounds). 00562 ** 00563 ** This is similar to missingspace, except we quit on the first hit, 00564 ** and we won't allow either member of the compound to be a single 00565 ** letter. 00566 ** 00567 ** We don't do words of length less than 2 * compoundmin, since 00568 ** both halves must at least compoundmin letters. 00569 */ 00570 if (icharlen (word) < 2 * m_hashheader.compoundmin) 00571 return 0; 00572 icharcpy (newword, word); 00573 p = newword + m_hashheader.compoundmin; 00574 for ( ; p[m_hashheader.compoundmin - 1] != 0; p++) 00575 { 00576 savech = *p; 00577 *p = 0; 00578 if (good (newword, 0, 0, pfxopts, FF_COMPOUNDONLY)) 00579 { 00580 *p = savech; 00581 if (good (p, 0, 1, FF_COMPOUNDONLY, 0) 00582 || compoundgood (p, FF_COMPOUNDONLY)) 00583 { 00584 secondcap = whatcap (p); 00585 switch (whatcap (newword)) 00586 { 00587 case ANYCASE: 00588 case CAPITALIZED: 00589 case FOLLOWCASE: /* Followcase can have l.c. suffix */ 00590 return secondcap == ANYCASE; 00591 case ALLCAPS: 00592 return secondcap == ALLCAPS; 00593 } 00594 } 00595 } 00596 else 00597 *p = savech; 00598 } 00599 return 0; 00600 } 00601 00602 /* 00603 * \param word 00604 */ 00605 void 00606 ISpellChecker::transposedletter (ichar_t *word) 00607 { 00608 ichar_t newword[INPUTWORDLEN + MAXAFFIXLEN]; 00609 register ichar_t * p; 00610 register ichar_t temp; 00611 00612 icharcpy (newword, word); 00613 for (p = newword; p[1] != 0; p++) 00614 { 00615 temp = *p; 00616 *p = p[1]; 00617 p[1] = temp; 00618 if (good (newword, 0, 1, 0, 0)) 00619 { 00620 if (ins_cap (newword, word) < 0) 00621 return; 00622 } 00623 temp = *p; 00624 *p = p[1]; 00625 p[1] = temp; 00626 } 00627 } 00628 00637 int 00638 ISpellChecker::ins_cap (ichar_t *word, ichar_t *pattern) 00639 { 00640 int i; /* Index into savearea */ 00641 int nsaved; /* No. of words saved */ 00642 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; 00643 00644 nsaved = save_cap (word, pattern, savearea); 00645 for (i = 0; i < nsaved; i++) 00646 { 00647 if (insert (savearea[i]) < 0) 00648 return -1; 00649 } 00650 return 0; 00651 } 00652 00662 int 00663 ISpellChecker::save_cap (ichar_t *word, ichar_t *pattern, 00664 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]) 00665 { 00666 int hitno; /* Index into hits array */ 00667 int nsaved; /* Number of words saved */ 00668 int preadd; /* No. chars added to front of root */ 00669 int prestrip; /* No. chars stripped from front */ 00670 int sufadd; /* No. chars added to back of root */ 00671 int sufstrip; /* No. chars stripped from back */ 00672 00673 if (*word == 0) 00674 return 0; 00675 00676 for (hitno = m_numhits, nsaved = 0; --hitno >= 0 && nsaved < MAX_CAPS; ) 00677 { 00678 if (m_hits[hitno].prefix) 00679 { 00680 prestrip = m_hits[hitno].prefix->stripl; 00681 preadd = m_hits[hitno].prefix->affl; 00682 } 00683 else 00684 prestrip = preadd = 0; 00685 if (m_hits[hitno].suffix) 00686 { 00687 sufstrip = m_hits[hitno].suffix->stripl; 00688 sufadd = m_hits[hitno].suffix->affl; 00689 } 00690 else 00691 sufadd = sufstrip = 0; 00692 save_root_cap (word, pattern, prestrip, preadd, 00693 sufstrip, sufadd, 00694 m_hits[hitno].dictent, m_hits[hitno].prefix, m_hits[hitno].suffix, 00695 savearea, &nsaved); 00696 } 00697 return nsaved; 00698 } 00699 00700 /* 00701 * \param word 00702 * \param pattern 00703 * \param prestrip 00704 * \param preadd 00705 * \param sufstrip 00706 * \param sufadd 00707 * \param firstdent 00708 * \param pfxent 00709 * \param sufent 00710 * 00711 * \return 00712 */ 00713 int 00714 ISpellChecker::ins_root_cap (ichar_t *word, ichar_t *pattern, 00715 int prestrip, int preadd, int sufstrip, int sufadd, 00716 struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent) 00717 { 00718 int i; /* Index into savearea */ 00719 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN]; 00720 int nsaved; /* Number of words saved */ 00721 00722 nsaved = 0; 00723 save_root_cap (word, pattern, prestrip, preadd, sufstrip, sufadd, 00724 firstdent, pfxent, sufent, savearea, &nsaved); 00725 for (i = 0; i < nsaved; i++) 00726 { 00727 if (insert (savearea[i]) < 0) 00728 return -1; 00729 } 00730 return 0; 00731 } 00732 00733 /* ARGSUSED */ 00747 void 00748 ISpellChecker::save_root_cap (ichar_t *word, ichar_t *pattern, 00749 int prestrip, int preadd, int sufstrip, int sufadd, 00750 struct dent *firstdent, struct flagent *pfxent, struct flagent *sufent, 00751 ichar_t savearea[MAX_CAPS][INPUTWORDLEN + MAXAFFIXLEN], 00752 int * nsaved) 00753 { 00754 #ifndef NO_CAPITALIZATION_SUPPORT 00755 register struct dent * dent; 00756 #endif /* NO_CAPITALIZATION_SUPPORT */ 00757 int firstisupper; 00758 ichar_t newword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; 00759 #ifndef NO_CAPITALIZATION_SUPPORT 00760 register ichar_t * p; 00761 int len; 00762 int i; 00763 int limit; 00764 #endif /* NO_CAPITALIZATION_SUPPORT */ 00765 00766 if (*nsaved >= MAX_CAPS) 00767 return; 00768 icharcpy (newword, word); 00769 firstisupper = myupper (pattern[0]); 00770 #ifdef NO_CAPITALIZATION_SUPPORT 00771 /* 00772 ** Apply the old, simple-minded capitalization rules. 00773 */ 00774 if (firstisupper) 00775 { 00776 if (myupper (pattern[1])) 00777 upcase (newword); 00778 else 00779 { 00780 lowcase (newword); 00781 newword[0] = mytoupper (newword[0]); 00782 } 00783 } 00784 else 00785 lowcase (newword); 00786 icharcpy (savearea[*nsaved], newword); 00787 (*nsaved)++; 00788 return; 00789 #else /* NO_CAPITALIZATION_SUPPORT */ 00790 #define flagsareok(dent) \ 00791 ((pfxent == NULL \ 00792 || TSTMASKBIT (dent->mask, pfxent->flagbit)) \ 00793 && (sufent == NULL \ 00794 || TSTMASKBIT (dent->mask, sufent->flagbit))) 00795 00796 dent = firstdent; 00797 if ((dent->flagfield & (CAPTYPEMASK | MOREVARIANTS)) == ALLCAPS) 00798 { 00799 upcase (newword); /* Uppercase required */ 00800 icharcpy (savearea[*nsaved], newword); 00801 (*nsaved)++; 00802 return; 00803 } 00804 for (p = pattern; *p; p++) 00805 { 00806 if (mylower (*p)) 00807 break; 00808 } 00809 if (*p == 0) 00810 { 00811 upcase (newword); /* Pattern was all caps */ 00812 icharcpy (savearea[*nsaved], newword); 00813 (*nsaved)++; 00814 return; 00815 } 00816 for (p = pattern + 1; *p; p++) 00817 { 00818 if (myupper (*p)) 00819 break; 00820 } 00821 if (*p == 0) 00822 { 00823 /* 00824 ** The pattern was all-lower or capitalized. If that's 00825 ** legal, insert only that version. 00826 */ 00827 if (firstisupper) 00828 { 00829 if (captype (dent->flagfield) == CAPITALIZED 00830 || captype (dent->flagfield) == ANYCASE) 00831 { 00832 lowcase (newword); 00833 newword[0] = mytoupper (newword[0]); 00834 icharcpy (savearea[*nsaved], newword); 00835 (*nsaved)++; 00836 return; 00837 } 00838 } 00839 else 00840 { 00841 if (captype (dent->flagfield) == ANYCASE) 00842 { 00843 lowcase (newword); 00844 icharcpy (savearea[*nsaved], newword); 00845 (*nsaved)++; 00846 return; 00847 } 00848 } 00849 while (dent->flagfield & MOREVARIANTS) 00850 { 00851 dent = dent->next; 00852 if (captype (dent->flagfield) == FOLLOWCASE 00853 || !flagsareok (dent)) 00854 continue; 00855 if (firstisupper) 00856 { 00857 if (captype (dent->flagfield) == CAPITALIZED) 00858 { 00859 lowcase (newword); 00860 newword[0] = mytoupper (newword[0]); 00861 icharcpy (savearea[*nsaved], newword); 00862 (*nsaved)++; 00863 return; 00864 } 00865 } 00866 else 00867 { 00868 if (captype (dent->flagfield) == ANYCASE) 00869 { 00870 lowcase (newword); 00871 icharcpy (savearea[*nsaved], newword); 00872 (*nsaved)++; 00873 return; 00874 } 00875 } 00876 } 00877 } 00878 /* 00879 ** Either the sample had complex capitalization, or the simple 00880 ** capitalizations (all-lower or capitalized) are illegal. 00881 ** Insert all legal capitalizations, including those that are 00882 ** all-lower or capitalized. If the prototype is capitalized, 00883 ** capitalized all-lower samples. Watch out for affixes. 00884 */ 00885 dent = firstdent; 00886 p = strtosichar (dent->word, 1); 00887 len = icharlen (p); 00888 if (dent->flagfield & MOREVARIANTS) 00889 dent = dent->next; /* Skip place-holder entry */ 00890 for ( ; ; ) 00891 { 00892 if (flagsareok (dent)) 00893 { 00894 if (captype (dent->flagfield) != FOLLOWCASE) 00895 { 00896 lowcase (newword); 00897 if (firstisupper || captype (dent->flagfield) == CAPITALIZED) 00898 newword[0] = mytoupper (newword[0]); 00899 icharcpy (savearea[*nsaved], newword); 00900 (*nsaved)++; 00901 if (*nsaved >= MAX_CAPS) 00902 return; 00903 } 00904 else 00905 { 00906 /* Followcase is the tough one. */ 00907 p = strtosichar (dent->word, 1); 00908 memmove ( 00909 reinterpret_cast<char *>(newword + preadd), 00910 reinterpret_cast<char *>(p + prestrip), 00911 (len - prestrip - sufstrip) * sizeof (ichar_t)); 00912 if (myupper (p[prestrip])) 00913 { 00914 for (i = 0; i < preadd; i++) 00915 newword[i] = mytoupper (newword[i]); 00916 } 00917 else 00918 { 00919 for (i = 0; i < preadd; i++) 00920 newword[i] = mytolower (newword[i]); 00921 } 00922 limit = len + preadd + sufadd - prestrip - sufstrip; 00923 i = len + preadd - prestrip - sufstrip; 00924 p += len - sufstrip - 1; 00925 if (myupper (*p)) 00926 { 00927 for (p = newword + i; i < limit; i++, p++) 00928 *p = mytoupper (*p); 00929 } 00930 else 00931 { 00932 for (p = newword + i; i < limit; i++, p++) 00933 *p = mytolower (*p); 00934 } 00935 icharcpy (savearea[*nsaved], newword); 00936 (*nsaved)++; 00937 if (*nsaved >= MAX_CAPS) 00938 return; 00939 } 00940 } 00941 if ((dent->flagfield & MOREVARIANTS) == 0) 00942 break; /* End of the line */ 00943 dent = dent->next; 00944 } 00945 return; 00946 #endif /* NO_CAPITALIZATION_SUPPORT */ 00947 } 00948 00949