summaryrefslogtreecommitdiff
blob: 34a9eb0a1c8f7e1c0954dd23e2a52d79e951639a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
Index: tokenize.c
===================================================================
RCS file: /cvsroot/link-grammar/link-grammar/tokenize.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -B -r1.3 -r1.4
--- link-grammar/link-grammar/tokenize.c	16 Aug 2006 17:07:02 -0000	1.3
+++ link-grammar/link-grammar/tokenize.c	27 Oct 2007 19:03:40 -0000	1.4
@@ -172,7 +172,8 @@
     used in a sentence.
 */
 
-
+#undef	MIN
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
 
 static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) {
     /* w points to a string, wend points to the char one after the end.  The
@@ -256,8 +257,8 @@
 
     for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) {
 
-	strncpy(word, w, wend-w);
-	word[wend-w] = '\0';
+	strncpy(word, w, MIN(wend-w, MAX_WORD));
+	word[MIN(wend-w, MAX_WORD)] = '\0';
 	if (wend == w) break;  /* it will work without this */
 	
 	if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break;
@@ -285,8 +286,8 @@
     /* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */
 
     s_stripped = -1;
-    strncpy(word, w, wend-w);
-    word[wend-w] = '\0';
+    strncpy(word, w, MIN(wend-w, MAX_WORD));
+    word[MIN(wend-w, MAX_WORD)] = '\0';
     word_is_in_dict=0;
 
     if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1;
@@ -309,16 +310,16 @@
 
 	if(s_ok==1 || i==s_strippable) {
 	  
-	  strncpy(newword, w, (wend-len)-w);
-	  newword[(wend-len)-w] = '\0';
+	  strncpy(newword, w, MIN((wend-len)-w, MAX_WORD));
+	  newword[MIN((wend-len)-w, MAX_WORD)] = '\0';
 
 	  /* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */	  
 	  if (boolean_dictionary_lookup(sent->dict, newword)) {
 	    if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); 
 	    s_stripped = i;
 	    wend -= len;
-	    strncpy(word, w, wend-w);
-	    word[wend-w] = '\0';
+	    strncpy(word, w, MIN(wend-w, MAX_WORD));
+	    word[MIN(wend-w, MAX_WORD)] = '\0';
 	    break;
 	  }
 
@@ -326,16 +327,16 @@
 	  else {
 	    for (j=0; j<p_strippable; j++) {
 	      if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) {
-		strncpy(newword, w+strlen(prefix[j]), (wend-len)-(w+strlen(prefix[j])));
-		newword[(wend-len)-(w+strlen(prefix[j]))]='\0';
+		strncpy(newword, w+strlen(prefix[j]), MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD));
+		newword[MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD)]='\0';
 		if(boolean_dictionary_lookup(sent->dict, newword)) {
 		  if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); 
 		  if (!issue_sentence_word(sent, prefix[j])) return FALSE;
 		  if(i < s_strippable) s_stripped = i;
 		  wend -= len;
 		  w += strlen(prefix[j]);
-		  strncpy(word, w, wend-w);
-		word[wend-w] = '\0';
+		  strncpy(word, w, MIN(wend-w, MAX_WORD));
+		  word[MIN(wend-w, MAX_WORD)] = '\0';
 		break;
 		}
 	      }