1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
|
Index: tokenize.c
===================================================================
RCS file: /cvsroot/link-grammar/link-grammar/tokenize.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -B -r1.3 -r1.4
--- link-grammar/link-grammar/tokenize.c 16 Aug 2006 17:07:02 -0000 1.3
+++ link-grammar/link-grammar/tokenize.c 27 Oct 2007 19:03:40 -0000 1.4
@@ -172,7 +172,8 @@
used in a sentence.
*/
-
+#undef MIN
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) {
/* w points to a string, wend points to the char one after the end. The
@@ -256,8 +257,8 @@
for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) {
- strncpy(word, w, wend-w);
- word[wend-w] = '\0';
+ strncpy(word, w, MIN(wend-w, MAX_WORD));
+ word[MIN(wend-w, MAX_WORD)] = '\0';
if (wend == w) break; /* it will work without this */
if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break;
@@ -285,8 +286,8 @@
/* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */
s_stripped = -1;
- strncpy(word, w, wend-w);
- word[wend-w] = '\0';
+ strncpy(word, w, MIN(wend-w, MAX_WORD));
+ word[MIN(wend-w, MAX_WORD)] = '\0';
word_is_in_dict=0;
if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1;
@@ -309,16 +310,16 @@
if(s_ok==1 || i==s_strippable) {
- strncpy(newword, w, (wend-len)-w);
- newword[(wend-len)-w] = '\0';
+ strncpy(newword, w, MIN((wend-len)-w, MAX_WORD));
+ newword[MIN((wend-len)-w, MAX_WORD)] = '\0';
/* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */
if (boolean_dictionary_lookup(sent->dict, newword)) {
if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]);
s_stripped = i;
wend -= len;
- strncpy(word, w, wend-w);
- word[wend-w] = '\0';
+ strncpy(word, w, MIN(wend-w, MAX_WORD));
+ word[MIN(wend-w, MAX_WORD)] = '\0';
break;
}
@@ -326,16 +327,16 @@
else {
for (j=0; j<p_strippable; j++) {
if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) {
- strncpy(newword, w+strlen(prefix[j]), (wend-len)-(w+strlen(prefix[j])));
- newword[(wend-len)-(w+strlen(prefix[j]))]='\0';
+ strncpy(newword, w+strlen(prefix[j]), MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD));
+ newword[MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD)]='\0';
if(boolean_dictionary_lookup(sent->dict, newword)) {
if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]);
if (!issue_sentence_word(sent, prefix[j])) return FALSE;
if(i < s_strippable) s_stripped = i;
wend -= len;
w += strlen(prefix[j]);
- strncpy(word, w, wend-w);
- word[wend-w] = '\0';
+ strncpy(word, w, MIN(wend-w, MAX_WORD));
+ word[MIN(wend-w, MAX_WORD)] = '\0';
break;
}
}
|