Add a refer book
[fse.git] / fetch.c
blob03303eed1218181c05bd7ab5bbc16c8396b7ebd0
1 #include <stdio.h>
2 #include <string.h>
3 #include <sys/stat.h>
4 #include <fcntl.h>
5 #include <unistd.h>
6 #include <dirent.h>
8 #include "hash_index.h"
11 static inline int is_ss_char(char c)
13 if ((c >= 'A' && c <= 'Z') ||
14 (c >= 'a' && c <= 'z') ||
15 (c >= '0' && c <= '9'))
16 return 1;
17 else
18 return 0;
21 static inline int is_space(char c)
23 return c == ' ';
26 static char word[128];
28 * Get a word from _p_, skip the none-ss-char first,
29 * then returen the begining of the word in _word_,
30 * and last, return the begining of the next word.
32 static char *get_word(char *str)
34 char *p;
36 while (*str && !is_ss_char(*str))
37 str++;
38 if (*str == 0)
39 return NULL;
41 p = word;
42 while (is_ss_char(*str) || *str == '.')
43 *p++ = *str++;
44 *p = 0;
46 return str;
49 static void tolower_str(char *str)
51 while (*str) {
52 if (*str >= 'A' && *str <= 'Z')
53 *str = *str + 0x20;
54 str++;
59 * Do the boring but Cool thing
61 static void parse_word(void)
63 int len = strlen(word);
64 char *p = word + len - 1;
66 /* Drop the last period mark */
67 while (*p == '.')
68 *p-- = '\0';
70 tolower_str(word);
73 /* 'ies' and 'ied' to 'y' */
74 if (!strncmp(p - 2, "ies", 3) ||
75 !strncmp(p - 2, "ied", 3)) {
76 *(p - 2) = 'y';
77 *(p - 1) = '\0';
78 return;
82 * Drop the trailing 'e', making keywords like 'waiting' and 'waite'
83 * are treated same.
85 if (*p == 'e') {
86 *p = '\0';
87 return;
90 /*
91 * treat the word with 's' ending as plural,
92 * even it may be not true
94 if (*p == 's')
95 *p-- = '\0';
97 /* Drop 'ing' */
98 if (!strncmp(p - 2, "ing", 3)) {
99 *(p - 2) = '\0';
100 return;
103 /* Drop 'er' */
104 if (!strncmp(p - 1, "er", 2)) {
105 *(p - 1) = '\0';
106 return;
109 /* Drop 'ed' */
110 if (!strncmp(p - 1, "ed", 2)) {
111 *(p - 1) = '\0';
112 return;
119 static void install_word(char *dir, char *word,
120 const char *file, uint32_t file_pos)
122 int fd;
123 uint32_t flag = 0;
124 char hash_file[512];
125 struct key_words key;
127 strcpy(hash_file, dir);
128 strcat(hash_file, "/");
129 strcat(hash_file, word);
131 if (access(hash_file, F_OK) < 0)
132 flag = O_CREAT;
133 fd = open(hash_file, O_WRONLY | O_APPEND | flag , 0644);
134 if (fd < 0)
135 printf("ERROR: open hash file %s error!\n", hash_file);
137 memset(&key, 0, sizeof key);
138 key.file_pos = file_pos;
139 strcpy(key.file, file);
140 if ((write(fd, &key, sizeof key)) < 0)
141 printf("EROOR: write hash file %s error!\n", hash_file);
142 close(fd);
145 static void hash_word(char *word, const char *file, uint32_t file_pos)
147 int hash;
148 char hash_dir[HASH_DIR_LEN + 3]= HASH_DIR;
150 hash = do_hash(word);
152 sprintf(hash_dir + HASH_DIR_LEN, "%02x", hash);
153 if (access(hash_dir, F_OK) < 0) {
154 if (mkdir(hash_dir, 0700) < 0) {
155 printf("mkdir: %s error!\n", hash_dir);
156 return;
159 install_word(hash_dir, word, file, file_pos);
162 static void do_parse(char *buf, const char *file, uint32_t file_pos)
164 char *p = buf;
166 while (1) {
167 p = get_word(p);
168 if (!p)
169 break;
170 parse_word();
171 if (*word <= ' ')
172 continue;
173 hash_word(word, file, file_pos + p - buf - strlen(word));
178 static void parse_file(const char *file)
180 int fd;
181 uint32_t file_pos = 0;
182 char buf[0x100000];
184 fd = open(file, O_RDONLY);
185 if (fd < 0) {
186 printf("Open file _%s_ error!\n");
187 return;
190 while((read(fd, buf, sizeof buf)) > 0) {
191 do_parse(buf, file, file_pos);
192 file_pos += sizeof buf;
195 close(fd);
199 void fetch()
201 DIR *dir;
202 struct dirent *de;
204 dir = opendir("./");
205 if (dir < 0) {
206 printf("open current directory error!\n");
207 return;
210 while((de = readdir(dir))) {
212 * we do nothing to these files started '.',
213 * "." and ".." included.
215 if (!strcmp(de->d_name, "a.out") ||
216 *de->d_name == '.')
217 continue;
218 #if 1
219 printf("Parsing file %s\n", de->d_name);
220 #endif
221 parse_file(de->d_name);