C语言构建定长字典
1.C语言简介-struct
C语言中没有字典。而有些场合下需要统计一些字符串变量的累计值,就需要用定长字典。定长字典顾名思义就是长度固定的字典。在C语言中结构体可以定义任意类型数据的集合,也就可以通过结构体数组来构造个定长字典。
2.词频字典
2.1词频字典结构体
/* 词频字典结构体 */#define WORD_MAX_SIZE 100
struct word_fre_dict {char word[WORD_MAX_SIZE]; /* 假设单词长度最长为100 */int times; /* 在一篇文章出现次数不超过 2^31 - 1次 */
};
typedef struct word_fre_dict WORD_FRE_DICT;
2.2 词频字典的排序
/* 可以用qsort函数进行排序 *//* 按字典序排序 */
int cmp_dict(const void *a, const void *b)
{WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a; WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;return strcmp(pa->word, pb->word);
}
/* 按词频从小到大排序 */
int cmp_fre_up(const void *a, const void *b)
{WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a; WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;return (pa->times - pb->times);
}
/* 按词频从大到小排序 */
int cmp_fre_down(const void *a, const void *b)
{WORD_FRE_DICT *pa = (WORD_FRE_DICT *)a; WORD_FRE_DICT *pb = (WORD_FRE_DICT *)b;return (pb->times - pa->times);
}
2.3 从字符串文本中提取词频字典
void create_word_fre_dict(char* paragraph)
{WORD_FRE_DICT word_list[1000] = { 0 };memset(word_list, 0, 1000 * sizeof(WORD_FRE_DICT));/* 预处理: 单词全改为小写, 非单词全转为空格符 */int i = 0;while (paragraph[i] != 0) {if (paragraph[i] >= 'A' && paragraph[i] <= 'Z') {tolower(paragraph[i]);}else if ((paragraph[i] < 'A' || paragraph[i] > 'Z') && \((paragraph[i] < 'a' || paragraph[i] > 'z'))) {paragraph[i] = ' ';}i++;}/* 使用strtok公式进行字符串拆分 */char* temp = strtok(paragraph, " ");strcpy(word_list[0].word, temp);word_list[0].times++;while (temp != NULL) {temp = strtok(NULL, " "); /* 最后一个字节拿出来后还会执行一次,这次结果为NULL */if (temp == NULL) {break;}printf("%s\n", temp);int dict_index = 0;int find = 0;while (word_list[dict_index].times != 0 && dict_index < 1000) {if (strcmp(word_list[dict_index].word, temp) == 0) {word_list[dict_index].times++;find = 1;break;}dict_index++;}if (!find && dict_index < 1000) {strcpy(word_list[dict_index].word, temp);word_list[dict_index].times = 1;}}qsort(word_list, 50, sizeof(WORD_FRE_DICT), cmp_dict);for (int de = 0; de < 20; de++) {printf("%s, count = %d\n", word_list[de].word, word_list[de].times);}
}int c_dict_test(void) {char para[] = { "Youth means a temperamental predominance of courage over timidity, \of the appetite for adventure over the love of ease. This often \exits in a man of 60, more than a boy of 20.nobody grows merely \by the number of years; we grow old by deserting our ideas. Years \may wrinkle the skin, but to give up enthusiasm wrinkles the soul. \Worry, fear, self distrust bows the heart and turns the spirit \back to dust." };printf("%s\n", para);create_word_fre_dict(para);return 0;
}