字典树(Trie),也叫单词查找树或键树,是一种树形结构,属于哈希树的变种。它是一种针对字符串进行操作的数据结构,典型应用是用于统计和排序大量的字符串,如在搜索引擎系统中用于文本词频统计。其主要思想是利用字符串的公共前缀来节约存储空间。字典树在插入和查询字符串的操作上具有较高的效率。
/*-------------字典树查询单词---------------*/#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>#define ALPHABET_SIZE 26/* 定义节点 */
typedef struct TrieNode
{struct TrieNode *children[ALPHABET_SIZE];char character;bool isEndOfWord;} TrieNode;/* 创建新的节点 */
TrieNode *createTrieNode()
{TrieNode *node;node = malloc(sizeof(TrieNode));node->isEndOfWord = false;int i = 0;while (i < ALPHABET_SIZE){node->children[i] = NULL;i++;}return node;
}/* 插入新词到字典树中 */
void insert(TrieNode *root, char *word)
{if ((strlen(word) - 1) != 0){char character = *word;if (root->children[character - 97] == NULL){TrieNode *node = NULL;node = createTrieNode();node->character = character;root->children[character - 97] = node;}word++;insert(root->children[character - 97], word);}else{root->isEndOfWord = true;}return;
}/* 从字典树中搜索单词 */
TrieNode *search(TrieNode *root, char *word)
{TrieNode *temp;while (*word != '\0'){char character = *word;if (root->children[character - 97] != NULL){temp = root->children[character - 97];word++;root = temp;}else{printf("No possible words!!\n");return NULL;}}return root;
}/* 打印单词 */
void printArray(char chars[], int len)
{int i;for (i = 0; i < len; i++){printf("%c", chars[i]);}printf("\n");
}/* 返回所有的相关单词 */
void printPathsRecur(TrieNode *node, char prefix[], int filledLen)
{if (node == NULL)return;prefix[filledLen] = node->character;filledLen++;if (node->isEndOfWord){printArray(prefix, filledLen);}int i;for (i = 0; i < ALPHABET_SIZE; i++){printPathsRecur(node->children[i], prefix, filledLen);}
}/* 遍历字典树查询到符合条件的单词 */
void traverse(char prefix[], TrieNode *root)
{TrieNode *temp = NULL;temp = search(root, prefix);int j = 0;while (prefix[j] != '\0'){j++;}printPathsRecur(temp, prefix, j - 1);
}#define NUMBER_OF_WORDS (354935)
#define INPUT_WORD_SIZE (100)/* 用户输入要查询的词 */
char *receiveInput(char *s)
{scanf("%99s", s);return s;
}int main()
{/* 读取字典文件 */int word_count = 0;char *words[NUMBER_OF_WORDS];FILE *fp = fopen("dictionary.txt", "r");if (fp == 0){fprintf(stderr, "Error while opening dictionary file");exit(1);}words[word_count] = malloc(INPUT_WORD_SIZE);while (fgets(words[word_count], INPUT_WORD_SIZE, fp)){word_count++;words[word_count] = malloc(INPUT_WORD_SIZE);}/* 将单词放入字典树 */TrieNode *root = NULL;root = createTrieNode();int i;for (i = 0; i < NUMBER_OF_WORDS; i++){insert(root, words[i]);}while (1){printf("Enter keyword: ");char str[100];receiveInput(str);printf("\n==========================================================\n");printf("\n********************* Possible Words ********************\n");/* 从字典树中找到单词 */traverse(str, root);printf("\n==========================================================\n");}
}
查询结果示例:
Enter keyword: cc
==========================================================
********************* Possible Words ********************
cc
ccesser
cchaddoorck
ccid
ccitt
cckw
ccm
ccw
ccws
==========================================================
Enter keyword: