题目分析
根据关键词自动判断文章的类别。共2篇文章(用char s1[]和char s2[]保存),去掉无关词后(用char irrelevant[][10]保存),统计文章里面每个单词出现的频率:关键词个数¸去掉无关词后的文章单词个数;
判断条件:文章中关键词"computer"的出现频率超过10%,被认为是计算机类;关键词" Earth "的出现频率超过10%,被认为是地球科学类。
char s1[]="An amazing machine! We are living in the computer age today and most of our day to day activities cannot be accomplished without using computers. Sometimes knowingly and sometimes unknowingly we use computers. Computer has become an indispensable and multipurpose tool. We are breathing in the computer age and gradually computer has become such a desire necessity of life that it is difficult to imagine life without it. ";
char s2[]="Earth not only orbits the sun but spins on its own axis as it does so. At the equator the Earth is spinning on its axis at a speed of about 1,038 miles (1,670 kilometres) an hour. Earth's spin or rotation is a relic of its origin as a hot. Earth science or geology is the science of the Earth. During the time when Earth was forming, the largest mass of planet Earth is called. ";
char irrelevant [][10]= {"a","an","the","that","what","which","then","to","for","of","in","as","and","on","is","are","this","by","every","so","was","were","at","most","we","it","without","such","cannot","be","has",,"have","it","today"};//允许重复
最后,编写函数。完整的代码如下:
#include <iostream>
#include <string.h>
using namespace std;
char s1[]="An amazing machine! We are living in the computer age today and most of our day to day activities cannot be accomplished without using computers. Sometimes knowingly and sometimes unknowingly we use computers. Computer has become an indispensable and multipurpose tool. We are breathing in the computer age and gradually computer has become such a desire necessity of life that it is difficult to imagine life without it. ";
char s2[]="Earth not only orbits the sun but spins on its own axis as it does so. At the equator the Earth is spinning on its axis at a speed of about 1,038 miles (1,670 kilometres) an hour. Earth's spin or rotation is a relic of its origin as a hot. Earth science or geology is the earth science of the Earth. During the time when Earth was forming, the largest mass of planet Earth is called. ";
char irrelevant[][10]={"a","an","the","that","what","which","then","to","for","of","in","as","and","on","is","are","this","by","every","so","was","were","at","most","we","it","without","such","cannot","be","has","have","it","today"};
int DivWords(char str[], char div[][20]);
int Cntirrelevant (char div[][20], int n, char irrelevant[][10], int m);
int CntKeyWords (char div[][20], int n, char keyword[]);
int JudgeClass(int a, int b, int c, char keyword[]);
int CalWordFreq(char str[], char keyword[]);int main()
{char keyword1[20]="computer";char keyword2[20]="earth";CalWordFreq(s1, keyword1);CalWordFreq(s2, keyword2); return 0;
}
int CalWordFreq(char str[], char keyword[])
{char d[100][20];int numofWords,numofirrlwords,numofkeywords;numofWords =DivWords(str,d);numofirrlwords = Cntirrelevant(d, numofWords, irrelevant, 34);numofkeywords = CntKeyWords(d, numofWords, keyword);JudgeClass(numofkeywords,numofWords,numofirrlwords,keyword);
}
int JudgeClass(int a, int b, int c, char keyword[])
{if (float(a) / (b - c)>=0.1) cout<< "This article looks like "<< keyword<< endl;elsecout<< "This article does not look like "<< keyword<< endl;return 1;}
int CntKeyWords (char div[][20], int n, char keyword[])
{int i,j,cnt=0;for(i=0;i<n;i++){if(strlen(div[i])<strlen(keyword)) continue;for(j=0;j<strlen(keyword);j++)if (div[i][j]!=keyword[j]) break; if (j==strlen(keyword)) cnt++; }return cnt;
}int Cntirrelevant(char div[][20], int n, char irrelevant[][10], int m)
{int i,j,cnt=0;for(i=0;i<n;i++)for(j=0;j<m;j++)if(strcmp(div[i],irrelevant[j])==0) {cnt++; }return cnt;
}int DivWords(char str[], char div[][20])
{int i=0,j=0,k=0; for(i=0;i<strlen(str);i++) {if(str[i]>='A'&&str[i]<='Z') str[i]=str[i]+32;if(str[i]>='a'&&str[i]<='z'){div[j][k]=str[i];k++;}else{ div[j][k]=0;j++;k=0;}} j++;k=0;return j;
}