使用Scanner类和正则表达式统计一篇英文中的单词,要求如下:
1、一共出现了多少个单词。
2、有多少个互不相同的单词。
3、按单词出现的频率大小输出单词。
package 第七次;
import java. util. * ;
import java. io. * ;
import java. util. regex. * ; public class word { public static void main ( String [ ] args) { String inputFile = "src/java线上作业/第三章/crossion.txt" ; Map < String , Integer > wordIndex = new HashMap < > ( ) ; Set < String > Words = new HashSet < > ( ) ; int count = 0 ; int uncount = 0 ; Map < Integer , String > dict; try ( BufferedReader reader = new BufferedReader ( new FileReader ( inputFile) ) ) { String line; while ( ( line = reader. readLine ( ) ) != null ) { Pattern pattern = Pattern . compile ( "[A-Za-z][A-Za-z-]*" ) ; Matcher matcher = pattern. matcher ( line) ; while ( matcher. find ( ) ) { String word = matcher. group ( ) . toLowerCase ( ) ; wordIndex. put ( word, wordIndex. getOrDefault ( word, 0 ) + 1 ) ; count++ ; if ( ! Words . contains ( word) ) { dict = new HashMap < > ( ) ; dict. put ( count, word) ; System . out. println ( dict) ; Words . add ( word) ; uncount++ ; } } } System . out. print ( "不相同的单词次数:" + uncount) ; System . out. print ( "单词出现的数量:" + count) ; } catch ( IOException e) { e. printStackTrace ( ) ; } List < Map. Entry < String , Integer > > sortedWords = new ArrayList < > ( wordIndex. entrySet ( ) ) ; Collections . sort ( sortedWords, Map. Entry . < String , Integer > comparingByValue ( ) . reversed ( ) ) ; for ( Map. Entry < String , Integer > entry : sortedWords) { int countvalue = Integer . valueOf ( entry. getValue ( ) ) ; System . out. println ( "单词出现的频率:" + entry. getKey ( ) + ": " + countvalue* 0.01 + "%" ) ; } }
}