pinyin4j
添加maven依赖
<dependency> <groupId> com.belerweb</groupId> <artifactId> pinyin4j</artifactId> <version> 2.5.0</version>
</dependency>
获取文本拼音
private String pinyinTest ( String context, boolean existNotPinyin) { if ( context == null || context. trim ( ) . length ( ) <= 0 ) { return null ; } HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat ( ) ; outputFormat. setToneType ( HanyuPinyinToneType . WITHOUT_TONE ) ; char [ ] chars = context. trim ( ) . toCharArray ( ) ; StringBuilder builder = new StringBuilder ( ) ; try { for ( char aChar : chars) { String [ ] pinyin = PinyinHelper . toHanyuPinyinStringArray ( aChar, outputFormat) ; if ( pinyin == null || pinyin. length <= 0 ) { if ( existNotPinyin) { builder. append ( aChar) ; } continue ; } builder. append ( pinyin[ 0 ] ) ; } } catch ( BadHanyuPinyinOutputFormatCombination e) { e. printStackTrace ( ) ; } return builder. toString ( ) . toUpperCase ( ) ; }
测试用例
@Test
public void test ( ) { String temp = "我爱罗52" ; String list = pinyinTest ( temp, false ) ;
}
hanlp
添加Maven依赖
<dependency> <groupId> com.hankcs</groupId> <artifactId> hanlp</artifactId> <version> portable- 1.8.4</version>
</dependency>
获取文本拼音
private String hanLpTest ( String content, Boolean existNotPinyin) { if ( context == null || context. trim ( ) . length ( ) <= 0 ) { return null ; } if ( existNotPinyin) { return HanLP . convertToPinyinString ( content, "" , false ) . toUpperCase ( ) ; } List < Pinyin > pinyinList = HanLP . convertToPinyinList ( content) ; StringBuilder builder = new StringBuilder ( ) ; pinyinList. forEach ( pinyin -> { if ( pinyin == null || Pinyin . none5. equals ( pinyin) ) { return ; } builder. append ( pinyin. getPinyinWithoutTone ( ) ) ; } ) ; return builder. toString ( ) . toUpperCase ( ) ;
}
测试用例
@Test
public void test ( ) { String temp = "我爱罗52" ; System . out. println ( hanLpTest ( temp, true ) ) ;
}
关键字分词
正则表达式
private final String SPLIT_WORD_REG_EX = "[^aoeiuv]?h?[iuv]?(ai|ei|ao|ou|er|ang?|eng?|ong|a|o|e|i|u|ng|n)?" ;
获取分词结果
private List < String > splitTest ( String keyword) { if ( context == null || context. trim ( ) . length ( ) <= 0 ) { return Collections . emptyList ( ) ; } List < String > keywordList = new ArrayList < > ( ) ; int index = 0 ; Pattern pat = Pattern . compile ( SPLIT_WORD_REG_EX ) ; for ( int i = keyword. length ( ) ; i > 0 ; i = i - index) { Matcher matcher = pat. matcher ( keyword) ; if ( ! matcher. find ( ) ) { break ; } keywordList. add ( matcher. group ( ) ) ; index = matcher. end ( ) - matcher. start ( ) ; keyword = keyword. substring ( index) ; } return keywordList;
}
测试用例
@Test public void test ( ) { String temp = "我爱罗52" ; List < String > list = splitTest ( temp) ; System . out. println ( list) ; }