shell脚本单词去重多个文件

例如要求如下：

有多个txt文件，每个文件内有多行单词
中间为英文的”,”，或者中文的”，”逗号作为分隔符。

world,世界
set，设置
good,好，商品
....

将这些文件汇总去除重复的单词，并输出到一个新的文件内
要求可以不区分大小写

实现

#! /bin/bash
#------------------------------------------------------------------------------
# Filename:    filterWords.sh
# Usage:       ./filterWords.sh ~/test/
# Version:     1.0
# Date:        2018-04-04
# Author:      vincent
# Email:       N/A
# Description: 此脚本用于过滤多个文件的重复单词，保留唯一的单词，并输出结果到新的文件
#              忽略大小写，如果单词重复，随机保留释义
#              支持格式：
#                  set,设置        #英文标点
#                  set，设置       #中文标点
#                  set,设置，集合   #支持多个“,”，但是默认第一分隔符前面为单词
#                  SeT,设置        #支持不许分大小写
# Notes:       N/A
#-------------------------------------------------------------------------------declare folderPath=$1
declare currentTime=$(date +%F"-"%H"-"%M"-"%S)
declare outputPath="${currentTime}_words.txt"
declare wordsCounts=0outputMsg()
{if [ $1 -ne 0 ]thenecho $2exit 1fi
}# 检验路径是否为空
if [ -z $folderPath ]
thenfolderPath="."
else# 检验路径是否存在if [ ! -d $folderPath ]thenecho "${folderPath} is not existed !"exit 1fi
fifileList=$(find $folderPath -type f -name "*.txt")
outputMsg $? "Find txt file failed!"
if [[ -z $fileList ]]
thenecho "No txt files are found."exit 1
fi# 支持英文的“,”或者中文的“，”分隔符,忽略大小写 
# 这里在复制代码的时候，注意格式，最好自己缩短为一行，否则很容易出错
# 设置两种分隔符
awk -F'[,|，]' 'BEGIN{key="";}{key=tolower($1);words[key]=$0}END{for(i in words) print words[i]}' $fileList > $outputPath
outputMsg $? "Filter words failed!"wordsCounts=$(wc -l $outputPath)
echo "Words counts: "
echo $wordsCounts