基于springboot+Vue+Element ui的电影推荐之协同过滤算法简单实现
- 1.基于用户的协同过滤算法的简单设计与实现
- 1.1获取某个用户的评分矩阵
- 1.2获取该用户与其他用户的相似度矩阵
- 1.3获取两个用户之间的相似度并存储
- 1.4返回推荐列表
- 2.基于物品的协同过滤算法的简单设计与实现
- 2.1计算物品相似度
- 2.2生成推荐列表
- 3.完整的RecommendAlgorithmService文件
- 4.关于改进的思考
文件说明:
MovieSimilarityDao 功能是与电影相似度相关的操作
RatingMatrixDao 功能是与用户对电影的评分矩阵相关的数据操作
SimilarityDao 功能是与用户相似度矩阵相关的数据操作
MovieSimilarityService 功能是获取电影的相似度矩阵以及获取与当前电影的相似度最高的前若干部电影id
SimilarityService 功能是获取用户的相似度矩阵以及获取与当前用户的相似度最高的前若干个用户id
RatingMatrixService是查询某个用户的评分矩阵以及某个电影的评分矩阵,还有获取整个用户评分矩阵
RecommendAlgorithmService 功能是两种协同过滤算法的简单实现
项目采用springboot+Vue+Element ui,mysql 8.0,以及Maven项目管理工具,持久层框架是MyBatis,建议采用注解开发
1.基于用户的协同过滤算法的简单设计与实现
思路:先获取用户评分矩阵,再计算根据余弦相似度计算公式计算用户之间的相似度获取相似度矩阵,然后给出推荐列表。
1.1获取某个用户的评分矩阵
getRating方法是根据用户id获取该用户的评分矩阵
public List<Map<Integer,Double>> getRating(Integer userid){//获取某个用户的评分矩阵List<Map<Integer,Double>> ratings=ratingMatrixService.getRatings(userid);System.out.println("该用户的评分矩阵:"+ratings);return ratings;}
评分实体类如下:
import lombok.*;
import org.springframework.stereotype.Component;@Data
@AllArgsConstructor
@NoArgsConstructor
@Component
public class RatingMatrix {//评分矩阵实体类private Integer userid;private Integer movieId;private Double rating;
}
获取该用户的评分矩阵
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;@Service
public class RatingMatrixService {@Resourceprivate JdbcTemplate jdbcTemplate;public RatingMatrixService(JdbcTemplate jdbcTemplate) {this.jdbcTemplate = jdbcTemplate;}public List<Map<Integer, Double>> getRatings(Integer userid) {//获取用户评分矩阵String query = "SELECT userid, movieId, rating FROM ratings where userid="+userid;List<Map<Integer, Double>> userRatings = new ArrayList<>();jdbcTemplate.query(query, rs -> {while (rs.next()) {int movieId = rs.getInt("movieId");double rating = rs.getDouble("rating");// 将评分数据存入MapMap<Integer, Double> userRatingMap = new HashMap<>();userRatingMap.put(movieId, rating);// 将Map添加到List中userRatings.add(userRatingMap);}});return userRatings;}public List<Map<Integer, Double>> getRatingsByMovieId(Integer movieId) {//获取电影评分矩阵String query = "SELECT userid, rating FROM ratings where movieId="+movieId;List<Map<Integer, Double>> ratings = new ArrayList<>();List<Map<String, Object>> movieRatings = jdbcTemplate.queryForList(query);for(Map<String, Object> row:movieRatings){int userid = (int) row.get("userid");double rating = (double) row.get("rating");// 将评分数据存入MapMap<Integer, Double> movieRatingMap = new HashMap<>();movieRatingMap.put(userid,rating);// 将Map添加到List中ratings.add(movieRatingMap);}return ratings;}public List<Map<Integer, Map<Integer, Double>>> getRatingMatrix() {//获取整个用户评分矩阵List<Map<Integer, Map<Integer, Double>>> ratingMatrix = new ArrayList<>();String sql = "SELECT * FROM ratings";List<Map<String, Object>> rows = jdbcTemplate.queryForList(sql);for (Map<String, Object> row : rows) {int userid = (int) row.get("userid");int movieId = (int) row.get("movieId");double rating = (double) row.get("rating");Map<Integer, Double> movieRatings = new HashMap<>();movieRatings.put(movieId, rating); // 将电影ID放在前面boolean found = false;for (Map<Integer, Map<Integer, Double>> userRatings : ratingMatrix) {if (userRatings.containsKey(userid)) {userRatings.get(userid).put(movieId, rating); // 将用户ID放在前面found = true;break;}}if (!found) {Map<Integer, Map<Integer, Double>> userRatingsMap = new HashMap<>();userRatingsMap.put(userid, movieRatings); // 将用户ID放在前面ratingMatrix.add(userRatingsMap);}}return ratingMatrix;}}
1.2获取该用户与其他用户的相似度矩阵
用户相似度的实体类如下
import lombok.*;
import org.springframework.stereotype.Component;@Data
@AllArgsConstructor
@NoArgsConstructor
@Component
public class Similarity {//用户相似度实体类private Integer userid1;private Integer userid2;private double similarity;
}
获取相似度矩阵
public List<Map<Integer,Double>> getSimilarity(Integer userid){//获取用户的相似度矩阵List<Map<Integer,Double>> similarities=similarityService.getSimilarities(userid);System.out.println("用户相似度矩阵:"+similarities);return similarities;}
其中的SimilarityService.java如下
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.*;@Service
public class SimilarityService {@Resourceprivate JdbcTemplate jdbcTemplate;public List<Map<Integer, Double>> getSimilarities(Integer userid1){//获取用户相似度矩阵String query = "SELECT userid2, similarity FROM similarities where userid1='" + userid1 + "'";List<Map<Integer, Double>> userSimilarity = new ArrayList<>();List<Map<String, Object>> resultList = jdbcTemplate.queryForList(query);for (Map<String, Object> row : resultList) {int userid2 = (int) row.get("userid2");double similarity = (double) row.get("similarity");// 将评分数据存入MapMap<Integer, Double> userRatingMap = new HashMap<>();userRatingMap.put(userid2, similarity);// 将Map添加到List中userSimilarity.add(userRatingMap);}return userSimilarity;}//遍历相似度矩阵找出相似度最高的前五个用户public List<Integer> getTopFiveSimilarUsers(List<Map<Integer,Double>> similarityMatrix){List<Integer> topFiveUsers = new ArrayList<>();// 将相似度矩阵转换为一个包含所有相似度的列表List<Double> allSimilarities = new ArrayList<>();for (Map<Integer, Double> userSimilarities : similarityMatrix) {for (Double similarity : userSimilarities.values()) {allSimilarities.add(similarity);}}// 对相似度进行快速排序allSimilarities.sort(Collections.reverseOrder());// 取出前五个相似度最高的用户for (int i = 0; i < Math.min(5, allSimilarities.size()); i++) {Double similarity = allSimilarities.get(i);for (Map<Integer, Double> userSimilarities : similarityMatrix) {for (Map.Entry<Integer, Double> entry : userSimilarities.entrySet()) {if (entry.getValue().equals(similarity) && !topFiveUsers.contains(entry.getKey())) {topFiveUsers.add(entry.getKey());break;}}}}return topFiveUsers;}
}
1.3获取两个用户之间的相似度并存储
public Double getSimilarity(Integer user1,Integer user2){//获取两个用户之间的相似度CollaborativeFiltering collaborativeFiltering=new CollaborativeFiltering();List<Map<Integer, Double>> list1 = ratingMatrixService.getRatings(user1);//2List<Map<Integer, Double>> list2 = ratingMatrixService.getRatings(user2);//5double similarity = collaborativeFiltering.calculateSimilarity(list1,list2);Similarity similarity1=new Similarity(user1,user2,similarity);//存储用户相似度similarityDao.addSimilarity(similarity1);System.out.println("用户"+user1+"和用户"+user2+"之间的相似度为:"+similarity);return similarity;}
其中的CollaorativeFiletering.java如下
import java.util.*;
public class CollaborativeFiltering {//采用余弦相似度计算公式计算两个用户的相似度public Double calculateSimilarity(List<Map<Integer, Double>> list1, List<Map<Integer, Double>> list2){//计算用户相似度// 计算余弦相似度double dotProduct = 0.0;double normA = 0.0;double normB = 0.0;for (Map<Integer, Double> map1 : list1) {for (Map<Integer, Double> map2 : list2) {for (Map.Entry<Integer, Double> entry1 : map1.entrySet()) {for (Map.Entry<Integer, Double> entry2 : map2.entrySet()) {if (entry1.getKey().equals(entry2.getKey())) {dotProduct += entry1.getValue() * entry2.getValue();normA += Math.pow(entry1.getValue(), 2);normB += Math.pow(entry2.getValue(), 2);}}}}}if (normA == 0 || normB == 0) {return 0.0; // 避免除以零}double similarity = dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));return similarity;}
}
这里采用的是余弦相似度计算公式计算用户的相似度,具体内容点击这里
1.4返回推荐列表
public Set<Integer> getRecommendList(Integer userid){System.out.println("获取推荐列表");//获取用户相似度矩阵List<Map<Integer,Double>> similarities=similarityService.getSimilarities(userid);//遍历该相似度矩阵找出最相似的前五个用户idList<Integer> userids=similarityService.getTopFiveSimilarUsers(similarities);//把这些用户喜欢的电影,评分高的电影推荐给当前用户//获取这些用户喜欢的电影Set<Integer> movieids=movieService.getUsersLikedMovies(userids);//获取这些用户里评分最高的电影idSet<Integer> movieids1=movieService.getMaxRatingMovie(userids);//把两个集合合并并去重movieids.addAll(movieids1);System.out.println("电影的推荐列表为:"+movieids);return movieids;}
2.基于物品的协同过滤算法的简单设计与实现
原理是物品A与物品B具有很大的相似度是因为喜欢物品A的用户大都也喜欢物品B,因此先计算两个item的相似度,再基于物品相似度以及用户喜好的item类型生成推荐列表。
2.1计算物品相似度
public double calculateItemSimilarity(Integer movie1,Integer movie2){//计算两个电影的相似度CollaborativeFiltering collaborativeFiltering=new CollaborativeFiltering();//获取电影1和电影2的评分集合List<Map<Integer,Double>> list1=ratingMatrixService.getRatingsByMovieId(movie1);List<Map<Integer,Double>> list2=ratingMatrixService.getRatingsByMovieId(movie2);System.out.println(list1);System.out.println(list2);//计算电影1和电影2的余弦相似度double similarity=collaborativeFiltering.calculateSimilarity(list1,list2);//存储电影相似度movieSimilarityDao.addMovieSimilarity(movie1,movie2,similarity);return similarity;}
这里同样采用余弦相似度计算公式计算两个电影的相似度,值得注意的是从数据库里查询到的结果需要先存到Map集合里再存到list里
2.2生成推荐列表
public Set<Integer> getRecommendMovieList(Integer movieId){//获取与该电影的推荐列表System.out.println("获取电影的推荐列表");//获取电影相似度矩阵List<Map<Integer,Double>> similarities=movieSimilarityService.getSimilarities(movieId);//遍历该相似度矩阵找出最相似的前十个电影idList<Integer> movieids=movieSimilarityService.getTopTenSimilarMovies(similarities);//把和当前电影相似度高的电影推荐给用户Set<Integer> recommendMovieList=new HashSet<>();if(movieids.isEmpty()){//按电影类型推荐String movieType=movieService.getMovieByType(movieId);List<String> movieTypes=splitStringByComma(movieType);for(String type:movieTypes){Set<Integer> typeMovies=movieService.getMoviesByType(type,movieId);System.out.println("该类型电影为:"+typeMovies);recommendMovieList.addAll(typeMovies);}}else{String movieType=movieService.getMovieByType(movieId);List<String> movieTypes=splitStringByComma(movieType);for(String type:movieTypes){Set<Integer> typeMovies=movieService.getMoviesByType(type,movieId);recommendMovieList.addAll(typeMovies);}recommendMovieList.addAll(movieids);}return recommendMovieList;}
其中的MovieSimilarityService.java如下:
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.*;@Service
public class MovieSimilarityService {@Resourceprivate JdbcTemplate jdbcTemplate;public List<Map<Integer, Double>> getSimilarities(Integer movieId1){//获取电影相似度矩阵String query = "SELECT movieId2, similarity FROM moviesimilarity where movieId1='" + movieId1+ "'";List<Map<Integer, Double>> movieSimilarity = new ArrayList<>();List<Map<String, Object>> resultList = jdbcTemplate.queryForList(query);for (Map<String, Object> row : resultList) {int movieId2 = (int) row.get("movieId2");double similarity = (double) row.get("similarity");// 将评分数据存入MapMap<Integer, Double> movieRatingMap = new HashMap<>();movieRatingMap.put(movieId2, similarity);// 将Map添加到List中movieSimilarity.add(movieRatingMap);}return movieSimilarity;}public List<Integer> getTopTenSimilarMovies(List<Map<Integer, Double>> similarityMatrix) {List<Integer> topTenMovies = new ArrayList<>();// 将相似度矩阵转换为一个包含所有相似度的列表List<Double> allSimilarities = new ArrayList<>();for (Map<Integer, Double> movieSimilarities : similarityMatrix) {for (Double similarity : movieSimilarities.values()) {allSimilarities.add(similarity);}}// 对相似度进行快速排序allSimilarities.sort(Collections.reverseOrder());// 取出前五个相似度最高的用户for (int i = 0; i < Math.min(10, allSimilarities.size()); i++) {Double similarity = allSimilarities.get(i);for (Map<Integer, Double> movieSimilarities : similarityMatrix) {for (Map.Entry<Integer, Double> entry : movieSimilarities.entrySet()) {if (entry.getValue().equals(similarity) && !topTenMovies.contains(entry.getKey())) {topTenMovies.add(entry.getKey());break;}}}}return topTenMovies;}
}
3.完整的RecommendAlgorithmService文件
import com.example.Dao.MovieSimilarityDao;
import com.example.Dao.SimilarityDao;
import com.example.entity.Similarity;
import com.example.utils.CollaborativeFiltering;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.*;@Service
public class RecommendAlgorithmService{//推荐算法的完整过程@Resourceprivate RatingMatrixService ratingMatrixService;@Resourceprivate SimilarityService similarityService;@Resourceprivate SimilarityDao similarityDao;@Resourceprivate MovieService movieService;@Resourceprivate MovieSimilarityDao movieSimilarityDao;@Resourceprivate MovieSimilarityService movieSimilarityService;//(1)基于用户的协同过滤算法的简单设计与实现//1.获取用户评分矩阵//2.计算用户之间的相似度,获取相似度矩阵//3.给出推荐列表public List<Map<Integer,Double>> getRating(Integer userid){//获取某个用户的评分矩阵List<Map<Integer,Double>> ratings=ratingMatrixService.getRatings(userid);System.out.println("该用户的评分矩阵:"+ratings);return ratings;}public List<Map<Integer,Double>> getSimilarity(Integer userid){//获取用户的相似度矩阵List<Map<Integer,Double>> similarities=similarityService.getSimilarities(userid);System.out.println("用户相似度矩阵:"+similarities);return similarities;}public List<Map<Integer,Map<Integer,Double>>> getRatingMatrix(){//获取整个评分矩阵List<Map<Integer,Map<Integer,Double>>> mapList=ratingMatrixService.getRatingMatrix();System.out.println("整个用户的评分矩阵:"+mapList);return mapList;}public Double getSimilarity(Integer user1,Integer user2){//获取两个用户之间的相似度CollaborativeFiltering collaborativeFiltering=new CollaborativeFiltering();List<Map<Integer, Double>> list1 = ratingMatrixService.getRatings(user1);//2List<Map<Integer, Double>> list2 = ratingMatrixService.getRatings(user2);//5double similarity = collaborativeFiltering.calculateSimilarity(list1,list2);Similarity similarity1=new Similarity(user1,user2,similarity);//存储用户相似度similarityDao.addSimilarity(similarity1);System.out.println("用户"+user1+"和用户"+user2+"之间的相似度为:"+similarity);return similarity;}public Set<Integer> getRecommendList(Integer userid){System.out.println("获取推荐列表");//获取用户相似度矩阵List<Map<Integer,Double>> similarities=similarityService.getSimilarities(userid);//遍历该相似度矩阵找出最相似的前五个用户idList<Integer> userids=similarityService.getTopFiveSimilarUsers(similarities);//把这些用户喜欢的电影,评分高的电影推荐给当前用户//获取这些用户喜欢的电影Set<Integer> movieids=movieService.getUsersLikedMovies(userids);//获取这些用户里评分最高的电影idSet<Integer> movieids1=movieService.getMaxRatingMovie(userids);//把两个集合合并并去重movieids.addAll(movieids1);System.out.println("电影的推荐列表为:"+movieids);return movieids;}//(2)基于物品的协同过滤算法的简单设计与实现//物品A和物品B具有很大的相似度是因为喜欢物品A的用户大都也喜欢物品B//1.计算物品之间的相似度//2.基于物品的相似度和用户的喜好生成推荐列表public double calculateItemSimilarity(Integer movie1,Integer movie2){//计算两个电影的相似度CollaborativeFiltering collaborativeFiltering=new CollaborativeFiltering();//获取电影1和电影2的评分集合List<Map<Integer,Double>> list1=ratingMatrixService.getRatingsByMovieId(movie1);List<Map<Integer,Double>> list2=ratingMatrixService.getRatingsByMovieId(movie2);System.out.println(list1);System.out.println(list2);//计算电影1和电影2的余弦相似度double similarity=collaborativeFiltering.calculateSimilarity(list1,list2);//存储电影相似度movieSimilarityDao.addMovieSimilarity(movie1,movie2,similarity);return similarity;}public Set<Integer> getRecommendMovieList(Integer movieId){//获取与该电影的推荐列表System.out.println("获取电影的推荐列表");//获取电影相似度矩阵List<Map<Integer,Double>> similarities=movieSimilarityService.getSimilarities(movieId);//遍历该相似度矩阵找出最相似的前十个电影idList<Integer> movieids=movieSimilarityService.getTopTenSimilarMovies(similarities);//把和当前电影相似度高的电影推荐给用户Set<Integer> recommendMovieList=new HashSet<>();if(movieids.isEmpty()){//按电影类型推荐String movieType=movieService.getMovieByType(movieId);List<String> movieTypes=splitStringByComma(movieType);for(String type:movieTypes){Set<Integer> typeMovies=movieService.getMoviesByType(type,movieId);System.out.println("该类型电影为:"+typeMovies);recommendMovieList.addAll(typeMovies);}}else{String movieType=movieService.getMovieByType(movieId);List<String> movieTypes=splitStringByComma(movieType);for(String type:movieTypes){Set<Integer> typeMovies=movieService.getMoviesByType(type,movieId);recommendMovieList.addAll(typeMovies);}recommendMovieList.addAll(movieids);}return recommendMovieList;}public List<String> splitStringByComma(String input) {//将电影类型按照逗号分割// 检查输入字符串是否为空if (input == null || input.isEmpty()) {return Collections.emptyList();}// 使用逗号分隔字符串,并返回结果return Arrays.asList(input.split(","));}
}
4.关于改进的思考
可以看到当前我们的功能里面采用余弦相似度计算公式计算物品相似度的时候会出问题。为什么呢?首先,以图书为例,如果两个用户都买过新华字典,这丝毫不能说明他们爱好相似,因为我们小时候曾经都买过新华字典但如果两个用户都买过数据挖掘导论,那么可以认为他们爱好比较相似,因为只有研究数据挖掘的人才会买这本书。换句话说,两个用户对冷门物品的相似购买行为更能说明用户之间的相似度,因此可以继续在此基础上基于用户行为计算用户的爱好相似度,这里不再赘述,请自行搜索各位大佬的相似度改进方法。
关于代码只是自己关于协同过滤算法的简单实现,若有错误或者需要改进的地方欢迎各位大佬给出宝贵意见。