做了一个B站Up主推荐系统

1. 技术路线

(1)爬取B站用户关注Up主列表

使用B站的API进行获取数据

https://api.bilibili.com/x/relation/followings?vmid

(2)采用简单的ItemCF模型

参考之前的文章[零基础入门推荐系统(1)]基于用户和基于物品的协同过滤方法(python代码实现)

class ItemCF(object):
    """
    物品协同过滤,根据用户浏览过的物品推荐相似物品
    """
    def train(self, user_items, alpha=0.5, normalization=False):
        """
        训练模型
        :return:
        """
        self.user_items = user_items
        # 计算物品的协同矩阵
        #self.item_sim_matrix = self.item_similarity(user_items, normalization=True)
        #self.item_sim_matrix = self.improved_item_similarity(user_items)
        self.item_sim_matrix = self.improved_item_similarity2(user_items, alpha=alpha, normalization=normalization)

        #print(self.item_sim_matrix)

        return self.item_sim_matrix

    def improved_item_similarity(self, user_items, normalization=False):
        """
        :param user_items: {user1:[movie1,movie2], user2:[movie1]}
        :return: W: {items1: {item2: sim12, item3:sim13}}
        """
        # calculate co-rated users between items.
        C = dict()
        N = dict()
        for user, items in user_items.items():
            for i in items:
                N[i] = N.get(i,0) + 1
                if i not in C:
                    C[i] = dict()
                for j in items:
                    if i == j:
                        continue
                    C[i][j] = C[i].get(j,0) + 1/math.log(1+len(items))

        # calculate final similarity matrix W
        W = dict()
        for i, related_items in C.items():
            if i not in W:
                W[i] = dict()
            for j, cij in related_items.items():
                W[i][j] = cij / math.sqrt(N[i] * N[j])

        if normalization:
            for i, item_list in W.items():
                item_list = [item/max(item_list) for item in item_list]
                W[i] = item_list
        return W

    def improved_item_similarity2(self, user_items, alpha=0.5, normalization=False):
        """
        Solution for Harry Potter problem.
        :param user_items: {user1:[movie1,movie2], user2:[movie1]}
        :return: W: {items1: {item2: sim12, item3:sim13}}
        """
        # calculate co-rated users between items.
        C = dict()
        N = dict()
        for user, items in user_items.items():
            for i in items:
                N[i] = N.get(i,0) + 1
                if i not in C:
                    C[i] = dict()
                for j in items:
                    if i == j:
                        continue
                    C[i][j] = C[i].get(j,0) + 1/math.log(1+len(items))

        # calculate final similarity matrix W
        W = dict()
        for i, related_items in C.items():
            if i not in W:
                W[i] = dict()
            for j, cij in related_items.items():
                # if N[i] < N[j]:
                W[i][j] = cij / (N[i]**(1-alpha) * N[j]**alpha)
                # else:
                #     W[i][j] = cij / (N[j] ** (1 - alpha) * N[i] ** alpha)

        if normalization:
            for i, item_list in W.items():
                item_list = [item/max(item_list) for item in item_list]
                W[i] = item_list
        return W

    def item_similarity(self, user_items, normalization=False):
        """
        :param user_items: {user1:[movie1,movie2], user2:[movie1]}
        :return: W: {items1: {item2: sim12, item3:sim13}}
        """
        # calculate co-rated users between items.
        C = dict()
        N = dict()
        for user, items in user_items.items():
            for i in items:
                N[i] = N.get(i,0) + 1
                if i not in C:
                    C[i] = dict()
                for j in items:
                    if i == j:
                        continue
                    C[i][j] = C[i].get(j,0) + 1

        # calculate final similarity matrix W
        W = dict()
        for i, related_items in C.items():
            if i not in W:
                W[i] = dict()
            for j, cij in related_items.items():
                W[i][j] = cij / math.sqrt(N[i] * N[j])

        if normalization:
            for i, item_sim_dict in W.items():
                max_val = max(item_sim_dict.values())
                #print(max_val)
                for j,sim in item_sim_dict.items():
                    item_sim_dict[j] = sim/max_val


        return W

    def recommend(self, user, N, K):
        """
        recommend item according to the history items of users.
        :param user:
        :param N: the number of recommend items
        :param K: the number of most similar users
        :return:  recommend items dict, {item: similarity}
        """
        already_items = set(self.user_items.get(user, set()))
        recommend_items = dict()

        for i in already_items:
            for j, sim in sorted(self.item_sim_matrix.get(i,dict()).items(), key=lambda x:-x[1])[:K]:
                if j in already_items:
                    continue
                recommend_items[j] = recommend_items.get(j,0) + sim
        recommend_item_list = sorted(recommend_items.items(), key=lambda x:-x[1])[:N]
        return recommend_item_list

    def recommend_users(self, users, N, K):
        """

        :param users:
        :param N:
        :param K:
        :return: dict, {user:[movie1, movie2]}
        """
        recommend_result = dict()
        for user in users:
            recommend_item_list = self.recommend(user, N, K)
            recommend_result[user] = recommend_item_list
        return recommend_result
————————————————
版权声明:本文为CSDN博主「rosefunR」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/rosefun96/article/details/109107812

2. 效果展现

(1)根据喜欢的Up主查询可能喜欢的其他Up主

比如喜欢象棋,输入
在这里插入图片描述

推荐结果:

在这里插入图片描述

(2)输入个人的B站 UID(B站身份ID)

比如,输入 碧诗的UID 2,推荐结果:

在这里插入图片描述

3. 体验网站

www.pazhufeng.com Up主个性推荐网站

已标记关键词 清除标记
相关推荐
©️2020 CSDN 皮肤主题: 酷酷鲨 设计师:CSDN官方博客 返回首页