{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "users = [\"User1\", \"User2\", \"User3\", \"User4\", \"User5\"]\n",
    "items = [\"Item A\", \"Item B\", \"Item C\", \"Item D\", \"Item E\"]\n",
    "# 用户购买记录数据集\n",
    "datasets = [\n",
    "    [1,0,1,1,0],\n",
    "    [1,0,0,1,1],\n",
    "    [1,0,1,0,0],\n",
    "    [0,1,0,1,1],\n",
    "    [1,1,1,0,1],\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(data=datasets,index=users,columns=items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Item A  Item B  Item C  Item D  Item E\n",
       "User1       1       0       1       1       0\n",
       "User2       1       0       0       1       1\n",
       "User3       1       0       1       0       0\n",
       "User4       0       1       0       1       1\n",
       "User5       1       1       1       0       1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import jaccard_similarity_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.2"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jaccard_similarity_score(df[\"Item A\"],df[\"Item B\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics.pairwise import pairwise_distances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:475: DataConversionWarning: Data with input dtype int64 was converted to bool by check_pairwise_arrays.\n",
      "  warnings.warn(msg, DataConversionWarning)\n"
     ]
    }
   ],
   "source": [
    "user_similar = 1-pairwise_distances(df,metric=\"jaccard\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.        , 0.5       , 0.66666667, 0.2       , 0.4       ],\n",
       "       [0.5       , 1.        , 0.25      , 0.5       , 0.4       ],\n",
       "       [0.66666667, 0.25      , 1.        , 0.        , 0.5       ],\n",
       "       [0.2       , 0.5       , 0.        , 1.        , 0.4       ],\n",
       "       [0.4       , 0.4       , 0.5       , 0.4       , 1.        ]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_similar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "user_similar = pd.DataFrame(user_similar, columns=users, index=users)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>User1</th>\n",
       "      <th>User2</th>\n",
       "      <th>User3</th>\n",
       "      <th>User4</th>\n",
       "      <th>User5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>0.500000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.25</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>0.400000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          User1  User2     User3  User4  User5\n",
       "User1  1.000000   0.50  0.666667    0.2    0.4\n",
       "User2  0.500000   1.00  0.250000    0.5    0.4\n",
       "User3  0.666667   0.25  1.000000    0.0    0.5\n",
       "User4  0.200000   0.50  0.000000    1.0    0.4\n",
       "User5  0.400000   0.40  0.500000    0.4    1.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_similar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "物品之间的两两相似度：\n",
      "        Item A    Item B  Item C  Item D    Item E\n",
      "Item A    1.00  0.200000    0.75    0.40  0.400000\n",
      "Item B    0.20  1.000000    0.25    0.25  0.666667\n",
      "Item C    0.75  0.250000    1.00    0.20  0.200000\n",
      "Item D    0.40  0.250000    0.20    1.00  0.500000\n",
      "Item E    0.40  0.666667    0.20    0.50  1.000000\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:475: DataConversionWarning: Data with input dtype int64 was converted to bool by check_pairwise_arrays.\n",
      "  warnings.warn(msg, DataConversionWarning)\n"
     ]
    }
   ],
   "source": [
    "# 计算物品间相似度\n",
    "item_similar = 1 - pairwise_distances(df.T, metric=\"jaccard\")\n",
    "item_similar = pd.DataFrame(item_similar, columns=items, index=items)\n",
    "print(\"物品之间的两两相似度：\")\n",
    "print(item_similar)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义一个字典，用于盛放用户的topN个相似用户(这里是2)\n",
    "topN_users={}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['User3', 'User2']\n",
      "['User4', 'User1']\n",
      "['User1', 'User5']\n",
      "['User2', 'User5']\n",
      "['User3', 'User4']\n"
     ]
    }
   ],
   "source": [
    "for i in user_similar.index:\n",
    "    #删除对角线上的自己与自己的相似度1\n",
    "    _df = user_similar.loc[i].drop([i])\n",
    "    _df_sorted = _df.sort_values(ascending=False)\n",
    "    #print(_df_sorted.values)\n",
    "    #top2 = list(_df_sorted)[:2]\n",
    "    top2=list(_df_sorted.index[:2])\n",
    "    print(top2)\n",
    "    topN_users[i]=top2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>User1</th>\n",
       "      <th>User2</th>\n",
       "      <th>User3</th>\n",
       "      <th>User4</th>\n",
       "      <th>User5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>0.500000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.25</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>0.400000</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          User1  User2     User3  User4  User5\n",
       "User1  1.000000   0.50  0.666667    0.2    0.4\n",
       "User2  0.500000   1.00  0.250000    0.5    0.4\n",
       "User3  0.666667   0.25  1.000000    0.0    0.5\n",
       "User4  0.200000   0.50  0.000000    1.0    0.4\n",
       "User5  0.400000   0.40  0.500000    0.4    1.0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "user_similar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'User1': ['User3', 'User2'],\n",
       " 'User2': ['User4', 'User1'],\n",
       " 'User3': ['User1', 'User5'],\n",
       " 'User4': ['User2', 'User5'],\n",
       " 'User5': ['User3', 'User4']}"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "topN_users"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Item A', 'Item C']"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(df.ix['User3'].replace(0,np.nan).dropna().index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Item A  Item B  Item C  Item D  Item E\n",
       "User1       1       0       1       1       0\n",
       "User2       1       0       0       1       1\n",
       "User3       1       0       1       0       0\n",
       "User4       0       1       0       1       1\n",
       "User5       1       1       1       0       1"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "rs_results={}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "for user,sim_users in topN_users.items():\n",
    "    rs_result=set()\n",
    "    for sim_user in sim_users:\n",
    "        rs_result = rs_result.union(set(df.loc[sim_user].replace(0,np.nan).dropna().index))\n",
    "        \n",
    "    rs_result -= set(df.loc[user].replace(0,np.nan).dropna().index)\n",
    "    rs_results[user]=rs_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'User1': {'Item E'},\n",
       " 'User2': {'Item B', 'Item C'},\n",
       " 'User3': {'Item B', 'Item D', 'Item E'},\n",
       " 'User4': {'Item A', 'Item C'},\n",
       " 'User5': {'Item D'}}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rs_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from pprint import pprint\n",
    "\n",
    "users = [\"User1\", \"User2\", \"User3\", \"User4\", \"User5\"]\n",
    "items = [\"Item A\", \"Item B\", \"Item C\", \"Item D\", \"Item E\"]\n",
    "# 用户购买记录数据集\n",
    "datasets = [\n",
    "    [1,0,1,1,0],\n",
    "    [1,0,0,1,1],\n",
    "    [1,0,1,0,0],\n",
    "    [0,1,0,1,1],\n",
    "    [1,1,1,0,1],\n",
    "]\n",
    "\n",
    "df = pd.DataFrame(datasets,\n",
    "                  columns=items,\n",
    "                  index=users)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "#计算物品的两两相似度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[1.         0.2        0.75       0.4        0.4       ]\n",
      " [0.2        1.         0.25       0.25       0.66666667]\n",
      " [0.75       0.25       1.         0.2        0.2       ]\n",
      " [0.4        0.25       0.2        1.         0.5       ]\n",
      " [0.4        0.66666667 0.2        0.5        1.        ]]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:475: DataConversionWarning: Data with input dtype int64 was converted to bool by check_pairwise_arrays.\n",
      "  warnings.warn(msg, DataConversionWarning)\n"
     ]
    }
   ],
   "source": [
    "#直接计算出两两的jaccard相似度\n",
    "item_similar = 1-pairwise_distances(df.T,metric='jaccard')\n",
    "print(item_similar)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "item_similar = pd.DataFrame(item_similar,index=items,columns=items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Item A</th>\n",
       "      <td>1.00</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>0.75</td>\n",
       "      <td>0.40</td>\n",
       "      <td>0.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item B</th>\n",
       "      <td>0.20</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.666667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item C</th>\n",
       "      <td>0.75</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item D</th>\n",
       "      <td>0.40</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.20</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item E</th>\n",
       "      <td>0.40</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.20</td>\n",
       "      <td>0.50</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        Item A    Item B  Item C  Item D    Item E\n",
       "Item A    1.00  0.200000    0.75    0.40  0.400000\n",
       "Item B    0.20  1.000000    0.25    0.25  0.666667\n",
       "Item C    0.75  0.250000    1.00    0.20  0.200000\n",
       "Item D    0.40  0.250000    0.20    1.00  0.500000\n",
       "Item E    0.40  0.666667    0.20    0.50  1.000000"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "item_similar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['Item A', 'Item B', 'Item C', 'Item D', 'Item E']"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#目标：找到物品的topN个相似物品"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "topN_items={}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in item_similar.index:\n",
    "    _df = item_similar.loc[i].drop([i])\n",
    "    _df_sorted = _df.sort_values(ascending=False)\n",
    "    top2 = list(_df_sorted.index[:2])\n",
    "    topN_items[i]=top2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'Item A': ['Item C', 'Item E'],\n",
       " 'Item B': ['Item E', 'Item D'],\n",
       " 'Item C': ['Item A', 'Item B'],\n",
       " 'Item D': ['Item E', 'Item A'],\n",
       " 'Item E': ['Item B', 'Item D']}"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "topN_items"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "#获取需要推荐给用户的物品"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义一个字典，用于盛放所有的推荐结果\n",
    "rs_results={}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "for user in df.index:\n",
    "    #定义一个针对于单个用户的推荐结果的集合\n",
    "    rs_result=set()\n",
    "    #将遍历的每个用户的已经买过的物品取出\n",
    "    for item in df.loc[user].replace(0,np.nan).dropna().index:\n",
    "        rs_result = rs_result.union(topN_items[item])\n",
    "        \n",
    "    #过滤用户已经购买过的物品\n",
    "    rs_result -= set(df.loc[user].replace(0,np.nan).dropna().index)\n",
    "    \n",
    "    #将每次遍历得到的推荐结果放入总的推荐结果中\n",
    "    rs_results[user]=rs_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'User1': {'Item B', 'Item E'},\n",
       " 'User2': {'Item B', 'Item C'},\n",
       " 'User3': {'Item B', 'Item E'},\n",
       " 'User4': {'Item A'},\n",
       " 'User5': {'Item D'}}"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rs_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "users = [\"User1\", \"User2\", \"User3\", \"User4\", \"User5\"]\n",
    "items = [\"Item A\", \"Item B\", \"Item C\", \"Item D\", \"Item E\"]\n",
    "# 用户购买记录数据集\n",
    "datasets = [\n",
    "    [5,3,4,4,None],\n",
    "    [3,1,2,3,3],\n",
    "    [4,3,4,3,5],\n",
    "    [3,3,1,5,4],\n",
    "    [1,5,5,2,1],\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(datasets,\n",
    "                  columns=items,\n",
    "                  index=users)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Item A  Item B  Item C  Item D  Item E\n",
       "User1       5       3       4       4     NaN\n",
       "User2       3       1       2       3     3.0\n",
       "User3       4       3       4       3     5.0\n",
       "User4       3       3       1       5     4.0\n",
       "User5       1       5       5       2     1.0"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "用户之间的两两相似度：\n"
     ]
    }
   ],
   "source": [
    "print(\"用户之间的两两相似度：\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>User1</th>\n",
       "      <th>User2</th>\n",
       "      <th>User3</th>\n",
       "      <th>User4</th>\n",
       "      <th>User5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>User1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.852803</td>\n",
       "      <td>0.707107</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.792118</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User2</th>\n",
       "      <td>0.852803</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.467707</td>\n",
       "      <td>0.489956</td>\n",
       "      <td>-0.900149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User3</th>\n",
       "      <td>0.707107</td>\n",
       "      <td>0.467707</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.161165</td>\n",
       "      <td>-0.466569</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User4</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.489956</td>\n",
       "      <td>-0.161165</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.641503</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>User5</th>\n",
       "      <td>-0.792118</td>\n",
       "      <td>-0.900149</td>\n",
       "      <td>-0.466569</td>\n",
       "      <td>-0.641503</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          User1     User2     User3     User4     User5\n",
       "User1  1.000000  0.852803  0.707107  0.000000 -0.792118\n",
       "User2  0.852803  1.000000  0.467707  0.489956 -0.900149\n",
       "User3  0.707107  0.467707  1.000000 -0.161165 -0.466569\n",
       "User4  0.000000  0.489956 -0.161165  1.000000 -0.641503\n",
       "User5 -0.792118 -0.900149 -0.466569 -0.641503  1.000000"
      ]
     },
     "execution_count": 68,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.T.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Item A</th>\n",
       "      <th>Item B</th>\n",
       "      <th>Item C</th>\n",
       "      <th>Item D</th>\n",
       "      <th>Item E</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Item A</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.476731</td>\n",
       "      <td>-0.123091</td>\n",
       "      <td>0.532181</td>\n",
       "      <td>0.969458</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item B</th>\n",
       "      <td>-0.476731</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.645497</td>\n",
       "      <td>-0.310087</td>\n",
       "      <td>-0.478091</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item C</th>\n",
       "      <td>-0.123091</td>\n",
       "      <td>0.645497</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.720577</td>\n",
       "      <td>-0.427618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item D</th>\n",
       "      <td>0.532181</td>\n",
       "      <td>-0.310087</td>\n",
       "      <td>-0.720577</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.581675</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Item E</th>\n",
       "      <td>0.969458</td>\n",
       "      <td>-0.478091</td>\n",
       "      <td>-0.427618</td>\n",
       "      <td>0.581675</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Item A    Item B    Item C    Item D    Item E\n",
       "Item A  1.000000 -0.476731 -0.123091  0.532181  0.969458\n",
       "Item B -0.476731  1.000000  0.645497 -0.310087 -0.478091\n",
       "Item C -0.123091  0.645497  1.000000 -0.720577 -0.427618\n",
       "Item D  0.532181 -0.310087 -0.720577  1.000000  0.581675\n",
       "Item E  0.969458 -0.478091 -0.427618  0.581675  1.000000"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
