{"id":735,"date":"2019-05-28T14:02:42","date_gmt":"2019-05-28T05:02:42","guid":{"rendered":"https:\/\/mieruca-ai.com\/ai\/?p=735"},"modified":"2019-09-18T10:56:41","modified_gmt":"2019-09-18T01:56:41","slug":"plsa","status":"publish","type":"post","link":"https:\/\/mieruca-ai.com\/ai\/plsa\/","title":{"rendered":"\u3010\u6280\u8853\u89e3\u8aac\u3011\u78ba\u7387\u7684\u6f5c\u5728\u610f\u5473\u89e3\u6790\uff08PLSA\uff09\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3068\u5fdc\u7528"},"content":{"rendered":"<p><!-- \u5c0e\u5165\u90e8\u5206 --><\/p>\n<p>\n\u4eca\u56de\u306f<strong>\u6f5c\u5728\u610f\u5473\u89e3\u6790(Latent Semantic Analysis: LSA)<\/strong>\u3092\u78ba\u7387\u7684\u306b\u767a\u5c55\u3055\u305b\u305f<strong>\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb<\/strong>\u306e<strong>\u78ba\u7387\u7684\u6f5c\u5728\u610f\u5473\u89e3\u6790\uff08PLSA\uff09<\/strong>\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\uff0e<br \/>\n\u3053\u306e\u30e2\u30c7\u30eb\u3092\u4f7f\u3046\u3068\u6f5c\u5728\u7684\u306a\u610f\u5473\u3092\u30c8\u30d4\u30c3\u30af\u3068\u3057\u3066\u62bd\u51fa\u3067\u304d\uff0c\u305d\u306e\u30c8\u30d4\u30c3\u30af\u5185\u3067\u5358\u8a9e\u3068\u6587\u66f8\u304c\u51fa\u73fe\u3059\u308b\u78ba\u7387\u304c\u308f\u304b\u308a\u307e\u3059\uff0e\u4e3b\u306b\u65e2\u5b58\u306e\u30c7\u30fc\u30bf\u306e\u5206\u6790\u306b\u7528\u3044\u3089\u308c\u3066\u3044\u307e\u3059\uff0e\n<\/p>\n<p><!--more--> <!-- \u76ee\u6b21\u90e8\u5206 --><\/p>\n<h3>\u76ee\u6b21<\/h3>\n<p>\n<a href=\"#toc_1\">\u78ba\u7387\u7684\u6f5c\u5728\u610f\u5473\u89e3\u6790(PLSA)\u3068\u306f<\/a><br \/>\n<a href=\"#toc_2\">PLSA\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0<\/a><br \/>\n<a href=\"#toc_3\">PLSA\u306e\u5b66\u7fd2<\/a><br \/>\n<a href=\"#toc_4\">EM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0 (E-step)<\/a><br \/>\n<a href=\"#toc_5\">EM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0 (M-step)<\/a><br \/>\n<a href=\"#toc_6\">\u904e\u5b66\u7fd2\u306e\u5bfe\u7b56 (TEM)<\/a><br \/>\n<a href=\"#toc_7\">LSA\u3068PLSA\u306e\u6bd4\u8f03<\/a><br \/>\n<a href=\"#toc_8\">PLSA\u3067\u306e\u5206\u6790\u4f8b<\/a><br \/>\n<a href=\"#toc_9\">PLSA\u306e\u5fdc\u7528<\/a><br \/>\n<a href=\"#toc_10\">PLSA\u306e\u554f\u984c\u70b9<\/a><br \/>\n<a href=\"#toc_11\">\u53c2\u8003\u6587\u732e<\/a>\n<\/p>\n<h2 id=\"toc_1\">\u78ba\u7387\u7684\u6f5c\u5728\u610f\u5473\u89e3\u6790(PLSA)\u3068\u306f<\/h2>\n<p>\n\u3000<strong>\u78ba\u7387\u7684\u6f5c\u5728\u610f\u5473\u89e3\u6790(Probabilistic Latent Semantic Analysis: PLSA)<\/strong>\u3068\u306f\uff0c1999\u5e74\u306bHofmann\u3089\u304c\u767a\u8868\u3057\u305f<strong>\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb<\/strong>\u306e\u4ee3\u8868\u4f8b\u3067\u3042\u308b\uff0e\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb\u306f\uff0c\u6587\u66f8\u306f<strong>\u8907\u6570\u306e\u72ec\u7acb\u3057\u305f\u6f5c\u5728\u7684\u306a\u30c8\u30d4\u30c3\u30af\u304b\u3089\u6210\u308b<\/strong>\u3082\u306e\u3068\u3057\u3066\uff0c\u305d\u306e\u904e\u7a0b\u3092\u78ba\u7387\u5206\u5e03\u3092\u7528\u3044\u3066\u3042\u3089\u308f\u3057\u305f<strong>\u78ba\u7387\u30e2\u30c7\u30eb<\/strong>\u3067\u3042\u308b\uff0e<br \/>\n\u3000\u4f8b\u3048\u3070\uff0c\u300c\u8eca\u4e2d\u6cca\u300d\u306b\u3064\u3044\u3066\u306e\u6587\u7ae0\u306f\u300c\u81ea\u52d5\u8eca\u300d\u3084\u300c\u30ad\u30e3\u30f3\u30d7\u300d\u306a\u3069\u306e\u30c8\u30d4\u30c3\u30af\u304b\u3089\u306a\u308b\u3068\u8003\u3048\u3089\u308c\u308b\uff0e\u300c\u81ea\u52d5\u8eca\u300d\u304b\u3089\u5358\u8a9e\u300c\u8eca\u300d\uff0c\u300c\u8eca\u5185\u300d\uff0c\u300c\u5ea7\u5e2d\u300d\u304c\u751f\u6210\u3055\u308c\uff0c\u300c\u30ad\u30e3\u30f3\u30d7\u300d\u304b\u3089\u5358\u8a9e\u300c\u6cca\u307e\u308b\u300d\uff0c\u300c\u6c34\u300d\uff0c\u300c\u81ea\u708a\u300d\uff0c\u300c\u5bdd\u308b\u300d\u304c\u751f\u6210\u3055\u308c\u305f\u3068\u3059\u308b\uff0e\u305d\u306e\u5834\u5408\u300c\u8eca\u4e2d\u6cca\u300d\u306b\u3064\u3044\u3066\u306e\u8a18\u4e8b\u306e\u5358\u8a9e\u7fa4(BOW)\u306f{\u8eca, \u8eca\u5185, \u5ea7\u5e2d, \u6cca\u307e\u308b, \u6c34, \u81ea\u708a}\u3068\u306a\u308b\uff0e\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb\u3067\u306f\u4e00\u822c\u7684\u306b<strong>\u8a9e\u9806\u306f\u8003\u616e\u3055\u308c\u306a\u3044<\/strong>\uff0e\u3053\u306e\u5834\u5408\u306b\u751f\u6210\u3055\u308c\u308b\u6587\u66f8\u306e\u4f8b\u3068\u3057\u3066\u300c\u8eca\u306b\u6cca\u307e\u308b\u3068\u304d\uff0c\u8eca\u5185\u3067\u81ea\u708a\u304c\u3067\u304d\u308b\u3088\u3046\u306b\u6c34\u3092\u6301\u3063\u3066\u3044\u304f\u3068\u3088\u3044\u3067\u3057\u3087\u3046\uff0e\u307e\u305f\u8eca\u5185\u3067\u5bdd\u3089\u308c\u308b\u3088\u3046\u5ea7\u5e2d\u304c\u30d5\u30eb\u30d5\u30e9\u30c3\u30c8\u306b\u3067\u304d\u308b\u8eca\u3092\u9078\u3073\u307e\u3057\u3087\u3046\uff0e\u300d\u304c\u3042\u3052\u3089\u308c\u308b\uff0e\u5b9f\u969b\u306b\u306f\u300c\u52d5\u8a5e\u300d\u3084\u300c\u52a9\u8a5e\u300d\u3092\u8868\u3059\u30c8\u30d4\u30c3\u30af\u3082\u3053\u3053\u306b\u306f\u5165\u3063\u3066\u3044\u308b\uff0e<br \/>\n\u3000\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb\u3092\u7528\u3044\u308b\u5834\u5408\uff0c\u6587\u7ae0\u3092\u751f\u6210\u3059\u308b\u3053\u3068\u3088\u308a\u3082\u305d\u306e\u5358\u8a9e\u3084\u6587\u66f8\u304c<strong>\u3069\u306e\u30c8\u30d4\u30c3\u30af\u304b\u3089\u751f\u6210\u3055\u308c\u305f\u306e\u304b<\/strong>\u306b\u7126\u70b9\u3092\u5f53\u3066\u308b\u3053\u3068\u306e\u65b9\u304c\u591a\u3044\uff0e\u305d\u306e\u305f\u3081\uff0c\u5148\u307b\u3069\u4f8b\u306b\u6319\u3052\u305f\u6587\u66f8\u3092\u89e3\u6790\u3057\uff0c\u30c8\u30d4\u30c3\u30af\u300c\u81ea\u52d5\u8eca\u300d\u3084\u300c\u30ad\u30e3\u30f3\u30d7\u300d\u306a\u3069\u3092\u5f97\u305f\u308a\uff0c\u30c8\u30d4\u30c3\u30af\u300c\u81ea\u52d5\u8eca\u300d\u306b\u304a\u3044\u3066\u300c\u8eca\u300d\u3084\u300c\u5ea7\u5e2d\u300d\u306f<strong>\u3069\u308c\u307b\u3069\u5f71\u97ff\u3092\u4e0e\u3048\u308b\u306e\u304b<\/strong>\u306a\u3069\u306b\u3064\u3044\u3066\u5206\u6790\u3092\u884c\u3046\uff0e\n<\/p>\n<h2 id=\"toc_2\">PLSA\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0<\/h2>\n<p>\n\u3000PLSA\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u89e3\u8aac\u3057\u3066\u3044\u304f\uff0e\n<\/p>\n<h3>\u7528\u3044\u308b\u8a18\u53f7<\/h3>\n<p>\n\u5358\u8a9e\uff1a$W = \\{w_1,w_2,&#8230;,w_M\\}$<br \/>\n\u6587\u66f8\uff1a$D = \\{d_1,d_2,&#8230;,d_N\\}$<br \/>\n\u30c8\u30d4\u30c3\u30af\uff1a$Z = \\{z_1,z_2,&#8230;,z_K\\}$\n<\/p>\n<h3>\u5358\u8a9e\u3068\u6587\u66f8\u306e\u540c\u6642\u78ba\u7387<\/h3>\n<p><div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(D, W) &#038;=&#038; P(D)P(W|D) \\\\<br \/>\n\t&#038;=&#038; P(D)\\sum_{k=1}^{K}P(W|z_k)P(z_k|D) \\\\<br \/>\n\t&#038;=&#038; P(D)\\sum_{k=1}^{K}P(W|z_k)\\frac{P(D|z_k)P(z_k)}{P(D)} \\\\<br \/>\n\t&#038;=&#038; \\sum_{k=1}^{K}P(W|z_k)P(D|z_k)P(z_k)<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u9014\u4e2d\uff0c$P(z_k|D)$\u306b\u5bfe\u3057\u3066<strong>\u30d9\u30a4\u30ba\u306e\u5b9a\u7406<\/strong>\u3092\u7528\u3044\u3066$P(D)$\u3092\u6d88\u53bb\u3057\u3066\u3044\u308b\uff0e<br \/>\n\u6700\u7d42\u7684\u306b\u306f\u30c8\u30d4\u30c3\u30af$Z$\u304b\u3089\u5358\u8a9e$W$\u3068\u6587\u7ae0$D$\u304c\u751f\u6210\u3055\u308c\u308b\u3053\u3068\u3068\u7b49\u4fa1\u306b\u306a\u308b\uff0e<br \/>\n<img decoding=\"async\" loading=\"lazy\" src=\"https:\/\/mieruca-ai.com\/ai\/wp-content\/uploads\/2019\/02\/\u30b9\u30e9\u30a4\u30c91.png\" alt=\" PLSA\u306e\u30a2\u30b9\u30da\u30af\u30c8\u30e2\u30c7\u30eb\" width=\"960\" height=\"720\" class=\"size-full wp-image-881\" \/><br \/>\n\u3053\u308c\u306e\u30e2\u30c7\u30eb\u306f<strong>\u30a2\u30b9\u30da\u30af\u30c8\u30e2\u30c7\u30eb<\/strong>\u3068\u547c\u3070\u308c\u308b\uff0e\n<\/p>\n<h2 id=\"toc_3\">PLSA\u306e\u5b66\u7fd2<\/h2>\n<h3>\u5bfe\u6570\u5c24\u5ea6<\/h3>\n<p><div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nL &#038;=&#038; \\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)\\log P(d_j,w_i) \\\\<br \/>\n\t&#038;=&#038; \\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)\\log \\sum_{k=1}^{K}P(w_i|z_k)P(d_j|z_k)P(z_k) \\\\<br \/>\n\t&#038;=&#038; \\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)\\log \\sum_{k=1}^{K}P(z_k|d_j,w_i)\\frac{P(w_i|z_k)P(d_j|z_k)P(z_k)}{P(z_k|d_j,w_i)}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>$n(d_j,w_i)$\u306f\u6587\u66f8$d_j$\u3067\u5358\u8a9e$w_i$\u304c\u51fa\u73fe\u3059\u308b\u56de\u6570\u3092\u8868\u3059\uff0e<br \/>\n\u307e\u305f\uff0cEM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3067\u306f\u6f5c\u5728\u5909\u6570\uff08\u30c8\u30d4\u30c3\u30af\uff09$z_k$\u306e\u4e8b\u5f8c\u5206\u5e03\u3092\uff0c\u4e0a\u8a18\u306e\u3088\u3046\u306b\u304f\u304f\u308a\u51fa\u3059\u6e96\u5099\u3092\u3059\u308b\uff0e\n<\/p>\n<h3>\u5bfe\u6570\u5c24\u5ea6\u306e\u4e0b\u9650<\/h3>\n<p>\n\u4e0a\u8a18\u306e\u5bfe\u6570\u5c24\u5ea6\u306b\u5bfe\u3057\u3066\uff0c<strong>\u30a4\u30a7\u30f3\u30bb\u30f3\u306e\u65b9\u7a0b\u5f0f<\/strong>\u3092\u7528\u3044\u3066\u4e0b\u9650\u3092\u6c42\u3081\u308b\uff0e<br \/>\nlog\u306e\u4e2d\u306b\u03a3\u304c\u3042\u308b\u5f62\u306f\u89e3\u6790\u7684\u306b\u6c42\u3081\u308b\u3053\u3068\u304c\u56f0\u96e3\u306a\u306e\u3067\uff0c\u30a4\u30a7\u30f3\u30bb\u30f3\u306e\u65b9\u7a0b\u5f0f\u3092\u7528\u3044\u3066\u03a3\u3092log\u306e\u5916\u306b\u51fa\u3057\u3066\u3044\u308b\uff0e<br \/>\n\u307e\u305f\uff0c\u5bfe\u6570\u5c24\u5ea6\u306e\u4e0b\u9650\u3092\u6700\u5927\u5316\u3059\u308b\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u6c42\u3081\u308b\u3053\u3068\u306b\u3088\u308a\uff0c\u9593\u63a5\u7684\u306b\u5bfe\u6570\u5c24\u5ea6\u3092\u6700\u5927\u5316\u3059\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nL &#038;\\geq &#038; \\sum_{i=1}^{M}\\sum_{j=1}^{N}\\sum_{k=1}^{K}n(d_j,w_i)P(z_k|d_j,w_i)\\log (\\frac{P(w_i|z_k)P(d_j|z_k)P(z_k)}{P(z_k|d_j,w_i)}) \\\\<br \/>\n\t&#038;= &#038; \\sum_{i=1}^{M}\\sum_{j=1}^{N}\\sum_{k=1}^{K}n(d_j,w_i)P(z_k|d_j,w_i)\\log (P(w_i|z_k)P(d_j|z_k)P(z_k)) \\\\<br \/>\n\t&#038;  &#038; &#8211; \\sum_{i=1}^{M}\\sum_{j=1}^{N}\\sum_{k=1}^{K}n(d_j,w_i)P(z_k|d_j,w_i)\\log (P(z_k|d_j,w_i))<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u30c8\u30d4\u30c3\u30af$z$\u306e\u4e8b\u5f8c\u5206\u5e03\u304b\u3089\u306a\u308b2\u9805\u76ee\u3092\u56fa\u5b9a\u3057\uff08E-step\uff09\uff0c\u305d\u306e\u4ed6\u306e\u4e8b\u5f8c\u5206\u5e03\u3092\u6c42\u3081\u308b\uff0e(M-step)<br \/>\n\u3053\u308c\u3092\u7e70\u308a\u8fd4\u3057\uff0c\u5bfe\u6570\u5c24\u5ea6\u304c\u53ce\u675f\u3059\u308b\u307e\u3067\u884c\u3046\uff0e\n<\/p>\n<h2 id=\"toc_4\">EM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0 (E-step)<\/h2>\n<p>\n\u30c8\u30d4\u30c3\u30af$z$\u306e\u4e8b\u5f8c\u5206\u5e03\u3092\u6c42\u3081\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(Z|D,W) &#038;=&#038; \\frac{P(D,W,Z)}{P(D,W)} \\\\<br \/>\n\t&#038;=&#038; \\frac{P(Z)P(W|Z)P(D|Z)}{\\sum_{k=1}^{K}P(z_k)P(W|z_k)P(D|z_k)} \\\\<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<\/p>\n<h2 id=\"toc_5\">EM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0 (M-step)<\/h2>\n<p>\n\u30c8\u30d4\u30c3\u30af$z$\u306e\u4e8b\u5f8c\u5206\u5e03\u3092\u56fa\u5b9a\u3057\u305f\u72b6\u614b\u3067\uff0c\u305d\u306e\u4ed6\u306e\u4e8b\u5f8c\u5206\u5e03\u3092\u6c42\u3081\u308b\uff0e<br \/>\n\u5bfe\u6570\u5c24\u5ea6$L$\u306b\u5bfe\u3057\u3066<strong>\u30e9\u30b0\u30e9\u30f3\u30b8\u30e5\u306e\u672a\u5b9a\u4e57\u6570\u6cd5<\/strong>\u3092\u7528\u3044\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nF &#038;= &#038; \\sum_{i=1}^{M}\\sum_{j=1}^{N}\\sum_{k=1}^{K}n(d_j,w_i)P(z_k|d_j,w_i)\\log (P(w_i|z_k)P(d_j|z_k)P(z_k)) \\\\<br \/>\n\t&#038;  &#038; + \\lambda_1(1-\\sum_{i=1}^{M}P(w_i|z_k))+ \\lambda_2(1-\\sum_{j=1}^{N}P(d_j|z_k))+ \\lambda_3(1-\\sum_{k=1}^{K}P(z_k))<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<\/p>\n<h3>$P(w_i|z_k)$\u3092\u6c42\u3081\u308b (M-step)<\/h3>\n<p><div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\n\\frac{\\delta F}{\\delta P(w_i|z_k)} &#038;= &#038; \\frac{\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{P(w_i|z_k)} &#8211; \\lambda_1 = 0 \\\\<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3086\u3048\u306b\uff0c<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(w_i|z_k) = \\frac{\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{\\lambda_1}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3053\u3053\u3067\u4e21\u8fba\u306e$\\sum_{i=1}^{M}$\u3092\u3068\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\n\\lambda_1 = \\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3053\u308c\u3092\u4ee3\u5165\u3057\u3066\uff0c<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(w_i|z_k) = \\frac{\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{\\sum_{i^{\\prime}=1}^{M}\\sum_{j=1}^{N}n(d_j,w_{i^{\\prime}})P(z_k|d_j,w_{i^{\\prime}})}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<\/p>\n<h3>$P(d_j|z_k)$\u3092\u6c42\u3081\u308b (M-step)<\/h3>\n<p><div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\n\\frac{\\delta F}{\\delta P(d_j|z_k)} &#038;= &#038; \\frac{\\sum_{i=1}^{M}n(d_j,w_i)P(z_k|d_j,w_i)}{P(d_j|z_k)} &#8211; \\lambda_2 = 0 \\\\<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3086\u3048\u306b\uff0c<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(d_j|z_k) = \\frac{\\sum_{i=1}^{M}n(d_j,w_i)P(z_k|d_j,w_i)}{\\lambda_2}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3053\u3053\u3067\u4e21\u8fba\u306e$\\sum_{j=1}^{N}$\u3092\u3068\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\n\\lambda_2 = \\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3053\u308c\u3092\u4ee3\u5165\u3057\u3066\uff0c<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(d_j|z_k) = \\frac{\\sum_{i=1}^{M}n(d_j,w_i)P(z_k|d_j,w_i)}{\\sum_{i=1}^{M}\\sum_{j^{\\prime}=1}^{N}n(d_{j^{\\prime}},w_i)P(z_k|d_{j^{\\prime}},w_i)}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<\/p>\n<h3>$P(z_k)$\u3092\u6c42\u3081\u308b (M-step)<\/h3>\n<p><div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\n\\frac{\\delta F}{\\delta P(z_k)} &#038;= &#038; \\frac{\\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{P(z_k)} &#8211; \\lambda_3 = 0 \\\\<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3086\u3048\u306b\uff0c<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(z_k) = \\frac{\\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{\\lambda_3}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3053\u3053\u3067\u4e21\u8fba\u306e$\\sum_{k=1}^{K}$\u3092\u3068\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\n\\lambda_3 = \\sum_{i=1}^{M}\\sum_{j=1}^{N}\\sum_{k=1}^{K}n(d_j,w_i)P(z_k|d_j,w_i)<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>\u3053\u308c\u3092\u4ee3\u5165\u3057\u3066\uff0c<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(z_k) &#038;= &#038; \\frac{\\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{\\sum_{i=1}^{M}\\sum_{j=1}^{N}\\sum_{k^{\\prime}=1}^{K}n(d_j,w_i)P(z_{k^{\\prime}}|d_j,w_i)} \\\\<br \/>\n\t&#038;= &#038; \\frac{\\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)P(z_k|d_j,w_i)}{\\sum_{i=1}^{M}\\sum_{j=1}^{N}n(d_j,w_i)}<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<\/p>\n<h2 id=\"toc_6\">\u904e\u5b66\u7fd2\u306e\u5bfe\u7b56 (TEM)<\/h2>\n<p>\nEM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3092\u7528\u3044\u305f\u6df7\u5408\u5206\u5e03\u306e\u5b66\u7fd2\u306b\u304a\u3044\u3066\uff0c\u904e\u5270\u9069\u5408\uff08\u904e\u5b66\u7fd2\uff09\u3092\u9632\u3050\u305f\u3081\u306b\u30a2\u30cb\u30fc\u30ea\u30f3\u30b0\u6cd5\u306e<strong>TEM(Tempered EM)<\/strong>\u304c\u7528\u3044\u3089\u308c\u308b\uff0e<\/p>\n<h3>TEM\u306e\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0<\/h3>\n<p>EM\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306eE\u30b9\u30c6\u30c3\u30d7\u306b\u304a\u3051\u308b\u30c8\u30d4\u30c3\u30af$z$\u306e\u4e8b\u5f8c\u5206\u5e03\u3092\uff0c\u30d1\u30e9\u30e1\u30fc\u30bf(inverse computational temperature)$\u03b2$\u3092\u7528\u3044\u3066\u4ee5\u4e0b\u306e\u3088\u3046\u306b\u5b9a\u7fa9\u3059\u308b\uff0e<\/p>\n<div class=\"show-srollbar\">\n$$<br \/>\n\\displaystyle<br \/>\n\\begin{eqnarray}<br \/>\nP(Z|D,W) &#038;=&#038; \\frac{P(Z)\\bigl[ P(W|Z)P(D|Z)\\bigr] ^\u03b2}{\\sum_{k=1}^{K}P(z_k)\\bigl[ P(W|z_k)P(D|z_k)\\bigr] ^\u03b2} \\\\<br \/>\n\\end{eqnarray}<br \/>\n$$\n<\/div>\n<p>$\u03b2&lt;1$\u306e\u7bc4\u56f2\u3067\u306e\u5c24\u5ea6\u3092\u5272\u308a\u5f15\u304f\uff0e<br \/>\n$\u03b2=1$\u304b\u3089\u5f90\u3005\u306b\u6e29\u5ea6\u3092\u3055\u3052\u3066\u3044\u304d\uff0c\u5c24\u5ea6\u304c\u53ce\u675f\u3059\u308b\u307e\u3067E\u30b9\u30c6\u30c3\u30d7\u3068M\u30b9\u30c6\u30c3\u30d7\u3092\u7e70\u308a\u8fd4\u3059\uff0e\n<\/p>\n<h2 id=\"toc_7\">LSA\u3068PLSA\u306e\u6bd4\u8f03<\/h2>\n<p><img decoding=\"async\" loading=\"lazy\" src=\"https:\/\/mieruca-ai.com\/ai\/wp-content\/uploads\/2019\/02\/\u30b9\u30e9\u30a4\u30c92-1.png\" alt=\"PLSA\u3068LSA\u306e\u6bd4\u8f03\" width=\"960\" height=\"720\" class=\"alignnone size-full wp-image-886\" \/><\/p>\n<p>\nPLSA\u306fLSA\u3092\u78ba\u7387\u30e2\u30c7\u30eb\u3068\u3057\u3066\u767a\u5c55\u3055\u305b\u305f\u30e2\u30c7\u30eb\u3067\u3042\u308b\uff0e<br \/>\nPLSA\u3067\u306f\u5358\u8a9e\u3082\u6587\u7ae0\u3082\u8907\u6570\u306e\u30c8\u30d4\u30c3\u30af\u304b\u3089\u78ba\u7387\u7684\u306b\u751f\u6210\u3055\u308c\u308b\uff0e<br \/>\nPLSA\u306b\u304a\u3051\u308b\u5404\u30c8\u30d4\u30c3\u30af\u9593\u306b\u306f\u72ec\u7acb\u6027\u3092\u4eee\u5b9a\u3059\u308b\u305f\u3081\uff0c\u30c8\u30d4\u30c3\u30af\u9593\u306e\u985e\u4f3c\u5ea6\u306f\u6c42\u3081\u3089\u308c\u306a\u3044\uff0e\n<\/p>\n<h2 id=\"toc_8\">PLSA\u3067\u306e\u5206\u6790\u4f8b<\/h2>\n<p>\n\u300c\u8eca\u4e2d\u6cca\u300d\u306b\u3064\u3044\u3066\u306e\u8a18\u4e8b\u3092\u96c6\u3081\u3066\u751f\u6210\u3057\u305f\u6587\u66f8\u306bPLSA\u3092\u9069\u7528\u3057\u3066\u307f\u305f\uff0e<br \/>\n\u4ee5\u4e0b\uff0c\u5404\u30c8\u30d4\u30c3\u30af\u306b\u5bfe\u3059\u308b\u4e8b\u5f8c\u5206\u5e03\u306e\u5927\u304d\u3044\u9806\u306b\u793a\u3059\uff0e<\/p>\n<h3>\u5358\u8a9e-\u30c8\u30d4\u30c3\u30af<\/h3>\n<p>\u30c8\u30d4\u30c3\u30af1: \u5834\uff0c\u99c5\uff0c\u9053\uff0c\u5168\u56fd\uff0c\u5bdd\u6cca\u308a<br \/>\n\u30c8\u30d4\u30c3\u30af2: \u8eca\u4e2d\uff0c\u6cca\uff0c\u8eca\uff0c\u5b50\u4f9b\uff0c\u30c8\u30a4\u30ec<br \/>\n\u30c8\u30d4\u30c3\u30af3: \u5834\u5408\uff0c\u30c9\u30e9\u30a4\u30d0\u30fc\uff0c\u5468\u56f2\uff0c\u4e00\u822c\uff0c\u76ee<\/p>\n<h3>\u6587\u7ae0-\u30c8\u30d4\u30c3\u30af<\/h3>\n<p>\uff08\u203b \u672c\u6765\u6587\u7ae0\u306e\u96c6\u5408\u304c\u51fa\u3066\u304f\u308b\u304c\uff0c\u4eca\u56de\u306f\u30b9\u30da\u30fc\u30b9\u306e\u90fd\u5408\u4e0a\u305d\u306e\u6027\u8cea\u306e\u307f\u3092\u8a18\u3059\uff0e\uff09<br \/>\n\u30c8\u30d4\u30c3\u30af1: \u5834\u6240\u306b\u95a2\u3059\u308b\u6587\u7ae0\u304c\u96c6\u307e\u3063\u305f<br \/>\n\u30c8\u30d4\u30c3\u30af2: \u30e1\u30ea\u30c3\u30c8\u30fb\u30c7\u30e1\u30ea\u30c3\u30c8\uff0c\u5feb\u9069\u306b\u904e\u3054\u3059\u305f\u3081\u306e\u65b9\u6cd5\u306b\u95a2\u3059\u308b\u6587\u7ae0\u304c\u96c6\u307e\u3063\u305f<br \/>\n\u30c8\u30d4\u30c3\u30af3: \u6cd5\u5f8b\u3084\u6ce8\u610f\u4e8b\u9805\uff0c\u30eb\u30fc\u30eb\u306b\u95a2\u3059\u308b\u8a18\u4e8b\u304c\u96c6\u307e\u3063\u305f<\/p>\n<h3>\u89e3\u91c8\u3057\u3066\u307f\u308b<\/h3>\n<p>PLSA\u3067\u306f<strong>\u30c8\u30d4\u30c3\u30af\u306f\u4eba\u9593\u304c\u89e3\u91c8\u3057\u306a\u304f\u3066\u306f\u306a\u3089\u306a\u3044<\/strong>\u304c\uff0c\u5358\u8a9e\u3084\u6587\u7ae0\u3068\u30c8\u30d4\u30c3\u30af\u306e\u95a2\u4fc2\u306b\u7740\u76ee\u3057\u3066\u307f\u308b\u3068\u306a\u3093\u3068\u306a\u304f\u305d\u308c\u304c\u4f55\u3092\u8868\u3057\u3066\u3044\u308b\u304b\u304c\u308f\u304b\u308b\uff0e<br \/>\n\u30c8\u30d4\u30c3\u30af1\u3067\u306f\u300c\u8eca\u4e2d\u6cca\u3092\u3059\u308b\u5834\u6240\u300d\u3092\uff0c\u30c8\u30d4\u30c3\u30af2\u3067\u306f\u300c\u8eca\u5185\u306b\u6cca\u307e\u308b\u6642\u306e\u3053\u3068\u300d\u3092\uff0c\u30c8\u30d4\u30c3\u30af3\u3067\u306f\u300c\u5468\u56f2\u306e\u72b6\u6cc1\u300d\u3092\u8868\u3057\u3066\u3044\u308b\u3068\u89e3\u91c8\u3067\u304d\u308b\uff0e<br \/>\n\u4eca\u56de\u306fWeb\u4e0a\u306e\u8a18\u4e8b\u304b\u3089\u53d6\u3063\u3066\u304d\u305f\u6587\u7ae0\u3092\u3082\u3068\u306b\u89e3\u6790\u3092\u884c\u3063\u305f\uff0e\u305d\u306e\u305f\u3081\uff0c\u8eca\u4e2d\u6cca\u306b\u3064\u3044\u3066\u306e\u30b5\u30a4\u30c8\u3067\u306f\u6cca\u307e\u308b\u5834\u6240\u3084\u5feb\u9069\u306b\u904e\u3054\u3059\u305f\u3081\u306e\u65b9\u6cd5\uff0c\u6cd5\u5f8b\u3084\u30eb\u30fc\u30eb\u306b\u3064\u3044\u3066\u66f8\u304b\u308c\u3066\u3044\u308b\u3053\u3068\u304c\u5206\u304b\u3063\u305f\uff0e<br \/>\n\u307e\u305f\uff0c\u305d\u308c\u305e\u308c\u306e\u30c8\u30d4\u30c3\u30af\u306b\u304a\u3044\u3066\u6ce8\u76ee\u3059\u3079\u304d\u30ad\u30fc\u30ef\u30fc\u30c9\u3082\u308f\u304b\u3063\u305f\uff0e\n<\/p>\n<h2 id=\"toc_9\">PLSA\u306e\u5fdc\u7528<\/h2>\n<p>\u5171\u8d77\u6027\u306b\u7740\u76ee\u3059\u308c\u3070\uff0c<strong>\u6587\u66f8\u3068\u5358\u8a9e\u4ee5\u5916\u306e\u3082\u306e\u306b\u3064\u3044\u3066\u3082\u540c\u69d8\u306e\u5206\u6790\u304c\u3067\u304d\u308b<\/strong>\uff0e<br \/>\nPLSA\u3067\u306f\uff0c\u4f55\u3089\u304b\u306e2\u3064\u306e\u30a8\u30f3\u30c6\u30a3\u30c6\u30a3\u306e<strong>\u5171\u8d77\u884c\u5217<\/strong>\u304b\u3089\uff0c\u305d\u306e\u6f5c\u5728\u30af\u30e9\u30b9\u5206\u6790\u3084\uff0c\u30af\u30e9\u30b9\u3068\u30a8\u30f3\u30c6\u30a3\u30c6\u30a3\u3068\u306e\u95a2\u4fc2\u3092\u6c42\u3081\u308b\u3053\u3068\u304c\u3067\u304d\u308b\uff0e<br \/>\n\u4f8b\u3048\u3070\uff0c\u6587\u66f8\u306e\u5206\u985e\u3084\uff0c\u5358\u8a9e\u306e\u5206\u985e\u306a\u3069\u306f\u3082\u3061\u308d\u3093\uff0c\u30e6\u30fc\u30b6\u30fc\u3068\u305d\u306e\u5c5e\u6027\u304b\u3089\u4f3c\u305f\u3088\u3046\u306a\u30e6\u30fc\u30b6\u30fc\u3092\u6c42\u3081\u308b\u3068\u3044\u3063\u305f<strong>\u30ea\u30b3\u30e1\u30f3\u30c9\u30a8\u30f3\u30b8\u30f3<\/strong>\u306b\u3082\u4f7f\u3046\u3053\u3068\u304c\u3067\u304d\u308b\uff0e<\/p>\n<h3>\u5fdc\u7528\u4f8b<\/h3>\n<p>\n\u30fbID-POS\u5206\u6790<br \/>\n\u30fb\u5354\u8abf\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u306e\u5b9f\u88c5<br \/>\n\u30fb\u30ea\u30b3\u30e1\u30f3\u30c9\u30a8\u30f3\u30b8\u30f3<br \/>\n\u30fb\u8981\u56e0\u5206\u6790\n<\/p>\n<h2 id=\"toc_10\">PLSA\u306e\u554f\u984c\u70b9<\/h2>\n<p>\u30fb\u30c8\u30d4\u30c3\u30af\u9593\u306b\u72ec\u7acb\u6027\u3092\u4eee\u5b9a\u3057\u3066\u3044\u308b\u305f\u3081\uff0c\u305d\u308c\u3089\u306e\u95a2\u4fc2\u3092\u7121\u8996\u3057\u3066\u3044\u308b<br \/>\n\u30fb\u65b0\u305f\u306a\u30c7\u30fc\u30bf\u306e\u8ffd\u52a0\u306b\u5f31\u3044<\/p>\n<h2 id=\"toc_11\">\u53c2\u8003\u6587\u732e<\/h2>\n<p>\u3010\u8ad6\u6587\u3011<br \/>\nT. Hofmann, 1999, Probabilistic latent semantic indexing, <em>Proceedings of Proceeding UAI&#8217;99 Proceedings of the Fifteenth conference on Uncertainty in artificial intelligence<\/em>, p289-296.<\/p>\n<p>\u3010\u672c\u3011<br \/>\n\u5ca9\u7530\u5177\u6cbb, 2015, <em>\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb<\/em>, \u8b1b\u8ac7\u793e.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4eca\u56de\u306f\u6f5c\u5728\u610f\u5473\u89e3\u6790(Latent Semantic Analysis: LSA)\u3092\u78ba\u7387\u7684\u306b\u767a\u5c55\u3055\u305b\u305f\u30c8\u30d4\u30c3\u30af\u30e2\u30c7\u30eb\u306e\u78ba\u7387\u7684\u6f5c\u5728\u610f\u5473\u89e3\u6790\uff08PLSA\uff09\u306b\u3064\u3044\u3066\u89e3\u8aac\u3057\u307e\u3059\uff0e \u3053\u306e\u30e2\u30c7\u30eb\u3092\u4f7f\u3046\u3068\u6f5c\u5728\u7684\u306a\u610f\u5473\u3092\u30c8\u30d4\u30c3\u30af\u3068\u3057\u3066\u62bd\u51fa\u3067\u304d\uff0c\u305d\u306e\u30c8\u30d4\u30c3\u30af\u5185\u3067\u5358\u8a9e\u3068\u6587\u66f8\u304c\u51fa\u73fe\u3059\u308b\u78ba\u7387\u304c\u308f\u304b\u308a\u307e\u3059\uff0e\u4e3b\u306b\u65e2\u5b58\u306e\u30c7\u30fc\u30bf\u306e\u5206\u6790\u306b\u7528\u3044\u3089\u308c\u3066\u3044\u307e\u3059\uff0e<\/p>\n","protected":false},"author":1,"featured_media":881,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[4,5],"tags":[],"_links":{"self":[{"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/posts\/735"}],"collection":[{"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/comments?post=735"}],"version-history":[{"count":57,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/posts\/735\/revisions"}],"predecessor-version":[{"id":1077,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/posts\/735\/revisions\/1077"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/media\/881"}],"wp:attachment":[{"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/media?parent=735"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/categories?post=735"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/mieruca-ai.com\/ai\/wp-json\/wp\/v2\/tags?post=735"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}