{"id":573,"date":"2019-03-28T19:53:44","date_gmt":"2019-03-28T11:53:44","guid":{"rendered":"https:\/\/luke6887.me\/?p=573"},"modified":"2019-04-01T10:41:00","modified_gmt":"2019-04-01T02:41:00","slug":"sit742-mis772%e7%9a%84%e4%b8%80%e4%ba%9b%e5%85%b3%e4%ba%8e%e6%95%b0%e6%8d%ae%e7%9a%84%e6%83%b3%e6%b3%95%e3%80%82%e3%80%82%e3%80%82","status":"publish","type":"post","link":"https:\/\/blog.luke6887.me\/?p=573","title":{"rendered":"SIT742->MIS772\u7684\u4e00\u4e9b\u5173\u4e8e\u6570\u636e\u7684\u60f3\u6cd5\u3002\u3002\u3002"},"content":{"rendered":"<p>\u4eca\u5b66\u671f\u7684772\u8ddf742\u7684\u6570\u636e\u5c45\u7136\u662f\u4e00\u6837\u7684\u3002\u3002\u3002\u90fd\u662f\u7ea2\u9152\u3002\u3002<\/p>\n<p><a href=\"https:\/\/www.kaggle.com\/zynicide\/wine-reviews\">https:\/\/www.kaggle.com\/zynicide\/wine-reviews<\/a><\/p>\n<p>\u4f46\u6709\u4e00\u4e2a\u95ee\u9898\u5c31\u662f\u6709\u4e0d\u5c11\u53d8\u91cf\uff08\u50cfdesignation\uff09\u6709\u5f88\u591a\u5f88\u591a\u7684\u9879\u3002\u3002\u3002\u505aclassification\u7684\u65f6\u5019\u5c31codeing\u53c8\u4e0d\u662f\u4e0d\u641e\u53c8\u4e0d\u662f\u3002\u3002\u3002<\/p>\n<p><!--more--><\/p>\n<p>\u60f3\u6cd51\uff1aunique integer codeing\uff0c\u8fd9\u4e2a\u7528RM\u7684\u8f6c\u6362\u5668\u5c31\u80fd\u641e\uff0c\u4f46\u3002\u3002\u611f\u89c9\u4e0d\u592a\u4fe1\u5f97\u8fc7_(:\u0437\u300d\u2220)_<\/p>\n<p>\u60f3\u6cd52\uff1afeature hashing\uff0c\u5c06\u53d8\u91cf\u8f6c\u6210\u5806hash\u503c\u3002\u3002\u3002\u3002\u7528\u7684\u662fsklearn\u7684feature hasher\uff0c\u6548\u679c\u6bd4unique integer\u597d\u70b9\u3002\u3002\u4f46\u611f\u89c9\u8fd8\u6709\u522b\u7684\u3002\u3002<\/p>\n<pre class=\"lang:python decode:true \" title=\"FeatureHasher\">from sklearn.compose import ColumnTransformer\r\nfh = FeatureHasher(n_features=20, input_type='string')\r\ndata_hash_target = data.drop(columns=['price-cat', 'points'])\r\ndata_label = data.drop(columns=['country', 'designation','province','region_1','variety','winery'])\r\n\r\nn_orig_features = data_hash_target.shape[1]\r\nhash_vector_size = 12\r\nct = ColumnTransformer([(f't_{i}', FeatureHasher(n_features=hash_vector_size, \r\n                        input_type='string',non_negative=True), i) for i in range(n_orig_features)])\r\n\r\nres_0 = ct.fit_transform(data_hash_target)\r\n\r\ndata_hash_target=pd.DataFrame(res_0, columns=['fh1', 'fh2', 'fh3', 'fh4', 'fh5', 'fh6', 'fh7', 'fh8','fh9','fh10','fh11','fh12',\r\n                                             'fh13', 'fh14', 'fh15', 'fh16', 'fh17', 'fh18', 'fh19', 'fh20','fh21','fh22','fh23','fh24',\r\n                                             'fh25', 'fh26', 'fh27', 'fh28', 'fh29', 'fh30', 'fh31', 'fh32','fh33','fh34','fh35','fh36',\r\n                                             'fh37', 'fh38', 'fh39', 'fh40', 'fh41', 'fh42', 'fh43', 'fh44','fh45','fh46','fh47','fh48',\r\n                                             'fh49', 'fh50', 'fh51', 'fh52', 'fh53', 'fh54', 'fh55', 'fh56','fh57','fh58','fh59','fh60',\r\n                                             'fh61', 'fh62', 'fh63', 'fh64', 'fh65', 'fh66', 'fh67', 'fh68','fh69','fh70','fh71','fh72'\r\n                                             ])\r\ndata_hash_target\r\ndata_hashed = pd.concat([data_hash_target, data_label],axis=1)<\/pre>\n<p>\u60f3\u6cd53\uff1a\u4ece772\u7684\u628aprice\u5206\u7c7b\u62105\u7c7b\u90a3\u91cc\u51fa\u6765\u7684\u60f3\u6cd5\uff0c\u628adesignation\u5206\u7c7b\uff08\u6216\u8005\u662f\u5f52\u7c7b\uff09\u5230\u6570\u91cf\u8f83\u5c0f\u7684\u7c7b\u522b\u91cc\u9762\u53bb\uff0c\u4f8b\u5982\u628a\u539f\u6765\u51e0\u4e07\u79cd\u7684designation\u5206\u7c7b\u621020\u7c7b\u4e4b\u7c7b\u7684\u3002\u3002\u7136\u540e\u518d\u505acodeing\u3002\u3002\uff08\u4f46\u662f\u6ca1\u6709\u5934\u7eea\u554a\u3002\u3002\u3002\u5509_(:\u0437\u300d\u2220)_\uff09<\/p>\n<p>\u8fd8\u6709\u8fd9\u6837\u6211\u662f\u8be5\u5355\u72ec\u628adesignation\u62c6\u51fa\u6765clustering\u505a\u5462\uff0c\u8fd8\u662f\u7528model\u8fde\u7740\u522b\u7684\u53d8\u91cf\u7ed9designation\u5206\u7c7b\u5462\u3002\u3002\u5c31\u5f88\u5934\u75db\u3002\u3002<\/p>\n<p>\uff08\u5148\u8bb0\u4e0b\u6765\u5427\u3002\u3002\u6015\u5fd8\u4e86\u3002\u3002\uff09<\/p>\n<p>4.1\u66f4\u65b0\uff1a\u770bcluster\u597d\u50cf\u771f\u7684designation\u8ddfwinery\u8fd9\u4e24\u4e2a\u7684\u4fe1\u606f\u6700\u591a\u3002\u3002\u3002\u611f\u89c9\u53ef\u4ee5\u4e00\u8bd5\u3002\u3002\uff1f<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4eca\u5b66\u671f\u7684772\u8ddf742\u7684\u6570\u636e\u5c45\u7136\u662f\u4e00\u6837\u7684\u3002\u3002\u3002\u90fd\u662f\u7ea2\u9152\u3002\u3002 https:\/\/www.kaggle.com\/zy &hellip; <\/p>\n<p class=\"link-more\"><a href=\"https:\/\/blog.luke6887.me\/?p=573\" class=\"more-link\">\u7ee7\u7eed\u9605\u8bfb<span class=\"screen-reader-text\">\u201cSIT742->MIS772\u7684\u4e00\u4e9b\u5173\u4e8e\u6570\u636e\u7684\u60f3\u6cd5\u3002\u3002\u3002\u201d<\/span><\/a><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[2],"tags":[],"class_list":["post-573","post","type-post","status-publish","format-standard","hentry","category-school-things"],"_links":{"self":[{"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=\/wp\/v2\/posts\/573","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=573"}],"version-history":[{"count":6,"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=\/wp\/v2\/posts\/573\/revisions"}],"predecessor-version":[{"id":582,"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=\/wp\/v2\/posts\/573\/revisions\/582"}],"wp:attachment":[{"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=573"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=573"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/blog.luke6887.me\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=573"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}