Subject: running lda in spark throws exception


the data is in the attachment

On Wed, Dec 30, 2015 at 8:59 PM, Li Li <[EMAIL PROTECTED]> wrote:
{"time":"2015-02-19","cmtUrl":"4040875","star":"很好","userName":"oliver甜甜","rvId":"rev_100000447","webpageUrl":"http://www.dianping.com/shop/4040875","seg_word":["团购","点评","汤山","比较","老牌","的","温泉","面积","大","人气","旺"],"word_vec":[50,51,52,53,54,5,55,56,57,58,59]}
{"time":"2015-02-19","cmtUrl":"4040875","star":"很好","userName":"妮妮688004","rvId":"rev_100000512","webpageUrl":"http://www.dianping.com/shop/4040875","seg_word":["团购","点评","温泉","很","不错","贴心","的","备有","各式","饮料","即使","在","寒冷","的","冬天","里","也","不","觉得","冷","遗憾","的","是","住宿","的","地方","设施","比较","旧","了","总体","说来","还是","值得","一","去","的"],"word_vec":[50,51,55,26,60,61,5,62,63,64,65,32,66,5,67,68,69,48,70,71,72,5,2,73,5,9,74,53,75,17,76,77,78,79,80,4,5]}
{"time":"2015-02-19","cmtUrl":"17768315","star":"好","userName":"kodokunogurume","rvId":"rev_100000921","webpageUrl":"http://www.dianping.com/shop/17768315","seg_word":["喜欢","从","美术馆","的","角度","来","看看","这","城市","的","水平","大阪","还是","比较","直接","了","当","的"],"word_vec":[81,82,83,5,84,85,86,87,88,5,89,90,78,53,91,17,92,5]}
{"time":"2015-02-19","cmtUrl":"6105063","star":"非常好","userName":"乐呵的玩","rvId":"rev_100001048","webpageUrl":"http://www.dianping.com/shop/6105063","seg_word":["好玩","、","刺激","!","人","有点","多","孩子","爱玩","!"],"word_vec":[93,94,95,96,97,98,99,100,101,96]}
{"time":"2015-02-19","cmtUrl":"17767749","star":"好","userName":"kodokunogurume","rvId":"rev_100001188","webpageUrl":"http://www.dianping.com/shop/17767749","seg_word":["来","大阪","看看","神社","什么","的","也","是","蛮","有","趣味","的","!"],"word_vec":[85,90,86,102,103,5,69,2,104,7,105,5,96]}
{"time":"2015-02-19","cmtUrl":"17768191","star":"好","userName":"kodokunogurume","rvId":"rev_100001544","webpageUrl":"http://www.dianping.com/shop/17768191","seg_word":["日本","朋友","带来","这边","逛逛","minami","/","namba","蛮","好","的"],"word_vec":[106,107,108,109,110,111,112,113,104,16,5]}
{"time":"2015-02-19","cmtUrl":"18032649","star":"好","userName":"dpuser_86804300513","rvId":"rev_100001575","webpageUrl":"http://www.dianping.com/shop/18032649","seg_word":["地方","有点","偏僻","道路","也","不是","很","好走","不过","草莓","很","好吃"],"word_vec":[9,98,114,115,69,116,26,117,118,119,26,120]}
{"time":"2015-02-19","cmtUrl":"15879791","star":"非常好","userName":"qzuser_28128496895817670","rvId":"rev_100001592","webpageUrl":"http://www.dianping.com/shop/15879791","seg_word":["非常","好玩","小","鹦鹉","不怕","人"],"word_vec":[121,93,122,123,124,97]}
{"time":"2015-02-19","price":168,"cmtUrl":"3696937","star":"很好","userName":"Selina_Style","rvId":"rev_100001786","webpageUrl":"http://www.dianping.com/shop/3696937","seg_word":["价格","上涨","不少","吃","的","感觉","没","以前","好","但是","休息","大厅","的","电影","很","赞","更新","的","挺快","温泉水","不错","游泳","两个","池子","一个","标准","泳道","一个","温泉","是","温水","名","华","四季","最","赞","的","还是","服务","态度","来","这儿","玩","一天","也","还","可以","价格","虽","上涨","但","还算","可以"],"word_vec":[125,126,127,128,5,129,11,130,16,131,132,133,5,134,26,135,136,5,137,138,60,139,140,141,142,143,144,142,55,2,145,146,147,148,149,135,5,78,150,151,85,152,153,154,69,10,155,125,156,126,19,157,155]}
{"time":"2015-02-19","cmtUrl":"17673293","star":"好","userName":"xujianyong","rvId":"rev_100001813","webpageUrl":"http://www.dianping.com/shop/17673293","seg_word":["听说","是","红","衬","军","到","曼谷","*","*","*","前","必","来","誓师","的","地方","现在","旁边","有","一个班","的","部队","驻守"],"word_vec":[158,2,159,160,161,162,163,164,164,164,165,166,85,167,5,9,14,168,7,169,5,170,171]}
{"time":"2015-02-19","cmtUrl":"17771279","star":"好","userName":"kodokunogurume","rvId":"rev_100001871","webpageUrl":"http://www.dianping.com/shop/17771279","seg_word":["大阪","儿童","乐园","能","听到","孩儿","们","欢乐","的","笑声","真是","不错","的","时光"],"word_vec":[90,172,173,174,175,176,177,178,5,179,180,60,5,181]}
{"time":"2015-02-19","price":100,"cmtUrl":"1769488","star":"很好","userName":"dpuser_20337075625","rvId":"rev_100002227","webpageUrl":"http://www.dianping.com/shop/1769488","seg_word":["名","不虚","传","的","地方","很","好","很大","至少","要","一天","的","时间","要","准备","好","少","带","东西"],"word_vec":[146,182,183,5,9,26,16,184,185,186,154,5,30,186,187,16,188,189,190]}
{"time":"2015-02-19","cmtUrl":"1888960","star":"很好","userName":"mjj05","rvId":"rev_100002760","webpageUrl":"http://www.dianping.com/shop/1888960","seg_word":["难得","去","趟","莘庄","公园","梅花","展","人","还是","满","多","滴","[","调皮","]"],"word_vec":[191,4,192,193,194,195,196,97,78,197,99,198,199,200,201]}
{"time":"2015-02-19","cmtUrl":"20639070","star":"很好","userName":"dpuser_18910654090","rvId":"rev_100002889","webpageUrl":"http://www.dianping.com/shop/20639070","seg_word":["团购","点评","其实","人多","的","时候","还是","应该","先","看","一楼","的","飞跃","湖北","和","星际","其他","的","容量","比","这","两个","要","多"],"word_vec":[50,51,202,203,5,204,78,15,205,206,207,5,208,209,210,211,212,5,213,214,87,140,186,99]}
{"time":"2015-02-19","cmtUrl":"1797025","star":"非常好","userName":"ladamp","rvId":"rev_100003423","webpageUrl":"http://www.dianping.com/shop/1797025","seg_word":["团购","点评","mini","过山车","小朋友","还是","能","玩","得","总体","来说","一米","二","以下","的","儿童","能","玩","的","项目","有限"],"word_vec":[50,51,215,216,217,78,174,153,218,76,219,220,221,222,5,172,174,153,5,223,224]}
{"time":"2015-02-19","cmtUrl":"20639070","star":"非常好","userName":"爱情砒霜","rvId":"rev_100003470","webpageUrl":"http://www.dianping.com/shop/20639070","seg_word":["团购","点评","出票","很快","明天","去","了","才","知道","好不好","玩"],"word_vec":[50,51,225,226,227,4,17,228,229,230,153]}
{"time":"2015-02-19","cmtUrl":"4064239","star":"非常好","userName":"荣和1915","rvId":"rev_100003587","webpageUrl":"http://www.dianping.com/shop/4064239","seg_word":["团购","点评","服务","还","可以","带","爸妈","妈妈","去","的","温泉","一般"],"word_vec":[50,51,150,10,155,189,231,232,4,5,55,233]}
{"time":"2015-02-19","cmtUrl":"3533701","star":"很好","userName":"张轶宸","rvId":"rev_100003595","webpageUrl":"http://www.dianping.com/shop/3533701","seg_word":["团购","点评","不错","!","人","不多","水","感觉","有点","不","干净","吃","的","还行","!"],"word_vec":[50,51,60,96,97,234,235,129,98,48,236,128,5,237,96]}
{"time":"2015-02-19","cmtUrl":"3644434","s