首页 新闻 会员 周边 捐助

对于cassandra hadoop 的初步使用

0
悬赏园豆:50 [待解决问题]

使用hadoop map 将数据从txt导入到cassandra 但是出现错误

13/09/08 18:00:30 INFO input.FileInputFormat: Total input paths to process : 1
13/09/08 18:00:39 INFO mapred.JobClient: Running job: job_201308291744_0006
13/09/08 18:00:40 INFO mapred.JobClient: map 0% reduce 0%
13/09/08 18:01:05 INFO mapred.JobClient: Task Id : attempt_201308291744_0006_m_000000_0, Status : FAILED
Error: java.lang.ClassNotFoundException: org.apache.thrift.transport.TTransport
at java.net.URLClassLoader$1.run(URLClassLoader.java:202)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:190)
at java.lang.ClassLoader.loadClass(ClassLoader.java:306)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:301)
at java.lang.ClassLoader.loadClass(ClassLoader.java:247)
at java.lang.Class.forName0(Native Method)
at java.lang.Class.forName(Class.java:247)
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:762)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:807)
at org.apache.hadoop.mapreduce.JobContext.getMapperClass(JobContext.java:157)
at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:569)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:305)
at org.apache.hadoop.mapred.Child.main(Child.java:170)

 但是代码里面有import org.apache.thrift.transport.TTransport;

eclipse export

下面是代码

package cassandra;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.apache.cassandra.thrift.Cassandra;
import org.apache.cassandra.thrift.Cassandra.Client;
import org.apache.cassandra.thrift.Column;
import org.apache.cassandra.thrift.ColumnParent;
import org.apache.cassandra.thrift.ConsistencyLevel;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.thrift.protocol.TBinaryProtocol;
import org.apache.thrift.protocol.TProtocol;
import org.apache.thrift.transport.TFramedTransport;
import org.apache.thrift.transport.TSocket;
import org.apache.thrift.transport.TTransport;
import org.safehaus.uuid.UUIDGenerator;

public class WordCount {
public static class Map extends Mapper<LongWritable,Text,Text,IntWritable>{
Client cassandraClient = null;
TTransport tr = null;

//@Override
public void configure(Configuration job){
//super.confilgure(job);

tr = new TFramedTransport(new TSocket("localhost",9160));
TProtocol proto = new TBinaryProtocol(tr);
cassandraClient = new Cassandra.Client(proto);

try{
tr.open();
cassandraClient.set_keyspace("Keyspace1");
}catch(Exception e){
e.printStackTrace();
}
}
//@Override
public void close() throws IOException{
//super.close();

if (tr != null){
tr.close();
}
}

public void map(LongWritable key,Text value,Context context) //OutputCollector<Text,IntWritable>output,Reporter reporter)
throws IOException,InterruptedException {
String line = value.toString();

int splitIndex = line.indexOf("|");//number

if(splitIndex>1 && splitIndex<line.length()){
String userName = line.substring(0,splitIndex);
String searchKeyword = line.substring(splitIndex+1);

ColumnParent cp =new ColumnParent();
cp.column_family ="WordCount";
Column c=new Column();
c.name=ByteBuffer.wrap(UUIDGenerator.getInstance().generateTimeBasedUUID().toByteArray());
c.timestamp =System.currentTimeMillis();
c.value=ByteBuffer.wrap(searchKeyword.getBytes("utf-8"));

try{
cassandraClient.insert(

ByteBuffer.wrap(userName.getBytes("utf-8")),cp,c,ConsistencyLevel.ONE);
}catch(Exception e){
e.printStackTrace();
}
}
}
}
public static void main(String[] args) throws Exception{
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf,args).getRemainingArgs();
if (otherArgs.length !=2){
System.err.println("Usage:wordcount<in><out>");
System.exit(2);
}
Job job = new Job(conf,"wordcount");
job.setJarByClass(WordCount.class);
job.setMapperClass(Map.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);

}
}

求大神 解答

wusuo的主页 wusuo | 初学一级 | 园豆:111
提问于:2013-09-09 09:07
< >
分享
所有回答(1)
0

其他hadoop 结点没有支持包

wusuo | 园豆:111 (初学一级) | 2013-09-16 20:14
清除回答草稿
   您需要登录以后才能回答,未注册用户请先注册