twitter4j.json.DataObjectFactory Java Examples

The following examples show how to use twitter4j.json.DataObjectFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SerializedStreamReader.java    From twitter-dataset-collector with Apache License 2.0 6 votes vote down vote up
public static void extractSubsetOfTweetFields(List<String> jsonFiles, String outputFile,
		 StatusTransformer transformer, StatusFilterer filterer) throws IOException, TwitterException {
	
	BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(
			new FileOutputStream(outputFile), FileUtil.UTF8));
	
	for (int i = 0; i < jsonFiles.size(); i++) {
		System.out.println(jsonFiles.get(i));
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				new FileInputStream(jsonFiles.get(i)), FileUtil.UTF8));
		String line = null;
		while ((line = reader.readLine())!= null){
			Status status = DataObjectFactory.createStatus(line);
			if (filterer.acceptStatus(status)){
				writer.append(transformer.extractLine(status));
				writer.newLine();
			}
		}
		reader.close();
		writer.flush();
	}
	writer.close();
}
 
Example #2
Source File: TextProcessor.java    From first-stories-twitter with MIT License 6 votes vote down vote up
@Override
public void execute(TridentTuple tuple, TridentCollector collector) {
	Status s = null;
	String tweetText = null;
	try {
		s = DataObjectFactory.createStatus((String) tuple.getValue(0));
		tweetText = tools.removeLinksAndReplies(tb.removeSpacesInBetween(s.getText()));
	} catch (Exception e) {
		LOG.error(e.toString());
	}

	Tweet t = null;
	if (s!=null)	//rarely Twitter4J can't parse the json to convert to Status and Status is null.
		t = new Tweet(s.getId(), tweetText);
	else
		t = new Tweet(-1, " ");
	
	collector.emit(new Values(t));
	
}
 
Example #3
Source File: ParseTweet.java    From trident-tutorial with Apache License 2.0 5 votes vote down vote up
private Status parse(String rawJson){
    try {
        Status parsed = DataObjectFactory.createStatus(rawJson);
        return parsed;
    } catch (TwitterException e) {
        log.warn("Invalid tweet json -> " + rawJson, e);
        return null;
    }
}
 
Example #4
Source File: StreamCollector.java    From twitter-dataset-collector with Apache License 2.0 5 votes vote down vote up
protected void open(final String tweetDump) {
	openWriter(tweetDump + ".0");
	
	listener = new StatusListener(){
		public void onStatus(Status status) {
			try {
				String line = DataObjectFactory.getRawJSON(status);
				writer.append(line);
				writer.newLine();
				currentFileSize += line.length();
				if (maxJsonFileSize > 0){
					if (currentFileSize >= maxJsonFileSize*1024){
						closeWriter();
						fileCounter++;
						currentFileSize = 0;
						openWriter(tweetDump + "." + fileCounter);
					}
				}
			} catch (IOException e){
				e.printStackTrace();
				closeWriter();
			}
		}
		public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}
		public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}
		public void onException(Exception ex) {
			ex.printStackTrace();
		}
		@Override
		public void onScrubGeo(long arg0, long arg1) {
		}
		@Override
		public void onStallWarning(StallWarning arg0) {
			//System.out.println(arg0.toString());
		}
	};
	Runtime.getRuntime().addShutdownHook(new Shutdown(this));
}
 
Example #5
Source File: TwitterProducer.java    From lsiem with Apache License 2.0 4 votes vote down vote up
private void start(Context context) {

/** Producer properties **/
Properties props = new Properties();
props.put("metadata.broker.list", context.getString(TwitterSourceConstant.BROKER_LIST));
props.put("serializer.class", context.getString(TwitterSourceConstant.SERIALIZER));
props.put("request.required.acks", context.getString(TwitterSourceConstant.REQUIRED_ACKS));

ProducerConfig config = new ProducerConfig(props);

final Producer<String, String> producer = new Producer<String, String>(config);

/** Twitter properties **/
consumerKey = context.getString(TwitterSourceConstant.CONSUMER_KEY_KEY);
consumerSecret = context.getString(TwitterSourceConstant.CONSUMER_SECRET_KEY);
accessToken = context.getString(TwitterSourceConstant.ACCESS_TOKEN_KEY);
accessTokenSecret = context.getString(TwitterSourceConstant.ACCESS_TOKEN_SECRET_KEY);

ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setOAuthConsumerKey(consumerKey);
cb.setOAuthConsumerSecret(consumerSecret);
cb.setOAuthAccessToken(accessToken);
cb.setOAuthAccessTokenSecret(accessTokenSecret);
cb.setJSONStoreEnabled(true);
cb.setIncludeEntitiesEnabled(true);

twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
final Map<String, String> headers = new HashMap<String, String>();

/** Twitter listener **/
StatusListener listener = new StatusListener() {
	// The onStatus method is executed every time a new tweet comes
	// in.
	public void onStatus(Status status) {
	    // The EventBuilder is used to build an event using the
	    // the raw JSON of a tweet
	    logger.info(status.getUser().getScreenName() + ": " + status.getText()); //delete uncomment sign
	    
	    KeyedMessage<String, String> data = new KeyedMessage<String, String>(context.getString(TwitterSourceConstant.KAFKA_TOPIC)
										 , DataObjectFactory.getRawJSON(status));
	    producer.send(data);
	    
	}
	    
	public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {}
	
	public void onTrackLimitationNotice(int numberOfLimitedStatuses) {}
	
	public void onScrubGeo(long userId, long upToStatusId) {}
	
	public void onException(Exception ex) {
	    logger.info("Shutting down Twitter sample stream...");
	    //twitterStream.shutdown();
	}
	
	public void onStallWarning(StallWarning warning) {}
    };

/** Bind the listener **/
twitterStream.addListener(listener);
/** GOGOGO **/
twitterStream.sample();   
   }
 
Example #6
Source File: TweetIngestor.java    From trident-tutorial with Apache License 2.0 4 votes vote down vote up
@Override
protected void doStart() {
    new Thread() {
        @Override
        public void run() {
            try {
                StatusAdapter listener = new StatusAdapter() {
                    @Override
                    public void onStatus(Status status) {
                        if(rateLimiter.tryAcquire()) {
                            Timer.Context t = METRIC_REGISTRY.timer("tweet-ingestion").time();
                            String rawJson = DataObjectFactory.getRawJSON(status);
                            if (StringUtils.isEmpty(rawJson)) {
                                return;
                            }
                            poster.post(kafkaTopic, rawJson);
                            t.stop();
                        }else{
                            // Throwing away tweets!
                        }
                    }

                    @Override
                    public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
                        return;
                    }

                    @Override
                    public void onStallWarning(StallWarning warning) {
                        log.warn("Received:" + warning);
                    }
                };

                stream.addListener(listener);
                FilterQuery fq = new FilterQuery();
                fq.track(new String[]{ track });
                stream.filter(fq);
                notifyStarted();
            } catch (Throwable e) {
                notifyFailed(e);
                throw Throwables.propagate(e);
            }
        }
    }.start();
}
 
Example #7
Source File: SerializedStreamReader.java    From twitter-dataset-collector with Apache License 2.0 4 votes vote down vote up
public static void printJsonFileSummary(List<String> jsonFiles) throws IOException, TwitterException{
	
	// aggregators
	int count =  0;
	int countGeo = 0;
	int countRetweet = 0;
	int countResponses = 0;
	long minTime = Long.MAX_VALUE, maxTime = 0;
	Date minDate = null, maxDate = null;
	Set<Long> users = new HashSet<Long>();
	
	for (int i = 0; i < jsonFiles.size(); i++) {
		System.out.println(jsonFiles.get(i));
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				new FileInputStream(jsonFiles.get(i)), FileUtil.UTF8));
		String line = null;
		while ((line = reader.readLine())!= null){
			Status status = DataObjectFactory.createStatus(line);
			count++;
			users.add(status.getUser().getId());
			if (status.isRetweet()){
				countRetweet++;
			}
			if (status.getInReplyToStatusId() > 0){
				countResponses++;
			}
			if (status.getGeoLocation() != null){
				countGeo++;
			}
			long tstamp = status.getCreatedAt().getTime();
			
			if (tstamp < minTime){
				minTime = tstamp;
				minDate = status.getCreatedAt();
			}
			if (tstamp > maxTime){
				maxTime = tstamp;
				maxDate = status.getCreatedAt();
			}
		}
		reader.close();
	}
	
	System.out.println("Period: [" + new Timestamp(minTime) + "," + new Timestamp(maxTime) + "]");
	System.out.println("Period: [" + minDate.toString() + "," + maxDate.toString() + "]");
	
	System.out.println("#tweets: " + count);
	System.out.println("#geo: " + countGeo);
	System.out.println("#retweets: " + countRetweet);
	System.out.println("#replies: " + countResponses);
	System.out.println("#users: " + users.size());
	
}