r - Adding variables to parseTweet in streamR package -



r - Adding variables to parseTweet in streamR package -

i've been using function parsetweets streamr bundle r , works well, when "reading" tweets through readtweets function, came across variables not beingness parsed (for example, user background color).

upon looking @ code seemed pretty straightforward add together variables beingness overlooked, when writing info frame seems skip these "new" variables. i've debugged variables , work. can see i'm missing!?

parsetweet_more <- function(tweets, simplify=false, verbose=true){ ## json list results.list <- readtweets(tweets, verbose=false) # if no text in list, alter null if (length(results.list)==0){ stop(deparse(substitute(tweets)), " did not contain tweets. ", "see ?parsetweets more details.") } # constructing info frame tweet , user variable df <- data.frame( text = unlistwithna(results.list, 'text'), screen_name = unlistwithna(results.list, c('user', 'screen_name')), retweet_count = unlistwithna(results.list, c('retweeted_status', 'retweet_count')), favorited = unlistwithna(results.list, 'favorited'), truncated = unlistwithna(results.list, 'truncated'), hola='de huebos', id_str = unlistwithna(results.list, 'id_str'), in_reply_to_screen_name = unlistwithna(results.list, 'in_reply_to_screen_name'), source = unlistwithna(results.list, 'source'), retweeted = unlistwithna(results.list, 'retweeted'), created_at = unlistwithna(results.list, 'created_at'), in_reply_to_status_id_str = unlistwithna(results.list, 'in_reply_to_status_id_str'), in_reply_to_user_id_str = unlistwithna(results.list, 'in_reply_to_user_id_str'), lang = unlistwithna(results.list, 'lang'), listed_count = unlistwithna(results.list, c('user', 'listed_count')), verified = unlistwithna(results.list, c('user', 'verified')), location = unlistwithna(results.list, c('user', 'location')), user_id_str = unlistwithna(results.list, c('user', 'id_str')), description = unlistwithna(results.list, c('user', 'description')), geo_enabled = unlistwithna(results.list, c('user', 'geo_enabled')), user_created_at = unlistwithna(results.list, c('user', 'created_at')), statuses_count = unlistwithna(results.list, c('user', 'statuses_count')), followers_count = unlistwithna(results.list, c('user', 'followers_count')), favourites_count = unlistwithna(results.list, c('user', 'favourites_count')), protected = unlistwithna(results.list, c('user', 'protected')), user_url = unlistwithna(results.list, c('user', 'url')), name = unlistwithna(results.list, c('user', 'name')), time_zone = unlistwithna(results.list, c('user', 'time_zone')), user_lang = unlistwithna(results.list, c('user', 'lang')), utc_offset = unlistwithna(results.list, c('user', 'utc_offset')), following_count = unlistwithna(results.list, c('user', 'friends_count')), is_translation_enabled = unlistwithna(results.list,c('user','is_translation_enabled')), #this doesn't "write" info frame! profile_background_color = unlistwithna(results.list, c('user','profile_background_color')), profile_image_url = unlistwithna(results.list, c('user','profile_image_url'), profile_link_color = unlistwithna(results.list, c('user','profile_link_color')), profile_sidebar_border_color = unlistwithna(results.list, c('user','profile_sidebar_border_color')), profile_sidebar_fill_color = unlistwithna(results.list, c('user','profile_sidebar_fill_color')), profile_text_color = unlistwithna(results.list, c('user','profile_text_color')), next = unlistwithna(results.list, c('user','following')), stringsasfactors=f) # retweet_count extracted retweeted_status. if not rt, set 0 df$retweet_count[is.na(df$retweet_count)] <- 0 # adding geographic variables , url entities if (simplify==false){ df$country_code <- unlistwithna(results.list, c('place', 'country_code')) df$country <- unlistwithna(results.list, c('place', 'country')) df$place_type <- unlistwithna(results.list, c('place', 'place_type')) df$full_name <- unlistwithna(results.list, c('place', 'full_name')) df$place_name <- unlistwithna(results.list, c('place', 'place_name')) df$place_id <- unlistwithna(results.list, c('place', 'place_id')) place_lat_1 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 1, 2)) place_lat_2 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 2, 2)) df$place_lat <- sapply(1:length(results.list), function(x) mean(c(place_lat_1[x], place_lat_2[x]), na.rm=true)) place_lon_1 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 1, 1)) place_lon_2 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 3, 1)) df$place_lon <- sapply(1:length(results.list), function(x) mean(c(place_lon_1[x], place_lon_2[x]), na.rm=true)) df$lat <- unlistwithna(results.list, c('geo', 'coordinates', 1)) df$lon <- unlistwithna(results.list, c('geo', 'coordinates', 2)) df$expanded_url <- unlistwithna(results.list, c('entities', 'urls', 1, 'expanded_url')) df$url <- unlistwithna(results.list, c('entities', 'urls', 1, 'url')) } # info message if (verbose==true) cat(length(df$text), "tweets have been parsed.", "\n") return(df) } unlistwithna <- function(lst, field){ if (length(field)==1){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], '[[', field)) } if (length(field)==2){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]])) } if (length(field)==3 & field[1]!="geo"){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]][[field[3]]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]])) } if (field[1]=="geo"){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[as.numeric(field[3])]])) } if (length(field)==4 && field[2]!="urls"){ notnulls <- unlist(lapply(lst, function(x) length(x[[field[1]]][[field[2]]][[field[3]]][[field[4]]])>0)) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]][[field[4]]])) } if (length(field)==4 && field[2]=="urls"){ notnulls <- unlist(lapply(lst, function(x) length(x[[field[1]]][[field[2]]])>0)) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[as.numeric(field[3])]][[field[4]]])) } if (length(field)==6 && field[2]=="bounding_box"){ notnulls <- unlist(lapply(lst, function(x) length(x[[field[1]]][[field[2]]])>0)) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]][[as.numeric(field[4])]][[as.numeric(field[5])]][[as.numeric(field[6])]])) } return(vect) }

i cannot reproduce problem, next code location streamr git , data(example_tweets) streamr package, variables mentioned have been parsed

code:

#load bundle sample tweets require(streamr) data(example_tweets) #download code above link , add together 'profile_...' variables parsetweets_new <- function(tweets, simplify=false, verbose=true){ ## json list results.list <- readtweets(tweets, verbose=false) # if no text in list, alter null if (length(results.list)==0){ stop(deparse(substitute(tweets)), " did not contain tweets. ", "see ?parsetweets more details.") } # constructing info frame tweet , user variable df <- data.frame( text = unlistwithna(results.list, 'text'), retweet_count = unlistwithna(results.list, c('retweeted_status', 'retweet_count')), favorited = unlistwithna(results.list, 'favorited'), truncated = unlistwithna(results.list, 'truncated'), id_str = unlistwithna(results.list, 'id_str'), in_reply_to_screen_name = unlistwithna(results.list, 'in_reply_to_screen_name'), source = unlistwithna(results.list, 'source'), retweeted = unlistwithna(results.list, 'retweeted'), created_at = unlistwithna(results.list, 'created_at'), in_reply_to_status_id_str = unlistwithna(results.list, 'in_reply_to_status_id_str'), in_reply_to_user_id_str = unlistwithna(results.list, 'in_reply_to_user_id_str'), lang = unlistwithna(results.list, 'lang'), listed_count = unlistwithna(results.list, c('user', 'listed_count')), verified = unlistwithna(results.list, c('user', 'verified')), location = unlistwithna(results.list, c('user', 'location')), user_id_str = unlistwithna(results.list, c('user', 'id_str')), description = unlistwithna(results.list, c('user', 'description')), geo_enabled = unlistwithna(results.list, c('user', 'geo_enabled')), user_created_at = unlistwithna(results.list, c('user', 'created_at')), statuses_count = unlistwithna(results.list, c('user', 'statuses_count')), followers_count = unlistwithna(results.list, c('user', 'followers_count')), favourites_count = unlistwithna(results.list, c('user', 'favourites_count')), protected = unlistwithna(results.list, c('user', 'protected')), user_url = unlistwithna(results.list, c('user', 'url')), name = unlistwithna(results.list, c('user', 'name')), time_zone = unlistwithna(results.list, c('user', 'time_zone')), user_lang = unlistwithna(results.list, c('user', 'lang')), utc_offset = unlistwithna(results.list, c('user', 'utc_offset')), friends_count = unlistwithna(results.list, c('user', 'friends_count')), screen_name = unlistwithna(results.list, c('user', 'screen_name')), #added new variables here profile_background_color = unlistwithna(results.list, c('user', "profile_background_color")), profile_background_image_url = unlistwithna(results.list, c('user', "profile_background_image_url")), profile_background_image_url_https = unlistwithna(results.list, c('user', "profile_background_image_url_https")), profile_image_url = unlistwithna(results.list, c('user', "profile_image_url")), profile_image_url_https = unlistwithna(results.list, c('user', "profile_image_url_https")), profile_banner_url = unlistwithna(results.list, c('user', "profile_banner_url")), next = unlistwithna(results.list, c('user', "following")), follow_request_sent = unlistwithna(results.list, c('user', "follow_request_sent")), notifications = unlistwithna(results.list, c('user', "notifications")), stringsasfactors=f) # retweet_count extracted retweeted_status. if not rt, set 0 df$retweet_count[is.na(df$retweet_count)] <- 0 # adding geographic variables , url entities if (simplify==false){ df$country_code <- unlistwithna(results.list, c('place', 'country_code')) df$country <- unlistwithna(results.list, c('place', 'country')) df$place_type <- unlistwithna(results.list, c('place', 'place_type')) df$full_name <- unlistwithna(results.list, c('place', 'full_name')) df$place_name <- unlistwithna(results.list, c('place', 'place_name')) df$place_id <- unlistwithna(results.list, c('place', 'place_id')) place_lat_1 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 1, 2)) place_lat_2 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 2, 2)) df$place_lat <- sapply(1:length(results.list), function(x) mean(c(place_lat_1[x], place_lat_2[x]), na.rm=true)) place_lon_1 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 1, 1)) place_lon_2 <- unlistwithna(results.list, c('place', 'bounding_box', 'coordinates', 1, 3, 1)) df$place_lon <- sapply(1:length(results.list), function(x) mean(c(place_lon_1[x], place_lon_2[x]), na.rm=true)) df$lat <- unlistwithna(results.list, c('geo', 'coordinates', 1)) df$lon <- unlistwithna(results.list, c('geo', 'coordinates', 2)) df$expanded_url <- unlistwithna(results.list, c('entities', 'urls', 1, 'expanded_url')) df$url <- unlistwithna(results.list, c('entities', 'urls', 1, 'url')) } # info message if (verbose==true) cat(length(df$text), "tweets have been parsed.", "\n") return(df) } unlistwithna <- function(lst, field){ if (length(field)==1){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], '[[', field)) } if (length(field)==2){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]])) } if (length(field)==3 & field[1]!="geo"){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]][[field[3]]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]])) } if (field[1]=="geo"){ notnulls <- unlist(lapply(lst, function(x) !is.null(x[[field[1]]][[field[2]]]))) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[as.numeric(field[3])]])) } if (length(field)==4 && field[2]!="urls"){ notnulls <- unlist(lapply(lst, function(x) length(x[[field[1]]][[field[2]]][[field[3]]][[field[4]]])>0)) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]][[field[4]]])) } if (length(field)==4 && field[2]=="urls"){ notnulls <- unlist(lapply(lst, function(x) length(x[[field[1]]][[field[2]]])>0)) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[as.numeric(field[3])]][[field[4]]])) } if (length(field)==6 && field[2]=="bounding_box"){ notnulls <- unlist(lapply(lst, function(x) length(x[[field[1]]][[field[2]]])>0)) vect <- rep(na, length(lst)) vect[notnulls] <- unlist(lapply(lst[notnulls], function(x) x[[field[1]]][[field[2]]][[field[3]]][[as.numeric(field[4])]][[as.numeric(field[5])]][[as.numeric(field[6])]])) } return(vect) } readtweets <- function(tweets, verbose=true){ ## checking input right if (is.null(tweets)){ stop("error: need specify file or object tweets text stored.") } ## read text file , save in memory list if (length(tweets)==1 && file.exists(tweets)){ lines <- readlines(tweets, encoding="utf-8") } else { lines <- tweets } results.list <- lapply(lines[nchar(lines)>0], function(x) trycatch(fromjson(x), error=function(e) e)) ## removing lines not contain tweets or not parsed errors <- which(unlist(lapply(results.list, length))<18) if (length(errors)>0){ results.list <- results.list[-errors] } # info message if (verbose==true) cat(length(results.list), "tweets have been parsed.", "\n") return(results.list) }

output:

#parse tweets, displaying 2 columns space constraints tweets.df<-parsetweets_new(example_tweets) #> tweets.df[,grep('profile',colnames(tweets.df))[1:2]] # profile_background_color profile_background_image_url #1 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #2 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #3 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #4 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #5 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #6 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #7 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #8 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #9 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png #10 c0deed http://a0.twimg.com/profile_background_images/656927849/miyt9dpjz77sc0w3d4vj.png

as can see, code works adding new variables, output unlistwithna(results.list, c('user', "profile_background_color")).i suspect issues tweets data,let know...

r parsing twitter data.frame

Comments

Popular posts from this blog

model view controller - MVC Rails Planning -

ruby on rails - Devise Logout Error in RoR -

html - Submenu setup with jquery and effect 'fold' -