首页
学习
活动
专区
工具
TVP
发布
社区首页 >问答首页 >无法将字符串转换为浮动python错误?

无法将字符串转换为浮动python错误?
EN

Stack Overflow用户
提问于 2018-06-11 08:17:42
回答 1查看 0关注 0票数 0

这是实时Twitter趋势发现py代码

代码语言:txt
复制
import sys
import json
import time
import datetime
import nltk
from twitter import Api
from functools import partial
from threading import Thread
from bokeh.models import ColumnDataSource
from bokeh.plotting import curdoc, figure
from tornado import gen

from utils import preprocess, get_tfidf


# cli param
tw = float(sys.argv[1]) # time window in min
top_n = int(sys.argv[2])
try:
store_history = sys.argv[3]
except IndexError:
store_history = False # default: don't store history


# twitter dev api
with open('credentials.json') as j:
cred = json.load(j)

api = Api(cred['CONSUMER_KEY'], cred['CONSUMER_SECRET'],
cred['ACCESS_TOKEN'], cred['ACCESS_TOKEN_SECRET'])

# start time for the 1st batch
batch_start_time = time.time()

# bokeh setup
source = ColumnDataSource(data=dict(x=[], y=[], text=[]))
doc = curdoc()

# bokeh update
@gen.coroutine
def update(x, y, text):
source.stream(dict(x=[x], y=[y], text=[text]), 10)  # last param controls 
right shift

# get live tweets
def get_tweets():
global batch_start_time
processed_tweet = []
try:
for line in api.GetStreamSample():
if 'text' in line and line['lang'] == u'en':
text = line['text'].encode('utf-8').replace('\n', ' ')
p_t = preprocess(text) # process tweets
if p_t:
processed_tweet += p_t,
if time.time() - batch_start_time >= tw * 60: # time is over for this batch
return processed_tweet
return processed_tweet # server-side interruption
except:
pass

# main logic for batch update
def blocking_task():
global batch_start_time
temp_batch_tweet = []
history = {}
start_t = None

while True:
try:
tweets = get_tweets()
if temp_batch_tweet: # some leftover due to interruption
temp_batch_tweet.extend(tweets)
else: # no interruption in this batch
temp_batch_tweet = tweets

utc_t = datetime.datetime.utcfromtimestamp(batch_start_time)
time_x = int('{}{}{}{}{}'.format(
utc_t.year, utc_t.month, utc_t.day, utc_t.hour, utc_t.minute))

if start_t is None: # history file start time
start_t = time_x

if temp_batch_tweet:
# get top features and idf scores
top_feature_name, top_feature_idf = get_tfidf(
temp_batch_tweet, top_n=top_n, max_features=int(5000./60*tw))

# maybe store history
if store_history:
history[time_x] = list(top_feature_name), list(top_feature_idf)

# reset start time and contain to hold next batch
batch_start_time = time.time()
temp_batch_tweet = []

# feature name ans scores (words with tie score on same line)
batch_dict = {}
for feat, score in zip(top_feature_name, top_feature_idf):
if score not in batch_dict:
batch_dict[score] = feat
else:
batch_dict[score] += ', {}'.format(feat)
for score, feat in batch_dict.iteritems(): # update
doc.add_next_tick_callback(
partial(update, x=time_x, y=score, text=feat))

except KeyboardInterrupt: # manual stop
print 'KeyboardInterrupt; aborted'
sys.exit(1)

# maybe dump history
if store_history and history:
with open('{}_{}_{}.json'.format(start_t, len(history), tw), 'w') as f:
json.dump(history, f)

# bokeh figure
p = figure(plot_height=650, plot_width=1300, title='Twitter Trending Words',
x_axis_label='UTC Time', y_axis_label='IDF Score')

# title font size
p.title.text_font_size='20pt'

# no grids
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

# no scientific representation
p.xaxis[0].formatter.use_scientific = False

# set x-tick min 1
p.xaxis[0].ticker.desired_num_ticks=1

l = p.text(x='x', y='y', text='text', text_font_size="10pt",
text_baseline="middle", text_align='center', source=source)

doc.add_root(p)
thread = Thread(target=blocking_task)
thread.start()


after I run this file it create boken application on browser but it displays 
nothing
please help... thanks
EN

回答 1

Stack Overflow用户

发布于 2018-06-11 18:10:21

代码语言:txt
复制
python file.py 2.5
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/-100005300

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档