Google Colaboratory是Jupyter的一个专用服务器,允许用户免费使用12个小时(重启后可以继续使用)。用户可以利用Google Colab测试Python代码,对于进行机器学习和数据科学研究的小伙伴是个非常实用的工具。
今天,我们介绍一些使用使用和配置Google Colab的方法及小技巧。
配置与连接Google Drive
# Create drive folder
!mkdir -p drive
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
# Authorize instance to use Google Drive
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
# Connect drive to folder
!google-drive-ocamlfuse drive
从实例上传和下载文件
from google.colab import files
def upload(path):
uploaded = files.upload()
with open(path,’wb’) as fp:
fp.write(uploaded[uploaded.keys()[0]])
def download(path):
files.download(path)
使用
Facet源代码:https://github.com/PAIR-code/facets
import shutil
if os.path.exists('./facets'):
shutil.rmtree("./facets")
!git clone https://github.com/PAIR-code/facets
!jupyter nbextension install facets/facets-dist/
import sys
import os
sys.path.append(os.path.abspath('./facets/facets_overview/python/'))
from generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
import base64
class FacetsOverview(object):
def __init__(self, df_train, df_test):
gfsg = GenericFeatureStatisticsGenerator()
self._proto = gfsg.ProtoFromDataFrames([{'name': 'train', 'table': df_train},
{'name': 'test', 'table': df_test}])
def _repr_html_(self):
protostr = base64.b64encode(self._proto.SerializeToString()).decode("utf-8")
HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html" >
<facets-overview id="elem"></facets-overview>
<script>
document.querySelector("#elem").protoInput = "{protostr}";
</script>"""
html = HTML_TEMPLATE.format(protostr=protostr)
return html
class FacetsDive(object):
def __init__(self, data):
self._data = data
self.height = 1000
def _repr_html_(self):
HTML_TEMPLATE = """<link rel="import" href="/nbextensions/facets-dist/facets-jupyter.html" >
<facets-dive id="elem" height="{height}"></facets-dive>
<script>
document.querySelector("#elem").data = {data};
</script>"""
html = HTML_TEMPLATE.format(data=self._data.to_json(orient='records'), height=self.height)
return html
上述定义的函数使用方法如下:
FacetsOverview(df_train, df_test)
FacetsDive(df_train.head(500))
在代码中运行TensorBord
LOG_DIR = '/tmp'
get_ipython().system_raw(
'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
.format(LOG_DIR)
)
! wget -c -nc https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip -o ngrok-stable-linux-amd64.zip
get_ipython().system_raw('./ngrok http 6006 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"
连接代码与ssh
#Generate root password
import random, string
password = ''.join(random.choice(string.ascii_letters + string.digits) for i in range(20))
#Download ngrok
! wget -q -c -nc https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip -qq -n ngrok-stable-linux-amd64.zip
#Setup sshd
! apt-get install -qq -o=Dpkg::Use-Pty=0 openssh-server pwgen > /dev/null
#Set root password
! echo root:$password | chpasswd
! mkdir -p /var/run/sshd
! echo "PermitRootLogin yes" >> /etc/ssh/sshd_config
! echo "PasswordAuthentication yes" >> /etc/ssh/sshd_config
! echo "LD_LIBRARY_PATH=/usr/lib64-nvidia" >> /root/.bashrc
! echo "export LD_LIBRARY_PATH" >> /root/.bashrc
#Run sshd
get_ipython().system_raw('/usr/sbin/sshd -D &')
#Ask token
print("Copy authtoken from https://dashboard.ngrok.com/auth")
import getpass
authtoken = getpass.getpass()
#Create tunnel
get_ipython().system_raw('./ngrok authtoken $authtoken && ./ngrok tcp 22 &')
#Print root password
print("Root password: {}".format(password))
#Get public address
! curl -s http://localhost:4040/api/tunnels | python3 -c \
"import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"
你的数据在/content/directory中。
现阶段,免费的Ngrok账户不支持并行双通道,如果你正使用其运行TensorBoard,你可以通过以下方法终止它。
!kill $(ps aux | grep './ngrok' | awk '{print $2}')
Google Colab与Kaggle的数据交互
为了实现Colab与Kaggle的数据上传和下载,你需要安装Kaggle-API库,地址如下:https://github.com/Kaggle/kaggle-api