首页
学习
活动
专区
圈层
工具
发布
首页
学习
活动
专区
圈层
工具
MCP广场
社区首页 >问答首页 >通过Python从Google下载文件时出错

通过Python从Google下载文件时出错
EN

Stack Overflow用户
提问于 2022-09-10 15:24:28
回答 2查看 148关注 0票数 0

我有一个从Google下载文件的代码。几个月前,这是一种完美的效果,但一天后就不行了。我找不到同样的理由。

我的守则如下:

代码语言:javascript
运行
复制
import pickle
import os
import re
import time

from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import requests
from tqdm import tqdm

# If modifying these scopes, delete the file token.pickle.
SCOPES = [
          'https://www.googleapis.com/auth/drive.metadata',
          'https://www.googleapis.com/auth/drive',
          'https://www.googleapis.com/auth/drive.file',
          ]


def get_gdrive_service():
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file('client_secrets.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(creds, token)
    # initiate Google Drive service API
    return build('drive', 'v3', credentials=creds)


def download_file_from_google_drive(id, destination):
    def get_confirm_token(response):
        print(response.cookies.items())
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        return None

    def save_response_content(response, destination):
        CHUNK_SIZE = 32768
        # get the file size from Content-length response header
        file_size = int(response.headers.get("Content-Length", 0))
        # extract Content disposition from response headers
        content_disposition = response.headers.get("content-disposition")
        print("content_disposition:", content_disposition)
        # parse filename
        filename = re.findall("filename=\"(.+)\"", content_disposition)[0]
        # print("[+] File name:", filename)
        # print("[+] File size:", file_size)
        progress = tqdm(response.iter_content(CHUNK_SIZE), f"Downloading {filename}", total=file_size, unit="Byte", unit_scale=True, unit_divisor=1024)
        with open(destination, "wb") as f:
            for chunk in progress:
                if chunk: # filter out keep-alive new chunks
                    f.write(chunk)
                    # update the progress bar
                    progress.update(len(chunk))
        progress.close()

    # base URL for download
    URL = "https://docs.google.com/uc?export=download"

    # init a HTTP session
    session = requests.Session()
    # make a request
    response = session.get(URL, params={'id': id}, stream=True)
    print("response:", response)
    print("[+] Downloading", response.url)

    # get confirmation token
    token = get_confirm_token(response)
    print("token:", token)
    if token:
        params = {'id': id, 'confirm': token}
        response = session.get(URL, params=params, stream=True)
    # download to disk
    save_response_content(response, destination)


def search(service, query):
    # search for the file
    result = []
    page_token = None
    while True:
        response = service.files().list(q=query,
                                        spaces="drive",
                                        fields="nextPageToken, files(id, name, mimeType)",
                                        pageToken=page_token).execute()
        # iterate over filtered files
        for file in response.get("files", []):
            print(f"Found file: {file['name']} with the id {file['id']} and type {file['mimeType']}")
            result.append((file["id"], file["name"], file["mimeType"]))
        page_token = response.get('nextPageToken', None)
        if not page_token:
            # no more files
            break
    return result


def download_file(f_name):
    service = get_gdrive_service()
    filename = f_name
    search_result = search(service, query=f"name='{filename}'")
    file_id = search_result[0][0]
    download_file_from_google_drive(file_id, filename)


if __name__ == '__main__':
    download_file("Data_09_09_2022.zip")

基本上,我在函数中出现了错误: get_confirm_token,其中key.startswith('download_warning')现在返回的值为零。我找不出原因。

有什么帮助..?

EN

Stack Overflow用户

发布于 2022-09-11 09:47:46

我在这里质疑整个部分。你为什么要这样下载?

代码语言:javascript
运行
复制
# base URL for download
URL = "https://docs.google.com/uc?export=download"

# init a HTTP session
session = requests.Session()
# make a request
response = session.get(URL, params={'id': id}, stream=True)
print("response:", response)
print("[+] Downloading", response.url)

# get confirmation token
token = get_confirm_token(response)
print("token:", token)
if token:
    params = {'id': id, 'confirm': token}
    response = session.get(URL, params=params, stream=True)

管理-下载#python

因为您只是下载文件,而不是导出文件。您应该简单地使用file.get方法并存储响应。

代码语言:javascript
运行
复制
from __future__ import print_function

import io

import google.auth
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload


def download_file(real_file_id):
    """Downloads a file
    Args:
        real_file_id: ID of the file to download
    Returns : IO object with location.

    Load pre-authorized user credentials from the environment.
    TODO(developer) - See https://developers.google.com/identity
    for guides on implementing OAuth2 for the application.
    """
    creds, _ = google.auth.default()

    try:
        # create drive api client
        service = build('drive', 'v3', credentials=creds)

        file_id = real_file_id

        # pylint: disable=maybe-no-member
        request = service.files().get_media(fileId=file_id)
        file = io.BytesIO()
        downloader = MediaIoBaseDownload(file, request)
        done = False
        while done is False:
            status, done = downloader.next_chunk()
            print(F'Download {int(status.progress() * 100)}.')

    except HttpError as error:
        print(F'An error occurred: {error}')
        file = None

    return file.getvalue()


if __name__ == '__main__':
    download_file(real_file_id='1KuPmvGq8yoYgbfW74OENMCB5H0n_2Jm9')
票数 1
EN
查看全部 2 条回答
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/73673013

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档