#!/usr/bin/env python
# coding=utf8
# Copyright 2010 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Storage command line tool."""

import ConfigParser
import datetime
import errno
import getopt
import os
import re
import socket
import stat
import sys
import traceback
import xml.sax.xmlreader


def OutputAndExit(message):
  sys.stderr.write('%s\n' % message)
  sys.exit(1)


# Before importing boto, find where gsutil is installed and include its
# boto sub-directory at the start of the PYTHONPATH, to ensure the versions of
# gsutil and boto stay in sync after software updates. This also allows gsutil
# to be used without explicitly adding it to the PYTHONPATH.
# We use realpath() below to unwind symlinks if any were used in the gsutil
# installation.
gsutil_bin_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
if not gsutil_bin_dir:
  OutputAndExit('Unable to determine where gsutil is installed. Sorry, '
                'cannot run correctly without this.\n')
boto_lib_dir = gsutil_bin_dir + os.sep + 'boto'
if not os.path.isdir(boto_lib_dir):
  OutputAndExit('There is no boto library under the gsutil install directory '
                '(%s).\nThe gsutil command cannot work properly when installed '
                'this way.\nPlease re-install gsutil per the installation '
                'instructions.' % gsutil_bin_dir)
sys.path.insert(0, boto_lib_dir)
import boto
boto.UserAgent += '/gsutil'
from boto.exception import BotoClientError
from boto.exception import InvalidAclError
from boto.exception import InvalidUriError
from boto.exception import ResumableUploadException
from boto.exception import StorageResponseError
from boto.provider import Provider

from gslib.command import Command
from gslib.exception import CommandException
from gslib.wildcard_iterator import WildcardException

usage_string = """
SYNOPSIS
  gsutil [-d][-D] [-h header]... [command args...]

  -d option shows HTTP protocol detail.

  -h option allows you to specify additional HTTP headers, for example:
     gsutil -h "Cache-Control:public,max-age=3600" -h "Content-Type:gzip" cp ...

  Commands:
    Concatenate object content to stdout:
      cat [-h] uri...
        -h  Prints short header for each object.
    Copy objects:
      cp [-a canned_acl] [-t] [-z ext1,ext2,...] src_uri dst_uri
        - or -
      cp [-a canned_acl] [-R] [-t] [-z extensions] uri... dst_uri
        -a Sets named canned_acl when uploaded objects created (list below).
        -R Causes directories and buckets to be copied recursively.
        -t Sets MIME type based on file extension.
        -z 'txt,html' Compresses file uploads with the given extensions.
    Get ACL XML for a bucket or object (save and edit for "setacl" command):
      getacl uri
    List buckets or objects:
      ls [-b] [-l] [-L] uri...
         -l Prints long listing (owner, length); -L provides more detail.
         -b Prints info about the bucket when used with a bucket URI.
    Make buckets:
      mb uri...
    Move/rename objects:
      mv src_uri dst_uri
        - or -
      mv uri... dst_uri
    Remove buckets:
      rb uri...
    Remove objects:
      rm [-f] uri...
         -f Continues despite errors when removing by wildcard.
    Set ACL on buckets and/or objects:
      setacl file-or-canned_acl_name uri...
    Print version info:
      ver

  Omitting URI scheme defaults to "file". For example, "dir/file.txt" is
  equivalent to "file://dir/file.txt"

  URIs support object name wildcards, for example:
    gsutil cp gs://mybucket/[a-f]*.doc localdir

  Source directory or bucket names are implicitly wildcarded, so
    gsutil cp localdir gs://mybucket
  will recursively copy localdir.

  canned_acl_name can be one of: "public-read", "public-read-write",
  "authenticated-read", "bucket-owner-read", "bucket-owner-full-control"
"""


def HaveFileUris(args):
  """Checks whether args contains any file URIs.

  Args:
    args: command-line arguments

  Returns:
    True if args contains any file URIs.
  """
  for uri_str in args:
    if uri_str.lower().startswith('file://') or uri_str.find(':') == -1:
      return True
  return False


def HaveProviderUris(args):
  """Checks whether args contains any provider URIs (like 'gs://').

  Args:
    args: command-line arguments

  Returns:
    True if args contains any provider URIs.
  """
  for uri_str in args:
    if re.match('^[a-z]+://$', uri_str):
      return True
  return False


def GetBotoConfigFileList():
  """Returns list of boto config files that exist."""

  config_paths = boto.pyami.config.BotoConfigLocations
  if 'AWS_CREDENTIAL_FILE' in os.environ:
    config_paths.append(os.environ['AWS_CREDENTIAL_FILE'])
  config_files = {}
  for config_path in config_paths:
    if os.path.exists(config_path):
      config_files[config_path] = 1
  cf_list = []
  for config_file in config_files:
    cf_list.append(config_file)
  return cf_list

config_file_list = GetBotoConfigFileList()
command_inst = Command(gsutil_bin_dir, boto_lib_dir, usage_string,
                       config_file_list)

prelude_config_content = (
"""# This file contains credentials and other configuration information needed
# by the boto library, used by gsutil. You can edit this file (e.g., to add
# credentials) but be careful not to mis-edit any of the variable names (like
# "gs_access_key_id") or remove important markers (like the "[Credentials]" and
# "[Boto]" section delimeters).
#
""")

if 'HOME' not in os.environ:
  OutputAndExit('No HOME environment variable set. gsutil requires this '
                'variable to be set.')

additional_config_content = """
[Boto]

# To use a proxy, edit and uncomment the proxy and proxy_port lines. If you
# need a user/password with this proxy, edit and uncomment those lines as well.
#proxy = <proxy host>
#proxy_port = <proxy port>
#proxy_user = <your proxy user name>
#proxy_pass = <your proxy password>

# Set 'is_secure' to False to cause boto to connect using HTTP instead of the
# default HTTPS. This is useful if you want to capture/analyze traffic
# (e.g., with tcpdump).
#is_secure = False

# 'debug' controls the level of debug messages printed: 0 for none, 1
# for basic boto debug, 2 for all boto debug plus HTTP requests/responses.
# Note: 'gsutil -d' sets debug to 2 for that one command run.
#debug = <0, 1, or 2>

# 'num_retries' controls the number of retry attempts made when errors occur.
# The default is 5. Note: don't set this value to 0, as it will cause boto to
# fail when reusing HTTP connections.
#num_retries = <integer value>


[GSUtil]

# 'resumable_threshold' specifies the smallest file size [bytes] for which
# resumable Google Storage transfers are attempted. The default is 1048576
# (1MB).
#resumable_threshold = 1048576

# 'resumable_tracker_dir' specifies the base location where resumable
# transfer tracker files are saved. By default they're in $HOME/.gsutil
#resumable_tracker_dir = <file path>
"""


def CreateBotoConfigFile():
  """Creates a boto config file interactively, containing needed credentials."""

  config_path = ('%s%s.boto' % (os.environ['HOME'], os.sep))
  sys.stderr.write('You have no boto config file. This script will create '
                   'one at\n%s\ncontaining your credentials, based on your '
                   'responses to the following questions.\n\n' % config_path)

  provider_map = {'aws': 'aws', 'google': 'gs'}
  uri_map = {'aws': 's3', 'google': 'gs'}
  key_ids = {}
  sec_keys = {}
  got_creds = False
  for provider in provider_map:
    if provider == 'google':
      key_ids[provider] = raw_input('What is your %s access key ID? ' %
                                    provider)
      sec_keys[provider] = raw_input('What is your %s secret access key? ' %
                                     provider)
      got_creds = True
      if not key_ids[provider] or not sec_keys[provider]:
        raise CommandException('Incomplete credentials provided. Please try '
                               'again.')
  if not got_creds:
    raise CommandException('No credentials provided. Please try again.')
  cfp = open(config_path, 'w')
  if not cfp:
    raise CommandException('Unable to write "%s".' % config_path)
  os.chmod(config_path, stat.S_IRUSR | stat.S_IWUSR)
  cfp.write(prelude_config_content)
  cfp.write('# This file was created by gsutil version "%s"\n# at %s.\n\n\n'
            % (command_inst.LoadVersionString(),
               datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
  cfp.write('[Credentials]\n\n')
  for provider in provider_map:
    key_prefix = provider_map[provider]
    uri_scheme = uri_map[provider]
    if provider in key_ids and provider in sec_keys:
      cfp.write('# %s credentials ("%s://" URIs):\n' %
                (provider, uri_scheme))
      cfp.write('%s_access_key_id = %s\n' % (key_prefix, key_ids[provider]))
      cfp.write('%s_secret_access_key = %s\n' %
                (key_prefix, sec_keys[provider]))
    else:
      cfp.write('# To add %s credentials ("%s://" URIs), edit and '
                'uncomment the\n# following two lines:\n'
                '#%s_access_key_id = <your %s access key ID>\n'
                '#%s_secret_access_key = <your %s secret access key>\n' %
                (provider, uri_scheme, key_prefix, provider, key_prefix,
                 provider))
    host_key = Provider.HostKeyMap[provider]
    cfp.write('# The ability to specify an alternate storage host is primarily '
              'for cloud\n# storage service developers.\n'
              '#%s_host = <alternate storage host address>\n\n' % host_key)
  cfp.write(additional_config_content)
  cfp.close()
  OutputAndExit('\nConfiguration file "%s" created. If you need to use\na '
                'proxy to access the Internet please see the instructions in '
                'that file.\nPlease try running gsutil again now.' %
                config_path)


def SetupConfigIfNeeded():
  """Interactively creates boto credential/config file if needed."""

  config = boto.config
  has_goog_creds = (config.has_option('Credentials', 'gs_access_key_id') and
                    config.has_option('Credentials', 'gs_secret_access_key'))
  has_amzn_creds = (config.has_option('Credentials', 'aws_access_key_id') and
                    config.has_option('Credentials', 'aws_secret_access_key'))
  has_auth_plugins = config.has_option('Plugin', 'plugin_directory')
  if not has_goog_creds and not has_amzn_creds and not has_auth_plugins:
    if config_file_list:
      raise CommandException('You have no storage service credentials in any '
                             'of the following boto config\nfiles. Please add '
                             'your credentials as described in the gsutil '
                             'README file, or else\ndelete these files and'
                             're-run this script to re-create them:\n%s' %
                             config_file_list)
    else:
      CreateBotoConfigFile()


# Maps command name to
# [function, min_args, max_args, supported_sub_args, file_uri_ok,
#  provider_uri_ok, uris_start_arg]
NO_MAX = sys.maxint
commands = {
    'cat': [command_inst.CatCommand, 0, NO_MAX, 'h', False, True, 0],
    'cp': [command_inst.CopyObjsCommand, 2, NO_MAX, 'a:rRtz:', True, False, 0],
    'getacl': [command_inst.GetAclCommand, 1, 1, '', False, False, 0],
    'help': [command_inst.HelpCommand, 0, 0, '', False, False, 0],
    'ls': [command_inst.ListCommand, 0, NO_MAX, 'blL', False, True, 0],
    'mb': [command_inst.MakeBucketsCommand, 1, NO_MAX, '', False, False, 0],
    'mv': [command_inst.MoveObjsCommand, 2, NO_MAX, '', True, False, 0],
    'rb': [command_inst.RemoveBucketsCommand, 1, NO_MAX, '', False, False, 0],
    'rm': [command_inst.RemoveObjsCommand, 1, NO_MAX, 'f', False, False, 0],
    'setacl': [command_inst.SetAclCommand, 2, NO_MAX, '', False, False, 1],
    'update': [command_inst.UpdateCommand, 0, 0, 'f', False, False, 0],
    'ver': [command_inst.VerCommand, 0, 0, '', False, False, 0],
    # Alias for ver command.
    'version': [command_inst.VerCommand, 0, 0, '', False, False, 0],
}

xml_parsing_commands = ['getacl', 'ls', 'setacl']


def SanityCheckXmlParser(cmd):
  # If the given command requires xml parsing, ensure we're
  # not using the xmlplus parser, which works incorrectly (see
  # http://code.google.com/p/gsutil/issues/detail?id=18 for details).
  if cmd not in xml_parsing_commands:
    return
  code_file = xml.sax.xmlreader.__file__
  if code_file.find('xmlplus') != -1:
    raise CommandException('The "%s" command requires XML parsing, and your '
                           'Python installation includes an\nXML parser (%s)\n'
                           'that does not work correctly.' %
                           (cmd, code_file))

def HandleUnknownFailure(debug, e):
  # Called if we fall through all known/handled exceptions. Allows us to
  # print a stacktrace if -D option used.
  if debug == 3:
    stack_trace = traceback.format_exc()
    sys.stderr.write('DEBUG: Exception stack trace:\n    %s\n' %
                     re.sub('\\n', '\n    ', stack_trace))
  else:
    OutputAndExit('Failure: %s.' % e)

def HandleUnknownFailure(debug, e):
  # Called if we fall through all known/handled exceptions. Allows us to
  # print a stacktrace if -D option used.
  if debug == 3:
    stack_trace = traceback.format_exc()
    sys.stderr.write('DEBUG: Exception stack trace:\n    %s\n' %
                     re.sub('\\n', '\n    ', stack_trace))
  else:
    OutputAndExit('Failure: %s.' % e)


def HandleCommandException(e):
  if e.informational:
    OutputAndExit(e.reason)
  else:
    OutputAndExit('CommandException: %s' % e.reason)


def main():
  if sys.version_info[:3] < (2, 5, 1):
    raise CommandException('This tool requires Python 2.5.1 or higher.')

  # If user enters no commands just print the usage info.
  if len(sys.argv) == 1:
    command_inst.OutputUsageAndExit()

  debug = 0
  try:
    opts, args = getopt.getopt(sys.argv[1:], 'dDvh:',
                               ['debug', 'detailedDebug', 'version', 'help',
                                'header'])
    if not args:
      cmd = 'help'
    else:
      cmd = args[0]
    if cmd not in commands:
      raise CommandException('Invalid command "%s".' % cmd)
    SanityCheckXmlParser(cmd)
    command_info = commands[cmd]
    command_func = command_info[0]
    min_arg_count = command_info[1]
    max_arg_count = command_info[2]
    supported_sub_args = command_info[3]
    file_uri_ok = command_info[4]
    provider_uri_ok = command_info[5]
    uris_start_arg = command_info[6]
    sub_opts, args = getopt.getopt(args[1:], supported_sub_args)
    if len(args) < min_arg_count or len(args) > max_arg_count:
      raise CommandException('Wrong number of arguments for "%s" command.' %
                             cmd)
    if not file_uri_ok and HaveFileUris(args[uris_start_arg:]):
      raise CommandException('"%s" command does not support "file://" URIs.' %
                             cmd)
    if not provider_uri_ok and HaveProviderUris(args[uris_start_arg:]):
      raise CommandException('"%s" command does not support provider-only '
                             'URIs.' % cmd)

    SetupConfigIfNeeded()

    headers = {}
    for o, a in opts:
      if o in ('-d', '--debug'):
        # Passing debug=2 causes boto to include httplib header output.
        debug = 2
      if o in ('-D', '--detailedDebug'):
        # We use debug level 3 to ask gsutil code to output more detailed
        # debug output. This is a bit of a hack since it overloads the same
        # flag that was originally implemented for boto use.
        debug = 3
      if o in ('-?', '--help'):
        command_inst.OutputUsageAndExit()
      if o in ('-h', '--header'):
        (hdr_name, unused_ptn, hdr_val) = a.partition(':')
        if not hdr_name or not hdr_val:
          command_inst.OutputUsageAndExit()
        headers[hdr_name] = hdr_val
      if o in ('-v', '--version'):
        command_inst.VerCommand([], None)
        sys.exit(0)
    if debug == 3:
      command_inst.VerCommand([], None)
      config_items = []
      try:
        config_items.extend(boto.config.items('Boto'))
        config_items.extend(boto.config.items('GSUtil'))
      except ConfigParser.NoSectionError:
        pass
      print 'config: %s' % str(config_items)
    command_func(args, sub_opts, headers, debug)
  except AttributeError, e:
    if e.message.find('secret_access_key') != -1:
      OutputAndExit('Missing credentials for the given URI(s). Does your '
                    'boto config file contain all needed credentials?')
    else:
      OutputAndExit(e.message)
  except BotoClientError, e:
    OutputAndExit('BotoClientError: %s.' % e.reason)
  except CommandException, e:
    HandleCommandException(e)
  except getopt.GetoptError, e:
    HandleCommandException(CommandException(e.msg))
  except InvalidAclError, e:
    OutputAndExit('InvalidAclError: %s.' % e.message)
  except InvalidUriError, e:
    OutputAndExit('InvalidUriError: %s.' % e.message)
  except boto.auth_handler.NotReadyToAuthenticate:
    OutputAndExit('NotReadyToAuthenticate')
  except OSError, e:
    OutputAndExit('OSError: %s.' % e.strerror)
  except WildcardException, e:
    OutputAndExit(e.reason)
  except StorageResponseError, e:
    detail_start = e.body.find('<Details>')
    detail_end = e.body.find('</Details>')
    exc_name = re.split("[\.']", str(type(e)))[-2]
    if detail_start != -1 and detail_end != -1:
      detail = e.body[detail_start+9:detail_end]
      OutputAndExit('%s: status=%d, code=%s, reason=%s, detail=%s.' %
                    (exc_name, e.status, e.code, e.reason, detail))
    else:
      OutputAndExit('%s:: status=%d, code=%s, reason=%s.' %
                    (exc_name, e.status, e.code, e.reason))
  except ResumableUploadException, e:
    OutputAndExit('ResumableUploadException: %s.' % e.message)
  except boto.exception.TooManyAuthHandlerReadyToAuthenticate:
    OutputAndExit('TooManyAuthHandlerReadyToAuthenticate: %s' % str(e))
  except socket.error, e:
    if e.args[0] == errno.EPIPE:
      # Retrying with a smaller file (per suggestion below) works because
      # the library code send loop (in boto/s3/key.py) can get through the
      # entire file and then request the HTTP response before the socket
      # gets closed and the response lost.
      message = (
"""
Got a "Broken pipe" error. This can happen to clients using Python 2.x,
when the server sends an error response and then closes the socket (see
http://bugs.python.org/issue5542). If you are trying to upload a large
object you might retry with a small (say 200k) object, and see if you get
a more specific error code.
""")
      OutputAndExit(message)
    else:
      HandleUnknownFailure(debug, e)
  except Exception, e:
    HandleUnknownFailure(debug, e)

if __name__ == '__main__':
  main()
  sys.exit(0)
