go-common/vendor/github.com/aliyun/aliyun-oss-go-sdk/oss/download.go
2019-04-22 02:59:20 +00:00

551 lines
14 KiB
Go

package oss
import (
"crypto/md5"
"encoding/base64"
"encoding/json"
"errors"
"hash"
"hash/crc64"
"io"
"io/ioutil"
"os"
"strconv"
)
// DownloadFile downloads files with multipart download.
//
// objectKey the object key.
// filePath the local file to download from objectKey in OSS.
// partSize the part size in bytes.
// options object's constraints, check out GetObject for the reference.
//
// error it's nil when the call succeeds, otherwise it's an error object.
//
func (bucket Bucket) DownloadFile(objectKey, filePath string, partSize int64, options ...Option) error {
if partSize < 1 {
return errors.New("oss: part size smaller than 1")
}
cpConf, err := getCpConfig(options, filePath)
if err != nil {
return err
}
uRange, err := getRangeConfig(options)
if err != nil {
return err
}
routines := getRoutines(options)
if cpConf.IsEnable {
return bucket.downloadFileWithCp(objectKey, filePath, partSize, options, cpConf.FilePath, routines, uRange)
}
return bucket.downloadFile(objectKey, filePath, partSize, options, routines, uRange)
}
// getRangeConfig gets the download range from the options.
func getRangeConfig(options []Option) (*unpackedRange, error) {
rangeOpt, err := findOption(options, HTTPHeaderRange, nil)
if err != nil || rangeOpt == nil {
return nil, err
}
return parseRange(rangeOpt.(string))
}
// ----- concurrent download without checkpoint -----
// downloadWorkerArg is download worker's parameters
type downloadWorkerArg struct {
bucket *Bucket
key string
filePath string
options []Option
hook downloadPartHook
enableCRC bool
}
// downloadPartHook is hook for test
type downloadPartHook func(part downloadPart) error
var downloadPartHooker downloadPartHook = defaultDownloadPartHook
func defaultDownloadPartHook(part downloadPart) error {
return nil
}
// defaultDownloadProgressListener defines default ProgressListener, shields the ProgressListener in options of GetObject.
type defaultDownloadProgressListener struct {
}
// ProgressChanged no-ops
func (listener *defaultDownloadProgressListener) ProgressChanged(event *ProgressEvent) {
}
// downloadWorker
func downloadWorker(id int, arg downloadWorkerArg, jobs <-chan downloadPart, results chan<- downloadPart, failed chan<- error, die <-chan bool) {
for part := range jobs {
if err := arg.hook(part); err != nil {
failed <- err
break
}
// Resolve options
r := Range(part.Start, part.End)
p := Progress(&defaultDownloadProgressListener{})
opts := make([]Option, len(arg.options)+2)
// Append orderly, can not be reversed!
opts = append(opts, arg.options...)
opts = append(opts, r, p)
rd, err := arg.bucket.GetObject(arg.key, opts...)
if err != nil {
failed <- err
break
}
// defer rd.Close()
var crcCalc hash.Hash64
if arg.enableCRC {
crcCalc = crc64.New(crcTable())
contentLen := part.End - part.Start + 1
rd = ioutil.NopCloser(TeeReader(rd, crcCalc, contentLen, nil, nil))
}
// defer rd.Close()
select {
case <-die:
return
default:
}
fd, err := os.OpenFile(arg.filePath, os.O_WRONLY, FilePermMode)
if err != nil {
failed <- err
break
}
_, err = fd.Seek(part.Start-part.Offset, io.SeekStart)
if err != nil {
fd.Close()
failed <- err
break
}
_, err = io.Copy(fd, rd)
if err != nil {
fd.Close()
failed <- err
break
}
rd.Close()
if arg.enableCRC {
part.CRC64 = crcCalc.Sum64()
}
fd.Close()
results <- part
}
}
// downloadScheduler
func downloadScheduler(jobs chan downloadPart, parts []downloadPart) {
for _, part := range parts {
jobs <- part
}
close(jobs)
}
// downloadPart defines download part
type downloadPart struct {
Index int // Part number, starting from 0
Start int64 // Start index
End int64 // End index
Offset int64 // Offset
CRC64 uint64 // CRC check value of part
}
// getDownloadParts gets download parts
func getDownloadParts(bucket *Bucket, objectKey string, partSize int64, uRange *unpackedRange) ([]downloadPart, bool, uint64, error) {
meta, err := bucket.GetObjectDetailedMeta(objectKey)
if err != nil {
return nil, false, 0, err
}
parts := []downloadPart{}
objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0)
if err != nil {
return nil, false, 0, err
}
enableCRC := false
crcVal := (uint64)(0)
if bucket.getConfig().IsEnableCRC && meta.Get(HTTPHeaderOssCRC64) != "" {
if uRange == nil || (!uRange.hasStart && !uRange.hasEnd) {
enableCRC = true
crcVal, _ = strconv.ParseUint(meta.Get(HTTPHeaderOssCRC64), 10, 0)
}
}
part := downloadPart{}
i := 0
start, end := adjustRange(uRange, objectSize)
for offset := start; offset < end; offset += partSize {
part.Index = i
part.Start = offset
part.End = GetPartEnd(offset, end, partSize)
part.Offset = start
part.CRC64 = 0
parts = append(parts, part)
i++
}
return parts, enableCRC, crcVal, nil
}
// getObjectBytes gets object bytes length
func getObjectBytes(parts []downloadPart) int64 {
var ob int64
for _, part := range parts {
ob += (part.End - part.Start + 1)
}
return ob
}
// combineCRCInParts caculates the total CRC of continuous parts
func combineCRCInParts(dps []downloadPart) uint64 {
if len(dps) == 0 {
return 0
}
crc := dps[0].CRC64
for i := 1; i < len(dps); i++ {
crc = CRC64Combine(crc, dps[i].CRC64, (uint64)(dps[i].End-dps[i].Start+1))
}
return crc
}
// downloadFile downloads file concurrently without checkpoint.
func (bucket Bucket) downloadFile(objectKey, filePath string, partSize int64, options []Option, routines int, uRange *unpackedRange) error {
tempFilePath := filePath + TempFileSuffix
listener := getProgressListener(options)
// If the file does not exist, create one. If exists, the download will overwrite it.
fd, err := os.OpenFile(tempFilePath, os.O_WRONLY|os.O_CREATE, FilePermMode)
if err != nil {
return err
}
fd.Close()
// Get the parts of the file
parts, enableCRC, expectedCRC, err := getDownloadParts(&bucket, objectKey, partSize, uRange)
if err != nil {
return err
}
jobs := make(chan downloadPart, len(parts))
results := make(chan downloadPart, len(parts))
failed := make(chan error)
die := make(chan bool)
var completedBytes int64
totalBytes := getObjectBytes(parts)
event := newProgressEvent(TransferStartedEvent, 0, totalBytes)
publishProgress(listener, event)
// Start the download workers
arg := downloadWorkerArg{&bucket, objectKey, tempFilePath, options, downloadPartHooker, enableCRC}
for w := 1; w <= routines; w++ {
go downloadWorker(w, arg, jobs, results, failed, die)
}
// Download parts concurrently
go downloadScheduler(jobs, parts)
// Waiting for parts download finished
completed := 0
for completed < len(parts) {
select {
case part := <-results:
completed++
completedBytes += (part.End - part.Start + 1)
parts[part.Index].CRC64 = part.CRC64
event = newProgressEvent(TransferDataEvent, completedBytes, totalBytes)
publishProgress(listener, event)
case err = <-failed:
close(die)
event = newProgressEvent(TransferFailedEvent, completedBytes, totalBytes)
publishProgress(listener, event)
return err
}
if completed >= len(parts) {
break
}
}
event = newProgressEvent(TransferCompletedEvent, completedBytes, totalBytes)
publishProgress(listener, event)
if enableCRC {
actualCRC := combineCRCInParts(parts)
err = checkDownloadCRC(actualCRC, expectedCRC)
if err != nil {
return err
}
}
return os.Rename(tempFilePath, filePath)
}
// ----- Concurrent download with chcekpoint -----
const downloadCpMagic = "92611BED-89E2-46B6-89E5-72F273D4B0A3"
type downloadCheckpoint struct {
Magic string // Magic
MD5 string // Checkpoint content MD5
FilePath string // Local file
Object string // Key
ObjStat objectStat // Object status
Parts []downloadPart // All download parts
PartStat []bool // Parts' download status
Start int64 // Start point of the file
End int64 // End point of the file
enableCRC bool // Whether has CRC check
CRC uint64 // CRC check value
}
type objectStat struct {
Size int64 // Object size
LastModified string // Last modified time
Etag string // Etag
}
// isValid flags of checkpoint data is valid. It returns true when the data is valid and the checkpoint is valid and the object is not updated.
func (cp downloadCheckpoint) isValid(bucket *Bucket, objectKey string, uRange *unpackedRange) (bool, error) {
// Compare the CP's Magic and the MD5
cpb := cp
cpb.MD5 = ""
js, _ := json.Marshal(cpb)
sum := md5.Sum(js)
b64 := base64.StdEncoding.EncodeToString(sum[:])
if cp.Magic != downloadCpMagic || b64 != cp.MD5 {
return false, nil
}
// Ensure the object is not updated.
meta, err := bucket.GetObjectDetailedMeta(objectKey)
if err != nil {
return false, err
}
objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0)
if err != nil {
return false, err
}
// Compare the object size, last modified time and etag
if cp.ObjStat.Size != objectSize ||
cp.ObjStat.LastModified != meta.Get(HTTPHeaderLastModified) ||
cp.ObjStat.Etag != meta.Get(HTTPHeaderEtag) {
return false, nil
}
// Check the download range
if uRange != nil {
start, end := adjustRange(uRange, objectSize)
if start != cp.Start || end != cp.End {
return false, nil
}
}
return true, nil
}
// load checkpoint from local file
func (cp *downloadCheckpoint) load(filePath string) error {
contents, err := ioutil.ReadFile(filePath)
if err != nil {
return err
}
err = json.Unmarshal(contents, cp)
return err
}
// dump funciton dumps to file
func (cp *downloadCheckpoint) dump(filePath string) error {
bcp := *cp
// Calculate MD5
bcp.MD5 = ""
js, err := json.Marshal(bcp)
if err != nil {
return err
}
sum := md5.Sum(js)
b64 := base64.StdEncoding.EncodeToString(sum[:])
bcp.MD5 = b64
// Serialize
js, err = json.Marshal(bcp)
if err != nil {
return err
}
// Dump
return ioutil.WriteFile(filePath, js, FilePermMode)
}
// todoParts gets unfinished parts
func (cp downloadCheckpoint) todoParts() []downloadPart {
dps := []downloadPart{}
for i, ps := range cp.PartStat {
if !ps {
dps = append(dps, cp.Parts[i])
}
}
return dps
}
// getCompletedBytes gets completed size
func (cp downloadCheckpoint) getCompletedBytes() int64 {
var completedBytes int64
for i, part := range cp.Parts {
if cp.PartStat[i] {
completedBytes += (part.End - part.Start + 1)
}
}
return completedBytes
}
// prepare initiates download tasks
func (cp *downloadCheckpoint) prepare(bucket *Bucket, objectKey, filePath string, partSize int64, uRange *unpackedRange) error {
// CP
cp.Magic = downloadCpMagic
cp.FilePath = filePath
cp.Object = objectKey
// Object
meta, err := bucket.GetObjectDetailedMeta(objectKey)
if err != nil {
return err
}
objectSize, err := strconv.ParseInt(meta.Get(HTTPHeaderContentLength), 10, 0)
if err != nil {
return err
}
cp.ObjStat.Size = objectSize
cp.ObjStat.LastModified = meta.Get(HTTPHeaderLastModified)
cp.ObjStat.Etag = meta.Get(HTTPHeaderEtag)
// Parts
cp.Parts, cp.enableCRC, cp.CRC, err = getDownloadParts(bucket, objectKey, partSize, uRange)
if err != nil {
return err
}
cp.PartStat = make([]bool, len(cp.Parts))
for i := range cp.PartStat {
cp.PartStat[i] = false
}
return nil
}
func (cp *downloadCheckpoint) complete(cpFilePath, downFilepath string) error {
os.Remove(cpFilePath)
return os.Rename(downFilepath, cp.FilePath)
}
// downloadFileWithCp downloads files with checkpoint.
func (bucket Bucket) downloadFileWithCp(objectKey, filePath string, partSize int64, options []Option, cpFilePath string, routines int, uRange *unpackedRange) error {
tempFilePath := filePath + TempFileSuffix
listener := getProgressListener(options)
// Load checkpoint data.
dcp := downloadCheckpoint{}
err := dcp.load(cpFilePath)
if err != nil {
os.Remove(cpFilePath)
}
// Load error or data invalid. Re-initialize the download.
valid, err := dcp.isValid(&bucket, objectKey, uRange)
if err != nil || !valid {
if err = dcp.prepare(&bucket, objectKey, filePath, partSize, uRange); err != nil {
return err
}
os.Remove(cpFilePath)
}
// Create the file if not exists. Otherwise the parts download will overwrite it.
fd, err := os.OpenFile(tempFilePath, os.O_WRONLY|os.O_CREATE, FilePermMode)
if err != nil {
return err
}
fd.Close()
// Unfinished parts
parts := dcp.todoParts()
jobs := make(chan downloadPart, len(parts))
results := make(chan downloadPart, len(parts))
failed := make(chan error)
die := make(chan bool)
completedBytes := dcp.getCompletedBytes()
event := newProgressEvent(TransferStartedEvent, completedBytes, dcp.ObjStat.Size)
publishProgress(listener, event)
// Start the download workers routine
arg := downloadWorkerArg{&bucket, objectKey, tempFilePath, options, downloadPartHooker, dcp.enableCRC}
for w := 1; w <= routines; w++ {
go downloadWorker(w, arg, jobs, results, failed, die)
}
// Concurrently downloads parts
go downloadScheduler(jobs, parts)
// Wait for the parts download finished
completed := 0
for completed < len(parts) {
select {
case part := <-results:
completed++
dcp.PartStat[part.Index] = true
dcp.Parts[part.Index].CRC64 = part.CRC64
dcp.dump(cpFilePath)
completedBytes += (part.End - part.Start + 1)
event = newProgressEvent(TransferDataEvent, completedBytes, dcp.ObjStat.Size)
publishProgress(listener, event)
case err = <-failed:
close(die)
event = newProgressEvent(TransferFailedEvent, completedBytes, dcp.ObjStat.Size)
publishProgress(listener, event)
return err
}
if completed >= len(parts) {
break
}
}
event = newProgressEvent(TransferCompletedEvent, completedBytes, dcp.ObjStat.Size)
publishProgress(listener, event)
if dcp.enableCRC {
actualCRC := combineCRCInParts(dcp.Parts)
err = checkDownloadCRC(actualCRC, dcp.CRC)
if err != nil {
return err
}
}
return dcp.complete(cpFilePath, tempFilePath)
}