2019-04-22 02:59:20 +00:00

334 lines
10 KiB
Go

// Copyright (C) 2015 The GoHBase Authors. All rights reserved.
// This file is part of GoHBase.
// Use of this source code is governed by the Apache License 2.0
// that can be found in the COPYING file.
package hrpc
import (
"context"
"errors"
"fmt"
"math"
"github.com/golang/protobuf/proto"
"github.com/tsuna/gohbase/pb"
)
const (
// DefaultMaxVersions defualt value for maximum versions to return for scan queries
DefaultMaxVersions uint32 = 1
// MinTimestamp default value for minimum timestamp for scan queries
MinTimestamp uint64 = 0
// MaxTimestamp default value for maximum timestamp for scan queries
MaxTimestamp = math.MaxUint64
// DefaultMaxResultSize Maximum number of bytes fetched when calling a scanner's
// next method. The default value is 2MB, which is good for 1ge networks.
// With faster and/or high latency networks this value should be increased.
DefaultMaxResultSize = 2097152
// DefaultNumberOfRows is default maximum number of rows fetched by scanner
DefaultNumberOfRows = math.MaxInt32
// DefaultMaxResultsPerColumnFamily is the default max number of cells fetched
// per column family for each row
DefaultMaxResultsPerColumnFamily = math.MaxInt32
)
// Scanner is used to read data sequentially from HBase.
// Scanner will be automatically closed if there's no more data to read,
// otherwise Close method should be called.
type Scanner interface {
// Next returns a row at a time.
// Once all rows are returned, subsequent calls will return io.EOF error.
//
// In case of an error, only the first call to Next() will return partial
// result (could be not a complete row) and the actual error,
// the subsequent calls will return io.EOF error.
Next() (*Result, error)
// Close should be called if it is desired to stop scanning before getting all of results.
// If you call Next() after calling Close() you might still get buffered results.
// Othwerwise, in case all results have been delivered or in case of an error, the Scanner
// will be closed automatically.
Close() error
}
// Scan represents a scanner on an HBase table.
type Scan struct {
base
baseQuery
startRow []byte
stopRow []byte
scannerID uint64
maxResultSize uint64
numberOfRows uint32
reversed bool
closeScanner bool
allowPartialResults bool
}
// baseScan returns a Scan struct with default values set.
func baseScan(ctx context.Context, table []byte,
options ...func(Call) error) (*Scan, error) {
s := &Scan{
base: base{
table: table,
ctx: ctx,
resultch: make(chan RPCResult, 1),
},
baseQuery: newBaseQuery(),
scannerID: math.MaxUint64,
maxResultSize: DefaultMaxResultSize,
numberOfRows: DefaultNumberOfRows,
reversed: false,
}
err := applyOptions(s, options...)
if err != nil {
return nil, err
}
return s, nil
}
func (s *Scan) String() string {
return fmt.Sprintf("Scan{Table=%q StartRow=%q StopRow=%q TimeRange=(%d, %d) "+
"MaxVersions=%d NumberOfRows=%d MaxResultSize=%d Familes=%v Filter=%v "+
"StoreLimit=%d StoreOffset=%d ScannerID=%d Close=%v}",
s.table, s.startRow, s.stopRow, s.fromTimestamp, s.toTimestamp,
s.maxVersions, s.numberOfRows, s.maxResultSize, s.families, s.filter,
s.storeLimit, s.storeOffset, s.scannerID, s.closeScanner)
}
// NewScan creates a scanner for the given table.
func NewScan(ctx context.Context, table []byte, options ...func(Call) error) (*Scan, error) {
return baseScan(ctx, table, options...)
}
// NewScanRange creates a scanner for the given table and key range.
// The range is half-open, i.e. [startRow; stopRow[ -- stopRow is not
// included in the range.
func NewScanRange(ctx context.Context, table, startRow, stopRow []byte,
options ...func(Call) error) (*Scan, error) {
scan, err := baseScan(ctx, table, options...)
if err != nil {
return nil, err
}
scan.startRow = startRow
scan.stopRow = stopRow
scan.key = startRow
return scan, nil
}
// NewScanStr creates a scanner for the given table.
func NewScanStr(ctx context.Context, table string, options ...func(Call) error) (*Scan, error) {
return NewScan(ctx, []byte(table), options...)
}
// NewScanRangeStr creates a scanner for the given table and key range.
// The range is half-open, i.e. [startRow; stopRow[ -- stopRow is not
// included in the range.
func NewScanRangeStr(ctx context.Context, table, startRow, stopRow string,
options ...func(Call) error) (*Scan, error) {
return NewScanRange(ctx, []byte(table), []byte(startRow), []byte(stopRow), options...)
}
// Name returns the name of this RPC call.
func (s *Scan) Name() string {
return "Scan"
}
// StopRow returns the end key (exclusive) of this scanner.
func (s *Scan) StopRow() []byte {
return s.stopRow
}
// StartRow returns the start key (inclusive) of this scanner.
func (s *Scan) StartRow() []byte {
return s.startRow
}
// IsClosing returns wether this scan closes scanner prematurely
func (s *Scan) IsClosing() bool {
return s.closeScanner
}
// AllowPartialResults returns true if client handles partials.
func (s *Scan) AllowPartialResults() bool {
return s.allowPartialResults
}
// Reversed returns true if scanner scans in reverse.
func (s *Scan) Reversed() bool {
return s.reversed
}
// NumberOfRows returns how many rows this scan
// fetches from regionserver in a single response.
func (s *Scan) NumberOfRows() uint32 {
return s.numberOfRows
}
// ToProto converts this Scan into a protobuf message
func (s *Scan) ToProto() proto.Message {
scan := &pb.ScanRequest{
Region: s.regionSpecifier(),
CloseScanner: &s.closeScanner,
NumberOfRows: &s.numberOfRows,
// tell server that we can process results that are only part of a row
ClientHandlesPartials: proto.Bool(true),
// tell server that we "handle" heartbeats by ignoring them
// since we don't really time out our scans (unless context was cancelled)
ClientHandlesHeartbeats: proto.Bool(true),
}
if s.scannerID != math.MaxUint64 {
scan.ScannerId = &s.scannerID
return scan
}
scan.Scan = &pb.Scan{
Column: familiesToColumn(s.families),
StartRow: s.startRow,
StopRow: s.stopRow,
TimeRange: &pb.TimeRange{},
MaxResultSize: &s.maxResultSize,
}
if s.maxVersions != DefaultMaxVersions {
scan.Scan.MaxVersions = &s.maxVersions
}
/* added support for limit number of cells per row */
if s.storeLimit != DefaultMaxResultsPerColumnFamily {
scan.Scan.StoreLimit = &s.storeLimit
}
if s.storeOffset != 0 {
scan.Scan.StoreOffset = &s.storeOffset
}
if s.fromTimestamp != MinTimestamp {
scan.Scan.TimeRange.From = &s.fromTimestamp
}
if s.toTimestamp != MaxTimestamp {
scan.Scan.TimeRange.To = &s.toTimestamp
}
if s.reversed {
scan.Scan.Reversed = &s.reversed
}
scan.Scan.Filter = s.filter
return scan
}
// NewResponse creates an empty protobuf message to read the response
// of this RPC.
func (s *Scan) NewResponse() proto.Message {
return &pb.ScanResponse{}
}
// DeserializeCellBlocks deserializes scan results from cell blocks
func (s *Scan) DeserializeCellBlocks(m proto.Message, b []byte) (uint32, error) {
scanResp := m.(*pb.ScanResponse)
partials := scanResp.GetPartialFlagPerResult()
scanResp.Results = make([]*pb.Result, len(partials))
var readLen uint32
for i, numCells := range scanResp.GetCellsPerResult() {
cells, l, err := deserializeCellBlocks(b[readLen:], numCells)
if err != nil {
return 0, err
}
scanResp.Results[i] = &pb.Result{
Cell: cells,
Partial: proto.Bool(partials[i]),
}
readLen += l
}
return readLen, nil
}
// ScannerID is an option for scan requests.
// This is an internal option to fetch the next set of results for an ongoing scan.
func ScannerID(id uint64) func(Call) error {
return func(s Call) error {
scan, ok := s.(*Scan)
if !ok {
return errors.New("'ScannerID' option can only be used with Scan queries")
}
scan.scannerID = id
return nil
}
}
// CloseScanner is an option for scan requests.
// Closes scanner after the first result is returned. This is an internal option
// but could be useful if you know that your scan result fits into one response
// in order to save an extra request.
func CloseScanner() func(Call) error {
return func(s Call) error {
scan, ok := s.(*Scan)
if !ok {
return errors.New("'Close' option can only be used with Scan queries")
}
scan.closeScanner = true
return nil
}
}
// MaxResultSize is an option for scan requests.
// Maximum number of bytes fetched when calling a scanner's next method.
// MaxResultSize takes priority over NumberOfRows.
func MaxResultSize(n uint64) func(Call) error {
return func(g Call) error {
scan, ok := g.(*Scan)
if !ok {
return errors.New("'MaxResultSize' option can only be used with Scan queries")
}
if n == 0 {
return errors.New("'MaxResultSize' option must be greater than 0")
}
scan.maxResultSize = n
return nil
}
}
// NumberOfRows is an option for scan requests.
// Specifies how many rows are fetched with each request to regionserver.
// Should be > 0, avoid extremely low values such as 1 because a request
// to regionserver will be made for every row.
func NumberOfRows(n uint32) func(Call) error {
return func(g Call) error {
scan, ok := g.(*Scan)
if !ok {
return errors.New("'NumberOfRows' option can only be used with Scan queries")
}
scan.numberOfRows = n
return nil
}
}
// AllowPartialResults is an option for scan requests.
// This option should be provided if the client has really big rows and
// wants to avoid OOM errors on her side. With this option provided, Next()
// will return partial rows.
func AllowPartialResults() func(Call) error {
return func(g Call) error {
scan, ok := g.(*Scan)
if !ok {
return errors.New("'AllowPartialResults' option can only be used with Scan queries")
}
scan.allowPartialResults = true
return nil
}
}
// Reversed is a Scan-only option which allows you to scan in reverse key order
// To use it the startKey would be greater than the end key
func Reversed() func(Call) error {
return func(g Call) error {
scan, ok := g.(*Scan)
if !ok {
return errors.New("'Reversed' option can only be used with Scan queries")
}
scan.reversed = true
return nil
}
}