2019-04-22 02:59:20 +00:00

415 lines
9.8 KiB
Go

package grok
import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
)
var (
canonical = regexp.MustCompile(`%{(\w+(?::\w+(?::\w+)?)?)}`)
normal = regexp.MustCompile(`%{([\w-.]+(?::[\w-.]+(?::[\w-.]+)?)?)}`)
symbolic = regexp.MustCompile(`\W`)
)
// A Config structure is used to configure a Grok parser.
type Config struct {
NamedCapturesOnly bool
SkipDefaultPatterns bool
RemoveEmptyValues bool
PatternsDir []string
Patterns map[string]string
}
// Grok object us used to load patterns and deconstruct strings using those
// patterns.
type Grok struct {
rawPattern map[string]string
config *Config
aliases map[string]string
compiledPatterns map[string]*gRegexp
patterns map[string]*gPattern
patternsGuard *sync.RWMutex
compiledGuard *sync.RWMutex
}
type gPattern struct {
expression string
typeInfo semanticTypes
}
type gRegexp struct {
regexp *regexp.Regexp
typeInfo semanticTypes
}
type semanticTypes map[string]string
// New returns a Grok object.
func New() (*Grok, error) {
return NewWithConfig(&Config{})
}
// NewWithConfig returns a Grok object that is configured to behave according
// to the supplied Config structure.
func NewWithConfig(config *Config) (*Grok, error) {
g := &Grok{
config: config,
aliases: map[string]string{},
compiledPatterns: map[string]*gRegexp{},
patterns: map[string]*gPattern{},
rawPattern: map[string]string{},
patternsGuard: new(sync.RWMutex),
compiledGuard: new(sync.RWMutex),
}
if !config.SkipDefaultPatterns {
g.AddPatternsFromMap(patterns)
}
if len(config.PatternsDir) > 0 {
for _, path := range config.PatternsDir {
err := g.AddPatternsFromPath(path)
if err != nil {
return nil, err
}
}
}
if err := g.AddPatternsFromMap(config.Patterns); err != nil {
return nil, err
}
return g, nil
}
// AddPattern adds a new pattern to the list of loaded patterns.
func (g *Grok) addPattern(name, pattern string) error {
dnPattern, ti, err := g.denormalizePattern(pattern, g.patterns)
if err != nil {
return err
}
g.patterns[name] = &gPattern{expression: dnPattern, typeInfo: ti}
return nil
}
// AddPattern adds a named pattern to grok
func (g *Grok) AddPattern(name, pattern string) error {
g.patternsGuard.Lock()
defer g.patternsGuard.Unlock()
g.rawPattern[name] = pattern
g.buildPatterns()
return nil
}
// AddPatternsFromMap loads a map of named patterns
func (g *Grok) AddPatternsFromMap(m map[string]string) error {
g.patternsGuard.Lock()
defer g.patternsGuard.Unlock()
for name, pattern := range m {
g.rawPattern[name] = pattern
}
return g.buildPatterns()
}
// AddPatternsFromMap adds new patterns from the specified map to the list of
// loaded patterns.
func (g *Grok) addPatternsFromMap(m map[string]string) error {
patternDeps := graph{}
for k, v := range m {
keys := []string{}
for _, key := range canonical.FindAllStringSubmatch(v, -1) {
names := strings.Split(key[1], ":")
syntax := names[0]
if g.patterns[syntax] == nil {
if _, ok := m[syntax]; !ok {
return fmt.Errorf("no pattern found for %%{%s}", syntax)
}
}
keys = append(keys, syntax)
}
patternDeps[k] = keys
}
order, _ := sortGraph(patternDeps)
for _, key := range reverseList(order) {
g.addPattern(key, m[key])
}
return nil
}
// AddPatternsFromPath adds new patterns from the files in the specified
// directory to the list of loaded patterns.
func (g *Grok) AddPatternsFromPath(path string) error {
if fi, err := os.Stat(path); err == nil {
if fi.IsDir() {
path = path + "/*"
}
} else {
return fmt.Errorf("invalid path : %s", path)
}
// only one error can be raised, when pattern is malformed
// pattern is hard-coded "/*" so we ignore err
files, _ := filepath.Glob(path)
var filePatterns = map[string]string{}
for _, fileName := range files {
file, err := os.Open(fileName)
if err != nil {
return err
}
scanner := bufio.NewScanner(bufio.NewReader(file))
for scanner.Scan() {
l := scanner.Text()
if len(l) > 0 && l[0] != '#' {
names := strings.SplitN(l, " ", 2)
filePatterns[names[0]] = names[1]
}
}
file.Close()
}
return g.AddPatternsFromMap(filePatterns)
}
// Match returns true if the specified text matches the pattern.
func (g *Grok) Match(pattern, text string) (bool, error) {
gr, err := g.compile(pattern)
if err != nil {
return false, err
}
if ok := gr.regexp.MatchString(text); !ok {
return false, nil
}
return true, nil
}
// compiledParse parses the specified text and returns a map with the results.
func (g *Grok) compiledParse(gr *gRegexp, text string) (map[string]string, error) {
captures := make(map[string]string)
if match := gr.regexp.FindStringSubmatch(text); len(match) > 0 {
for i, name := range gr.regexp.SubexpNames() {
if name != "" {
if g.config.RemoveEmptyValues && match[i] == "" {
continue
}
name = g.nameToAlias(name)
captures[name] = match[i]
}
}
}
return captures, nil
}
// Parse the specified text and return a map with the results.
func (g *Grok) Parse(pattern, text string) (map[string]string, error) {
gr, err := g.compile(pattern)
if err != nil {
return nil, err
}
return g.compiledParse(gr, text)
}
// ParseTyped returns a inteface{} map with typed captured fields based on provided pattern over the text
func (g *Grok) ParseTyped(pattern string, text string) (map[string]interface{}, error) {
gr, err := g.compile(pattern)
if err != nil {
return nil, err
}
match := gr.regexp.FindStringSubmatch(text)
captures := make(map[string]interface{})
if len(match) > 0 {
for i, segmentName := range gr.regexp.SubexpNames() {
if len(segmentName) != 0 {
if g.config.RemoveEmptyValues == true && match[i] == "" {
continue
}
name := g.nameToAlias(segmentName)
if segmentType, ok := gr.typeInfo[segmentName]; ok {
switch segmentType {
case "int":
captures[name], _ = strconv.Atoi(match[i])
case "float":
captures[name], _ = strconv.ParseFloat(match[i], 64)
default:
return nil, fmt.Errorf("ERROR the value %s cannot be converted to %s", match[i], segmentType)
}
} else {
captures[name] = match[i]
}
}
}
}
return captures, nil
}
// ParseToMultiMap parses the specified text and returns a map with the
// results. Values are stored in an string slice, so values from captures with
// the same name don't get overridden.
func (g *Grok) ParseToMultiMap(pattern, text string) (map[string][]string, error) {
gr, err := g.compile(pattern)
if err != nil {
return nil, err
}
captures := make(map[string][]string)
if match := gr.regexp.FindStringSubmatch(text); len(match) > 0 {
for i, name := range gr.regexp.SubexpNames() {
if name != "" {
if g.config.RemoveEmptyValues == true && match[i] == "" {
continue
}
name = g.nameToAlias(name)
captures[name] = append(captures[name], match[i])
}
}
}
return captures, nil
}
func (g *Grok) buildPatterns() error {
g.patterns = map[string]*gPattern{}
return g.addPatternsFromMap(g.rawPattern)
}
func (g *Grok) compile(pattern string) (*gRegexp, error) {
g.compiledGuard.RLock()
gr, ok := g.compiledPatterns[pattern]
g.compiledGuard.RUnlock()
if ok {
return gr, nil
}
g.patternsGuard.RLock()
newPattern, ti, err := g.denormalizePattern(pattern, g.patterns)
g.patternsGuard.RUnlock()
if err != nil {
return nil, err
}
compiledRegex, err := regexp.Compile(newPattern)
if err != nil {
return nil, err
}
gr = &gRegexp{regexp: compiledRegex, typeInfo: ti}
g.compiledGuard.Lock()
g.compiledPatterns[pattern] = gr
g.compiledGuard.Unlock()
return gr, nil
}
func (g *Grok) denormalizePattern(pattern string, storedPatterns map[string]*gPattern) (string, semanticTypes, error) {
ti := semanticTypes{}
for _, values := range normal.FindAllStringSubmatch(pattern, -1) {
names := strings.Split(values[1], ":")
syntax, semantic, alias := names[0], names[0], names[0]
if len(names) > 1 {
semantic = names[1]
alias = g.aliasizePatternName(semantic)
}
// Add type cast information only if type set, and not string
if len(names) == 3 {
if names[2] != "string" {
ti[semantic] = names[2]
}
}
storedPattern, ok := storedPatterns[syntax]
if !ok {
return "", ti, fmt.Errorf("no pattern found for %%{%s}", syntax)
}
var buffer bytes.Buffer
if !g.config.NamedCapturesOnly || (g.config.NamedCapturesOnly && len(names) > 1) {
buffer.WriteString("(?P<")
buffer.WriteString(alias)
buffer.WriteString(">")
buffer.WriteString(storedPattern.expression)
buffer.WriteString(")")
} else {
buffer.WriteString("(")
buffer.WriteString(storedPattern.expression)
buffer.WriteString(")")
}
//Merge type Informations
for k, v := range storedPattern.typeInfo {
//Lastest type information is the one to keep in memory
if _, ok := ti[k]; !ok {
ti[k] = v
}
}
pattern = strings.Replace(pattern, values[0], buffer.String(), -1)
}
return pattern, ti, nil
}
func (g *Grok) aliasizePatternName(name string) string {
alias := symbolic.ReplaceAllString(name, "_")
g.aliases[alias] = name
return alias
}
func (g *Grok) nameToAlias(name string) string {
alias, ok := g.aliases[name]
if ok {
return alias
}
return name
}
// ParseStream will match the given pattern on a line by line basis from the reader
// and apply the results to the process function
func (g *Grok) ParseStream(reader *bufio.Reader, pattern string, process func(map[string]string) error) error {
gr, err := g.compile(pattern)
if err != nil {
return err
}
for {
line, err := reader.ReadString('\n')
if err == io.EOF {
return nil
}
if err != nil {
return err
}
values, err := g.compiledParse(gr, line)
if err != nil {
return err
}
if err = process(values); err != nil {
return err
}
}
}