package docstract

import (
	"errors"
	"io/ioutil"
	"strings"
)

//DocType is a wrapper for the type iota/enum
type DocType int

const (
	//DocUnkown represents an unknown document type
	DocUnkown = iota

	//DocPDF represents a pdf document type
	DocPDF

	//DocX represents a microsoft docx document type
	DocX

	//DocXLSX represents microsoft excel doc
	DocXLSX

	//DocHTML represents an html document type
	DocHTML
)

//DocStract stores the binary data for extracted files, as well as the type and filename metadata
type DocStract struct {
	Type     DocType
	FileName *string
	Bytes    []byte
}

//SaveFile saves the file to the path, does not check if it's an unkown filetype only if it has a name
func (d *DocStract) SaveFile(path string) error {
	if len(path) > 0 && path[len(path)-1] != '/' {
		path += "/"
	}

	if d.FileName == nil {
		return errors.New("document does not have a filename cannot save")
	}

	return ioutil.WriteFile(path+*(d.FileName), d.Bytes, 0644)
}

//sets name to nil if cannot dertermine name and type to unkown
func (d *DocStract) getName() {
	blocks := strings.Split(string(d.Bytes), "\n")
	nameBlock := blocks[len(blocks)-1]

	chunks := strings.Split(nameBlock, ".")

	nameChunk := 0
	t := DocUnkown

	switch len(chunks[0]) {
	case 0: //pdf
		t = DocPDF
		nameChunk = 2
	default: //html
		switch {
		case strings.Contains(chunks[0], "word"): //docx
			nameChunk = 8
			t = DocX
			break
		case strings.Contains(chunks[2], "worksheets"): //xlsx
			t = DocXLSX
			for i := 3; i < len(chunks); i++ {
				if strings.Contains(StripSeperators(chunks[i]), "xlsx") {
					nameChunk = i + 1
					break
				}
			}
			break
		default: //html
			t = DocHTML
		}
	}

	name := strings.TrimSpace(chunks[nameChunk])
	name = StripSeperators(name)

	switch t {
	case DocPDF:
		name += ".pdf"
		name = name[3:]
		d.Type = DocPDF

	case DocX:
		name += ".docx"
		name = name[3:]
		d.Type = DocX

	case DocXLSX:
		name += ".xlsx"
		head := 0
		for i, b := range d.Bytes {
			if i+1 < len(d.Bytes) && b == 'P' && d.Bytes[i+1] == 'K' {
				head = i
				break
			}
		}
		name = name[3:]
		d.Type = DocXLSX
		d.Bytes = d.Bytes[head:]

	case DocHTML:
		name += ".html"
		d.Type = DocHTML
	}

	d.FileName = &name
}