mirror of https://github.com/misterzym/jdocset.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
5.5 KiB
204 lines
5.5 KiB
8 years ago
|
package main
|
||
|
|
||
|
import (
|
||
|
"os"
|
||
|
"github.com/yhat/scrape"
|
||
|
"strings"
|
||
|
"golang.org/x/net/html"
|
||
|
"golang.org/x/net/html/atom"
|
||
|
)
|
||
|
|
||
|
type Verifier func(string) bool
|
||
|
type TypeEvaluator func(Verifier, Verifier) bool
|
||
|
type IndexEntry struct {
|
||
|
name string
|
||
|
elementType ElementType
|
||
|
path string
|
||
|
}
|
||
|
|
||
|
var ALL_ELEMENT_TYPES = []ElementType{
|
||
|
Class,
|
||
|
Method,
|
||
|
Field,
|
||
|
Constructor,
|
||
|
Interface,
|
||
|
Exception,
|
||
|
Error,
|
||
|
Enum,
|
||
|
Trait,
|
||
|
Notation,
|
||
|
Package,
|
||
|
}
|
||
|
|
||
|
var ELEMENT_TYPE_TO_TYPE_EVALUATORS = map[ElementType][]TypeEvaluator{
|
||
|
Class: NewTypeEvaluators(isClass),
|
||
|
Method: NewTypeEvaluators(isStaticMethod, isMethod),
|
||
|
Field: NewTypeEvaluators(isStaticField, isField),
|
||
|
Constructor: NewTypeEvaluators(isConstructor),
|
||
|
Interface: NewTypeEvaluators(isInterface),
|
||
|
Exception: NewTypeEvaluators(isException),
|
||
|
Error: NewTypeEvaluators(isError),
|
||
|
Enum: NewTypeEvaluators(isEnum),
|
||
|
Trait: NewTypeEvaluators(isTrait),
|
||
|
Notation: NewTypeEvaluators(isNotation),
|
||
|
Package: NewTypeEvaluators(isPackage),
|
||
|
}
|
||
|
|
||
|
func parseIndex(indexFilePath string, entryHandler func(IndexEntry)) {
|
||
|
|
||
|
log.Info("Indexing from file", "file", indexFilePath)
|
||
|
|
||
|
indexed := 0
|
||
|
file, err := os.OpenFile(indexFilePath, os.O_RDONLY, 0666)
|
||
|
|
||
|
if err != nil {
|
||
|
log.Error("Unable to open file", "file", indexFilePath, "error", err)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
root, err := html.Parse(file)
|
||
|
|
||
|
if err != nil {
|
||
|
log.Error("Unable to parse index", "file", file, "error", err)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
anchorTags := scrape.FindAll(root, scrape.ByTag(atom.A))
|
||
|
|
||
|
for _, tag := range anchorTags {
|
||
|
var parentTag = tag.Parent
|
||
|
|
||
|
if parentTag.FirstChild != tag {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
isParentSpan := parentTag.DataAtom == atom.Span
|
||
|
isParentCode := parentTag.DataAtom == atom.Code
|
||
|
isParentItalic := parentTag.DataAtom == atom.I
|
||
|
isParentBold := parentTag.DataAtom == atom.B
|
||
|
|
||
|
if isParentSpan || isParentCode || isParentItalic || isParentBold {
|
||
|
parentTag = parentTag.Parent
|
||
|
if parentTag.FirstChild != tag.Parent {
|
||
|
continue
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if parentTag.DataAtom != atom.Dt {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
text := scrape.Text(parentTag)
|
||
|
var tagType ElementType = NotFound
|
||
|
var dtClassName = scrape.Attr(parentTag, "class")
|
||
|
|
||
|
lowercaseText := strings.ToLower(text)
|
||
|
|
||
|
textContainsInsensitive := func(s string) bool {
|
||
|
return strings.Contains(lowercaseText, s)
|
||
|
}
|
||
|
|
||
|
dtClassNameHasSuffix := func(s string) bool {
|
||
|
return strings.HasSuffix(dtClassName, s)
|
||
|
}
|
||
|
|
||
|
tagTypeDetermined := false
|
||
|
|
||
|
for _, elementType := range ALL_ELEMENT_TYPES {
|
||
|
|
||
|
typeEvaluators := ELEMENT_TYPE_TO_TYPE_EVALUATORS[elementType]
|
||
|
|
||
|
for _, evaluator := range typeEvaluators {
|
||
|
|
||
|
if evaluator(textContainsInsensitive, dtClassNameHasSuffix) {
|
||
|
tagType = elementType
|
||
|
tagTypeDetermined = true
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if tagTypeDetermined {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if tagType == NotFound {
|
||
|
log.Warn("Warning: could not determine type", "text", text, "dtClassName", dtClassName)
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
name := scrape.Text(tag)
|
||
|
path := scrape.Attr(tag, "href")
|
||
|
|
||
|
entryHandler(IndexEntry{name: name, elementType: tagType, path: path})
|
||
|
|
||
|
indexed++
|
||
|
}
|
||
|
|
||
|
log.Info("Indexed", "count", indexed)
|
||
|
}
|
||
|
|
||
|
func NewTypeEvaluators(a TypeEvaluator, others ...TypeEvaluator) []TypeEvaluator {
|
||
|
|
||
|
typeEvaluators := make([]TypeEvaluator, 1 + len(others))
|
||
|
|
||
|
typeEvaluators[0] = a
|
||
|
for i, typeEvaluator := range others {
|
||
|
typeEvaluators[i + 1] = typeEvaluator
|
||
|
}
|
||
|
|
||
|
return typeEvaluators
|
||
|
}
|
||
|
|
||
|
func isClass(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("class in") || textContainsInsensitive("- class") || dtClassNameHasSuffix("class")
|
||
|
}
|
||
|
|
||
|
func isStaticMethod(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("static method in") || dtClassNameHasSuffix("method")
|
||
|
}
|
||
|
|
||
|
func isStaticField(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("static variable in") || textContainsInsensitive("field in") || dtClassNameHasSuffix("field")
|
||
|
}
|
||
|
|
||
|
func isConstructor(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("constructor") || dtClassNameHasSuffix("constructor")
|
||
|
}
|
||
|
|
||
|
func isMethod(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("method in")
|
||
|
}
|
||
|
|
||
|
func isField(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("variable in")
|
||
|
}
|
||
|
|
||
|
func isInterface(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("interface in") || textContainsInsensitive("- interface") || dtClassNameHasSuffix("interface")
|
||
|
}
|
||
|
|
||
|
func isException(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("exception in") || textContainsInsensitive("- exception") || dtClassNameHasSuffix("exception")
|
||
|
}
|
||
|
|
||
|
func isError(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("error in") || textContainsInsensitive("- error") || dtClassNameHasSuffix("error")
|
||
|
}
|
||
|
|
||
|
func isEnum(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("enum in") || textContainsInsensitive("- enum") || dtClassNameHasSuffix("enum")
|
||
|
}
|
||
|
|
||
|
func isTrait(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("trait in")
|
||
|
}
|
||
|
|
||
|
func isNotation(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("annotation type") || dtClassNameHasSuffix("annotation")
|
||
|
}
|
||
|
|
||
|
func isPackage(textContainsInsensitive, dtClassNameHasSuffix Verifier) bool {
|
||
|
return textContainsInsensitive("package") || dtClassNameHasSuffix("package")
|
||
|
}
|