add rutor api

This commit is contained in:
YouROK
2023-01-29 20:04:24 +03:00
parent d71c42936d
commit fbd1765331
7 changed files with 428 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
package torrsearch
import (
"strings"
snowballeng "github.com/kljensen/snowball/english"
snowballru "github.com/kljensen/snowball/russian"
)
// lowercaseFilter returns a slice of tokens normalized to lower case.
func lowercaseFilter(tokens []string) []string {
r := make([]string, len(tokens))
for i, token := range tokens {
r[i] = replaceChars(strings.ToLower(token))
}
return r
}
// stopwordFilter returns a slice of tokens with stop words removed.
func stopwordFilter(tokens []string) []string {
r := make([]string, 0, len(tokens))
for _, token := range tokens {
if !isStopWord(token) {
r = append(r, token)
}
}
return r
}
// stemmerFilter returns a slice of stemmed tokens.
func stemmerFilter(tokens []string) []string {
r := make([]string, len(tokens))
for i, token := range tokens {
worden := snowballeng.Stem(token, false)
wordru := snowballru.Stem(token, false)
if wordru == "" || worden == "" {
continue
}
if wordru != token {
r[i] = wordru
} else {
r[i] = worden
}
}
return r
}
func replaceChars(word string) string {
out := []rune(word)
for i, r := range out {
if r == 'ё' {
out[i] = 'е'
}
}
return string(out)
}
func isStopWord(word string) bool {
switch word {
case "a", "about", "above", "after", "again", "against", "all", "am", "an",
"and", "any", "are", "as", "at", "be", "because", "been", "before",
"being", "below", "between", "both", "but", "by", "can", "did", "do",
"does", "doing", "don", "down", "during", "each", "few", "for", "from",
"further", "had", "has", "have", "having", "he", "her", "here", "hers",
"herself", "him", "himself", "his", "how", "i", "if", "in", "into", "is",
"it", "its", "itself", "just", "me", "more", "most", "my", "myself",
"no", "nor", "not", "now", "of", "off", "on", "once", "only", "or",
"other", "our", "ours", "ourselves", "out", "over", "own", "s", "same",
"she", "should", "so", "some", "such", "t", "than", "that", "the", "their",
"theirs", "them", "themselves", "then", "there", "these", "they",
"this", "those", "through", "to", "too", "under", "until", "up",
"very", "was", "we", "were", "what", "when", "where", "which", "while",
"who", "whom", "why", "will", "with", "you", "your", "yours", "yourself",
"yourselves", "и", "в", "во", "не", "что", "он", "на", "я", "с",
"со", "как", "а", "то", "все", "она", "так", "его",
"но", "да", "ты", "к", "у", "же", "вы", "за", "бы",
"по", "только", "ее", "мне", "было", "вот", "от",
"меня", "еще", "нет", "о", "из", "ему", "теперь",
"когда", "даже", "ну", "вдруг", "ли", "если", "уже",
"или", "ни", "быть", "был", "него", "до", "вас",
"нибудь", "опять", "уж", "вам", "ведь", "там", "потом",
"себя", "ничего", "ей", "может", "они", "тут", "где",
"есть", "надо", "ней", "для", "мы", "тебя", "их",
"чем", "была", "сам", "чтоб", "без", "будто", "чего",
"раз", "тоже", "себе", "под", "будет", "ж", "тогда",
"кто", "этот", "того", "потому", "этого", "какой",
"совсем", "ним", "здесь", "этом", "один", "почти",
"мой", "тем", "чтобы", "нее", "сейчас", "были", "куда",
"зачем", "всех", "никогда", "можно", "при", "наконец",
"два", "об", "другой", "хоть", "после", "над", "больше",
"тот", "через", "эти", "нас", "про", "всего", "них",
"какая", "много", "разве", "три", "эту", "моя",
"впрочем", "хорошо", "свою", "этой", "перед", "иногда",
"лучше", "чуть", "том", "нельзя", "такой", "им", "более",
"всегда", "конечно", "всю", "между":
return true
}
return false
}

View File

@@ -0,0 +1,76 @@
package torrsearch
import (
"log"
"server/rutor/models"
"strconv"
)
// Index is an inverted Index. It maps tokens to document IDs.
type Index map[string][]int
var idx Index
func NewIndex(torrs []*models.TorrentDetails) {
log.Println("Index torrs")
idx = make(Index)
idx.add(torrs)
}
func Search(text string) []int {
return idx.search(text)
}
func (idx Index) add(torrs []*models.TorrentDetails) {
for ID, torr := range torrs {
for _, token := range analyze(torr.Name + " " + torr.GetNames() + " " + strconv.Itoa(torr.Year)) {
ids := idx[token]
if ids != nil && ids[len(ids)-1] == ID {
// Don't add same ID twice.
continue
}
idx[token] = append(ids, ID)
}
}
}
// intersection returns the set intersection between a and b.
// a and b have to be sorted in ascending order and contain no duplicates.
func intersection(a []int, b []int) []int {
maxLen := len(a)
if len(b) > maxLen {
maxLen = len(b)
}
r := make([]int, 0, maxLen)
var i, j int
for i < len(a) && j < len(b) {
if a[i] < b[j] {
i++
} else if a[i] > b[j] {
j++
} else {
r = append(r, a[i])
i++
j++
}
}
return r
}
// Search queries the Index for the given text.
func (idx Index) search(text string) []int {
var r []int
for _, token := range analyze(text) {
if ids, ok := idx[token]; ok {
if r == nil {
r = ids
} else {
r = intersection(r, ids)
}
} else {
// Token doesn't exist.
return nil
}
}
return r
}

View File

@@ -0,0 +1,23 @@
package torrsearch
import (
"strings"
"unicode"
)
// tokenize returns a slice of tokens for the given text.
func tokenize(text string) []string {
return strings.FieldsFunc(text, func(r rune) bool {
// Split on any character that is not a letter or a number.
return !unicode.IsLetter(r) && !unicode.IsNumber(r)
})
}
// analyze analyzes the text and returns a slice of tokens.
func analyze(text string) []string {
tokens := tokenize(text)
tokens = lowercaseFilter(tokens)
tokens = stopwordFilter(tokens)
tokens = stemmerFilter(tokens)
return tokens
}