From fbd1765331d9bc21eda5f5255fff6a9445ee8e9b Mon Sep 17 00:00:00 2001 From: YouROK <8YouROK8@mail.ru> Date: Sun, 29 Jan 2023 20:04:24 +0300 Subject: [PATCH] add rutor api --- server/rutor/models/torrentDetails.go | 76 +++++++++++++++ server/rutor/rutor.go | 128 ++++++++++++++++++++++++++ server/rutor/torrsearch/filter.go | 99 ++++++++++++++++++++ server/rutor/torrsearch/index.go | 76 +++++++++++++++ server/rutor/torrsearch/tokenizer.go | 23 +++++ server/rutor/utils/utils.go | 14 +++ server/web/api/rutor.go | 12 +++ 7 files changed, 428 insertions(+) create mode 100644 server/rutor/models/torrentDetails.go create mode 100644 server/rutor/rutor.go create mode 100644 server/rutor/torrsearch/filter.go create mode 100644 server/rutor/torrsearch/index.go create mode 100644 server/rutor/torrsearch/tokenizer.go create mode 100644 server/rutor/utils/utils.go create mode 100644 server/web/api/rutor.go diff --git a/server/rutor/models/torrentDetails.go b/server/rutor/models/torrentDetails.go new file mode 100644 index 0000000..0c38a3c --- /dev/null +++ b/server/rutor/models/torrentDetails.go @@ -0,0 +1,76 @@ +package models + +import ( + "strings" + "time" +) + +const ( + CatMovie = "Movie" + CatSeries = "Series" + CatDocMovie = "DocMovie" + CatDocSeries = "DocSeries" + CatCartoonMovie = "CartoonMovie" + CatCartoonSeries = "CartoonSeries" + CatTVShow = "TVShow" + CatAnime = "Anime" + + Q_LOWER = 0 + Q_WEBDL_720 = 100 + Q_BDRIP_720 = 101 + Q_BDRIP_HEVC_720 = 102 + Q_WEBDL_1080 = 200 + Q_BDRIP_1080 = 201 + Q_BDRIP_HEVC_1080 = 202 + Q_BDREMUX_1080 = 203 + Q_WEBDL_SDR_2160 = 300 + Q_WEBDL_HDR_2160 = 301 + Q_WEBDL_DV_2160 = 302 + Q_BDRIP_SDR_2160 = 303 + Q_BDRIP_HDR_2160 = 304 + Q_BDRIP_DV_2160 = 305 + Q_UHD_BDREMUX_SDR = 306 + Q_UHD_BDREMUX_HDR = 307 + Q_UHD_BDREMUX_DV = 308 + + Q_UNKNOWN = 0 + Q_A = 1 // Авторский, по типу Гоблина или старых переводчиков + Q_L1 = 100 // Любительский одноголосый закадровый + Q_L2 = 101 // Любительский двухголосый закадровый + Q_L = 102 // Любительский 3-5 человек закадровый + Q_LS = 103 // Любительский студия + Q_P1 = 200 // Професиональный одноголосый закадровый + Q_P2 = 201 // Профессиональный двухголосый закадровый + Q_P = 202 // Профессиональный 3-5 человек закадровый + Q_PS = 203 // Профессиональный студия + Q_D = 300 // Официальное профессиональное многоголосое озвучивание + Q_LICENSE = 301 // Лицензия +) + +type TorrentDetails struct { + Title string + Name string + Names []string + Categories string + Size string + CreateDate time.Time + Tracker string + Link string + Year int + Peer int + Seed int + Magnet string + Hash string + IMDBID string + VideoQuality int + AudioQuality int +} + +type TorrentFile struct { + Name string + Size int64 +} + +func (d TorrentDetails) GetNames() string { + return strings.Join(d.Names, " ") +} diff --git a/server/rutor/rutor.go b/server/rutor/rutor.go new file mode 100644 index 0000000..9b06af3 --- /dev/null +++ b/server/rutor/rutor.go @@ -0,0 +1,128 @@ +package rutor + +import ( + "bytes" + "compress/flate" + "encoding/json" + "github.com/agnivade/levenshtein" + "io" + "net/http" + "os" + "path/filepath" + "server/log" + "server/rutor/models" + "server/rutor/torrsearch" + "server/rutor/utils" + "server/settings" + "sort" + "strconv" + "strings" + "time" +) + +var ( + torrs []*models.TorrentDetails + isStop bool +) + +func Start() { + go func() { + if settings.BTsets.EnableRutorSearch { + updateDB() + isStop = false + for !isStop { + for i := 0; i < 3*60*60; i++ { + time.Sleep(time.Second) + if isStop { + return + } + } + updateDB() + } + } + }() +} + +func Stop() { + isStop = true + time.Sleep(time.Millisecond * 1500) +} + +// https://github.com/yourok-0001/releases/raw/master/torr/rutor.ls +func updateDB() { + log.TLogln("Update rutor db") + filename := filepath.Join(settings.Path, "rutor.tmp") + out, err := os.Create(filename) + if err != nil { + log.TLogln("Error create file rutor.tmp:", err) + return + } + defer out.Close() + resp, err := http.Get("https://github.com/yourok-0001/releases/raw/master/torr/rutor.ls") + if err != nil { + log.TLogln("Error connect to rutor db:", err) + return + } + defer resp.Body.Close() + _, err = io.Copy(out, resp.Body) + if err != nil { + log.TLogln("Error download rutor db:", err) + return + } + + err = os.Remove(filepath.Join(settings.Path, "rutor.ls")) + if err != nil && !os.IsNotExist(err) { + log.TLogln("Error remove old rutor db:", err) + return + } + err = os.Rename(filename, filepath.Join(settings.Path, "rutor.ls")) + if err != nil { + log.TLogln("Error rename rutor db:", err) + return + } + loadDB() +} + +func loadDB() { + log.TLogln("Load rutor db") + buf, err := os.ReadFile("rutor.ls") + if err == nil { + r := flate.NewReader(bytes.NewReader(buf)) + buf, err = io.ReadAll(r) + r.Close() + if err == nil { + var ftors []*models.TorrentDetails + err = json.Unmarshal(buf, &ftors) + if err == nil { + torrs = ftors + log.TLogln("Index rutor db") + torrsearch.NewIndex(torrs) + } + } + } +} + +func Search(query string) []*models.TorrentDetails { + matchedIDs := torrsearch.Search(query) + if len(matchedIDs) == 0 { + return nil + } + var list []*models.TorrentDetails + for _, id := range matchedIDs { + list = append(list, torrs[id]) + } + + hash := utils.ClearStr(query) + + sort.Slice(list, func(i, j int) bool { + lhash := utils.ClearStr(strings.ToLower(list[i].Name+list[i].GetNames())) + strconv.Itoa(list[i].Year) + lev1 := levenshtein.ComputeDistance(hash, lhash) + lhash = utils.ClearStr(strings.ToLower(list[j].Name+list[j].GetNames())) + strconv.Itoa(list[j].Year) + lev2 := levenshtein.ComputeDistance(hash, lhash) + if lev1 == lev2 { + return list[j].CreateDate.Before(list[i].CreateDate) + } + return lev1 < lev2 + }) + return list +} diff --git a/server/rutor/torrsearch/filter.go b/server/rutor/torrsearch/filter.go new file mode 100644 index 0000000..c9f30fb --- /dev/null +++ b/server/rutor/torrsearch/filter.go @@ -0,0 +1,99 @@ +package torrsearch + +import ( + "strings" + + snowballeng "github.com/kljensen/snowball/english" + snowballru "github.com/kljensen/snowball/russian" +) + +// lowercaseFilter returns a slice of tokens normalized to lower case. +func lowercaseFilter(tokens []string) []string { + r := make([]string, len(tokens)) + for i, token := range tokens { + r[i] = replaceChars(strings.ToLower(token)) + } + return r +} + +// stopwordFilter returns a slice of tokens with stop words removed. +func stopwordFilter(tokens []string) []string { + r := make([]string, 0, len(tokens)) + for _, token := range tokens { + if !isStopWord(token) { + r = append(r, token) + } + } + return r +} + +// stemmerFilter returns a slice of stemmed tokens. +func stemmerFilter(tokens []string) []string { + r := make([]string, len(tokens)) + for i, token := range tokens { + worden := snowballeng.Stem(token, false) + wordru := snowballru.Stem(token, false) + if wordru == "" || worden == "" { + continue + } + if wordru != token { + r[i] = wordru + } else { + r[i] = worden + } + } + return r +} + +func replaceChars(word string) string { + out := []rune(word) + for i, r := range out { + if r == 'ё' { + out[i] = 'е' + } + } + return string(out) +} + +func isStopWord(word string) bool { + switch word { + case "a", "about", "above", "after", "again", "against", "all", "am", "an", + "and", "any", "are", "as", "at", "be", "because", "been", "before", + "being", "below", "between", "both", "but", "by", "can", "did", "do", + "does", "doing", "don", "down", "during", "each", "few", "for", "from", + "further", "had", "has", "have", "having", "he", "her", "here", "hers", + "herself", "him", "himself", "his", "how", "i", "if", "in", "into", "is", + "it", "its", "itself", "just", "me", "more", "most", "my", "myself", + "no", "nor", "not", "now", "of", "off", "on", "once", "only", "or", + "other", "our", "ours", "ourselves", "out", "over", "own", "s", "same", + "she", "should", "so", "some", "such", "t", "than", "that", "the", "their", + "theirs", "them", "themselves", "then", "there", "these", "they", + "this", "those", "through", "to", "too", "under", "until", "up", + "very", "was", "we", "were", "what", "when", "where", "which", "while", + "who", "whom", "why", "will", "with", "you", "your", "yours", "yourself", + "yourselves", "и", "в", "во", "не", "что", "он", "на", "я", "с", + "со", "как", "а", "то", "все", "она", "так", "его", + "но", "да", "ты", "к", "у", "же", "вы", "за", "бы", + "по", "только", "ее", "мне", "было", "вот", "от", + "меня", "еще", "нет", "о", "из", "ему", "теперь", + "когда", "даже", "ну", "вдруг", "ли", "если", "уже", + "или", "ни", "быть", "был", "него", "до", "вас", + "нибудь", "опять", "уж", "вам", "ведь", "там", "потом", + "себя", "ничего", "ей", "может", "они", "тут", "где", + "есть", "надо", "ней", "для", "мы", "тебя", "их", + "чем", "была", "сам", "чтоб", "без", "будто", "чего", + "раз", "тоже", "себе", "под", "будет", "ж", "тогда", + "кто", "этот", "того", "потому", "этого", "какой", + "совсем", "ним", "здесь", "этом", "один", "почти", + "мой", "тем", "чтобы", "нее", "сейчас", "были", "куда", + "зачем", "всех", "никогда", "можно", "при", "наконец", + "два", "об", "другой", "хоть", "после", "над", "больше", + "тот", "через", "эти", "нас", "про", "всего", "них", + "какая", "много", "разве", "три", "эту", "моя", + "впрочем", "хорошо", "свою", "этой", "перед", "иногда", + "лучше", "чуть", "том", "нельзя", "такой", "им", "более", + "всегда", "конечно", "всю", "между": + return true + } + return false +} diff --git a/server/rutor/torrsearch/index.go b/server/rutor/torrsearch/index.go new file mode 100644 index 0000000..4273d8f --- /dev/null +++ b/server/rutor/torrsearch/index.go @@ -0,0 +1,76 @@ +package torrsearch + +import ( + "log" + "server/rutor/models" + "strconv" +) + +// Index is an inverted Index. It maps tokens to document IDs. +type Index map[string][]int + +var idx Index + +func NewIndex(torrs []*models.TorrentDetails) { + log.Println("Index torrs") + idx = make(Index) + idx.add(torrs) +} + +func Search(text string) []int { + return idx.search(text) +} + +func (idx Index) add(torrs []*models.TorrentDetails) { + for ID, torr := range torrs { + for _, token := range analyze(torr.Name + " " + torr.GetNames() + " " + strconv.Itoa(torr.Year)) { + ids := idx[token] + if ids != nil && ids[len(ids)-1] == ID { + // Don't add same ID twice. + continue + } + idx[token] = append(ids, ID) + } + } +} + +// intersection returns the set intersection between a and b. +// a and b have to be sorted in ascending order and contain no duplicates. +func intersection(a []int, b []int) []int { + maxLen := len(a) + if len(b) > maxLen { + maxLen = len(b) + } + r := make([]int, 0, maxLen) + var i, j int + for i < len(a) && j < len(b) { + if a[i] < b[j] { + i++ + } else if a[i] > b[j] { + j++ + } else { + r = append(r, a[i]) + i++ + j++ + } + } + return r +} + +// Search queries the Index for the given text. +func (idx Index) search(text string) []int { + var r []int + for _, token := range analyze(text) { + if ids, ok := idx[token]; ok { + if r == nil { + r = ids + } else { + r = intersection(r, ids) + } + } else { + // Token doesn't exist. + return nil + } + } + return r +} diff --git a/server/rutor/torrsearch/tokenizer.go b/server/rutor/torrsearch/tokenizer.go new file mode 100644 index 0000000..3eaee9c --- /dev/null +++ b/server/rutor/torrsearch/tokenizer.go @@ -0,0 +1,23 @@ +package torrsearch + +import ( + "strings" + "unicode" +) + +// tokenize returns a slice of tokens for the given text. +func tokenize(text string) []string { + return strings.FieldsFunc(text, func(r rune) bool { + // Split on any character that is not a letter or a number. + return !unicode.IsLetter(r) && !unicode.IsNumber(r) + }) +} + +// analyze analyzes the text and returns a slice of tokens. +func analyze(text string) []string { + tokens := tokenize(text) + tokens = lowercaseFilter(tokens) + tokens = stopwordFilter(tokens) + tokens = stemmerFilter(tokens) + return tokens +} diff --git a/server/rutor/utils/utils.go b/server/rutor/utils/utils.go new file mode 100644 index 0000000..3569e56 --- /dev/null +++ b/server/rutor/utils/utils.go @@ -0,0 +1,14 @@ +package utils + +import "strings" + +func ClearStr(str string) string { + ret := "" + str = strings.ToLower(str) + for _, r := range str { + if (r >= '0' && r <= '9') || (r >= 'a' && r <= 'z') || (r >= 'а' && r <= 'я') || r == 'ё' { + ret = ret + string(r) + } + } + return ret +} diff --git a/server/web/api/rutor.go b/server/web/api/rutor.go new file mode 100644 index 0000000..3afbbe7 --- /dev/null +++ b/server/web/api/rutor.go @@ -0,0 +1,12 @@ +package api + +import ( + "github.com/gin-gonic/gin" + "server/rutor" +) + +func rutorSearch(c *gin.Context) { + query := c.Query("query") + list := rutor.Search(query) + c.JSON(200, list) +}