package metadata import ( "fmt" "polaris/log" "polaris/pkg/utils" "regexp" "strconv" "strings" "time" ) type Info struct { NameEn string NameCn string Year int Season int StartEpisode int EndEpisode int Resolution string IsSeasonPack bool } func (m *Info) ParseExtraDescription(desc string) { if m.IsSeasonPack { //try to parse episode number with description mm := ParseTv(desc) if mm.StartEpisode > 0 { //sometimes they put episode info in desc text m.IsSeasonPack = false m.StartEpisode = mm.StartEpisode m.EndEpisode = mm.EndEpisode } } } func (m *Info) IsAcceptable(names ...string) bool { re := regexp.MustCompile(`[^\p{L}\w\s]`) nameCN := re.ReplaceAllString(strings.ToLower(m.NameCn), " ") nameEN := re.ReplaceAllString(strings.ToLower(m.NameEn), " ") nameCN = strings.Join(strings.Fields(nameCN), " ") nameEN = strings.Join(strings.Fields(nameEN), " ") for _, name := range names { name = re.ReplaceAllString(strings.ToLower(name), " ") name = strings.Join(strings.Fields(name), " ") if utils.IsASCII(name) { //ascii name should match words re := regexp.MustCompile(`\b` + name + `\b`) if re.MatchString(nameCN) || re.MatchString(nameEN) { return true } else { continue } } if strings.Contains(nameCN, name) || strings.Contains(nameEN, name) { return true } } return false } func ParseTv(name string) *Info { name = strings.ToLower(name) name = strings.ReplaceAll(name, "\u200b", "") //remove unicode hidden character return parseName(name) } func adjacentNumber(s string, start int) (n1 int, l int) { runes := []rune(s) if start > len(runes)-1 { //out of bound return -1, -1 } var n []rune for i := start; i < len(runes); i++ { k := runes[i] if (k < '0' || k > '9') && !chineseNum[k] { //not digit anymore break } n = append(n, k) } if len(n) == 0 { return -1, -1 } m, err := strconv.Atoi(string(n)) if err != nil { return chinese2Num[string(n)], len(n) } return m, len(n) } func findSeason(s string) (n int, p int) { //season numner seasonRe1 := regexp.MustCompile(`s\d{1,2}`) seasonMatches := seasonRe1.FindAllString(s, -1) if len(seasonMatches) > 0 { seNum := seasonMatches[0][1:] n, err := strconv.Atoi(seNum) if err != nil { panic(fmt.Sprintf("convert %s error: %v", seNum, err)) } return n, strings.Index(s, seNum) } else { seasonRe1 := regexp.MustCompile(`season \d{1,2}`) seasonMatches := seasonRe1.FindAllString(s, -1) if len(seasonMatches) > 0 { re3 := regexp.MustCompile(`\d{1,2}`) seNum := re3.FindAllString(seasonMatches[0], -1)[0] n, err := strconv.Atoi(seNum) if err != nil { panic(fmt.Sprintf("convert %s error: %v", seNum, err)) } return n, strings.Index(s, seasonMatches[0]) } else { seasonRe1 := regexp.MustCompile(`第.{1,2}季`) seasonMatches := seasonRe1.FindAllString(s, -1) if len(seasonMatches) > 0 { m1 := []rune(seasonMatches[0]) seNum := m1[1 : len(m1)-1] n, err := strconv.Atoi(string(seNum)) if err != nil { log.Warnf("parse season number %v error: %v, try to parse using chinese", seNum, err) n = chinese2Num[string(seNum)] } return n, strings.Index(s, seasonMatches[0]) } } } return -1, -1 } func findEpisodes(s string) (start int, end int) { var episodeCn = map[rune]bool{ '话': true, '話': true, '集': true, } rr := []rune(s) for i := 0; i < len(rr); i++ { r := rr[i] if r == 'e' { n, l := adjacentNumber(s, i+1) if n > 0 { foundDash := false for j := i + l + 1; j < len(rr); j++ { r1 := rr[j] if r1 == '-' { foundDash = true continue } if r1 == ' ' || r1 == 'e' { continue } if foundDash { if r1 == 's' { s1, l1 := adjacentNumber(s, j+1) if s1 > 0 { //S01E01-S01E21 n1, _ := adjacentNumber(s, j+l1+2) if n1 > 0 { return n, n1 } } } n1, _ := adjacentNumber(s, j) if n1 > 0 { return n, n1 } } else { break } } return n, n } } else if r == '第' { n, l := adjacentNumber(s, i+1) if len(rr) > i+l+1 && episodeCn[rr[i+l+1]] { return n, n } else if len(rr) > i+l+1 { if rr[i+l+1] == '-' { n1, l1 := adjacentNumber(s, i+l+2) if episodeCn[rr[i+l+2+l1]] { return n, n1 } } } } } //episode number re1 := regexp.MustCompile(`\[\d{1,4}\]`) episodeMatches1 := re1.FindAllString(s, -1) if len(episodeMatches1) > 0 { //[11] [1080p], [2022][113][HEVC][GB][4K] for _, m := range episodeMatches1 { epNum := strings.TrimRight(strings.TrimLeft(m, "["), "]") n, err := strconv.Atoi(epNum) if err != nil { log.Debugf("convert %s error: %v", epNum, err) continue } nowYear := time.Now().Year() if n > nowYear-50 { //high possibility is year number continue } return n, n } } else { //【第09話】 re2 := regexp.MustCompile(`第\d{1,4}([话話集])`) episodeMatches1 := re2.FindAllString(s, -1) if len(episodeMatches1) > 0 { re := regexp.MustCompile(`\d{1,4}`) epNum := re.FindAllString(episodeMatches1[0], -1)[0] n, err := strconv.Atoi(epNum) if err != nil { panic(fmt.Sprintf("convert %s error: %v", epNum, err)) } return n, n } else { //The Road Season 2 Episode 12 XviD-AFG re3 := regexp.MustCompile(`episode \d{1,4}`) epNums := re3.FindAllString(s, -1) if len(epNums) > 0 { re3 := regexp.MustCompile(`\d{1,4}`) epNum := re3.FindAllString(epNums[0], -1)[0] n, err := strconv.Atoi(epNum) if err != nil { panic(fmt.Sprintf("convert %s error: %v", epNum, err)) } return n, n } else { //SHY 靦腆英雄 / Shy -05 ( CR 1920x1080 AVC AAC MKV) if maybeSeasonPack(s) { //avoid miss match, season pack not use this rule return -1, -1 } re3 := regexp.MustCompile(`[^(season)][^\d\w]\d{1,2}[^\d\w]`) epNums := re3.FindAllString(s, -1) if len(epNums) > 0 { re3 := regexp.MustCompile(`\d{1,2}`) epNum := re3.FindAllString(epNums[0], -1)[0] n, err := strconv.Atoi(epNum) if err != nil { panic(fmt.Sprintf("convert %s error: %v", epNum, err)) } return n, n } } } } return -1, -1 } func matchResolution(s string) string { //resolution resRe := regexp.MustCompile(`\d{3,4}p`) resMatches := resRe.FindAllString(s, -1) if len(resMatches) != 0 { return resMatches[0] } else { if strings.Contains(s, "720") { return "720p" } else if strings.Contains(s, "1080") { return "1080p" } } return "" } func maybeSeasonPack(s string) bool { //season pack packRe := regexp.MustCompile(`((\d{1,2}-\d{1,2}))|(complete)|(全集)|(合集)|(\W[sS]\d{1,2}\W)`) if packRe.MatchString(s) { return true } return false } //func parseEnglishName(name string) *Info { // meta := &Info{ // //Season: -1, // Episode: -1, // } // // start, end := findEpisodes(name) // if start > 0 && end > 0 { // meta.Episode = start // } // // re := regexp.MustCompile(`[^\p{L}\w\s]`) // name = re.ReplaceAllString(strings.ToLower(name), " ") // newSplits := strings.Split(strings.TrimSpace(name), " ") // // seasonRe := regexp.MustCompile(`^s\d{1,2}`) // resRe := regexp.MustCompile(`^\d{3,4}p`) // episodeRe := regexp.MustCompile(`e\d{1,3}`) // // var seasonIndex = -1 // var episodeIndex = -1 // var resIndex = -1 // for i, p := range newSplits { // p = strings.TrimSpace(p) // if p == "" { // continue // } // if seasonRe.MatchString(p) { // //season part // seasonIndex = i // } else if resRe.MatchString(p) { // resIndex = i // } // if i >= seasonIndex && episodeRe.MatchString(p) { // episodeIndex = i // } // } // // if seasonIndex != -1 { // //season exists // ss := seasonRe.FindAllString(newSplits[seasonIndex], -1) // if len(ss) != 0 { // //season info // // ssNum := strings.TrimLeft(ss[0], "s") // n, err := strconv.Atoi(ssNum) // if err != nil { // panic(fmt.Sprintf("convert %s error: %v", ssNum, err)) // } // meta.Season = n // } // } else { //maybe like Season 1? // seasonRe := regexp.MustCompile(`season \d{1,2}`) // matches := seasonRe.FindAllString(name, -1) // if len(matches) > 0 { // for i, s := range newSplits { // if s == "season" { // seasonIndex = i // } // } // numRe := regexp.MustCompile(`\d{1,2}`) // seNum := numRe.FindAllString(matches[0], -1)[0] // n, err := strconv.Atoi(seNum) // if err != nil { // panic(fmt.Sprintf("convert %s error: %v", seNum, err)) // } // meta.Season = n // // } // } // // if episodeIndex != -1 { // // ep := episodeRe.FindAllString(newSplits[episodeIndex], -1) // //if len(ep) > 0 { // // //episode info exists // // epNum := strings.TrimLeft(ep[0], "e") // // n, err := strconv.Atoi(epNum) // // if err != nil { // // panic(fmt.Sprintf("convert %s error: %v", epNum, err)) // // } // // meta.Episode = n // //} // } else { //no episode, maybe like One Punch Man S2 - 08 [1080p].mkv // // // numRe := regexp.MustCompile(`^\d{1,2}$`) // // for i, p := range newSplits { // // if numRe.MatchString(p) { // // if i > 0 && strings.Contains(newSplits[i-1], "season") { //last word cannot be season // // continue // // } // // if i < seasonIndex { // // //episode number most likely should comes alfter season number // // continue // // } // // //episodeIndex = i // // n, err := strconv.Atoi(p) // // if err != nil { // // panic(fmt.Sprintf("convert %s error: %v", p, err)) // // } // // meta.Episode = n // // // } // // } // // } // if resIndex != -1 { // //resolution exists // meta.Resolution = newSplits[resIndex] // } // if meta.Episode == -1 { // meta.Episode = -1 // meta.IsSeasonPack = true // } // // if seasonIndex > 0 { // //name exists // names := newSplits[0:seasonIndex] // meta.NameEn = strings.TrimSpace(strings.Join(names, " ")) // } else { // meta.NameEn = name // } // // return meta //} func parseName(name string) *Info { meta := &Info{Season: 1} if strings.TrimSpace(name) == "" { return meta } year, yearP := findYear(name) meta.Year = year season, p := findSeason(name) if season == -1 { log.Debugf("not find season info: %s", name) if !utils.IsASCII(name) { season = 1 } p = len(name) - 1 } meta.Season = season start, end := findEpisodes(name) if start > 0 && end > 0 { meta.StartEpisode = start meta.EndEpisode = end } else { meta.IsSeasonPack = true } meta.Resolution = matchResolution(name) //if meta.IsSeasonPack && meta.Episode != 0 { // meta.Season = meta.Episode // meta.Episode = -1 //} //tv name if utils.IsASCII(name) && p < len(name) && p-1 > 0 { p1 := p -1 if yearP > 0 { p1 = min(p1, yearP-1) } meta.NameEn = strings.TrimSpace(name[:p1]) meta.NameCn = meta.NameEn } else { fields := strings.FieldsFunc(name, func(r rune) bool { return r == '[' || r == ']' || r == '【' || r == '】' }) titleCn := "" title := "" for _, p := range fields { //寻找匹配的最长的字符串,最有可能是名字 if utils.ContainsChineseChar(p) && len([]rune(p)) > len([]rune(titleCn)) { //最长含中文字符串 titleCn = p } if len([]rune(p)) > len([]rune(title)) { //最长字符串 title = p } } re := regexp.MustCompile(`[^\p{L}\w\s]`) title = re.ReplaceAllString(strings.TrimSpace(strings.ToLower(title)), "") //去除标点符号 titleCn = re.ReplaceAllString(strings.TrimSpace(strings.ToLower(titleCn)), "") meta.NameCn = titleCn cnRe := regexp.MustCompile(`\p{Han}.*\p{Han}`) cnmatches := cnRe.FindAllString(titleCn, -1) //titleCn中最长的中文字符 if len(cnmatches) > 0 { for _, t := range cnmatches { if len([]rune(t)) > len([]rune(meta.NameCn)) { meta.NameCn = strings.ToLower(t) } } } meta.NameEn = title ////匹配title中最长拉丁字符串 //enRe := regexp.MustCompile(`[[:ascii:]]*`) //enM := enRe.FindAllString(title, -1) //if len(enM) > 0 { // for _, t := range enM { // if len(t) > len(meta.NameEn) { // meta.NameEn = strings.TrimSpace(strings.ToLower(t)) // } // } //} } return meta } var chinese2Num = map[string]int{ "一": 1, "二": 2, "三": 3, "四": 4, "五": 5, "六": 6, "七": 7, "八": 8, "九": 9, } var chineseNum = map[rune]bool{ '一': true, '二': true, '三': true, '四': true, '五': true, '六': true, '七': true, '八': true, '九': true, }