lowercase all DB tags and fix infinite loop

This commit is contained in:
bakape 2020-05-17 14:17:04 +03:00
parent 812675f337
commit 63498af703
No known key found for this signature in database
GPG Key ID: F91DA82BD0B67F74
8 changed files with 61 additions and 30 deletions

View File

@ -20,7 +20,7 @@ const (
)
var (
ErrNoMatch = Error{errors.New("no images match tag")}
ErrNoMatch = Error{errors.New("not enough images match tag")}
)
func (d DataSource) String() string {
@ -35,7 +35,6 @@ func (d DataSource) String() string {
}
type FetchRequest struct {
// Initial tag population
IsInitial bool
Tag string
}

View File

@ -1,10 +1,9 @@
package danbooru
import (
"bytes"
"context"
"database/sql"
"fmt"
"errors"
"io"
"io/ioutil"
"net/http"
@ -31,6 +30,8 @@ var (
"cosplay": {},
"objectification": {},
}
errAllFetched = errors.New("all pages fetched")
)
type cacheEntry struct {
@ -47,6 +48,7 @@ func Fetch(req common.FetchRequest) (f *os.File, image db.Image, err error) {
// Faster tag init
skipPageFetch := false
allFetched := false
if req.IsInitial {
var n int
n, err = db.CountPending(req.Tag)
@ -56,14 +58,13 @@ func Fetch(req common.FetchRequest) (f *os.File, image db.Image, err error) {
skipPageFetch = n >= 3
}
if !skipPageFetch {
var w bytes.Buffer
w.WriteString("solo")
fmt.Fprintf(&w, " %s", req.Tag)
for t := range blacklisted {
fmt.Fprintf(&w, " -%s", t)
}
err = tryFetchPage(req.Tag, strings.ToLower(req.Tag), req.Tag+" solo")
if err != nil {
err = tryFetchPage(req.Tag, req.Tag+" solo")
switch err {
case nil:
case errAllFetched:
err = nil
allFetched = true
default:
return
}
}
@ -71,6 +72,10 @@ func Fetch(req common.FetchRequest) (f *os.File, image db.Image, err error) {
img, err := db.PopRandomPendingImage(req.Tag)
if err != nil {
if err == sql.ErrNoRows {
if allFetched {
err = common.ErrNoMatch
return
}
err = nil
}
return
@ -104,10 +109,10 @@ func Fetch(req common.FetchRequest) (f *os.File, image db.Image, err error) {
}
// Attempt to fetch a random page from Danbooru
func tryFetchPage(requested, requestedLower, tags string) (err error) {
func tryFetchPage(requested, tags string) (err error) {
store := cache[tags]
if store == nil {
maxPages := 200
maxPages := 300
if common.IsTest { // Reduce test duration
maxPages = 10
}
@ -122,8 +127,7 @@ func tryFetchPage(requested, requestedLower, tags string) (err error) {
return
}
if len(store.pages) == store.maxPages {
// Already fetched all pages
return
return errAllFetched
}
// Always dowload first page on fresh fetch
@ -152,7 +156,7 @@ func tryFetchPage(requested, requestedLower, tags string) (err error) {
// Empty page. Don't check pages past this one. They will also be empty.
store.maxPages = page
// Retry with a new random page
return tryFetchPage(requested, requestedLower, tags)
return tryFetchPage(requested, tags)
}
// Push applicable posts to pending image set
@ -176,7 +180,7 @@ func tryFetchPage(requested, requestedLower, tags string) (err error) {
select {
case <-ctx.Done():
return
case dst <- processPost(requested, requestedLower, p):
case dst <- processPost(requested, p):
}
}
@ -196,7 +200,7 @@ func tryFetchPage(requested, requestedLower, tags string) (err error) {
return
}
func processPost(requested, requestedLower string, p boorufetch.Post,
func processPost(requested string, p boorufetch.Post,
) (err error) {
img := db.PendingImage{TargetTag: requested}
img.MD5, err = p.MD5()
@ -262,13 +266,13 @@ func processPost(requested, requestedLower string, p boorufetch.Post,
}
// Ensure tags contain solo
if t.Tag == "solo" {
hasSolo = true
if !hasSolo {
hasSolo = t.Tag == "solo"
}
// Ensure array contains initial tag
if strings.ToLower(t.Tag) == requestedLower {
containsRequested = true
if !containsRequested {
containsRequested = strings.ToLower(t.Tag) == requested
}
}
if !containsRequested || !hasSolo {

View File

@ -4,6 +4,7 @@ import (
crypto "crypto/rand"
"database/sql"
"math/rand"
"strings"
"github.com/Masterminds/squirrel"
"github.com/bakape/boorufetch"
@ -18,6 +19,8 @@ type Filters struct {
// Generate a new captcha and return its ID and image list in order
func GenerateCaptcha(f Filters) (id [64]byte, images [9][16]byte, err error) {
f.Tag = strings.ToLower(f.Tag)
buf := make([]byte, 16)
matchedCount, err := getMatchingImages(f, &images, &buf)
if err != nil {

View File

@ -2,6 +2,7 @@ package db
import (
"database/sql"
"strings"
"github.com/Masterminds/squirrel"
"github.com/bakape/boorufetch"
@ -27,6 +28,8 @@ func InsertImage(img Image) (err error) {
return BlacklistImage(img.MD5)
}
lowercaseTags(img.Tags)
dbMu.Lock()
defer dbMu.Unlock()
@ -76,6 +79,8 @@ func BlacklistImage(hash [16]byte) (err error) {
// Return count of images matching selectors
func ImageCount(f Filters) (n int, err error) {
f.Tag = strings.ToLower(f.Tag)
dbMu.RLock()
defer dbMu.RUnlock()
@ -84,7 +89,6 @@ func ImageCount(f Filters) (n int, err error) {
Join("images on image_id = images.id").
Where(squirrel.Eq{
"tag": f.Tag,
"source": common.Danbooru,
"blacklist": false,
"rating": f.Explicitness,
}).

View File

@ -3,6 +3,7 @@ package db
import (
"database/sql"
"encoding/json"
"strings"
"github.com/bakape/boorufetch"
"github.com/bakape/captchouli/v2/common"
@ -23,6 +24,7 @@ func IsPendingImage(md5 [16]byte) (bool, error) {
// Insert a new image pending processing
func InsertPendingImage(img PendingImage) (err error) {
lowercaseTags(img.Tags)
tags, err := json.Marshal(img.Tags)
if err != nil {
return
@ -40,6 +42,8 @@ func InsertPendingImage(img PendingImage) (err error) {
// Deletes random pending pending image for tag and returns it, if any
func PopRandomPendingImage(tag string) (img PendingImage, err error) {
tag = strings.ToLower(tag)
dbMu.Lock()
defer dbMu.Unlock()
@ -89,6 +93,11 @@ func PopRandomPendingImage(tag string) (img PendingImage, err error) {
// Count pending images for tag
func CountPending(tag string) (n int, err error) {
tag = strings.ToLower(tag)
dbMu.RLock()
defer dbMu.RUnlock()
err = sq.Select("count(*)").
From("pending_images").
Where("target_tag = ?", tag).

View File

@ -2,6 +2,7 @@ package db
import (
"database/sql"
"strings"
)
// Execute all SQL statement strings and return on first error, if any
@ -44,3 +45,9 @@ func imageExists(table string, md5 [16]byte) (exists bool, err error) {
}
return
}
func lowercaseTags(tags []string) {
for i := range tags {
tags[i] = strings.ToLower(tags[i])
}
}

View File

@ -3,6 +3,7 @@ package captchouli
import (
"log"
"os"
"strings"
"time"
"github.com/bakape/captchouli/v2/common"
@ -48,6 +49,8 @@ func init() {
}
func fetch(req common.FetchRequest) (err error) {
req.Tag = strings.ToLower(req.Tag)
f, img, err := danbooru.Fetch(req)
if f == nil || err != nil {
return

View File

@ -142,11 +142,13 @@ func NewService(opts Options) (s *Service, err error) {
// Initialize pool with enough images, if lacking
func (s *Service) initPool(tags []string) (err error) {
formatErr := func(tag string, err error) error {
return fmt.Errorf(
"captchouli: error initializing image pool for tag `%s`: %w",
tag,
err,
)
return Error{
Err: fmt.Errorf(
"error initializing image pool for tag `%s`: %w",
tag,
err,
),
}
}
// Init first 3 tags needed for operation first and init the rest
@ -221,7 +223,7 @@ func (s *Service) initTag(tag string) (err error) {
if !s.quiet {
fetchCount++
fmt.Printf("image fetch: %d\n", fetchCount)
fmt.Printf("captchouli: image fetch: %d\n", fetchCount)
}
err = fetch(req)
if err != nil {