diff --git a/README.MD b/README.MD index d192655..9bb1ac9 100644 --- a/README.MD +++ b/README.MD @@ -43,6 +43,12 @@ DB_TYPE=neo4j DB_URL= DB_PASSWORD= DB_USERNAME= +# Allowed values are FATAL, ERROR, WARN, INFO, DEBUG, TRACE (default: INFO) +LOG_LEVEL= +# Allowed values are PLAIN, JSON (default: PLAIN) +LOG_FORMAT= +# Allowed values are TRUE, FALSE (default: FALSE) +NEO4J_DEBUG= ``` ## Running the Program @@ -67,14 +73,20 @@ The program can also be run using Docker. To use Docker, follow these steps: 1. Create a .env file in the project's root directory with the following content: - ````plaintext - E621_API_KEY= - E621_USERNAME= - DB_TYPE=neo4j - DB_URL= - DB_PASSWORD= - DB_USERNAME= - ```` + ```plaintext + E621_API_KEY= + E621_USERNAME= + DB_TYPE=neo4j + DB_URL= + DB_PASSWORD= + DB_USERNAME= + # Allowed values are FATAL, ERROR, WARN, INFO, DEBUG, TRACE (default: INFO) + LOG_LEVEL= + # Allowed values are PLAIN, JSON (default: PLAIN) + LOG_FORMAT= + # Allowed values are TRUE, FALSE (default: FALSE) + NEO4J_DEBUG= + ``` 2. Build the Docker image: diff --git a/cmd/scraper/main.go b/cmd/scraper/main.go index fdd7686..ebfedb2 100644 --- a/cmd/scraper/main.go +++ b/cmd/scraper/main.go @@ -9,61 +9,89 @@ import ( "git.dragse.it/anthrove/e621-to-graph/pkg/logic" log "github.com/sirupsen/logrus" "net/http" - "os" "strings" + "time" ) -func init() { - // Log as JSON instead of the default ASCII formatter. - //log.SetFormatter(&log.JSONFormatter{}) - - // Output to stdout instead of the default stderr - // Can be any io.Writer, see below for File example - log.SetOutput(os.Stdout) - - // Only log the warning severity or above. - log.SetLevel(log.DebugLevel) - - // Logging Method Name - //log.SetReportCaller(true) -} - func main() { + // Loads Config + appConfig, err := config.LoadConfig() + if err != nil { + log.Fatal(err) + } + log.Debug("main: config loaded") + + loggingSetup(appConfig) + var graphConnection logic.GraphConnection ctx := context.Background() - // Loads Config - config, err := config.LoadConfig() - if err != nil { - log.Println(err) - } - - switch strings.ToLower(config.DBType) { + switch strings.ToLower(appConfig.DBType) { case "neo4j": - log.Println("Setup Neo4J Connection") - graphConnection = neo4j.NewNeo4JConnection() - err = graphConnection.Connect(ctx, config.DBEndpoint, config.DBUsername, config.DBPassword) + graphConnection = neo4j.NewNeo4JConnection(appConfig.Neo4jDebug) + err = graphConnection.Connect(ctx, appConfig.DBEndpoint, appConfig.DBUsername, appConfig.DBPassword) if err != nil { - panic(err) + log.Panicf("main: %s", err) } - log.Println("Connection successful") default: - panic("No Database was selected!") + log.Panic("main: no database was selected!") } + log.WithFields(log.Fields{ + "database_type": strings.ToLower(appConfig.DBType), + }).Info("main: database connection successful") // Initialize the e621API - e621Client := e621.NewClient(config.E621APIKey, config.E621Username) - - log.Printf("Im ready!") - + e621Client := e621.NewClient(appConfig.E621APIKey, appConfig.E621Username) // Register the ScapeUserFavourites with the "/user" route http.HandleFunc("/user", api.ScapeUserFavourites(ctx, graphConnection, e621Client)) // Start the HTTP server err = http.ListenAndServe(":8080", nil) if err != nil { - return + log.Panicf("main: %s", err) } } + +func loggingSetup(appConfig *config.Config) { + switch strings.ToUpper(appConfig.LogLevel) { + case "FATAL": + log.SetLevel(log.FatalLevel) + case "ERROR": + log.SetLevel(log.ErrorLevel) + case "WARN": + log.SetLevel(log.WarnLevel) + case "INFO": + log.SetLevel(log.InfoLevel) + case "DEBUG": + log.SetLevel(log.DebugLevel) + case "TRACE": + log.SetLevel(log.TraceLevel) + default: + log.Panic("main: no log level was set") + } + + switch strings.ToUpper(appConfig.LogFormat) { + case "PLAIN": + log.SetFormatter(&log.TextFormatter{ + ForceColors: true, + ForceQuote: true, + DisableLevelTruncation: true, + PadLevelText: true, + FullTimestamp: true, + TimestampFormat: time.DateTime, // 2006-01-02 15:04:05 + }) + case "JSON": + log.SetFormatter(&log.JSONFormatter{ + TimestampFormat: time.DateTime, // 2006-01-02 15:04:05, + }) + default: + log.Panic("main: no formatter was set") + } + + log.WithFields(log.Fields{ + "log_level": log.GetLevel(), + "formatter": appConfig.LogFormat, + }).Info("main: setting logging info") +} diff --git a/internal/api/user.go b/internal/api/user.go index def53c9..11603ea 100644 --- a/internal/api/user.go +++ b/internal/api/user.go @@ -13,17 +13,16 @@ import ( // ScapeUserFavourites is the handler for the user API func ScapeUserFavourites(ctx context.Context, graphConnection logic.GraphConnection, e621Client *e621.Client) func(response http.ResponseWriter, request *http.Request) { return func(w http.ResponseWriter, r *http.Request) { - log.Println("Request") if r.Method != http.MethodPost { w.WriteHeader(http.StatusMethodNotAllowed) - fmt.Fprintf(w, "Only POST requests are allowed") + fmt.Fprintf(w, "only POST requests are allowed") return } username := r.FormValue("username") if username == "" { w.WriteHeader(http.StatusBadRequest) - fmt.Fprintf(w, "Username is required") + fmt.Fprintf(w, "username is required") return } @@ -32,7 +31,9 @@ func ScapeUserFavourites(ctx context.Context, graphConnection logic.GraphConnect // Send a response w.WriteHeader(http.StatusOK) - fmt.Fprintf(w, "Username %s processed successfully", username) + log.WithFields(log.Fields{ + "requested_user": username, + }).Info("api: processing user") } } diff --git a/internal/config/config.go b/internal/config/config.go index 787216b..24f1b01 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -3,6 +3,7 @@ package config import ( "fmt" "github.com/caarlos0/env" + "strings" ) type Config struct { @@ -12,13 +13,27 @@ type Config struct { DBEndpoint string `env:"DB_URL,required"` DBUsername string `env:"DB_USERNAME,required"` DBPassword string `env:"DB_PASSWORD,required"` + LogLevel string `env:"LOG_LEVEL" envDefault:"INFO"` + LogFormat string `env:"LOG_FORMAT" envDefault:"PLAIN"` + Neo4jDebug bool `env:"NEO4J_DEBUG" envDefault:"FALSE"` } // LoadConfig loads the configuration from environment variables func LoadConfig() (*Config, error) { config := &Config{} if err := env.Parse(config); err != nil { - return nil, fmt.Errorf("error parsing configuration: %w", err) + return nil, fmt.Errorf("config: error parsing configuration: %w", err) + } + + logLevel := strings.ToUpper(config.LogLevel) + logFormat := strings.ToUpper(config.LogFormat) + + if logLevel != "FATAL" && logLevel != "ERROR" && logLevel != "WARN" && logLevel != "INFO" && logLevel != "DEBUG" && logLevel != "TRACE" { + return nil, fmt.Errorf("config: valid levels for erros are: FATAL, ERROR, WARN, INFO, DEBUG, TRACE") + } + + if logFormat != "PLAIN" && logFormat != "JSON" { + return nil, fmt.Errorf("config: valit formatters are: PLAIN, JSON") } return config, nil diff --git a/internal/database/neo4j/impl.go b/internal/database/neo4j/impl.go index 6be21e3..26c695c 100644 --- a/internal/database/neo4j/impl.go +++ b/internal/database/neo4j/impl.go @@ -9,11 +9,15 @@ import ( ) type neo4jConnection struct { - driver neo4j.DriverWithContext + driver neo4j.DriverWithContext + neo4jDebug bool } -func NewNeo4JConnection() logic.GraphConnection { - return &neo4jConnection{} +func NewNeo4JConnection(neo4jDebug bool) logic.GraphConnection { + return &neo4jConnection{ + driver: nil, + neo4jDebug: neo4jDebug, + } } func (c *neo4jConnection) CheckUserToPostLink(ctx context.Context, e621PostID int64, e621UserID int64) (bool, error) { @@ -48,12 +52,14 @@ func (c *neo4jConnection) UploadUser(ctx context.Context, user model.E621User) e return CreateUserNode(ctx, c.driver, user) } func (c *neo4jConnection) Connect(ctx context.Context, endpoint string, username string, password string) error { + driver, err := neo4j.NewDriverWithContext(endpoint, neo4j.BasicAuth(username, password, ""), - useConsoleLogger(neo4j.INFO)) + logger(c.neo4jDebug)) + if err != nil { return err } - err = driver.VerifyAuthentication(context.Background(), nil) + err = driver.VerifyAuthentication(ctx, nil) if err != nil { return err } @@ -61,8 +67,8 @@ func (c *neo4jConnection) Connect(ctx context.Context, endpoint string, username return nil } -func useConsoleLogger(level neo4j.LogLevel) func(config *config.Config) { +func logger(neo4jDebug bool) func(config *config.Config) { return func(config *config.Config) { - config.Log = neo4j.ConsoleLogger(level) + config.Log = NewNeo4jLogger(neo4jDebug) } } diff --git a/internal/database/neo4j/logger.go b/internal/database/neo4j/logger.go new file mode 100644 index 0000000..1152689 --- /dev/null +++ b/internal/database/neo4j/logger.go @@ -0,0 +1,45 @@ +package neo4j + +import ( + "fmt" + neo4jLog "github.com/neo4j/neo4j-go-driver/v5/neo4j/log" + log "github.com/sirupsen/logrus" +) + +type neo4jLogger struct { + neo4jDebug bool +} + +func NewNeo4jLogger(neo4jDebug bool) neo4jLog.Logger { + return &neo4jLogger{neo4jDebug: neo4jDebug} +} + +func (n neo4jLogger) Error(name string, id string, err error) { + log.WithFields(log.Fields{ + "name": name, + "id": id, + }).Errorf("neo4j: %s", err) +} + +func (n neo4jLogger) Warnf(name string, id string, msg string, args ...any) { + log.WithFields(log.Fields{ + "name": name, + "id": id, + }).Warnf("neo4j: %v", fmt.Sprintf(msg, args...)) +} + +func (n neo4jLogger) Infof(name string, id string, msg string, args ...any) { + log.WithFields(log.Fields{ + "name": name, + "id": id, + }).Infof("neo4j: %v", fmt.Sprintf(msg, args...)) +} + +func (n neo4jLogger) Debugf(name string, id string, msg string, args ...any) { + if n.neo4jDebug { + log.WithFields(log.Fields{ + "name": name, + "id": id, + }).Debugf("neo4j: %v", fmt.Sprintf(msg, args...)) + } +} diff --git a/internal/database/neo4j/relationship.go b/internal/database/neo4j/relationship.go index 0094fc3..3411ecf 100644 --- a/internal/database/neo4j/relationship.go +++ b/internal/database/neo4j/relationship.go @@ -3,6 +3,7 @@ package neo4j import ( "context" "github.com/neo4j/neo4j-go-driver/v5/neo4j" + log "github.com/sirupsen/logrus" ) func EstablishPostTagLink(ctx context.Context, driver neo4j.DriverWithContext, e621PostID int64, e621Tag string) error { @@ -16,6 +17,10 @@ func EstablishPostTagLink(ctx context.Context, driver neo4j.DriverWithContext, e "e621Tag": e621Tag, } + log.WithFields(log.Fields{ + "e621_post_id": e621PostID, + "e621_tag": e621Tag, + }).Trace("neo4j: creating post to e621Tag link") _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) if err != nil { return err @@ -35,6 +40,10 @@ func EstablishPostToSourceLink(ctx context.Context, driver neo4j.DriverWithConte "sourceURL": sourceURL, } + log.WithFields(log.Fields{ + "e621_post_id": e621PostID, + "source_url": sourceURL, + }).Trace("neo4j: creating post to source link") _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) if err != nil { return err @@ -53,7 +62,10 @@ func EstablishUserToPostLink(ctx context.Context, driver neo4j.DriverWithContext "e621PostID": e621PostID, "e621ID": e621UserID, } - + log.WithFields(log.Fields{ + "e621_post_id": e621PostID, + "e621_user_id": e621UserID, + }).Trace("neo4j: creating user to post link") _, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) if err != nil { return err @@ -73,6 +85,10 @@ func RelationshipCheckUserToPost(ctx context.Context, driver neo4j.DriverWithCon "e621ID": e621UserID, } + log.WithFields(log.Fields{ + "e621_post_id": e621PostID, + "e621_user_id": e621UserID, + }).Trace("neo4j: check user post relationship") result, err := neo4j.ExecuteQuery(ctx, driver, query, params, neo4j.EagerResultTransformer) if err != nil { return false, err diff --git a/internal/e621/favorites.go b/internal/e621/favorites.go index 379d77a..396a31c 100644 --- a/internal/e621/favorites.go +++ b/internal/e621/favorites.go @@ -5,11 +5,18 @@ import ( "fmt" "git.dragse.it/anthrove/e621-to-graph/internal/e621/scheduler" "git.dragse.it/anthrove/e621-to-graph/pkg/e621/model" + log "github.com/sirupsen/logrus" ) // GetFavorites retrieves all favorites from the e621 API. func (c *Client) GetFavorites(_ context.Context, user model.E621User, page int64) func() (model.PostResponseWrapper, error) { URIPath := fmt.Sprintf("favorites.json?user_id=%d&limit=%d&page=%d", user.ID, 320, page) + log.WithFields(log.Fields{ + "user": user.Name, + "id": user.ID, + "page": page, + "uri": URIPath, + }).Trace("e621: adding task to get favorites") e621Task := NewE621ApiTask[model.PostResponseWrapper](URIPath) return scheduler.Schedule[model.PostResponseWrapper](c.scheduler, e621Task, c.username, c.apiKey) } diff --git a/internal/e621/scheduler/scheduler.go b/internal/e621/scheduler/scheduler.go index ce96c91..b12babb 100644 --- a/internal/e621/scheduler/scheduler.go +++ b/internal/e621/scheduler/scheduler.go @@ -6,6 +6,7 @@ import ( "git.dragse.it/anthrove/e621-to-graph/pkg/util/queue" log "github.com/sirupsen/logrus" "golang.org/x/time/rate" + "reflect" ) type Scheduler struct { @@ -25,23 +26,35 @@ func NewScheduler() *Scheduler { func Schedule[T e621.DataType](s *Scheduler, t e621.Task[T], username string, apiKey string) func() (T, error) { channel := make(chan e621.DataResponse[T]) schedulerTask := NewSchedulerTaskImpl[T](t, channel, username, apiKey) - log.Debug("Psuh task") - err := s.queue.Push(schedulerTask) - log.Debug("Element pushed") + log.WithFields(log.Fields{ + "type": reflect.TypeOf(*new(T)), + }).Debug("scheduler: pushing task to queue") + err := s.queue.Push(schedulerTask) if err != nil { return func() (T, error) { var nil T return nil, err } } + + log.WithFields(log.Fields{ + "type": reflect.TypeOf(*new(T)), + }).Trace("scheduler: pushed task to queue") + return func() (T, error) { + log.Trace("scheduler: getting data from channel") data := <-channel return data.Data, data.Error } } func (s *Scheduler) SetLimiter(limiter *rate.Limiter) { + log.WithFields(log.Fields{ + "burst": limiter.Burst(), + "limit": limiter.Limit(), + "tokens": limiter.Tokens(), + }).Debug("scheduler: setting limiter") s.limiter = limiter } @@ -51,7 +64,7 @@ func (s *Scheduler) StartExecutionHandler() { s.limiter.Wait(context.Background()) } s.queue.WaitForElement() - log.Debug("element found") + log.Trace("scheduler: element found") task, err := s.queue.Pop() if err != nil { continue diff --git a/internal/e621/task.go b/internal/e621/task.go index 25b7719..16a9526 100644 --- a/internal/e621/task.go +++ b/internal/e621/task.go @@ -4,7 +4,9 @@ import ( "encoding/json" "git.dragse.it/anthrove/e621-to-graph/pkg/e621" pgkError "git.dragse.it/anthrove/e621-to-graph/pkg/error" + log "github.com/sirupsen/logrus" "net/http" + "reflect" ) type e621APITask[T e621.DataType] struct { @@ -12,6 +14,10 @@ type e621APITask[T e621.DataType] struct { } func NewE621ApiTask[T e621.DataType](uri string) e621.Task[T] { + log.WithFields(log.Fields{ + "uri": uri, + "type": reflect.TypeOf(*new(T)), + }).Debug("e621: task created") return &e621APITask[T]{ uri: uri, } @@ -39,11 +45,18 @@ func (e e621APITask[T]) HandleStatusCode(statusCode int) e621.DataResponse[T] { default: err = pgkError.StatusCodesToError(statusCode) } + + log.WithFields(log.Fields{ + "status_code": statusCode, + }).Debug("e621: handel status code") return e621.DataResponse[T]{Error: err} } func (e e621APITask[T]) HandleResponse(responseData *http.Response) e621.DataResponse[T] { var data T + log.WithFields(log.Fields{ + "type": reflect.TypeOf(*new(T)), + }).Debug("e621: decoding json response") err := json.NewDecoder(responseData.Body).Decode(&data) defer responseData.Body.Close() if err != nil { diff --git a/internal/e621/user.go b/internal/e621/user.go index 8f71f35..41ba62a 100644 --- a/internal/e621/user.go +++ b/internal/e621/user.go @@ -4,11 +4,16 @@ import ( "fmt" "git.dragse.it/anthrove/e621-to-graph/internal/e621/scheduler" "git.dragse.it/anthrove/e621-to-graph/pkg/e621/model" + log "github.com/sirupsen/logrus" ) // GetUserInfo retrieves the users information from e621 API. func (c *Client) GetUserInfo(username string) func() (model.E621User, error) { URIPath := fmt.Sprintf("users/%s.json", username) + log.WithFields(log.Fields{ + "username": username, + "uri": URIPath, + }).Debug("e621: getting user info") e621Task := NewE621ApiTask[model.E621User](URIPath) return scheduler.Schedule[model.E621User](c.scheduler, e621Task, c.username, c.apiKey) } diff --git a/internal/service/manager.go b/internal/service/manager.go index 2d04cb6..b7d093a 100644 --- a/internal/service/manager.go +++ b/internal/service/manager.go @@ -24,17 +24,17 @@ func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, e621 if e621User.IsBanned { log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - "bann": e621User.IsBanned, - }).Info("User is Banned") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + "e621_user_bann": e621User.IsBanned, + }).Info("service: user is Banned") return nil } log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - }).Info("Processing user") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + }).Info("service: processing user") err = graphConnection.UploadUser(ctx, e621User) if err != nil { @@ -42,22 +42,17 @@ func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, e621 } log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - }).Info("Getting favorites for user") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + }).Info("service: start processing favorites") start := time.Now() for { AwaitE621Favorites := e621Client.GetFavorites(ctx, e621User, page) - log.WithFields(log.Fields{ - "id": e621User.ID, - "fav_page": page, - }).Debug("Requesting API for favorites") - favoritesPage, err := AwaitE621Favorites() if err != nil { - log.Printf(err.Error()) + log.Error(err.Error()) } // Append the fetched posts to the result slice @@ -73,26 +68,14 @@ func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, e621 } - if err != nil { - log.Fatal(err) - } - log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - "post_amount": len(allFavorites), - "scrape_time": time.Since(start), - }).Info("Getting favorites for user") - - startUploadPosts := time.Now() - // Uploads all Tags, Posts as Nodes to Neo4j for i, post := range allFavorites { if exists, err := graphConnection.CheckUserToPostLink(ctx, post.ID, e621User.ID); err == nil && exists { log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - "last_post_id": post.ID, - }).Info("No new favorites found") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + "last_post_id": post.ID, + }).Info("service: no new favorites found") break } else if err != nil { return err @@ -104,13 +87,13 @@ func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, e621 return err } log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - "post_number": i, - "post_amount": len(allFavorites), - "post_id": post.ID, - "upload_time": time.Since(start), - }).Debug("Uploading post") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + "post_number": i, + "post_amount": len(allFavorites), + "post_id": post.ID, + "upload_time": time.Since(start), + }).Debug("service: uploading post") start := time.Now() err = uploadPostToUserRelationship(ctx, graphConnection, post, e621User) @@ -149,19 +132,20 @@ func ScrapeUser(ctx context.Context, graphConnection logic.GraphConnection, e621 return err } log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - "post_number": i, - "post_amount": len(allFavorites), - "post_id": post.ID, - "upload_time": time.Since(start), - }).Debug("Making relationship") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + "post_number": i, + "post_amount": len(allFavorites), + "post_id": post.ID, + "upload_time": time.Since(start), + }).Debug("service: making relationship") } log.WithFields(log.Fields{ - "user": e621User.Name, - "id": e621User.ID, - "upload_time": time.Since(startUploadPosts), - }).Info("Upload to Database finished") + "e621_username": e621User.Name, + "e621_user_id": e621User.ID, + "post_amount": len(allFavorites), + "scrape_time": time.Since(start), + }).Info("service: finished processing favorites") return nil } diff --git a/pkg/util/queue/queue.go b/pkg/util/queue/queue.go index 37e7c1d..ba957b4 100644 --- a/pkg/util/queue/queue.go +++ b/pkg/util/queue/queue.go @@ -21,7 +21,7 @@ func NewQueue() Queue { // routine. So the thread waits, till WaitForElement pulls the Item from the channel // FIXME this should be fixed in future. require more discussion what is the best way. func (queue *Queue) WaitForElement() { - log.Debug("waiting for element") + log.Debug("queue: waiting for element") _ = <-queue.notifyChannel }