2023-05-24 14:05:27 +00:00
package services
import (
"context"
2023-06-20 08:38:36 +00:00
"e621_to_neo4j/database"
2023-05-24 14:05:27 +00:00
"e621_to_neo4j/e621"
2023-05-24 21:11:49 +00:00
"e621_to_neo4j/e621/models"
2023-05-24 14:05:27 +00:00
"e621_to_neo4j/utils"
"log"
"time"
)
2023-06-20 08:38:36 +00:00
func ScrapeUser ( ctx context . Context , graphConnection database . GraphConnection , e621Client e621 . Client , username string ) error {
2023-05-24 14:05:27 +00:00
var err error
e621User , err := e621Client . GetUserInfo ( username )
if err != nil {
return err
}
2023-05-24 21:11:49 +00:00
if e621User . IsBanned {
log . Printf ( "User %s is banned from e621!" , e621User . Name )
return nil
}
log . Printf ( "Processing user: %s with id %d" , e621User . Name , e621User . ID )
2023-06-20 08:38:36 +00:00
err = graphConnection . UploadUser ( ctx , e621User )
2023-05-24 14:05:27 +00:00
if err != nil {
log . Fatal ( err )
}
2023-05-24 21:11:49 +00:00
log . Printf ( "Getting favorites for user %s with id %d" , e621User . Name , e621User . ID )
start := time . Now ( )
2023-06-17 17:13:25 +00:00
userFavorites , err := e621Client . GetFavorites ( e621User )
2023-05-24 14:05:27 +00:00
if err != nil {
log . Fatal ( err )
}
2023-05-24 21:11:49 +00:00
log . Printf ( "User %s with id %d has %d favorites. Time took to scrape: %v" , e621User . Name , e621User . ID , len ( userFavorites ) , time . Since ( start ) )
2023-05-24 14:05:27 +00:00
startUploadPosts := time . Now ( )
2023-05-24 21:11:49 +00:00
// Uploads all Tags, Posts as Nodes to Neo4j
for i , post := range userFavorites {
2023-06-20 08:38:36 +00:00
if exists , err := graphConnection . CheckUserToPostLink ( ctx , post . ID , e621User . ID ) ; err == nil && exists {
2023-06-17 17:13:25 +00:00
log . Printf ( "No new posts found for user %s with id %d" , e621User . Name , e621User . ID )
log . Printf ( "Last Post ID Found: %d" , post . ID )
break
} else if err != nil {
return err
}
2023-05-24 21:11:49 +00:00
start = time . Now ( )
2023-06-20 08:38:36 +00:00
err = uploadNodes ( ctx , graphConnection , post )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
}
log . Printf ( "Uploading post for user %s with id %d, %d of %d with ID: %d took: %v" , e621User . Name , e621User . ID , i , len ( userFavorites ) , post . ID , time . Since ( start ) )
2023-05-24 14:05:27 +00:00
start := time . Now ( )
2023-06-20 08:38:36 +00:00
err = uploadPostToUserRelationship ( ctx , graphConnection , post , e621User )
2023-05-24 21:11:49 +00:00
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-06-20 08:38:36 +00:00
err = uploadSourceTagRelationship ( ctx , graphConnection , post )
2023-05-24 21:11:49 +00:00
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-06-20 08:38:36 +00:00
err = uploadGeneralTagRelationship ( ctx , graphConnection , post )
2023-05-24 21:11:49 +00:00
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-06-20 08:38:36 +00:00
err = uploadCharacterTagtRelationship ( ctx , graphConnection , post )
2023-05-24 21:11:49 +00:00
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-06-20 08:38:36 +00:00
err = uploadCopyrightTagRelationship ( ctx , graphConnection , post )
2023-05-24 21:11:49 +00:00
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-06-20 08:38:36 +00:00
err = uploadArtistTagRelationship ( ctx , graphConnection , post )
2023-05-24 14:05:27 +00:00
if err != nil {
2023-05-24 21:11:49 +00:00
log . Fatal ( err )
2023-05-24 14:05:27 +00:00
return err
}
2023-05-24 21:11:49 +00:00
log . Printf ( "Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v" , e621User . Name , e621User . ID , i , len ( userFavorites ) , post . ID , time . Since ( start ) )
}
log . Printf ( "Uploading all posts for user %s took: %v" , username , time . Since ( startUploadPosts ) )
2023-06-17 17:13:25 +00:00
2023-05-24 21:11:49 +00:00
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadNodes uploads the post to the database and creates the nodes
2023-06-20 08:38:36 +00:00
func uploadNodes ( ctx context . Context , graphConnection database . GraphConnection , post models . Post ) error {
2023-06-17 17:13:25 +00:00
2023-05-24 21:11:49 +00:00
uniqueGeneralTags := make ( [ ] string , 0 )
uniqueCharacterTags := make ( [ ] string , 0 )
uniqueCopyrightTags := make ( [ ] string , 0 )
uniqueArtistTags := make ( [ ] string , 0 )
allGeneralTags := make ( [ ] string , 0 )
allCharacterTags := make ( [ ] string , 0 )
allCopyrightTags := make ( [ ] string , 0 )
allArtistTags := make ( [ ] string , 0 )
allGeneralTags = append ( allGeneralTags , post . Tags . General ... )
allCharacterTags = append ( allCharacterTags , post . Tags . Character ... )
allCopyrightTags = append ( allCopyrightTags , post . Tags . Character ... )
allArtistTags = append ( allArtistTags , post . Tags . Artist ... )
uniqueGeneralTags = utils . UniqueNonEmptyElementsOf ( allGeneralTags )
uniqueCharacterTags = utils . UniqueNonEmptyElementsOf ( allCharacterTags )
uniqueCopyrightTags = utils . UniqueNonEmptyElementsOf ( allCopyrightTags )
uniqueArtistTags = utils . UniqueNonEmptyElementsOf ( allArtistTags )
2023-06-20 08:38:36 +00:00
err := graphConnection . UploadPost ( ctx , post . ID )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
}
// Uploads the source to the database
for _ , source := range post . Sources {
2023-06-20 08:38:36 +00:00
err := graphConnection . UploadSource ( ctx , source )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
for _ , uniqueGeneralTag := range uniqueGeneralTags {
2023-06-20 08:38:36 +00:00
err := graphConnection . UploadTag ( ctx , uniqueGeneralTag , "general" )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
for _ , uniqueCharacterTag := range uniqueCharacterTags {
2023-06-20 08:38:36 +00:00
err := graphConnection . UploadTag ( ctx , uniqueCharacterTag , "character" )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
for _ , uniqueCopyrightTag := range uniqueCopyrightTags {
2023-06-20 08:38:36 +00:00
err := graphConnection . UploadTag ( ctx , uniqueCopyrightTag , "copyright" )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
for _ , uniqueArtistTag := range uniqueArtistTags {
2023-06-20 08:38:36 +00:00
err := graphConnection . UploadTag ( ctx , uniqueArtistTag , "artist" )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
}
2023-06-17 17:13:25 +00:00
2023-05-24 21:11:49 +00:00
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadPostToUserRelationship creates a relationship between the user and the post
2023-06-20 08:38:36 +00:00
func uploadPostToUserRelationship ( ctx context . Context , graphConnection database . GraphConnection , post models . Post , e621User models . E621User ) error {
err := graphConnection . EstablishUserToPostLink ( ctx , post . ID , e621User . ID )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
}
// log.Printf("Created UserToPostRelationship for user: %s to post: %d", e621User.Name, post.ID)
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadSourceTagRelationship creates a relationship between the post and the source
2023-06-20 08:38:36 +00:00
func uploadSourceTagRelationship ( ctx context . Context , graphConnection database . GraphConnection , post models . Post ) error {
2023-05-24 21:11:49 +00:00
for _ , source := range post . Sources {
2023-06-20 08:38:36 +00:00
err := graphConnection . EstablishPostToSourceLink ( ctx , post . ID , source )
2023-05-24 14:05:27 +00:00
if err != nil {
return err
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToSourceRelationship for Post: %d to source: %s", post.ID, source)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadGeneralTagRelationship creates a relationship between the post and the general tag
2023-06-20 08:38:36 +00:00
func uploadGeneralTagRelationship ( ctx context . Context , graphConnection database . GraphConnection , post models . Post ) error {
2023-05-24 21:11:49 +00:00
for _ , generalTag := range post . Tags . General {
2023-06-20 08:38:36 +00:00
err := graphConnection . EstablishPostToTagLink ( ctx , post . ID , generalTag )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToTagRelationship for post: %d to general tag: %s", post.ID, generalTag)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadCharacterTagtRelationship creates a relationship between the post and the character tag
2023-06-20 08:38:36 +00:00
func uploadCharacterTagtRelationship ( ctx context . Context , graphConnection database . GraphConnection , post models . Post ) error {
2023-05-24 21:11:49 +00:00
for _ , characterTag := range post . Tags . Character {
2023-06-20 08:38:36 +00:00
err := graphConnection . EstablishPostToTagLink ( ctx , post . ID , characterTag )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToTagRelationship for post: %d to character tag: %s", post.ID, characterTag)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadCopyrightTagRelationship creates a relationship between the post and the copyright tag
2023-06-20 08:38:36 +00:00
func uploadCopyrightTagRelationship ( ctx context . Context , graphConnection database . GraphConnection , post models . Post ) error {
2023-05-24 21:11:49 +00:00
for _ , copyrightTag := range post . Tags . Copyright {
2023-06-20 08:38:36 +00:00
err := graphConnection . EstablishPostToTagLink ( ctx , post . ID , copyrightTag )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToTagRelationship for post: %d to copyrigh tag: %s", post.ID, copyrightTag)
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
return nil
}
// uploadArtistTagRelationship creates a relationship between the post and the artist tag
2023-06-20 08:38:36 +00:00
func uploadArtistTagRelationship ( ctx context . Context , graphConnection database . GraphConnection , post models . Post ) error {
2023-05-24 21:11:49 +00:00
for _ , artistTag := range post . Tags . Artist {
2023-06-20 08:38:36 +00:00
err := graphConnection . EstablishPostToTagLink ( ctx , post . ID , artistTag )
2023-05-24 21:11:49 +00:00
if err != nil {
return err
}
// log.Printf("Created PostToTagRelationship for post: %d to artist tag: %s", post.ID, artistTag)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
return nil
}
2023-05-24 21:11:49 +00:00
//11min für Selloo, simultan mit mutt_jake
2023-06-17 17:13:25 +00:00
//1h58m53 für mutt_jake, mit Selloo