2023-05-24 14:05:27 +00:00
package services
import (
"context"
"e621_to_neo4j/e621"
2023-05-24 21:11:49 +00:00
"e621_to_neo4j/e621/models"
2023-05-24 14:05:27 +00:00
"e621_to_neo4j/neo4jAPI"
"e621_to_neo4j/utils"
"github.com/neo4j/neo4j-go-driver/v5/neo4j"
"log"
"time"
)
func ScrapeUser ( ctx context . Context , driver neo4j . DriverWithContext , e621Client e621 . Client , username string ) error {
var err error
e621User , err := e621Client . GetUserInfo ( username )
if err != nil {
return err
}
2023-05-24 21:11:49 +00:00
if e621User . IsBanned {
log . Printf ( "User %s is banned from e621!" , e621User . Name )
return nil
}
log . Printf ( "Processing user: %s with id %d" , e621User . Name , e621User . ID )
2023-05-24 14:05:27 +00:00
err = neo4jAPI . CreateUserNode ( ctx , driver , e621User )
if err != nil {
log . Fatal ( err )
}
2023-05-24 21:11:49 +00:00
log . Printf ( "Getting favorites for user %s with id %d" , e621User . Name , e621User . ID )
start := time . Now ( )
2023-05-24 14:05:27 +00:00
userFavorites , err := e621Client . GetFavorites ( e621User . Name )
if err != nil {
log . Fatal ( err )
}
2023-05-24 21:11:49 +00:00
log . Printf ( "User %s with id %d has %d favorites. Time took to scrape: %v" , e621User . Name , e621User . ID , len ( userFavorites ) , time . Since ( start ) )
2023-05-24 14:05:27 +00:00
startUploadPosts := time . Now ( )
2023-05-24 21:11:49 +00:00
// Uploads all Tags, Posts as Nodes to Neo4j
for i , post := range userFavorites {
start = time . Now ( )
err = uploadNodes ( ctx , driver , post )
if err != nil {
return err
}
log . Printf ( "Uploading post for user %s with id %d, %d of %d with ID: %d took: %v" , e621User . Name , e621User . ID , i , len ( userFavorites ) , post . ID , time . Since ( start ) )
}
// Makes relationships between different nodes
2023-05-24 14:05:27 +00:00
for i , post := range userFavorites {
start := time . Now ( )
2023-05-24 21:11:49 +00:00
err = uploadPostToUserRelationship ( ctx , driver , post , e621User )
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
err = uploadSourceTagRelationship ( ctx , driver , post )
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
err = uploadGeneralTagRelationship ( ctx , driver , post )
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
err = uploadCharacterTagtRelationship ( ctx , driver , post )
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
err = uploadCopyrightTagRelationship ( ctx , driver , post )
if err != nil {
log . Fatal ( err )
return err
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
err = uploadArtistTagRelationship ( ctx , driver , post )
2023-05-24 14:05:27 +00:00
if err != nil {
2023-05-24 21:11:49 +00:00
log . Fatal ( err )
2023-05-24 14:05:27 +00:00
return err
}
2023-05-24 21:11:49 +00:00
log . Printf ( "Making relationship for user %s with id %d, %d for Post: %d with ID: %d took: %v" , e621User . Name , e621User . ID , i , len ( userFavorites ) , post . ID , time . Since ( start ) )
}
log . Printf ( "Uploading all posts for user %s took: %v" , username , time . Since ( startUploadPosts ) )
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadNodes uploads the post to the database and creates the nodes
func uploadNodes ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post ) error {
uniqueGeneralTags := make ( [ ] string , 0 )
uniqueCharacterTags := make ( [ ] string , 0 )
uniqueCopyrightTags := make ( [ ] string , 0 )
uniqueArtistTags := make ( [ ] string , 0 )
allGeneralTags := make ( [ ] string , 0 )
allCharacterTags := make ( [ ] string , 0 )
allCopyrightTags := make ( [ ] string , 0 )
allArtistTags := make ( [ ] string , 0 )
allGeneralTags = append ( allGeneralTags , post . Tags . General ... )
allCharacterTags = append ( allCharacterTags , post . Tags . Character ... )
allCopyrightTags = append ( allCopyrightTags , post . Tags . Character ... )
allArtistTags = append ( allArtistTags , post . Tags . Artist ... )
uniqueGeneralTags = utils . UniqueNonEmptyElementsOf ( allGeneralTags )
uniqueCharacterTags = utils . UniqueNonEmptyElementsOf ( allCharacterTags )
uniqueCopyrightTags = utils . UniqueNonEmptyElementsOf ( allCopyrightTags )
uniqueArtistTags = utils . UniqueNonEmptyElementsOf ( allArtistTags )
// Uploads post to database
err := neo4jAPI . CreatePostNode ( ctx , driver , post . ID )
if err != nil {
return err
}
// Uploads the source to the database
for _ , source := range post . Sources {
err := neo4jAPI . CreateSourceNode ( ctx , driver , source )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
for _ , uniqueGeneralTag := range uniqueGeneralTags {
err := neo4jAPI . CreateTagNode ( ctx , driver , uniqueGeneralTag , "general" )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
for _ , uniqueCharacterTag := range uniqueCharacterTags {
err := neo4jAPI . CreateTagNode ( ctx , driver , uniqueCharacterTag , "character" )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
for _ , uniqueCopyrightTag := range uniqueCopyrightTags {
err := neo4jAPI . CreateTagNode ( ctx , driver , uniqueCopyrightTag , "copyright" )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
for _ , uniqueArtistTag := range uniqueArtistTags {
err := neo4jAPI . CreateTagNode ( ctx , driver , uniqueArtistTag , "artist" )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
}
2023-05-24 21:11:49 +00:00
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadPostToUserRelationship creates a relationship between the user and the post
func uploadPostToUserRelationship ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post , e621User models . E621User ) error {
err := neo4jAPI . UserToPostRelationship ( ctx , driver , post . ID , e621User . ID )
if err != nil {
return err
}
// log.Printf("Created UserToPostRelationship for user: %s to post: %d", e621User.Name, post.ID)
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadSourceTagRelationship creates a relationship between the post and the source
func uploadSourceTagRelationship ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post ) error {
for _ , source := range post . Sources {
err := neo4jAPI . PostToSourceRelationship ( ctx , driver , post . ID , source )
2023-05-24 14:05:27 +00:00
if err != nil {
return err
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToSourceRelationship for Post: %d to source: %s", post.ID, source)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadGeneralTagRelationship creates a relationship between the post and the general tag
func uploadGeneralTagRelationship ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post ) error {
for _ , generalTag := range post . Tags . General {
err := neo4jAPI . PostToTagRelationship ( ctx , driver , post . ID , generalTag )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToTagRelationship for post: %d to general tag: %s", post.ID, generalTag)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadCharacterTagtRelationship creates a relationship between the post and the character tag
func uploadCharacterTagtRelationship ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post ) error {
for _ , characterTag := range post . Tags . Character {
err := neo4jAPI . PostToTagRelationship ( ctx , driver , post . ID , characterTag )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToTagRelationship for post: %d to character tag: %s", post.ID, characterTag)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
return nil
}
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
// uploadCopyrightTagRelationship creates a relationship between the post and the copyright tag
func uploadCopyrightTagRelationship ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post ) error {
for _ , copyrightTag := range post . Tags . Copyright {
err := neo4jAPI . PostToTagRelationship ( ctx , driver , post . ID , copyrightTag )
if err != nil {
return err
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
// log.Printf("Created PostToTagRelationship for post: %d to copyrigh tag: %s", post.ID, copyrightTag)
2023-05-24 14:05:27 +00:00
}
2023-05-24 21:11:49 +00:00
return nil
}
// uploadArtistTagRelationship creates a relationship between the post and the artist tag
func uploadArtistTagRelationship ( ctx context . Context , driver neo4j . DriverWithContext , post models . Post ) error {
for _ , artistTag := range post . Tags . Artist {
err := neo4jAPI . PostToTagRelationship ( ctx , driver , post . ID , artistTag )
if err != nil {
return err
}
// log.Printf("Created PostToTagRelationship for post: %d to artist tag: %s", post.ID, artistTag)
2023-05-24 14:05:27 +00:00
2023-05-24 21:11:49 +00:00
}
2023-05-24 14:05:27 +00:00
return nil
}
2023-05-24 21:11:49 +00:00
//11min für Selloo, simultan mit mutt_jake