Skip to content

Commit

Permalink
remove some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Haslik authored and Jan Haslik committed Sep 5, 2024
1 parent 3e0060b commit 496b5b0
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
8 changes: 3 additions & 5 deletions crawler/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ type document struct {
Content string `json:"content"`
}

// Create a global map to track visited URLs and a mutex to ensure thread-safe access
var visitedUrls = make(map[string]bool)
var mu sync.Mutex

Expand All @@ -37,7 +36,7 @@ func Crawl(urls []string, numWorkers int) error {
urlChan := make(chan string, len(urls)) // Channel for URLs to process
errChan := make(chan error, numWorkers) // Channel for errors

// Fill the channel with initial URLs
// Initial URLs
for _, u := range urls {
urlChan <- u
}
Expand All @@ -61,7 +60,6 @@ func Crawl(urls []string, numWorkers int) error {
}(i)
}

// Close the error channel after all workers are done
go func() {
wg.Wait()
close(errChan)
Expand Down Expand Up @@ -93,7 +91,7 @@ func fetch(url string, urlChan chan<- string) error {
fmt.Printf("Skipping already visited URL: %s\n", url)
return nil
}
visitedUrls[url] = true // Mark URL as visited
visitedUrls[url] = true // Url is visited
mu.Unlock()

res, err := http.Get(url)
Expand All @@ -115,7 +113,7 @@ func fetch(url string, urlChan chan<- string) error {
content = ExtractStrings(content)
content = CleanContent(content)

// Create a document object to send to the indexer server
// Document object to send to the indexer server
doc := document{
Url: url,
Content: content,
Expand Down
6 changes: 3 additions & 3 deletions crawler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ import (
* @brief Main function of the Asura Crow crawler to connect to the database and start the web crawling process every 5 minutes.
*/
func main() {
// Create a ticker to trigger the crawling process every 5 minutes
// Ticker to trigger the crawling process every 5 minutes
ticker := time.NewTicker(time.Second * 5)
defer ticker.Stop()

var wg sync.WaitGroup
wg.Add(1) // Add a dummy entry to the wait group to prevent main from exiting
wg.Add(1) // Dummy entry to the wait group to prevent main from exiting

// Goroutine to perform crawling at regular intervals
go func() {
Expand All @@ -42,7 +42,7 @@ func performCrawling() {
return
}

// Retrieve website URLs from the database
// Get website URLs from the database
websiteUrls, err := conn.GetWebsiteUrls()
if err != nil {
fmt.Println("Error retrieving website URLs:", err)
Expand Down

0 comments on commit 496b5b0

Please sign in to comment.