Mit folgendem Shell Script lassen sich die Apache Webserver-Logs aller Domains in Plesk automatisch prüfen und zusammenfassen. Dadurch kann man einen Überblick über Traffic und eventuelle Probleme bekommen, auch bei vielen Domains und großen Log-Files.
#!/bin/bash # Define output files output_file="/var/www/vhosts/_analyze_logs/traffic-pages.txt" output_wordpress="/var/www/vhosts/_analyze_logs/traffic-wordpress.txt" output_5xx="/var/www/vhosts/_analyze_logs/traffic-5xx.txt" output_404="/var/www/vhosts/_analyze_logs/traffic-404.txt" output_bots="/var/www/vhosts/_analyze_logs/traffic-bots.txt" output_ips="/var/www/vhosts/_analyze_logs/traffic-ips.txt" # Define temporary files temp_file="/tmp/combined_traffic.txt" temp_wordpress="/tmp/combined_wordpress.txt" temp_5xx="/tmp/combined_5xx.txt" temp_404="/tmp/combined_404.txt" temp_bots="/tmp/combined_bots.txt" # Default values hours_back=24 num_entries=30 domain="" mail_results="" bots_min_count=500 ips_min_count=1000 log_file="access_ssl_log" # Check if arguments are provided for hours back, number of entries, specific domain, bots_min_count, and log_file if [ $# -gt 0 ]; then hours_back=$1 fi if [ $# -gt 1 ]; then num_entries=$2 fi if [ $# -gt 2 ]; then domain=$3 fi if [ $# -gt 3 ]; then mail_results=$4 fi if [ $# -gt 4 ]; then bots_min_count=$5 fi if [ $# -gt 4 ]; then ips_min_count=$6 fi if [ $# -gt 5 ]; then log_file=$6 fi # Start timing the script start_time=$(date +%s%3N) # Calculate the date and time threshold date_threshold=$(date -d "-$hours_back hours" +"%d/%b/%Y:%H:%M:%S") # Initialize the output files echo "Top $num_entries Most Viewed Pages Across All Logs (Last $hours_back hours)" > "$output_file" echo "Generated on: $(date +'%d/%b/%Y %H:%M:%S')" >> "$output_file" echo "" >> "$output_file" echo "Top $num_entries WordPress Related Traffic Across All Logs (Last $hours_back hours)" > "$output_wordpress" echo "Generated on: $(date +'%d/%b/%Y %H:%M:%S')" >> "$output_wordpress" echo "" >> "$output_wordpress" echo "Top $num_entries 5xx Errors Across All Logs (Last $hours_back hours)" > "$output_5xx" echo "Generated on: $(date +'%d/%b/%Y %H:%M:%S')" >> "$output_5xx" echo "" >> "$output_5xx" echo "Top $num_entries 404 Errors Across All Logs (Last $hours_back hours)" > "$output_404" echo "Generated on: $(date +'%d/%b/%Y %H:%M:%S')" >> "$output_404" echo "" >> "$output_404" echo "Bot Traffic >= $bots_min_count Across All Logs (Last $hours_back hours)" > "$output_bots" echo "Generated on: $(date +'%d/%b/%Y %H:%M:%S')" >> "$output_bots" echo "" >> "$output_bots" echo "Top $num_entries IP Addresses Across All Logs (Last $hours_back hours)" > "$output_ips" echo "Generated on: $(date +'%d/%b/%Y %H:%M:%S')" >> "$output_ips" echo "" >> "$output_ips" # Process each log and combine results into temporary files > "$temp_file" > "$temp_wordpress" > "$temp_5xx" > "$temp_404" # Determine the log files to process if [ -n "$domain" ]; then logs_to_process=(/var/www/vhosts/$domain/logs/$log_file) else logs_to_process=(/var/www/vhosts/*/logs/$log_file) fi # Define pattern for static files static_files_pattern='\.(css|js|jpg|jpeg|png|gif|webp|ico|svg|woff|woff2|ttf|eot|pdf|zip|map|mp4|webm|mp3|wav|doc|docx|xls|xlsx|rar|tar|gz)(\?.*)?$' # Define pattern for normal traffic normal_traffic_pattern="$static_files_pattern|\/wp-content\/|\/wp-admin\/|\/wp-json\/|koko-analytics|wp-cron\.php|\/wp-includes\/|wc-ajax=|\/favicons\/|\/xmlrpc\.php|\/feed\/|robots\.txt|sitemap|wp-login\.php" # Define pattern for WordPress traffic wordpress_traffic_pattern="wp-cron\.php|\/wp-includes\/|wc-ajax=|\/xmlrpc\.php|wp-login\.php|\/wp-json\/|\/wp-admin\/|\/login\/|\/wp-content\/themes\/|\/wp-content\/plugins\/|\/feed\/|\/wp-comments-post\.php|\/trackback\/" # Define bot search patterns bot_pattern_include="bot|spider|crawler|slurp|bing|yandex|baidu|AdsBot-Google|Googlebot|Applebot|BingSapphire|Plesk screenshot bot|bingbot|Bytespider|DuckDuckBot|Xing Bot|YandexBot|Sogou Spider|Yahoo! Slurp|Facebot" bot_pattern_exclude="pingdom|UptimeRobot|StatusCake|Site24x7|Uptime\.com|Monitis|Uptrends|Dotcom-Monitor|Updown\.io|Hetrix|NodePing" for log in "${logs_to_process[@]}"; do # Extract domain name domain=$(basename "$(dirname "$(dirname "$log")")") # Process normal traffic awk -v date_threshold="$date_threshold" -F\" '$4 > date_threshold {print $2}' "$log" | \ awk -v domain="$domain" '{print domain $2}' | \ grep -v -E "$normal_traffic_pattern" >> "$temp_file" # Process WordPress traffic awk -v date_threshold="$date_threshold" -F\" '$4 > date_threshold {print $2}' "$log" | \ awk -v domain="$domain" '{print domain $2}' | \ grep -E "$wordpress_traffic_pattern" | grep -v -E "$static_files_pattern" >> "$temp_wordpress" # Process 5xx errors awk -v date_threshold="$date_threshold" -v domain="$domain" '$4 > date_threshold && $9 >= 500 && $9 < 600 {print domain $7}' "$log" >> "$temp_5xx" # Process 404 errors awk -v date_threshold="$date_threshold" -v domain="$domain" '$4 > date_threshold && $9 == 404 {print domain $7}' "$log" >> "$temp_404" # Process bot traffic # Store results in variable to check count results=$(awk -v domain="$domain" -F\" '$4 > date_threshold {print domain $2}' "$log" | \ awk -F\" '{print $6}' "$log" | grep -i -E "$bot_pattern_include" | grep -i -v -E "$bot_pattern_exclude" | sort | uniq -c | sort -rn | awk -v min="$bots_min_count" '$1 >= min') # Only output if results exist if [ ! -z "$results" ]; then echo "$domain" >> "$output_bots" echo "$results" >> "$output_bots" echo " " >> "$output_bots" fi # Process IP addresses # Extract IP addresses and count occurrences results=$(awk -v date_threshold="$date_threshold" '$4 > date_threshold {print $1}' "$log" | sort | uniq -c | sort -rn | awk -v min="$ips_min_count" '$1 >= min') # Only output if results exist if [ ! -z "$results" ]; then echo "$domain" >> "$output_ips" echo "$results" >> "$output_ips" echo " " >> "$output_ips" fi done # Aggregate and sort results for normal traffic sort "$temp_file" | uniq -c | sort -rn | head -n "$num_entries" >> "$output_file" # Aggregate and sort results for WordPress traffic sort "$temp_wordpress" | uniq -c | sort -rn | head -n "$num_entries" >> "$output_wordpress" # Aggregate and sort results for 5xx errors sort "$temp_5xx" | uniq -c | sort -rn | head -n "$num_entries" >> "$output_5xx" # Aggregate and sort results for 404 errors sort "$temp_404" | uniq -c | sort -rn | head -n "$num_entries" >> "$output_404" # Clean up temporary files rm "$temp_file" rm "$temp_wordpress" rm "$temp_5xx" rm "$temp_404" # End timing the script end_time=$(date +%s%3N) execution_time=$((end_time - start_time)) echo "Results saved to:" echo "- $output_file" echo "- $output_wordpress" echo "- $output_5xx" echo "- $output_404" echo "- $output_bots" echo "- $output_ips" echo "Script execution time: ${execution_time}ms"
Shell Script aktualisiert am 05.12.2024
Hierbei werden die Dateien traffic-pages.txt
, traffic-5xx.txt
, traffic-404.txt
, traffic-bots.txt
und traffic-ips.txt
angelegt, welche die entsprechenden gefilterten Daten enthalten.