Tuesday, January 27, 2015

Server hang monitor

I recently had an issue where a Windows 2012 R2 VM would hand in the middle of the night.  No clue why.  Event logs were of no help.  Our best guess is that our internal vulnerability scan made it upset.  Either way this server was a major departmental file store and if it isn't up in the morning we get hammered with tickets.

I decided to create a script to manage this issue.  It uses 2 text files for input located in the samne folder as the script..  It requires the VMware PowerShell extensions.  It only works on virtual servers as written.  If the server is running but not reponding vSphere will reset it.  I have this running on a recurring 15 minute scheduled task.


Param(
$Debug = $False
)
#======================================================================================
#         File Name : Server-Alive.ps1
#   Original Author : Kenneth C. Mazie (kcmjr AT kcmjr.com)
#                   :
#    PowerShell Ver : 3
#                   :
#       Description : Used to reboot hung virtual servers automatically.
#                   :
#             Notes : Normal operation is with no command line options.  Use -debug $true
#                   : to enable debugging messages on console.  Requires VMware
#                   : PowerShell extensions.  Needs 2 config files located with the script
#                   : Servers.txt contains list of server(s) one per line to target,
#                   : Config.txt contains all other needed settings-
#                   :   user = vsphere username
#                   :   pass = vsphere password
#                   :   vCenter = vsphere server IP
#                   :   smtpServer = snmp server
#                   :   emailDomain = obvious
#                   :   emailfrom = what this process is called (minus email domain)
#                   :   emailTo = who should get the emails (minus email domain)
#                   :
#          Warnings : Yes Virginia, this WILL reboot your server...
#                   : 
#             Legal : Public Domain. Modify and redistribute freely. No rights reserved.
#                   : SCRIPT PROVIDED "AS IS" WITHOUT WARRANTIES OR GUARANTEES OF
#                   : ANY KIND. USE AT YOUR OWN RISK. NO TECHNICAL SUPPORT PROVIDED.
#                   :
#           Credits : Code snippets and/or ideas came from many sources including but
#                   :   not limited to the following:
#                   :
#    Last Update by : Kenneth C. Mazie
#   Version History : v1.0 - 01-27-15 - Original
#    Change History : v1.1 - 00-00-00 -    
#                   :
#=======================================================================================

clear-host
$ErrorActionPreference = "stop" #silentlycontinue"
$Out = Get-PSSnapin | Where-Object {$_.Name -like "vmware.vimautomation.core"};if ($Out -eq $null) {Add-PSSnapin vmware.vimautomation.core}
$Services = ""
If ($Debug){$Global:Debug = $True}

If (Test-Path "$PSScriptRoot\Config.txt"){
  Get-Content "$PSScriptRoot\Config.txt" | foreach-object -begin {$Config=@{}} -process { $x = [regex]::split($_,'='); if(($x[0].CompareTo("") -ne 0) -and ($x[0].StartsWith("[") -ne $True)) { $Config.Add($x[0], $x[1]) } }
  $user = $Config.user
  $pass = $Config.pass
  $vCenter = $Config.vCenter
  $smtpServer = $Config.smtpServer
  $EmailDomain = $Config.EmailDomain
  $emailFrom = $Config.emailFrom
  $emailTo = $Config.emailTo
}Else
  $user = Read-Host -Prompt "Enter the vSphere username."
  $pass = Read-Host -Prompt "Enter the vSphere password."
  $vCenter = Read-Host -Prompt "Enter the vSphere server."
  $smtpServer = Read-Host -Prompt "Enter the SMTP host."
  $EmailDomain = Read-Host -Prompt "Enter the Email domain."
  $emailFrom = 'ServerHealth@$EmailDomain'
  $emailTo = Read-Host -Prompt "Enter the email recipient without domain."
}
$subject = 'Server Health Status Check'
$EmailBody = ""

If (Test-Path "$PSScriptRoot\Servers.txt"){
  $Servers = Get-Content "$PSScriptRoot\Servers.txt"
}Else{
  $Server = Read-Host -Prompt "Enter the Server to test."
}

$Connect = Connect-VIServer -Server $vCenter -User $user -Password $pass

#----------------------------[ Functions ]--------------------------------------
Function SendEmail ($EmailBody){
If ($Global:Debug){Write-Host "`n   Sending email..."}
$email = New-Object System.Net.Mail.MailMessage
$email.From = $emailFrom
$email.IsBodyHtml =$True
$email.To.Add($emailTo)
$email.Subject = $subject
$email.Body = $EmailBody
$smtp = new-object Net.Mail.SmtpClient($smtpServer)
$smtp.Send($email)
}

Function Connection ($Server) {
try
   {$Connection = Test-Connection -ComputerName $Server -Count 1 -ErrorAction stop}
catch [System.Management.Automation.ActionPreferenceStopException]{
   try
      {throw $_.exception}
   catch [System.Net.NetworkInformation.PingException]
      {
      $ConnException = "Caught Ping Exception"
         Return $ServerExists = $False
      }

   catch
      {
      $ConnException = "General catch"
         Return $ServerExists = $False
      }
  }
  Return $ServerExists = $True
}
#--------------------------[ End of Functions ]---------------------------------

ForEach($Server in $Servers){
  $Server = $Server.ToUpper()
  If ($Global:Debug){Write-Host "Beginning connectivity test to ""$Server""..." -ForegroundColor Cyan}
  $EmailBody = $EmailBody + "--[ Heath check for server $Server ]--<br>"
  If($Global:Debug){Write-Host "`n--[ Check #1 - Generic Ping Test ]---------------------------------------------" -ForegroundColor Cyan}
  $Check1 = $True  
  $Result = ""
  if (Connection $Server){
    If ($Global:Debug){Write-Host "   ""$Server"" has responded to ping..." -ForegroundColor Green }
       $EmailBody = $EmailBody + "<font color=green><br>- $Server has successfully responded to the 1st ping check..."
  }Else{
    If ($Global:Debug){Write-Host "   ""$Server"" has failed 1st ping check..." -ForegroundColor Red }
       $EmailBody = $EmailBody + "<font color=red><br>- $Server has failed to respond to the 1st ping check..."
       $Check1 = $False
       If ($Global:Debug){sleep -Seconds 3}Else{sleep -Seconds 30}
    if (Connection $Server){
      If ($Global:Debug){Write-Host "   ""$Server"" has responded to ping..." -ForegroundColor Green }
         $EmailBody = $EmailBody + "<font color=green><br>- $Server has successfully responded to the 2nd ping check..."
    }Else{
      If ($Global:Debug){Write-Host "   ""$Server"" has failed 2nd ping check..." -ForegroundColor Red }
         $EmailBody = $EmailBody + "<font color=red><br>- $Server has failed to respond to the 2nd ping check..."
         $Check1 = $False
         If ($Global:Debug){sleep -Seconds 3}Else{sleep -Seconds 30}
      if (Connection $Server){
        If ($Global:Debug){Write-Host "   ""$Server"" has responded to ping..." -ForegroundColor Green }
              $EmailBody = $EmailBody + "<font color=green><br>- $Server has successfully responded to the 3rd ping check..."
      }Else{
        If ($Global:Debug){Write-Host "   ""$Server"" has failed 3rd ping check..." -ForegroundColor Red }
              $EmailBody = $EmailBody + "<font color=red><br>- $Server has failed to respond to the 3rd ping check..."
              $Check1 = $False
        If ($Global:Debug){sleep -Seconds 3}Else{sleep -Seconds 30}
      }
    }
  } 

  If($Global:Debug){Write-Host "`n--[ Check #2 - Service Inspection ]--------------------------------------------" -ForegroundColor Cyan}
  $Check2 = $True  
  $Result = ""
  $ErrorActionPreference = "silentlycontinue"
  $Result = Get-Service -ComputerName $Server | Where-Object {$_.DisplayName -eq "Server"}
  $ErrorActionPreference = "stop" #silentlycontinue"
  If ($Result.Status -eq "Running"){
    If ($Global:Debug){write-host "   Server service on $Server is running..." -ForegroundColor Green }
    $EmailBody = $EmailBody + "<font color=green><br>- Powershell detected that the SERVER service is running on $Server..."
  }Else{
    If ($Global:Debug){write-host "   Server service on $Server is unable to be detected..." -ForegroundColor Red }
    $EmailBody = $EmailBody + "<font color=red><br>- Powershell detected that the SERVER service is NOT running on $Server..."
    $Check2 = $False
  }

  If($Global:Debug){Write-Host "`n--[ Check #3 - VMWare Status ]-------------------------------------------------" -ForegroundColor Cyan}
  $Check3 = $True  
  $Result = ""
  $Result = Get-VMGuest -VM $Server 

  If ($Result.State -eq "Running"){
    If ($Global:Debug){write-host "   $Server is running..." -ForegroundColor Green }
    $EmailBody = $EmailBody + "<font color=green><br>- VMware detected that $Server is running..."
  }Else{
    If ($Global:Debug){write-host "   $Server is shut down..." -ForegroundColor Red }
    $EmailBody = $EmailBody + "<font color=red><br>- VMware detected that $Server is not running..."
    $Check3 = $False
  }

  If($Global:Debug){Write-Host "`n--[ Final Determination ]-----------------------------------------------------" -ForegroundColor Cyan}
  $Now = "{0:HH:mm}" -f ([DateTime]::Now)

  If (($Check1 -eq $False) -and ($Check2 -eq $False) -and ($Check3 -eq $True)){
    #--[ Server is hung... restart if within the window ]--
    if (((get-date).hour -le 6) -or ((get-date).hour -ge 18)){
      $Restart = ReStart-VM -VM $Server -Confirm:$false
      If ($Global:Debug){Write-Host "   Server $Server has failed some health checks and is being forceably restarted. " -ForegroundColor Red }
      $EmailBody = $EmailBody + "<font color=red><br>- Server $Server has failed some health checks and is being forceably restarted. "
      SendEmail $EmailBody
    }else{
      If ($Global:Debug){Write-Host "   Server $Server has failed some health checks and should be forceably restarted but cannot due to being outside the time window. " -ForegroundColor Red }
      $EmailBody = $EmailBody + "<font color=darkcyan><br>- Server $Server has failed some health checks and should be forceably restarted but cannot due to being outside the safe reboot time window. "
      SendEmail $EmailBody
    }
  }ElseIf (($Check1 -eq $False) -and ($Check2 -eq $False) -and ($Check3 -eq $False)){
    #--[ Server may be hung but could be rebooting or intentially shut down.  No auto restart, email only ]--
    If ($Global:Debug){Write-Host "   Server $Server has failed some health checks and should be checked.  It is NOT being forceably restarted. " -ForegroundColor Yellow  }
    $EmailBody = $EmailBody + "<font color=red><br>- Server $Server has failed some health checks and should be checked.  It is NOT being forceably restarted. "
    SendEmail $EmailBody
  }Else{
    If ($Global:Debug){Write-Host "   Server $Server has passed enough connection validation to assume it is onlne.  No action taken." -ForegroundColor Green}
    #$EmailBody = $EmailBody + "<font color=green><br>- Server $Server has passed enough connection validation to assume it is onlne.  No action taken. "
    #SendEmail $EmailBody
  }
}


DisConnect-VIServer -Server $vCenter -Confirm:$false -Force:$true