Page 1 of 1

8071(48,12,101) and 8070(96,5,61)

Posted: Thu Apr 04, 2013 10:00 pm
by davidcoton
Two successive GPU failures.
Config:

Code: Select all

*********************** Log Started 2013-03-30T22:01:45Z ***********************
22:01:45:************************* Folding@home Client *************************
22:01:45:      Website: http://folding.stanford.edu/
22:01:45:    Copyright: (c) 2009-2013 Stanford University
22:01:45:       Author: Joseph Coffland <joseph@cauldrondevelopment.com>
22:01:45:         Args: 
22:01:45:       Config: C:/Users/David/AppData/Roaming/FAHClient/config.xml
22:01:45:******************************** Build ********************************
22:01:45:      Version: 7.3.6
22:01:45:         Date: Feb 18 2013
22:01:45:         Time: 15:25:17
22:01:45:      SVN Rev: 3923
22:01:45:       Branch: fah/trunk/client
22:01:45:     Compiler: Intel(R) C++ MSVC 1500 mode 1200
22:01:45:      Options: /TP /nologo /EHa /Qdiag-disable:4297,4103,1786,279 /Ox -arch:SSE
22:01:45:               /QaxSSE2,SSE3,SSSE3,SSE4.1,SSE4.2 /Qopenmp /Qrestrict /MT /Qmkl
22:01:45:     Platform: win32 XP
22:01:45:         Bits: 32
22:01:45:         Mode: Release
22:01:45:******************************* System ********************************
22:01:45:          CPU: AMD Athlon(tm) II X4 640 Processor
22:01:45:       CPU ID: AuthenticAMD Family 16 Model 5 Stepping 3
22:01:45:         CPUs: 4
22:01:45:       Memory: 3.50GiB
22:01:45:  Free Memory: 2.31GiB
22:01:45:      Threads: WINDOWS_THREADS
22:01:45:  Has Battery: false
22:01:45:   On Battery: false
22:01:45:   UTC offset: 0
22:01:45:          PID: 5576
22:01:45:          CWD: C:/Users/David/AppData/Roaming/FAHClient
22:01:45:           OS: Windows Vista (TM) Home Premium Service Pack 2
22:01:45:      OS Arch: X86
22:01:45:         GPUs: 1
22:01:45:        GPU 0: NVIDIA:3 GK104 [GeForce GTX 660 Ti]
22:01:45:         CUDA: 3.0
22:01:45:  CUDA Driver: 5000
22:01:45:Win32 Service: false
22:01:45:***********************************************************************
22:01:45:<config>
22:01:45:  <service-description v='Folding@home Client'/>
22:01:45:  <service-restart v='true'/>
22:01:45:  <service-restart-delay v='5000'/>
22:01:45:
22:01:45:  <!-- Client Control -->
22:01:45:  <client-threads v='4'/>
22:01:45:  <cycle-rate v='4'/>
22:01:45:  <cycles v='-1'/>
22:01:45:  <data-directory v='.'/>
22:01:45:  <disable-sleep-when-active v='true'/>
22:01:45:  <exec-directory v='C:\Program Files\FAHClient'/>
22:01:45:  <exit-when-done v='false'/>
22:01:45:  <fold-anon v='false'/>
22:01:45:  <open-web-control v='false'/>
22:01:45:
22:01:45:  <!-- Configuration -->
22:01:45:  <config-rotate v='true'/>
22:01:45:  <config-rotate-dir v='configs'/>
22:01:45:  <config-rotate-max v='16'/>
22:01:45:
22:01:45:  <!-- Debugging -->
22:01:45:  <assignment-servers>
22:01:45:    assign3.stanford.edu:8080 assign4.stanford.edu:80
22:01:45:  </assignment-servers>
22:01:45:  <capture-directory v='capture'/>
22:01:45:  <capture-on-error v='false'/>
22:01:45:  <capture-packets v='false'/>
22:01:45:  <capture-requests v='false'/>
22:01:45:  <capture-responses v='false'/>
22:01:45:  <capture-sockets v='false'/>
22:01:45:  <debug-sockets v='false'/>
22:01:45:  <exception-locations v='true'/>
22:01:45:  <gpu-assignment-servers>
22:01:45:    assign-GPU.stanford.edu:80 assign-GPU.stanford.edu:8080
22:01:45:  </gpu-assignment-servers>
22:01:45:  <stack-traces v='false'/>
22:01:45:
22:01:45:  <!-- Error Handling -->
22:01:45:  <max-slot-errors v='5'/>
22:01:45:  <max-unit-errors v='5'/>
22:01:45:
22:01:45:  <!-- Folding Core -->
22:01:45:  <checkpoint v='5'/>
22:01:45:  <core-dir v='cores'/>
22:01:45:  <core-priority v='idle'/>
22:01:45:  <cpu-affinity v='false'/>
22:01:45:  <cpu-usage v='100'/>
22:01:45:  <gpu-usage v='100'/>
22:01:45:  <no-assembly v='false'/>
22:01:45:
22:01:45:  <!-- Folding Slot Configuration -->
22:01:45:  <cause v='ANY'/>
22:01:45:  <client-subtype v='STDCLI'/>
22:01:45:  <client-type v='advanced'/>
22:01:45:  <cpu-species v='X86_AMD'/>
22:01:45:  <cpu-type v='X86'/>
22:01:45:  <cpus v='-1'/>
22:01:45:  <cuda-index v='0'/>
22:01:45:  <extra-core-args v='-forceasm'/>
22:01:45:  <gpu v='true'/>
22:01:45:  <max-packet-size v='normal'/>
22:01:45:  <opencl-index v='0'/>
22:01:45:  <os-species v='UNKNOWN'/>
22:01:45:  <os-type v='WIN32'/>
22:01:45:  <power v='full'/>
22:01:45:  <project-key v='0'/>
22:01:45:  <smp v='true'/>
22:01:45:
22:01:45:  <!-- HTTP Server -->
22:01:45:  <allow v='127.0.0.1 192.168.1.0/24'/>
22:01:45:  <connection-timeout v='60'/>
22:01:45:  <deny v='0/0'/>
22:01:45:  <http-addresses v='0:7396'/>
22:01:45:  <https-addresses v=''/>
22:01:45:  <max-connect-time v='900'/>
22:01:45:  <max-connections v='800'/>
22:01:45:  <max-request-length v='52428800'/>
22:01:45:  <min-connect-time v='300'/>
22:01:45:  <threads v='4'/>
22:01:45:
22:01:45:  <!-- Logging -->
22:01:45:  <log v='log.txt'/>
22:01:45:  <log-color v='false'/>
22:01:45:  <log-crlf v='true'/>
22:01:45:  <log-date v='false'/>
22:01:45:  <log-date-periodically v='21600'/>
22:01:45:  <log-debug v='true'/>
22:01:45:  <log-domain v='false'/>
22:01:45:  <log-header v='true'/>
22:01:45:  <log-level v='true'/>
22:01:45:  <log-no-info-header v='true'/>
22:01:45:  <log-redirect v='false'/>
22:01:45:  <log-rotate v='true'/>
22:01:45:  <log-rotate-dir v='logs'/>
22:01:45:  <log-rotate-max v='16'/>
22:01:45:  <log-short-level v='false'/>
22:01:45:  <log-simple-domains v='true'/>
22:01:45:  <log-thread-id v='false'/>
22:01:45:  <log-thread-prefix v='true'/>
22:01:45:  <log-time v='true'/>
22:01:45:  <log-to-screen v='true'/>
22:01:45:  <log-truncate v='false'/>
22:01:45:  <verbosity v='5'/>
22:01:45:
22:01:45:  <!-- Network -->
22:01:45:  <proxy v=':8080'/>
22:01:45:  <proxy-enable v='false'/>
22:01:45:  <proxy-pass v=''/>
22:01:45:  <proxy-user v=''/>
22:01:45:
22:01:45:  <!-- Process Control -->
22:01:45:  <child v='false'/>
22:01:45:  <daemon v='false'/>
22:01:45:  <pid v='false'/>
22:01:45:  <pid-file v='Folding@home Client.pid'/>
22:01:45:  <respawn v='false'/>
22:01:45:  <service v='false'/>
22:01:45:
22:01:45:  <!-- Remote Command Server -->
22:01:45:  <command-address v='0.0.0.0'/>
22:01:45:  <command-allow-no-pass v='127.0.0.1'/>
22:01:45:  <command-deny-no-pass v='0/0'/>
22:01:45:  <command-port v='36330'/>
22:01:45:  <password v='*******'/>
22:01:45:
22:01:45:  <!-- Slot Control -->
22:01:45:  <idle v='false'/>
22:01:45:  <max-shutdown-wait v='60'/>
22:01:45:  <pause-on-battery v='true'/>
22:01:45:  <pause-on-start v='false'/>
22:01:45:
22:01:45:  <!-- User Information -->
22:01:45:  <machine-id v='0'/>
22:01:45:  <passkey v='********************************'/>
22:01:45:  <team v='0'/>
22:01:45:  <user v='davidcoton'/>
22:01:45:
22:01:45:  <!-- Web Server -->
22:01:45:  <session-timeout v='3600'/>
22:01:45:  <web-allow v='127.0.0.1'/>
22:01:45:  <web-deny v='0/0'/>
22:01:45:
22:01:45:  <!-- Work Unit Control -->
22:01:45:  <dump-after-deadline v='true'/>
22:01:45:  <max-queue v='16'/>
22:01:45:  <max-units v='0'/>
22:01:45:  <next-unit-percentage v='99'/>
22:01:45:
22:01:45:  <!-- Folding Slots -->
22:01:45:  <slot id='0' type='CPU'>
22:01:45:    <cpus v='4'/>
22:01:45:  </slot>
22:01:45:  <slot id='1' type='GPU'/>
22:01:45:</config>
Log:

Code: Select all

[******************************* Date: 2013-04-04 *******************************]
00:39:11:WU02:FS01:Starting
00:39:11:WU02:FS01:Running FahCore: "C:\Program Files\FAHClient/FAHCoreWrapper.exe" C:/Users/David/AppData/Roaming/FAHClient/cores/www.stanford.edu/~pande/Win32/x86/NVIDIA/Fermi/Core_15.fah/FahCore_15.exe -dir 02 -suffix 01 -version 703 -lifeline 5576 -checkpoint 5 -gpu 0 -gpu-vendor nvidia -forceasm
00:39:11:WU02:FS01:Started FahCore on PID 7372
00:39:11:WU02:FS01:Core PID:8080
00:39:11:WU02:FS01:FahCore 0x15 started
00:39:12:WU02:FS01:0x15:
00:39:12:WU02:FS01:0x15:*------------------------------*
00:39:12:WU02:FS01:0x15:Folding@Home GPU Core
00:39:12:WU02:FS01:0x15:Version                2.25 (Wed May 9 17:03:01 EDT 2012)
00:39:12:WU02:FS01:0x15:Build host             AmoebaRemote
00:39:12:WU02:FS01:0x15:Board Type             NVIDIA/CUDA
00:39:12:WU02:FS01:0x15:Core                   15
00:39:12:WU02:FS01:0x15:
00:39:12:WU02:FS01:0x15:Window's signal control handler registered.
00:39:12:WU02:FS01:0x15:Preparing to commence simulation
00:39:12:WU02:FS01:0x15:- Assembly optimizations manually forced on.
00:39:12:WU02:FS01:0x15:- Not checking prior termination.
00:39:12:WU02:FS01:0x15:sizeof(CORE_PACKET_HDR) = 512 file=<>
00:39:12:WU02:FS01:0x15:- Expanded 58888 -> 259314 (decompressed 440.3 percent)
00:39:12:WU02:FS01:0x15:Called DecompressByteArray: compressed_data_size=58888 data_size=259314, decompressed_data_size=259314 diff=0
00:39:12:WU02:FS01:0x15:- Digital signature verified
00:39:12:WU02:FS01:0x15:
00:39:12:WU02:FS01:0x15:Project: 8071 (Run 48, Clone 12, Gen 101)
00:39:12:WU02:FS01:0x15:
00:39:12:WU02:FS01:0x15:Assembly optimizations on if available.
00:39:12:WU02:FS01:0x15:Entering M.D.
00:39:14:WU02:FS01:0x15:Tpr hash 02/wudata_01.tpr:  63313012 2932637097 925364161 3031880553 1217044142
00:39:14:WU02:FS01:0x15:GPU device id=0
00:39:14:WU02:FS01:0x15:Working on Glycine aRginine prOline Methionine Alanine Cystine Serine t=  48.00000
00:39:14:WU02:FS01:0x15:Client config unavailable.
00:39:14:WU02:FS01:0x15:Starting GUI Server
00:39:16:WU00:FS01:Upload complete
00:39:16:WU00:FS01:Server responded WORK_ACK (400)
00:39:16:WU00:FS01:Final credit estimate, 3874.00 points
00:39:16:WU00:FS01:Cleaning up
00:40:15:WU02:FS01:0x15:Setting checkpoint frequency: 500000
00:40:15:WU02:FS01:0x15:Completed         3 out of 50000000 steps (0%).
00:42:34:WU02:FS01:0x15:Completed    500000 out of 50000000 steps (1%).
...
03:46:18:WU02:FS01:0x15:Completed  40000000 out of 50000000 steps (80%).
03:48:07:WU02:FS01:0x15:Run: exception thrown in GuardedRun -- cannot continue further.
03:48:07:WU02:FS01:0x15:Going to send back what have done -- stepsTotalG=50000000
03:48:07:WU02:FS01:0x15:Work fraction=0.8078 steps=50000000.
03:48:11:WU02:FS01:0x15:logfile size=14418 infoLength=14418 edr=0 trr=23
03:48:11:WU02:FS01:0x15:+ Opened results file
03:48:11:WU02:FS01:0x15:- Writing 14954 bytes of core data to disk...
03:48:11:WU02:FS01:0x15:Done: 14442 -> 4722 (compressed to 32.6 percent)
03:48:11:WU02:FS01:0x15:  ... Done.
03:48:11:WU02:FS01:0x15:DeleteFrameFiles: successfully deleted file=02/wudata_01.ckp
03:48:11:WU02:FS01:0x15:
03:48:11:WU02:FS01:0x15:Folding@home Core Shutdown: UNSTABLE_MACHINE
03:48:11:WARNING:WU02:FS01:FahCore returned: UNSTABLE_MACHINE (122 = 0x7a)
03:48:12:WU02:FS01:Sending unit results: id:02 state:SEND error:FAULTY project:8071 run:48 clone:12 gen:101 core:0x15 unit:0x000000706652edb4512f2d41f485909a
03:48:12:WU02:FS01:Uploading 5.11KiB to 171.67.108.36
03:48:12:WU02:FS01:Connecting to 171.67.108.36:8080
03:48:12:WU00:FS01:Connecting to assign-GPU.stanford.edu:80
03:48:12:WU02:FS01:Upload complete
03:48:12:WU02:FS01:Server responded WORK_ACK (400)
03:48:13:WU02:FS01:Cleaning up
03:48:13:WU00:FS01:News: Welcome to Folding@Home
03:48:13:WU00:FS01:Assigned to work server 171.67.108.36
03:48:13:WU00:FS01:Requesting new work unit for slot 01: READY gpu:0:GK104 [GeForce GTX 660 Ti] from 171.67.108.36
03:48:13:WU00:FS01:Connecting to 171.67.108.36:8080
03:48:14:WU00:FS01:Downloading 57.38KiB
03:48:15:WU00:FS01:Download complete
03:48:15:WU00:FS01:Received Unit: id:00 state:DOWNLOAD error:NO_ERROR project:8070 run:96 clone:5 gen:61 core:0x15 unit:0x000000446652edb45122df81b882118a
03:48:15:WU00:FS01:Starting
03:48:15:WU00:FS01:Running FahCore: "C:\Program Files\FAHClient/FAHCoreWrapper.exe" C:/Users/David/AppData/Roaming/FAHClient/cores/www.stanford.edu/~pande/Win32/x86/NVIDIA/Fermi/Core_15.fah/FahCore_15.exe -dir 00 -suffix 01 -version 703 -lifeline 5576 -checkpoint 5 -gpu 0 -gpu-vendor nvidia -forceasm
03:48:15:WU00:FS01:Started FahCore on PID 7712
03:48:15:WU00:FS01:Core PID:3584
03:48:15:WU00:FS01:FahCore 0x15 started
03:48:15:WU00:FS01:0x15:
03:48:15:WU00:FS01:0x15:*------------------------------*
03:48:15:WU00:FS01:0x15:Folding@Home GPU Core
03:48:15:WU00:FS01:0x15:Version                2.25 (Wed May 9 17:03:01 EDT 2012)
03:48:15:WU00:FS01:0x15:Build host             AmoebaRemote
03:48:15:WU00:FS01:0x15:Board Type             NVIDIA/CUDA
03:48:15:WU00:FS01:0x15:Core                   15
03:48:15:WU00:FS01:0x15:
03:48:15:WU00:FS01:0x15:Window's signal control handler registered.
03:48:15:WU00:FS01:0x15:Preparing to commence simulation
03:48:15:WU00:FS01:0x15:- Assembly optimizations manually forced on.
03:48:15:WU00:FS01:0x15:- Not checking prior termination.
03:48:15:WU00:FS01:0x15:sizeof(CORE_PACKET_HDR) = 512 file=<>
03:48:15:WU00:FS01:0x15:- Expanded 58242 -> 257358 (decompressed 441.8 percent)
03:48:15:WU00:FS01:0x15:Called DecompressByteArray: compressed_data_size=58242 data_size=257358, decompressed_data_size=257358 diff=0
03:48:15:WU00:FS01:0x15:- Digital signature verified
03:48:15:WU00:FS01:0x15:
03:48:15:WU00:FS01:0x15:Project: 8070 (Run 96, Clone 5, Gen 61)
03:48:15:WU00:FS01:0x15:
03:48:15:WU00:FS01:0x15:Assembly optimizations on if available.
03:48:15:WU00:FS01:0x15:Entering M.D.
03:48:18:WU00:FS01:0x15:Tpr hash 00/wudata_01.tpr:  592891363 2913890229 1956379719 984361722 2042602009
03:48:18:WU00:FS01:0x15:GPU device id=0
03:48:19:WU00:FS01:0x15:Working on Gallium Rubidium Oxygen Manganese Argon Carbon Silicon t=  96.00000
03:48:19:WU00:FS01:0x15:Client config unavailable.
03:48:19:WU00:FS01:0x15:Starting GUI Server
03:49:21:WU00:FS01:0x15:Setting checkpoint frequency: 500000
03:49:21:WU00:FS01:0x15:Completed         3 out of 50000000 steps (0%).
03:51:45:WU00:FS01:0x15:Completed    500000 out of 50000000 steps (1%).
...
******************************* Date: 2013-04-04 *******************************
...
07:10:33:WU00:FS01:0x15:Completed  41500000 out of 50000000 steps (83%).
07:12:58:WU00:FS01:0x15:Completed  42000000 out of 50000000 steps (84%).
07:13:05:WU00:FS01:0x15:Run: exception thrown in GuardedRun -- cannot continue further.
07:13:05:WU00:FS01:0x15:Going to send back what have done -- stepsTotalG=50000000
07:13:05:WU00:FS01:0x15:Work fraction=0.8405 steps=50000000.
07:13:09:WU00:FS01:0x15:logfile size=14606 infoLength=14606 edr=0 trr=23
07:13:09:WU00:FS01:0x15:+ Opened results file
07:13:09:WU00:FS01:0x15:- Writing 15142 bytes of core data to disk...
07:13:09:WU00:FS01:0x15:Done: 14630 -> 4776 (compressed to 32.6 percent)
07:13:09:WU00:FS01:0x15:  ... Done.
07:13:09:WU00:FS01:0x15:DeleteFrameFiles: successfully deleted file=00/wudata_01.ckp
07:13:09:WU00:FS01:0x15:
07:13:09:WU00:FS01:0x15:Folding@home Core Shutdown: EARLY_UNIT_END
07:13:09:WARNING:WU00:FS01:FahCore returned: BAD_WORK_UNIT (114 = 0x72)
07:13:09:WU00:FS01:Sending unit results: id:00 state:SEND error:FAULTY project:8070 run:96 clone:5 gen:61 core:0x15 unit:0x000000446652edb45122df81b882118a
07:13:09:WU00:FS01:Uploading 5.16KiB to 171.67.108.36
07:13:09:WU00:FS01:Connecting to 171.67.108.36:8080
07:13:09:WU02:FS01:Connecting to assign-GPU.stanford.edu:80
07:13:10:WU00:FS01:Upload complete
07:13:10:WU00:FS01:Server responded WORK_ACK (400)
07:13:10:WU00:FS01:Cleaning up
Time for memtest?

David

Re: 8071(48,12,101) and 8070(96,5,61)

Posted: Thu Apr 04, 2013 10:58 pm
by PantherX
Project: 8071 (Run 48, Clone 12, Gen 101) & Project: 8070 (Run 96, Clone 5, Gen 61) were completed successfully by another donor. I guess it might be time to clean our your GPUs and if it is OC, may be lower it a little?