Configuration Files

Below are the configuration files for this subproject. These files are installed along side the source code when the package is installed. These are only the defaults however, you can always override these values in your own environment. See the Configuration object documentation for more detailed information.

Agent

The below is the current configuration file for the agent. This file lives at pyfarm/agent/etc/agent.yml in the source tree.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# The name of the render farm this agent belongs to.  If specified, the agent
# will not join a farm with a different name.  If not specified, the agent will
# join any farm.
farm_name: null

# The platform specific locations where the agent uuid
# file is default.  This can be overridden with --agent-id-file flag.
agent_id_file_platform_defaults:
  linux: /etc/pyfarm/agent/uuid.dat
  mac: /Library/pyfarm/agent/uuid.dat
  bsd: /etc/pyfarm/agent/uuid.dat
  windows: $LOCALAPPDATA/pyfarm/agent/uuid.dat

# The platform specific locations where the run control file is looked for
run_control_file_by_platform:
  linux: /tmp/pyfarm/agent/should_be_running
  mac: /tmp/pyfarm/agent/should_be_running
  bsd: /tmp/pyfarm/agent/should_be_running
  windows: $TEMP/pyfarm/agent/should_be_running

# The default location to store data.  $temp will expand to
# whatever pyfarm's data root is plus the application
# name (agent).  For example on Linux this would expand to
# /tmp/pyfarm/agent
agent_data_root: $temp

# Defines the number of seconds between iterations of pyfarm-supervisor's
# agent status check.
supervisor_interval: 5

# The location where the agent should change directories
# into upon starting.  If this value is not set then no
# changes will be made.
agent_chdir:

# The location where static web files should be served from.  This
# will default to using PyFarm's installation root.
agent_static_root: auto

# The default location where lock files should be stored.  By
# default these will be stored alone side other data
# inside the `agent_data_root` value above.
lock_file_root: $agent_data_root/lock

# Locations of specific lock files
agent_lock_file: $lock_file_root/agent.pid
supervisor_lock_file: $lock_file_root/supervisor.pid

# Where user data for the agent is stored.  ~ will be expanded
# to the current users's home directory.
agent_user_data: ~/.pyfarm/agent

# The default location where the agent should save logs to.  This
# includes both logs from processes and the agent log itself.
agent_logs_root: $agent_data_root/logs

# The location where agent updates should be stored.
agent_updates_dir: $agent_data_root/updates

# The default port which the agent should use to serve the
# REST api.
agent_api_port: 50000

# The location where the the agent should save its own
# logging output to.
agent_log: $agent_logs_root/agent.log
supervisor_log: $agent_logs_root/supervisor.log

# Controls the log level for all loggers.  Valid levels
# are 'debug', 'info', 'warning', 'error' and 'critical'.
agent_global_logger_level: info

# The user agent the master will use when connecting to the agent's
# REST api.  This value should only be changed if the master's code
# is updated with a new user agent.  Change this value has not effect
# on the master.
master_user_agent: PyFarm/1.0 (master)

# Configuration values which control how the url
# for the master is constructed.  If 'master' is not set
# the --master flag will be required to start the agent.
master:
master_api_version: 1
master_api: http://$master/api/v$master_api_version

# The user agent the master uses to talke to the agent's
# REST api.  This value should not be modified unless
# there's a specific reason to do so.
master_user_agent: PyFarm/1.0 (master)

# Controls how often the agent should reannounce itself
# to the master.  The agent may be in contact with the master
# more often than this however during long period of
# inactivity this is how often the agent will 'inform' the
# master the agent is still online.
agent_master_reannounce: 120

# How many seconds the agent should spend attempting to inform
# the master that it's shutting down.
agent_shutdown_timeout: 15

# Number of seconds to offset from zero when calculating the
# http retry delay.
agent_http_retry_delay_offset: 1

# If an http request fails, use this as the base value
# to help determine how long we should wait before retrying.  This
# value is then used to calculate the final delay:
#   agent_http_retry_delay_offset + (random() * agent_http_retry_delay_factor)
agent_http_retry_delay_factor: 5

# Controls if the http client connection should be persistent or
# not.  Generally this should always be True because the connection
# self-terminates after a short period of time anyway.  For higher
# latency situations or with larger deployments this value should
# be False.
agent_http_persistent_connections: True

# When using persistent connections, twisted will sometimes run requests over
# connections that have already been closed by the server without realizing it.
# When this happens, we catch the resulting failure and retry the request.
# We do this up to $broken_connection_max_retry times, assuming that if
# it still fails at that point, there is probably something else wrong.
broken_connection_max_retry: 20

# If True then html templates will be reloaded with
# every request instead of cached.
agent_html_template_reload: False

# If True then reformat json output to be more human
# readable.
agent_pretty_json: True

# How often the agent should check for changes in ram.  This value
# is used to ensure ram usage is checked at least this often though
# it may be checked more often due to other events (such as jobs
# running)
agent_ram_check_interval: 30

# If the ram has changed this may megabytes since the last
# check then report the change to the master.
agent_ram_report_delta: 100

# How much the agent should wait, in seconds, between
# each report about a change in ram.
agent_ram_max_report_frequency: 10

# The default network time server and version the agent
# should use to calcuate its clock skew.
agent_ntp_server: pool.ntp.org
agent_ntp_server_version: 2

# The amount of time this agent is offset from what
# would be considered correct based on an atomic
# clock.  If this value is set to auto the time will
# be calculated using NTP.
agent_time_offset: auto

# Physical and network information about the host the agent
# is running on.  Setting these values to 'auto' will cause
# them to be initilized to the system's current
# configuration values.
agent_ram: auto
agent_cpus: auto
agent_hostname: auto

# When True this will enable a telnet connection
# to the agent which will present a Python interpreter
# upon connection.  This is mainly used for debugging
# and direct manipulation of the agent.  You can use
# the show() function once connected to see what
# objects are available.
agent_manhole: False
agent_manhole_port: 50001
agent_manhole_username: admin
agent_manhole_password: admin

# NOTE: The following values are used by the unittests and should be
# generally ignored for anything other than development.
agent_unittest:
  client_redirect_target: http://example.com
  client_api_test_url_https: https://httpbin.org
  client_api_test_url_http: http://httpbin.org

# A list of paths or names where the `lspci` command can
# be called from on Linux.  This is used to retrieve information
# about graphics cards installed on the system in
# `pyfarm.agent.sysinfo.graphics.graphics_cards`.
# If you need run the command with sudo you may also specify an entry
# like this:
#  - sudo lspci
sysinfo_command_lspci:
  - lspci
  - /bin/lspci
  - /sbin/lspci
  - /usr/sbin/lspci
  - /usr/bin/lspci

# If this is False, the agent will not allow two or more assignments to run
# on this node at the same time.
# If we still have at least one assignment with at least one task that isn't
# failed or done, new assignments will be rejected.
agent_allow_sharing: False

Job Types

The below is the current configuration file for job types. This file lives at pyfarm/jobtypes/etc/jobtypes.yml in the source tree.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# When set to True caching of job types will be enabled.  When set to
# False caching is disabled and every job type will retrieved from
# the master directly.
jobtype_enable_cache: True

# If True then output from all processes will be sent directly to
# the agent's logger(s) instead of to the log file assoicated
# with each process.
jobtype_capture_process_output: False

# The location where tasks should be logged
jobtype_task_logs: $agent_logs_root/tasks

# The filename to an individual log file.  This filename supports several
# internal variables:
#
#   $YEAR - The current year
#   $MONTH - The current month
#   $DAY - The current day
#   $HOUR - The current hour
#   $MINUTE - The current hour
#   $JOB - The id of the job this log is for
#   $PROCESS - The uuid of the process object responsible for creating the log
#
# In addition to the above you can, as with any configuration variable,
# also use environment variables in the filename.
# Path separators ("/" and "\") are not allowed.
jobtype_task_log_filename:
    $YEAR-$MONTH-$DAY_$HOUR-$MINUTE-$SECOND_$JOB_$PROCESS.csv

# store cached source code from the master.  Note
# that $temp will be expanded to the local system's
# temp directory.  If this directory does not exist
# it will be created.  Leaving this value blank will
# disable job type caching.
jobtype_cache_directory: $temp/jobtype_cache

# The root directory that the default implementation of JobType.tempdir()
# will create a path using tempfile.mkdtemp.
jobtype_tempdir_root: $temp/tempdir/$JOBTYPE_UUID

# If True then expand environment variables in file paths.
jobtype_expandvars: True

# If True, then ignore any errors produced when tring
# to map users and groups to IDs.  This will cause the
# underlying methods in the job type to instead run
# as the job type's owner instead, ignoring what the
# incoming job requests.
# NOTE: This value is not used on Windows.
jobtype_ignore_id_mapping_errors: False

# Any additional key/value pairs to include
# in the environment of a process launched
# by a job type.
jobtype_default_environment: {}

# If True then a job type's get_environment() method
# will also include the operating system's environment.
# The environment is constructed at the time the agent
# is launched, modifications to the agent's environment
# during execution are not included.
jobtype_include_os_environ: False

# Configures the thread pool used by job types
# for logging.
jobtype_logging_threadpool:
  # Setting this value to something smaller than `1` will result
  # in an exception being raised.  This value also cannot be larger
  # than `max_threads` below.
  min_threads: 3

  # This value must be greater than or equal to `min_threads`
  # above.  You may also set this value to 'auto' meaning the
  # number of processors times 1.5 or 20 (whichever is lower).
  max_threads: auto

  # As log messages are sent from processes they are stored
  # in an in memory queue.  When the number of messages is higher
  # than this number a thread will be spawned to consume the
  # data and flush it into a file object.
  max_queue_size: 10

  # Most often the operating system will control how often data
  # is written to disk from a file object.  This value overrides
  # that behavior and forces the file object to flush to disk
  # after this many messages have been processed.
  flush_lines: 100