Configuration Files¶

Below are the configuration files for this subproject. These files are installed along side the source code when the package is installed. These are only the defaults however, you can always override these values in your own environment. See the Configuration object documentation for more detailed information.

Master¶

The below is the current configuration file for the agent. This file lives at pyfarm/master/etc/master.yml in the source tree.

# Configures if the underlying Flask application and other libraries
# should run in debug mode.
#
# **Production Note**: This value should always be false.
debug: true


# Enables verbose output of loggers associated with the
# job queue.
debug_queue: false


# The URL to access the database.  For debugging and development
# a local SQLite database is used but for production other databases,
# such as MySQL or PostgreSQL which PyFarm runs tests against, should
# be used.
#
# For more information on the expected format of this variable
# see SQLAlchemy's documentation:
#   https://sqlalchemy.readthedocs.org/en/latest/core/engines.html#database-urls
database: "sqlite:///pyfarm.sqlite"


# Where to store runtime statistics. Same format as "database"
statistics_database: "sqlite:///pyfarm-statistics.sqlite"


# The broker that PyFarm's scheduler should use.  For debugging and
# development running Redis is the simplest.  For large deployments, or
# to understand the format of this variable, see:
#   http://celery.readthedocs.org/en/latest/configuration.html#broker-url
scheduler_broker: "redis://"


# The URL the master is running on.  This is used to form internal
# urls and other information.
base_url: http://127.0.0.1:5000/


# The name of this render farm, normally this can remain the default value.  If
# you're running multiple farms this allows you to only accept agents to your
# master that match your current farm name.
farm_name: ""


# The secret key which is used by several components of Flask
# for form validation, salting of secrets, etc.
#
# **Production Note**: This value should be random, consistent between
# frontends, and kept secret.  Do not use the value below for
# production.
secret_key: pyfarm


# The key used for signing the csrf token.
#
# **Production Note**: This value should be random, consistent between
# frontends, and kept secret.  Do not use the value below for
# production.
csrf_session_key: pyfarm


# The prefix of the URL from which the API will operate on.  This should
# not generally be changed unless you are operating different versions
# of the API at the same time from one web server.
api_prefix: /api/v1


# The URL template we use to communicate with the agent.
agent_api_url_template: http://{host}:{port}/api/v1


# Enables or disable the login functionality.  This can be used when
# debugging or doing development but should not be changed for
# production.
login_disabled: false


# The amount of time the 'remeber me' cookie should persist.  The keys
# and values here are passed into a `timedelta` object as keywords.
cookie_duration:
  hours: 16


# When true json output from the APIs will be reformatted to
# be more human readable.
pretty_json: false


# When true all SQLAlchemy queries will be echoed.  This is useful
# for debugging the SQL statements being run and to get an idea of
# what the underlying ORM may be doing.
echo_sql: false


# When true the application will automatically create users in
# PyFarm's database if they do not exist already.  Setting this
# to false will cause an exception to be raised if the user in
# question does not exist.
autocreate_users: true


# When autocreating users, use this email address as a template.  For example:
#   "{username}@example.com"
# Not setting this value and setting `autocreate_users` to true will result
# in a users's email address not being set on a newly created user.
autocreate_user_email: null


# When provided an integer this many seconds will elapse after a job
# has completed before it is deleted.
default_job_delete_time: null


# The format for timestamps in the user interface.
timestamp_format: "YYYY-MM-DD HH:mm:ss"


# The directory to store updates for agents.  This will use `temp` above
# as the base directory.
agent_updates_dir: ${temp}/pyfarm-updates


# Optional directory to serve GET requests for agent updates
# from.  This is different from `agent_updates_dir` in that it's
# only used when an agent is requested a file to update from.  This
# can be useful when you're caching requests or doing something with
# the update files prior to them being requested by the agent.
agent_updates_webdir: null


# The directory to store downloaded logs in.
#
# **Production Note**: For production it's probably best if these are kept
# in a persistent location rather than $temp.
tasklogs_dir: ${temp}/task_logs


# The address the Flask application should listen on.  This is only important
# when running the application in a standalone mode of operation. By default
# this will only listen locally but could be changed to listen on
# a specific adatper or `0.0.0.0` for all addresses.
flask_listen_address: 127.0.0.1


# When true all database tables will be dropped prior to setting
# up the application.  This is useful for development purposes only
# and should not be used in production.  There's also the `pyfarm-tables`
# command line tool which can be used to create or drop tables.
dev_db_drop_all: false


# When true we'll attempt to create any missing database tables
# prior to the application starting.  This is useful for development
# purposes only and should not be used in production.  There's also
# the `pyfarm-tables` command line tool which can be used to create
# or drop tables.
dev_db_create_all: false


# When true the application will be instanced as 'app' in the
# pyfarm.master.entrypoints module.  When running behind something
# like uwsgi this should be true.
instance_application: false

##
## BEGIN Queue defaults
##


# The default priority for a newly created job queue.
queue_default_priority: 0


# The default weight of a newly created job queue.
queue_default_weight: 10


# The minimum and maxinum priority any queue can have.  This is
# used by the models for validation purposes.
queue_min_priority: -1000
queue_max_priority: 1000

##
## END Queue defaults
##

##
## BEGIN Job Type defaults
##

# The maximum number of tasks for the given job type
# to send to an agent at once.
job_type_max_batch: 1


# When batching and this value is true frames will be batched
# in contiguous groups.
job_type_batch_contiguous: true

##
## END Job Type defaults
##

Models¶

The below is the current configuration file for job types. This file lives at pyfarm/models/etc/models.yml in the source tree.

##
## BEGIN Database Table Names
##

# Prefix used in the construction of all table names.  See the variables
# below for uage.
table_prefix: ""


# The name of the table for software items
table_software: ${table_prefix}software

# The name of the table for software versions
table_software_version: ${table_software}_versions

# The name of the table used for tagging
table_tag: ${table_prefix}tags

# The name of the table storing agent entries
table_agent: ${table_prefix}agents

# The name of the table which associates agents and software versions
table_agent_software_version_assoc: ${table_prefix}agent_software_version_associations

# The name of the table which associates agents and tags
table_agent_tag_assoc: ${table_prefix}agent_tag_associations

# The name of the table which associated agents and mac addresses
table_agent_mac_address: ${table_prefix}agent_mac_addresses

# The name of the table containing jobs
table_job: ${table_prefix}jobs

# The name of the table containing job types
table_job_type: ${table_prefix}jobtypes

# The name of the table containing job type versions
table_job_type_version: ${table_prefix}jobtype_versions

# The name of the table which associates jobs and tags.
table_job_tag_assoc: ${table_prefix}job_tag_associations

# The name of the table which associates job and tag requirements
table_job_tag_req: ${table_prefix}job_tag_requirements

# The name of the table which associates inter-job dependencies
table_job_dependency: ${table_prefix}job_dependencies

# The name of the table which associates job and software requirements
table_job_software_req: ${table_prefix}job_software_requirements

# The name of the table containing information about users to be notified
# of status changes form jobs
table_job_notified_users: ${table_prefix}notified_users

# The name of the table which associates software requirements and jobs
table_job_type_software_req: ${table_prefix}jobtype_software_requirements

# The name of the table containing tasks
table_task: ${table_prefix}tasks

# The name of the table containing user information
table_user: ${table_prefix}users

# The name of the table containing role information
table_role: ${table_prefix}roles

# The name of the table which associates users and roles
table_user_role: ${table_prefix}user_roles

# The name of the table containing the job queues
table_job_queue: ${table_prefix}job_queues

# The name of the table containing job groups
table_job_group: ${table_prefix}job_groups

# The name of the table containing path mappings
table_path_map: ${table_prefix}path_maps

# The name of the table containing task logs
table_task_log: ${table_prefix}task_logs

# The name of the table containing assoications between task
# logs and jobs
table_task_log_assoc: ${table_prefix}task_log_associations

# The name of the table containing GPU information for agents
table_gpu: ${table_prefix}gpus

# The name of the table containing associations between agents and GPUs
table_gpu_in_agent: ${table_prefix}gpu_agent_associations

# The name of the table storing which tasks where failed on an agent
table_failed_task_in_agent: ${table_prefix}failed_tasks_in_agents

# The name of the table containing the disks of the agents
table_agent_disk: ${table_prefix}agent_disks

table_statistics_agent_count: ${table_prefix}agent_counts

table_statistics_task_event_count: ${table_prefix}task_event_counts

table_statistics_task_count: ${table_prefix}task_counts

##
## END Database Table Names
##

##
## BEGIN Database Model Constraints
##

# There's some validation that happens when an agent is added to the
# database.  One of the checks we have is to ensure the agent's address
# is a remote address which a loopback address normally is not considered
# 'remote'.  Changing this value to to true disable this and will allow
# agents from a local address to connect.
allow_agents_from_loopback: false


# The maximum length of a tag
max_tag_length: 64


# The maximum length of a hostname
max_hostname_length: 255


# The maximum length of a job group's name.
max_jobgroup_name_length: 255


# The maximum length of the operating system's name for an agent.
max_osname_length: 128


# The maximum length of an agent's CPU name
max_cpuname_length: 128


# **Not Implemented** The default amount of ram the agent is allowed to
# allocate towards work.  A value of 1.0 would allow the agent to be
# assigned as much work as the system's ram would allow.
agent_ram_allocation: .8


# **Not Implemented** Based on load, this is the default amount of CPU space
# an agent is allowed to occupy with work.
agent_cpu_allocation: 1.0


# The minimum and maxinum ports an agent can connect from
agent_min_port: 1024
agent_max_port: 65535


# The minimum and maxinum CPUs an agent can declare
# These values also drive the min/max number of CPUS job is allowed to request.
agent_min_cpus: 1
agent_max_cpus: 256


# The minimum and maxinum amount of RAM, in megabytes, an agent can declare.
# These values also drive the min/max amount of ram a job is allowed to request.
agent_min_ram: 16
agent_max_ram: 262144


# The default weight given to a job for use in the queue.
queue_default_weight: 10


# The maxinum length a job's title is allowed to be
jobtitle_max_length: 255


# The global default batch size for all new jobs.
job_default_batch: 1


# The global default number of times a job will requeue
# for tailed tasks.  0 will never requeue, -1 will
# requeue indefinitely.
job_requeue_default: 3


# The global default minimum number of CPUs a job may execute
# on.  0 will disable the minimum, -1 for force an entire agent
# to be exclusive to a job's task.
job_default_cpus: 1


# The global default amount of ram that's required to be free on
# host in order for a task of a job to run on a given agent.  A
# value of 0 will not require a minimum, -1 will force the agent's
# entire ram to be allocated to the given task.
job_default_ram: 32


# The maximum length a path mapping is allowed to be.
max_path_length: 512


# The maximum length a GPU name is allowed to be.
max_gpu_name_length: 128


# The maximum length a queue name is allowed to be.
max_queue_name_length: 255


# The maximum length of a queue's path
max_queue_path_length: 1024


# The maximum length of a job type's name
job_type_max_name_length: 64


# The maximum length of a job type's class name
job_type_max_class_name_length: 64


# The maximum length of a username
max_username_length: 255


# The maximum length of an email address
max_email_length: 255


# The maximum length of a role name
max_role_length: 128

# The maximum length of a mountpoint for agent disks
max_mountpoint_length: 255

# The maximum lenght of the function name to discover the presence of a
# software version on an agent
max_discovery_function_name_length: 255

##
## END Database Model Constraints
##

Scheduler¶

The below is the current configuration file for job types. This file lives at pyfarm/scheduler/etc/scheduler.yml in the source tree.

##
## BEGIN Scheduler Settings
##

# The user agent the scheduler will use when connecting to
# an agent.  Do not change this value unless the agent is
# updated to reflect the change made here.
master_user_agent: "PyFarm/1.0 (master)"


# How often the scheduler should run and poll agents.  The keys and
# values here are passed into a `timedelta` object as keywords.
agent_poll_interval:
  seconds: 30


# How often the scheduler should run and and assign tasks.  The keys and
# values here are passed into a `timedelta` object as keywords.
assign_tasks_interval:
  minutes: 4


# How often orphaned task logs should be cleaned up on disk.  The keys and
# values here are passed into a `timedelta` object as keywords.
orphaned_log_cleanup_interval:
  hours: 1


# How often we should attempt to compress old task logs.  The keys and
# values here are passed into a `timedelta` object as keywords.
compress_log_interval:
  minutes: 10


# How often old jobs should be deleted. Please note this only marks
# jobs as to be deleted and does not actually perform the deletion
# itself.  See the ``delete_job_interval`` setting which will actually
# trigger the deletion of jobs.  The keys and values here are passed
# into a `timedelta` object as keywords.
autodelete_old_job_interval:
  hours: 1


# How often the scheduler which deletes jobs should run.  The keys and values
# here are passed into a `timedelta` object as keywords.
delete_job_interval:
  minutes: 5


# Used when polling agents to determine if we should or should not
# reach out to an agent.  This is used in combination with the agent's
# `last_heard_from` column, it's state and number of running tasks.  The keys
# and values here are passed into a `timedelta` object as keywords.
poll_busy_agents_interval:
  minutes: 5


# Used when polling agents to determine if we should or should not
# reach out to an agent.  This is used in combination with an agent's
# `last_head_from` column, state and running task count.  The keys
# and values here are passed into a `timedelta` object as keywords.
poll_idle_agents_interval:
  hours: 1


# Used when polling agents to determine if an agent is considered
# offline or not after a given period of time without communication.  The keys
# and values here are passed into a `timedelta` object as keywords.
poll_offline_agents_interval:
  hours: 2


# A directory where lock files for the scheuler can be found.
scheduler_lockfile_base: ${temp}/scheduler_lock

# The number of times an SQL transation error should be retried.
transaction_retries: 10

# The number of seconds we wait for a request to an agent to respond.  An
# exception is raised if we exceed this amount.
agent_request_timeout: 10

# When true the queue will prefer to assign work
# for jobs which are already running.
queue_prefer_running_jobs: true

# Whether to use an agents total RAM instead of reported free RAM to determine
# whether or not it can run a task.
use_total_ram_for_scheduling: false

##
## END Scheduler Settings
##

##
## BEGIN Email Server Settings
##

# The smtp server used to send email notifications.  Note that setting
# this value to null or leaving it blank will disable email notifications.
smtp_server: localhost


# Port to connect to the smtp server on.  The default port, 0, will
# cause the underlying library to use the default smtp port.
smtp_port: 0


# Optional login credentials for the smtp server.  The default value
# [null, null] means no username and password is required.
smtp_login: [null, null]


# The default address from which all emails from the scheduler will
# originate.
from_email: pyfarm@localhost

##
## END Email Server Settings
##

##
## BEGIN Email Template Settings
##

# General note about the settings below.  The brackets, {{ }}
# are used by the templating system for string substitution.  For example,
# {{ job.title }} would substitute in the string found on the `title` column
# of a job model.  For more information on template formatting, see Jinja's
# documentation: http://jinja.pocoo.org/docs/
# Finally, for multi-line strings follow this syntax:
#   foobar:
#   |
#     This is a multi-line
#     string.  It's indentation
#
#     and whitespace will be preserved.


# The template email subject line used for a succesful job.
success_subject: Job {{ job.title }} completed successfully


# The template body of an email for a succesful job
success_body:
|
  {{ job.jobtype_version.jobtype.name }} job {{ job.title }} (id {{ job.id }})
  has completed successfully on {{ job.time_finished.isoformat() }}.

  Job: {{ job.url }}

  {% if job.output_link %}
  Output: {{ job.output_link }}
  {% endif %}

  Sincerely,
      The PyFarm render manager


# The template email subject line used for a failed job.
failed_subject: Job {{ job.title }} failed


# The template email body for a failed job.
failed_body:
|
  {{ job.jobtype_version.jobtype.name }} job {{ job.title }}
  (id {{ job.id }}) has failed on
  {{ job.time_finished.isoformat() }}.

  Job: {{ job.url }}

  {% if job.output_link %}
  Output:

  {{ job.output_link }}
  {% endif %}

  {% if failed_log_urls %}
  Log(s) for failed tasks:
  {% for url in failed_log_urls %}
  {{url}}
  {% endfor%}
  {% endif %}

  Sincerely,
      The PyFarm render manager


# The template email subject line used for a deleted job.  Supported
# template values are:
#   {job_title} - The title of the job being deleted
deleted_subject: Job {job_title} deleted

# The template email body for a deleted job.  Supported template
# values are:
#   {job_title} - The title of the job deleted
#   {job_id} - The id of the job deleted
#   {jobtype_name} - The name of the job type used
deleted_body:
|
  {jobtype_name} job {job_title} has been deleted.

  Sincerely,
      The PyFarm render manager


##
## END Email Template Settings
##


##
## BEGIN Statistics Gathering Settings
##

# Whether or not to gather data for runtime statistics
enable_statistics: true


agent_count_interval:
    hours: 1

task_event_count_consolidate_interval:
    minutes: 15

task_count_interval:
    minutes: 15

##
## END Statistics Gathering Settings
##