// This example script find a simple, low-demensional // descriptor for formation enthalpy using a small // set of data from the OQMD // Load in a dataset of compounds data = new data.materials.CompositionDataset data import ./datasets/small_set.txt // Get only the ground-state instances of each compound data duplicates RankingDuplicateResolver minmize PropertyRanker energy_pa SimpleEntryRanker // Define where to find elemental property data data attributes properties directory ./lookup-data/ // Select which set of elemental properties to use for attributes data attributes properties add set general // Define a set of functions used to generate new attributes // based on a set of existing descriptors data attributes expanders add FunctionExpander #{x}*#{y} #{x}^2 log(#{x}) // Generate new attributes data attributes generate // Set formation energy as property to be modeled data target delta_e // Remove attributes that are NaN sel = new attributes.selectors.RemoveNaNAttributeSelector sel train $data sel run $data // Select a subset of attibutes using LASSO sel = new attributes.selectors.LassoAttributeSelector -n_lasso 10 & -max_dim 4 -cv_method 0.1 100 -pick_best -debug sel train $data sel run $data print sel description // Fit a linear model using only those attributes model = new models.regression.PolynomialRegression 1 model train $data model crossvalidate $data // Print out model statistics print model model print model training stats print model validation stats // Print out suggested citations for methods used in this script citations exit