{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 1, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_1", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 22.7909677028656, "val_loss": 6.58987962387796, "val_perplexity": 727.6932677327253, "test_loss": 6.519769226930871, "test_perplexity": 678.4218057906794}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 2, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_2", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 31.49470615386963, "val_loss": 7.280081490319547, "val_perplexity": 1451.106271410181, "test_loss": 6.837989152210916, "test_perplexity": 932.6119073605179}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 3, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_3", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 37.88629627227783, "val_loss": 6.731550580229722, "val_perplexity": 838.4463368506426, "test_loss": 6.6616793831988925, "test_perplexity": 781.8628825777257}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 5, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_5", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 55.12407445907593, "val_loss": 6.961125601588031, "val_perplexity": 1054.8201965638193, "test_loss": 6.897221097342595, "test_perplexity": 989.5211085359712}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 1, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_1", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 22.738898754119873, "val_loss": 6.186038631275837, "val_perplexity": 485.9173906061095, "test_loss": 6.109469596887339, "test_perplexity": 450.09991744647}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 2, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_2", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 31.720718145370483, "val_loss": 6.118845752030324, "val_perplexity": 454.3399707383464, "test_loss": 6.031953304707446, "test_perplexity": 416.5278410818601}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 3, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_3", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 38.168434143066406, "val_loss": 6.149878361527287, "val_perplexity": 468.6603761827662, "test_loss": 6.066403492019242, "test_perplexity": 431.1273370324535}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 5, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_5", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 55.80784487724304, "val_loss": 6.220879362847982, "val_perplexity": 503.1454849879539, "test_loss": 6.138157553516266, "test_perplexity": 463.1993641461015}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 1, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_1", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 22.80651617050171, "val_loss": 6.929359436035156, "val_perplexity": 1021.839216556221, "test_loss": 6.454903537038387, "test_perplexity": 635.8123909482596}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 2, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_2", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 35.30740809440613, "val_loss": 7.497376152549023, "val_perplexity": 1803.3046053379737, "test_loss": 6.713111717149237, "test_perplexity": 823.1280000093909}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 3, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_3", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 46.154869079589844, "val_loss": 6.447000387540778, "val_perplexity": 630.8072747126029, "test_loss": 6.373772015874157, "test_perplexity": 586.2650645401869}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 0.5, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 5, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_5", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 67.33725190162659, "val_loss": 6.519500959293116, "val_perplexity": 678.2398315854251, "test_loss": 6.452274230032021, "test_perplexity": 634.1428408157709}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 1, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_1", "seed": 1111, "tied": false, "best_val_loss": null, "epoch_num": 1, "epoch_time": 22.807337522506714, "val_loss": 6.2109733280830115, "val_perplexity": 498.18591372115077, "test_loss": 6.132174686586623, "test_perplexity": 460.4363775208578}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 2, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_2", "seed": 1111, "tied": false, "test_loss": 6.71311174161836, "test_perplexity": 823.1280201506112}
{"annealing_loss_improvement_pct": 1.0, "batch_size": 20, "bptt": 35, "clip": 0.25, "cuda": true, "datapath": "./data/wikitext-2", "device": "", "dropout": 0.2, "dry_run": false, "emsize": 200, "epochs": 1, "log_interval": 200, "lr": 2, "rnn_type": "RNN_TANH", "nhead": 2, "hidden_size": 200, "num_layers": 3, "onnx_export": "", "save": "model.pt", "filename": "model_epochs_1_rnn_type_RNN_TANH_hidden_size_200_batch_size_20_bptt_35_num_layers_3", "seed": 1111, "tied": false, "test_loss": 6.373772006669106, "test_perplexity": 586.2650591435872}
