transformers: Unable to train xlnet with tensorflow
Environment info
transformersversion: ‘2.0.0’- Platform: jupyter notebook
- Python version: 3.7.6
- PyTorch version (GPU?):
- Tensorflow version (GPU?): 2.1.0 GPU
- Using GPU in script?: Yes
- Using distributed or parallel set-up in script?: No
Who can help
Information
Model I am using (Bert, XLNet …): XLNet
The problem arises when using: my own modified scripts: (give details below)
The tasks I am working on is: my own task or dataset: (give details below)
To reproduce
Steps to reproduce the behavior:
# I get my input, output from a dataframe. It's just a series of text and a series of
# integers representing classes.
x = df['description']
y_label = pd.Categorical(df['target'])
y_cat = y_label.categories
y = y_label.codes
n_label = len(y_cat)
# I use the tokenizer. Then convert it to a numpy array
xlnet_tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")
train_tokenized_inputs = [xlnet_tokenizer.encode(text)
for text in x.values.tolist()]
# It needs to be at least 1 and no more than 2000
train_max_length = max(1,min(np.array([len(inp) for inp in train_tokenized_inputs]).max(), 2000))
train_padded_inputs = (tf.keras.preprocessing.sequence.pad_sequences(train_tokenized_inputs, maxlen=train_max_length,
value=0,
padding='post', truncating='post',dtype='int32'))
# I use the xlnet model
clf = TFAutoModelForSequenceClassification.from_pretrained("xlnet-base-cased", num_labels=n_label)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
clf.compile(optimizer='adam',loss=loss)
clf.fit(x=train_padded_inputs, y=y,
batch_size=32,
epochs=1,
verbose=1,
callbacks=None,
validation_split=0.2,
validation_data=None,
shuffle=True,
class_weight=None,
sample_weight=None,
initial_epoch=0,
steps_per_epoch=None,
validation_steps=None,
validation_freq=1,
max_queue_size=10,
workers=1,
use_multiprocessing=False,)
The error message is:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-68-c147be84f56e> in <module>
15 max_queue_size=10,
16 workers=1,
---> 17 use_multiprocessing=False,)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
817 max_queue_size=max_queue_size,
818 workers=workers,
--> 819 use_multiprocessing=use_multiprocessing)
820
821 def evaluate(self,
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
233 max_queue_size=max_queue_size,
234 workers=workers,
--> 235 use_multiprocessing=use_multiprocessing)
236
237 total_samples = _get_total_number_of_samples(training_data_adapter)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
550 batch_size=batch_size,
551 check_steps=False,
--> 552 steps=steps_per_epoch)
553 (x, y, sample_weights,
554 val_x, val_y,
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _standardize_user_data(self, x, y, sample_weight, class_weight, batch_size, check_steps, steps_name, steps, validation_split, shuffle, extract_tensors_from_dataset)
2344 # First, we build the model on the fly if necessary.
2345 if not self.inputs:
-> 2346 all_inputs, y_input, dict_inputs = self._build_model_with_inputs(x, y)
2347 is_build_called = True
2348 else:
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _build_model_with_inputs(self, inputs, targets)
2570 else:
2571 cast_inputs = inputs
-> 2572 self._set_inputs(cast_inputs)
2573 return processed_inputs, targets, is_dict_inputs
2574
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py in _set_inputs(self, inputs, outputs, training)
2657 kwargs['training'] = training
2658 try:
-> 2659 outputs = self(inputs, **kwargs)
2660 except NotImplementedError:
2661 # This Model or a submodel is dynamic and hasn't overridden
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
771 not base_layer_utils.is_in_eager_or_tf_function()):
772 with auto_control_deps.AutomaticControlDependencies() as acd:
--> 773 outputs = call_fn(cast_inputs, *args, **kwargs)
774 # Wrap Tensors in `outputs` in `tf.identity` to avoid
775 # circular dependencies.
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
235 except Exception as e: # pylint:disable=broad-except
236 if hasattr(e, 'ag_error_metadata'):
--> 237 raise e.ag_error_metadata.to_exception(e)
238 else:
239 raise
TypeError: in converted code:
/opt/conda/lib/python3.7/site-packages/transformers/modeling_tf_xlnet.py:916 call *
output = self.sequence_summary(output)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/base_layer.py:773 __call__
outputs = call_fn(cast_inputs, *args, **kwargs)
/opt/conda/lib/python3.7/site-packages/transformers/modeling_tf_utils.py:459 call *
output = self.first_dropout(output)
/opt/conda/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py:416 converted_call
return py_builtins.overload_of(f)(*args)
TypeError: 'NoneType' object is not callable
In addition, I tried to use TFTrainer in case I could solve my problem with it.
from transformers import TFTrainer
Gets this error message
---------------------------------------------------------------------------
ImportError Traceback (most recent call last)
<ipython-input-51-aece35bcf827> in <module>
----> 1 from transformers import TFTrainer
ImportError: cannot import name 'TFTrainer' from 'transformers' (/opt/conda/lib/python3.7/site-packages/transformers/__init__.py)
Expected behavior
I expect the code to run and the model to be fine-tuned on my dataset.
I expect that I shouldn’t need the TFTrainer as the explanation on huggingface.co says the model is a standard tensorflow 2 layer. But I expect that I should be able to import it.
About this issue
- Original URL
- State: closed
- Created 3 years ago
- Comments: 41 (18 by maintainers)
This error means that your GPU doesn’t have enough RAM to run an einsum operation. But yes, your dataset looks better. Still, you are not properly using the tokenizer, use a proper way to use it:
To get tokenized data that looks like: