-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsetup.py
More file actions
51 lines (48 loc) · 1.47 KB
/
setup.py
File metadata and controls
51 lines (48 loc) · 1.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from setuptools import setup, find_packages
# Core dependencies (always required)
CORE_DEPS = [
'tqdm>=4.66.1',
'numpy>=1.26.4',
'simple_parsing>=0.1.5',
'mosaicml-streaming>=0.7.5',
'sentencepiece>=0.1.99',
'zstandard>=0.23.0',
'universal-pathlib>=0.2.2',
'fsspec[s3]>=2023.1.0',
'pyarrow>=10.0.0',
# 'transformers==4.39.3',
]
# Optional dependencies for dataset support
DATASET_DEPS = [
'datasets>=2.18.0',
]
setup(
name='datatools-py',
version='0.5',
packages=find_packages(),
install_requires=CORE_DEPS,
extras_require={
'datasets': DATASET_DEPS,
'full': DATASET_DEPS, # Alias for datasets
},
author='Alexander Wettig',
description='Library and scripts for common LM data utilities (tokenizing, splitting, packing, ...)',
long_description=open('README.md').read(),
long_description_content_type='text/markdown',
url='https://github.com/CodeCreator/datatools',
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
entry_points={
'console_scripts': [
'peek=datatools.scripts.peek:main',
'merge_index=datatools.scripts.merge_index:main',
'pack=datatools.scripts.pack:main',
'wrangle=datatools.scripts.wrangle:main',
'tokenize=datatools.scripts.tokenize:main',
]
},
python_requires='>=3.6',
)