diff options
| author | thomas grothe <grothe.tr@gmail.com> | 2023-11-29 23:43:15 -0600 |
|---|---|---|
| committer | thomas grothe <grothe.tr@gmail.com> | 2023-11-29 23:43:15 -0600 |
| commit | 6cdf7ae3598e791a537b10885bd31c56a26b362e (patch) | |
| tree | 00ef3b62bd64f6bba311bc36fa20bcc054fa2fbd | |
| parent | 6354f6ae105f36c68124558eb45323d90b3ad992 (diff) | |
wow as python script
| -rwxr-xr-x | wow | 2 | ||||
| -rwxr-xr-x | wow.py | 55 |
2 files changed, 56 insertions, 1 deletions
@@ -39,7 +39,7 @@ while getopts "f:l:r:v" opt; do done if [[ ${#files[@]} == 0 ]]; then - files=(~/doc/j/_journal-toaug2022 ~/_poetry ~/doc/_journal_2019) + files=(~/doc/j/_journal-toaug2022 ~/_poetry ~/doc/_journal_2019 ) fi if [ $V ]; then echo "Grabbing text sample from files: "${files[*]}; fi @@ -0,0 +1,55 @@ +#!/usr/bin/python +#Words Of Wisdom +# output some random text from my journal + +import os +import filetype #for filetype (extension and mime type) +import chardet #for getting encoding +import subprocess + +total_text = '' + +def addSampleFileIfTxt(filepath): + global total_text + if v: print('checking {}'.format(filepath)) + if os.path.isfile(filepath): + f = open(filepath, 'rb') + fb = f.read() #file data (bytes) + enc = str(chardet.detect(fb)['encoding']) + ftype = filetype.guess(filepath) + if v: print('filetype: {}'.format(str(ftype))) + if v: print('encoding: {}'.format(enc)) + if enc in ['ascii','utf-8']: + if v: print(fb) + total_text += str(fb) + elif ftype.extension == 'odt': + total_text += subprocess.run(['odt2txt', filepath]).stdout + else: + if v: print('{} is not txt') + else: + if v: print('not file') + + + +paths=['/home/thomas/doc/j/', '/home/thomas/_poetry', '/home/thomas/doc/_journal_2019'] #the paths to scan recursively for files from which to grab text +samplefiles=[] #the individual files we want to grab text from + +v=True #verbose + +for p in paths: + if v: print('path {}'.format(p)) + if os.path.isdir(p): + for root,dirs,files in os.walk(p): + if v: print('walk {}: {} files, {} dirs'.format(root, len(files), len(dirs))) + for f in files: + addSampleFileIfTxt(root + '/' + f) + else: + addSampleFileIfTxt(p) + +print(total_text) +#for f in samplefiles: +# print(f) + + + + |
