### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy ### csv file to md table or normal table ### only ascii and chinese characters allowed in file,and assumes utf-8 is used as charset or it displays not properly
oldIFS=$IFS
functionExitFunc() { IFS=$oldIFS }
functionprintSymbolLie() { sep=$1 mid=$2 shift 2 echo -n "$sep" for count in"$@"; do for ((m = -2; m < $count; m++)); do echo -n "$mid" done echo -n "$sep" done echo }
trap'ExitFunc' 2 9 15 20 EXIT
isMD=0 isColor=0 IFS=, whilegetopts'cmd:' OPT; do case"$OPT"in c) isColor=1 ;; m) isMD=1 ;; d) IFS=$(echo -ne "$OPTARG") ;; ?) echo"avaliable options: [-c] [-m] [-d delimiter]" >&2 ## standard error exit 1 ;; esac done shift"$(($OPTIND - 1))"
echo"delimiter has been set to [$IFS]" echo -n "$IFS" | hexdump -C echo
for item in$@; do if [ ! -f "$item" ]; then echo"file ${item} not exists" continue fi
echo"file name: $item"
# array contasing all line per file line_arr=() col_count=0 row_count=0 whileread line; do line_arr[$row_count]=$line arr=($line) temp_count=${#arr[@]} if ((temp_count > col_count)); then col_count=$temp_count fi ((row_count++)) done <$item echo"table column size: ${col_count}, row size: ${row_count}"
# array contains max length of every col max_count_arr=() for ((i = 0; i < ${row_count}; i++)); do line=${line_arr[i]} arr=($line)
# echo "line: $line ${arr[@]}" for ((j = 0; j < $col_count; j++)); do max_length=${max_count_arr[j]} col_str=${arr[j]}
# wc output: always in the following order: newline, word, character, byte, maximum line length. bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n ",")) bytes=${bytes_chars[2]} chars=${bytes_chars[1]}
if ((bytes > chars)); then current_length=$(((bytes + chars) / 2)) else current_length=$chars fi
if ((current_length > max_length)); then max_count_arr[j]=$current_length fi done done
echo -e "max width of every col: ${max_count_arr[@]}\n"
for ((i = 0; i < ${row_count}; i++)); do line=${line_arr[i]} arr=($line)
if [[ $isMD -eq 1 && $i -eq 1 ]]; then printSymbolLie "|""-""${max_count_arr[@]}" fi
# print the table head line +-------+-------+ if [ $isMD -eq 0 ]; then if [[ $i -eq 0 || $i -eq 1 ]]; then printSymbolLie "+""-""${max_count_arr[@]}" fi fi
# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8 # as for display width: one Chinese character = ascii * 2 # # (1) x*3 + y = bytes (use wc) # (2) x + y = chars (use wc) # (3) 2*x + y + z = max_width (we have calculated it before, z is the empty char count) # # printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes # so finally, the printf min size for current col is: # 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)
if ((j == 0)); thenecho -n "|"; fi max_col_width=${max_count_arr[j]} min_print=$((max_col_width + (bytes - chars) / 2))
if ((i == 0 && isColor == 1)); then printf" \033[1;33m%-${min_print}s\033[00m |"${arr[j]} else printf" %-${min_print}s |"${arr[j]} fi done echo done
# print the end line +-------+-------+ if [ $isMD -eq 0 ]; then printSymbolLie "+""-""${max_count_arr[@]}" fi done
### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy ### csv file to md table or normal table ### only ascii and chinese characters allowed in file,or it displays not properly and assumes utf-8 is used as charset
functioncalLength() { local line=$1 local start=$2 local end=$3 local arr=($line) local result=""
while ((start < end)); do local col_str=${arr[start]} # wc output: always in the following order: newline, word, character, byte, maximum line length. local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n ",")) local bytes=${bytes_chars[2]} local chars=${bytes_chars[1]} if ((bytes > chars)); then result="${result}$(((bytes + chars) / 2))," else result="${result}$chars," fi ((start++)) done
echo"$result" >"col$2.txt"
return 0 }
functionprintSymbolLie() { sep=$1 mid=$2 shift 2
echo -n "$sep" for count in"$@"; do for ((m = -2; m < $count; m++)); do echo -n "$mid" done echo -n "$sep" done echo }
trap'ExitFunc' 2 9 15 20 EXIT
isMD=0 isColor=0 IFS=, whilegetopts'cmd:' OPT; do case"$OPT"in c) isColor=1 ;; m) isMD=1 ;; d) IFS=$(echo -ne "$OPTARG") ;; ?) echo"avaliable options: [-c] [-m] [-d delimiter]" >&2 ## standard error exit 1 ;; esac done shift"$(($OPTIND - 1))"
log"delimiter has been set to [$IFS]" echo -n "$IFS" | hexdump -C echo
for item in$@; do if [ ! -f "$item" ]; then echo"file ${item} not exists" continue fi
log"file name: $item"
# array contasing all line per file line_arr=() col_count=0 row_count=0 whileread line; do line_arr[$row_count]=$line arr=($line) temp_count=${#arr[@]} if ((temp_count > col_count)); then col_count=$temp_count fi ((row_count++)) done <$item log"table column size: ${col_count}, row size: ${row_count}"
# array contains max length of every col max_count_arr=() for ((i = 0; i < ${row_count}; i++)); do line=${line_arr[i]} arr=($line)
# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8 # as for display width: one Chinese character = ascii * 2 # # (1) x*3 + y = bytes (use wc) # (2) x + y = chars (use wc) # (3) x*2 + y + z = max_width (we have calculated it before, z is the empty char count) # # printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes # so finally, the printf min size for current col is: # 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)
### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy ### csv file to md table or normal table ### only ascii and chinese characters allowed in file,or it displays not properly and assumes utf-8 is used as charset
functioncalLength() { local line=$1 local start=$2 local end=$3 local arr=($line) local result=""
while ((start < end)); do local col_str=${arr[start]} # wc output: always in the following order: newline, word, character, byte, maximum line length. local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n "$IFS")) local bytes=${bytes_chars[2]} local chars=${bytes_chars[1]} if ((bytes > chars)); then result="${result}$(((bytes + chars) / 2))," else result="${result}$chars," fi ((start++)) done
echo"$result" >"col$2.txt"
return 0 }
functioncalLength2() { local line=$1 local start=$2 local end=$3 local row=$4 local arr=($line) local result=""
while ((start < end)); do local col_str=${arr[start]} # wc output: always in the following order: newline, word, character, byte, maximum line length. local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n "$IFS")) local bytes=${bytes_chars[2]} local chars=${bytes_chars[1]} if ((bytes > chars)); then result="${result}$(((bytes + chars) / 2))," else result="${result}$chars," fi ((start++)) done
echo"$result" >"row${row}_col$2.txt"
return 0 }
functionrowCal() { local line_start_num=$1 local col_count=$2
shift 2 local max_count_arr=() for line in"$@"; do local arr=($line) local files="" for ((l = 0; l < $col_count; l++)); do (calLength2 "$line""$l""$((l + 1))""${line_start_num}") & files="${files}row${line_start_num}_col${l}.txt " done wait
local result=$(echo$files | xargs cat) # echo "command result: $result" local col_length_arr=($result) for ((j = 0; j < $col_count; j++)); do local max_length=${max_count_arr[j]} local current_length=${col_length_arr[j]}
if ((current_length > max_length)); then max_count_arr[j]=$current_length fi done done
echo$files | xargs rm &
local final="" for count in"${max_count_arr[@]}"; do final="${final}${count}," done
echo -n "$final" >"row${line_start_num}" }
functionprintSymbolLie() { sep=$1 mid=$2 shift 2
echo -n "$sep" for count in"$@"; do for ((m = -2; m < $count; m++)); do echo -n "$mid" done echo -n "$sep" done echo }
log"delimiter has been set to [$IFS]" echo -n "$IFS" | hexdump -C echo
for item in$@; do if [ ! -f "$item" ]; then echo"file ${item} not exists" continue fi
log"file name: $item"
########################################################## # array contasing all line per file line_arr=() col_count=0 row_count=0 whileread line; do line_arr[$row_count]=$line arr=($line) temp_count=${#arr[@]} if ((temp_count > col_count)); then col_count=$temp_count fi ((row_count++)) done <$item log"table column size: ${col_count}, row size: ${row_count}"
########################################################## row_files="" if ((row_count > parrel)); then per_size=$((row_count / parrel)) for ((r = 0; r < $parrel; r++)); do start_index=$((r * per_size)) if ((r == parrel - 1)); then length=$((row_count - (r*per_size))) else length=$per_size fi (rowCal "${start_index}"$col_count"${line_arr[@]:${start_index}:${length}}") & row_files="${row_files}row${start_index}$IFS" done else (rowCal 0 $col_count"${line_arr[@]:0:${col_count}}") & row_files="row0" fi wait
# array contains max length of every col max_count_arr=() for row_cal_file in$row_files; do result=$(cat $row_cal_file) col_length_arr=($result)
for ((j = 0; j < $col_count; j++)); do max_length=${max_count_arr[j]} current_length=${col_length_arr[j]}
if ((current_length > max_length)); then max_count_arr[j]=$current_length fi done rm $row_cal_file & done log"max width of every col cal \n"
########################################################## # for ((i = 0; i < ${row_count}; i++)); do i=0 whileread line; do # line=${line_arr[i]} arr=($line)
# echo "line:######### $line" if [[ $isMD -eq 1 && $i -eq 1 ]]; then printSymbolLie "|""-""${max_count_arr[@]}" fi
# print the table head line +-------+-------+ if [[ $isMD -eq 0 && ($i -eq 0 || $i -eq 1) ]]; then printSymbolLie "+""-""${max_count_arr[@]}" fi
# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8 # as for display width: one Chinese character = ascii * 2 # # (1) x*3 + y = bytes (use wc) # (2) x + y = chars (use wc) # (3) x*2 + y + z = max_width (we have calculated it before, z is the empty char count) # # printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes # so finally, the printf min size for current col is: # 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)
### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy ### csv file to md table or normal table ### only ascii and chinese characters allowed in file,or it displays not properly and assumes utf-8 is used as charset
oldIFS=$IFS isVerbose=0
functiondebug() { if ((isVerbose == 1)); then echo -e "[$(date +"%F %T")]: $@" fi }
functioninfo() { echo -e "$@" }
functionExitFunc() { IFS=$oldIFS }
functioncalLength2() { local line=$1 local start=$2 local end=$3 local row=$4 local arr=($line) local result=""
while ((start < end)); do local col_str=${arr[start]} # wc output: always in the following order: newline, word, character, byte, maximum line length. local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n "$IFS")) local bytes=${bytes_chars[2]} local chars=${bytes_chars[1]} if ((bytes > chars)); then result="${result}$(((bytes + chars) / 2))," else result="${result}$chars," fi ((start++)) done
echo"$result" >"row${row}_col$2.txt"
return 0 }
functionrowCal() { local line_start_num=$1 local col_count=$2
shift 2 local max_count_arr=() for line in"$@"; do
debug "line: $line"
local arr=($line) local files="" for ((l = 0; l < $col_count; l++)); do (calLength2 "$line""$l""$((l + 1))""${line_start_num}") & files="${files}row${line_start_num}_col${l}.txt " done wait
local result=$(echo$files | xargs cat) # echo "command result: $result" local col_length_arr=($result) for ((j = 0; j < $col_count; j++)); do local max_length=${max_count_arr[j]} local current_length=${col_length_arr[j]}
if ((current_length > max_length)); then max_count_arr[j]=$current_length fi done done
echo$files | xargs rm &
local final="" for count in"${max_count_arr[@]}"; do final="${final}${count}," done
echo -n "$final" >"row${line_start_num}" }
functionprintSymbolLie() { sep=$1 mid=$2 shift 2
echo -n "$sep" for count in"$@"; do for ((m = -2; m < $count; m++)); do echo -n "$mid" done echo -n "$sep" done }
debug "delimiter has been set to [$IFS]" if ((isVerbose == 1)); thenecho -n "$IFS" | hexdump -C; fi echo
for item in$@; do if [ ! -f "$item" ]; then echo"file ${item} not exists" continue fi
debug "file name: $item"
########################################################## # array contasing all line per file line_arr=() col_count=0 row_count=0 whileread line; do line_arr[$row_count]=$line arr=($line) temp_count=${#arr[@]} if ((temp_count > col_count)); then col_count=$temp_count fi ((row_count++)) done <$item debug "table column size: ${col_count}, row size: ${row_count}"
########################################################## row_files="" if ((isParrel == 1 && row_count > parrel)); then per_size=$((row_count / parrel)) for ((r = 0; r < $parrel; r++)); do start_index=$((r * per_size)) if ((r == parrel - 1)); then length=$((row_count - (r * per_size))) else length=$per_size fi (rowCal "${start_index}"$col_count"${line_arr[@]:${start_index}:${length}}") & row_files="${row_files}row${start_index}$IFS" done else debug "no parrel function call:" 0 $col_count"${line_arr[@]:0:${row_count}}" (rowCal 0 $col_count"${line_arr[@]:0:${row_count}}") & row_files="row0" fi wait
# array contains max length of every col max_count_arr=() for row_cal_file in$row_files; do result=$(cat $row_cal_file) col_length_arr=($result)
for ((j = 0; j < $col_count; j++)); do max_length=${max_count_arr[j]} current_length=${col_length_arr[j]}
if ((current_length > max_length)); then max_count_arr[j]=$current_length fi done rm $row_cal_file & done debug "max width of every col cal \n"
########################################################## # for ((i = 0; i < ${row_count}; i++)); do i=0
format_str="" parameter_str="" print_count=0 whileread line; do ((print_count++)) # line=${line_arr[i]} arr=($line)
# echo "line:######### $line" if [[ $isMD -eq 1 && $i -eq 1 ]]; then format_str=${format_str}$(printSymbolLie "|""-""${max_count_arr[@]}")"\n" fi
# print the table head line +-------+-------+ if [[ $isMD -eq 0 && ($i -eq 0 || $i -eq 1) ]]; then format_str=${format_str}$(printSymbolLie "+""-""${max_count_arr[@]}")"\n" fi
# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8 # as for display width: one Chinese character = ascii * 2 # # (1) x*3 + y = bytes (use wc) # (2) x + y = chars (use wc) # (3) x*2 + y + z = max_width (we have calculated it before, z is the empty char count) # # printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes # so finally, the printf min size for current col is: # 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above) # # todo:// optimization has to be done to reduce cal time
if ((j == 0)); then format_str="${format_str}|"; fi max_col_width=${max_count_arr[j]} min_print=$((max_col_width + (bytes - chars) / 2))
if ((i == 0 && isColor == 1)); then format_str="${format_str} \033[1;33m%-${min_print}s\033[00m |" else format_str="${format_str} %-${min_print}s |" fi