CSV 转换为表格(支持MD格式的表格)

写了一个 CSV 文件转为 stdout 中的表格的脚本,也支持输出 Markdown 表格,考虑了汉字与英文字符的显示宽度。

  • Shell 使用 utf-8 编码
  • 仅支持 ASCII 字符和占3个字节的字符 (其他字符在UTF-8中可能是2个字节或4个字节,不适用;或者要占据3个字节,但是打印的宽度不等于一个ASCII字符的2倍的情况下,也不适用)
  • 支持表头字段带有颜色(黄色),使用 -c 选项
  • 支持 MD 表格格式,使用 -m 选项
  • 分隔符默认为 , 使用 -d "delemeter" 指定,delemeter 中的第一个字符会作为分隔符

满足上述情况下,才能打印出格式正确的表格

待优化事项

  • 行数增多时,处理的时间明显过长,是由于每个单元格中的值都要计算字节数和字符数,计算打印时要打印的空格字符数
  • 只针对ASCII字符和占三个字节的字符,并且占三个字节的字符的显示宽度是ASCII字符的两倍,其他情况现在还不能正确处理
1
2
# SYNOPSIS 
./toTable.sh -m -v -c -p -n 50 -d "," file

示例

示例文本:

1
2
3
4
5
6
$ cat sep
A,B,C
1,郭,2,C
我是人间一朵花,你是人间一头牛
1,郭,2,C
我是人间一朵花,你是人间一头牛,lovedthe ywat uodjsadsandsasdsnadmsmada,dsad

示例输出:

image-20210911205933449

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/bin/bash

### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy
### csv file to md table or normal table
### only ascii and chinese characters allowed in file,and assumes utf-8 is used as charset or it displays not properly

oldIFS=$IFS

function ExitFunc() {
IFS=$oldIFS
}

function printSymbolLie() {
sep=$1
mid=$2
shift 2
echo -n "$sep"
for count in "$@"; do
for ((m = -2; m < $count; m++)); do
echo -n "$mid"
done
echo -n "$sep"
done
echo
}

trap 'ExitFunc' 2 9 15 20 EXIT

isMD=0
isColor=0
IFS=,
while getopts 'cmd:' OPT; do
case "$OPT" in
c)
isColor=1
;;
m)
isMD=1
;;
d)
IFS=$(echo -ne "$OPTARG")
;;
?)
echo "avaliable options: [-c] [-m] [-d delimiter]" >&2 ## standard error
exit 1
;;
esac
done
shift "$(($OPTIND - 1))"

echo "delimiter has been set to [$IFS]"
echo -n "$IFS" | hexdump -C
echo

for item in $@; do
if [ ! -f "$item" ]; then
echo "file ${item} not exists"
continue
fi

echo "file name: $item"

# array contasing all line per file
line_arr=()
col_count=0
row_count=0
while read line; do
line_arr[$row_count]=$line
arr=($line)
temp_count=${#arr[@]}
if ((temp_count > col_count)); then
col_count=$temp_count
fi
((row_count++))
done <$item
echo "table column size: ${col_count}, row size: ${row_count}"

# array contains max length of every col
max_count_arr=()
for ((i = 0; i < ${row_count}; i++)); do
line=${line_arr[i]}
arr=($line)

# echo "line: $line ${arr[@]}"
for ((j = 0; j < $col_count; j++)); do
max_length=${max_count_arr[j]}
col_str=${arr[j]}

# wc output: always in the following order: newline, word, character, byte, maximum line length.
bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n ","))
bytes=${bytes_chars[2]}
chars=${bytes_chars[1]}

if ((bytes > chars)); then
current_length=$(((bytes + chars) / 2))
else
current_length=$chars
fi

if ((current_length > max_length)); then
max_count_arr[j]=$current_length
fi
done
done

echo -e "max width of every col: ${max_count_arr[@]}\n"

for ((i = 0; i < ${row_count}; i++)); do
line=${line_arr[i]}
arr=($line)

if [[ $isMD -eq 1 && $i -eq 1 ]]; then
printSymbolLie "|" "-" "${max_count_arr[@]}"
fi

# print the table head line +-------+-------+
if [ $isMD -eq 0 ]; then
if [[ $i -eq 0 || $i -eq 1 ]]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi
fi

for ((j = 0; j < $col_count; j++)); do
bytes_chars=($(echo -n ${arr[j]} | wc -c -m | xargs -n 1 echo -n ","))
bytes=${bytes_chars[2]}
chars=${bytes_chars[1]}

# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8
# as for display width: one Chinese character = ascii * 2
#
# (1) x*3 + y = bytes (use wc)
# (2) x + y = chars (use wc)
# (3) 2*x + y + z = max_width (we have calculated it before, z is the empty char count)
#
# printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes
# so finally, the printf min size for current col is:
# 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)

if ((j == 0)); then echo -n "|"; fi
max_col_width=${max_count_arr[j]}
min_print=$((max_col_width + (bytes - chars) / 2))

if ((i == 0 && isColor == 1)); then
printf " \033[1;33m%-${min_print}s\033[00m |" ${arr[j]}
else
printf " %-${min_print}s |" ${arr[j]}
fi
done
echo
done

# print the end line +-------+-------+
if [ $isMD -eq 0 ]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi
done

IFS=$oldIFS

并行计算每列的最大宽度(列粒度)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/bin/bash

### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy
### csv file to md table or normal table
### only ascii and chinese characters allowed in file,or it displays not properly and assumes utf-8 is used as charset

oldIFS=$IFS

function log() {
echo -e "[$(date +"%F %T")]: $@"
}

function ExitFunc() {
IFS=$oldIFS
}

function calLength() {
local line=$1
local start=$2
local end=$3
local arr=($line)
local result=""

while ((start < end)); do
local col_str=${arr[start]}
# wc output: always in the following order: newline, word, character, byte, maximum line length.
local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n ","))
local bytes=${bytes_chars[2]}
local chars=${bytes_chars[1]}
if ((bytes > chars)); then
result="${result}$(((bytes + chars) / 2)),"
else
result="${result}$chars,"
fi
((start++))
done

echo "$result" >"col$2.txt"

return 0
}

function printSymbolLie() {
sep=$1
mid=$2
shift 2

echo -n "$sep"
for count in "$@"; do
for ((m = -2; m < $count; m++)); do
echo -n "$mid"
done
echo -n "$sep"
done
echo
}

trap 'ExitFunc' 2 9 15 20 EXIT

isMD=0
isColor=0
IFS=,
while getopts 'cmd:' OPT; do
case "$OPT" in
c)
isColor=1
;;
m)
isMD=1
;;
d)
IFS=$(echo -ne "$OPTARG")
;;
?)
echo "avaliable options: [-c] [-m] [-d delimiter]" >&2 ## standard error
exit 1
;;
esac
done
shift "$(($OPTIND - 1))"

log "delimiter has been set to [$IFS]"
echo -n "$IFS" | hexdump -C
echo

for item in $@; do
if [ ! -f "$item" ]; then
echo "file ${item} not exists"
continue
fi

log "file name: $item"

# array contasing all line per file
line_arr=()
col_count=0
row_count=0
while read line; do
line_arr[$row_count]=$line
arr=($line)
temp_count=${#arr[@]}
if ((temp_count > col_count)); then
col_count=$temp_count
fi
((row_count++))
done <$item
log "table column size: ${col_count}, row size: ${row_count}"

# array contains max length of every col
max_count_arr=()
for ((i = 0; i < ${row_count}; i++)); do
line=${line_arr[i]}
arr=($line)

# 并行没有办法返回给一个变量,所以只能都写到文件中,然后再拼接
# 此处还有一个方案就是行的并行,当前的方案是列的并行
files=""
for((l=0;l<$col_count;l++)); do
(calLength "$line" "$l" "$((l+1))") &
files="${files}col${l}.txt "
done
wait

result=$(echo $files | xargs cat)
# echo "command result: $result"
col_length_arr=($result)
for ((j = 0; j < $col_count; j++)); do
max_length=${max_count_arr[j]}
current_length=${col_length_arr[j]}

if ((current_length > max_length)); then
max_count_arr[j]=$current_length
fi
done
done

echo $files | xargs rm


# log "max width of every col: ${max_count_arr[@]}\n"

# for ((i = 0; i < ${row_count}; i++)); do
i=0
while read line; do
# line=${line_arr[i]}
arr=($line)

# echo "line:######### $line"
if [[ $isMD -eq 1 && $i -eq 1 ]]; then
printSymbolLie "|" "-" "${max_count_arr[@]}"
fi

# print the table head line +-------+-------+
if [[ $isMD -eq 0 && ($i -eq 0 || $i -eq 1) ]]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi

for ((j = 0; j < $col_count; j++)); do
bytes_chars=($(echo -n ${arr[j]} | wc -c -m | xargs -n 1 echo -n ","))
bytes=${bytes_chars[2]}
chars=${bytes_chars[1]}

# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8
# as for display width: one Chinese character = ascii * 2
#
# (1) x*3 + y = bytes (use wc)
# (2) x + y = chars (use wc)
# (3) x*2 + y + z = max_width (we have calculated it before, z is the empty char count)
#
# printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes
# so finally, the printf min size for current col is:
# 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)

if ((j == 0)); then echo -n "|"; fi
max_col_width=${max_count_arr[j]}
min_print=$((max_col_width + (bytes - chars) / 2))
# z=$((max_col_width - (bytes + chars) / 2))

if ((i == 0 && isColor == 1)); then
printf " \033[1;33m%-${min_print}s\033[00m |" ${arr[j]}
else
printf " %-${min_print}s |" ${arr[j]}
fi
done
echo
((i++))
done <$item

# print the end line +-------+-------+
if [ $isMD -eq 0 ]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi
echo
done

IFS=$oldIFS

列、行并行

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#!/bin/bash

### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy
### csv file to md table or normal table
### only ascii and chinese characters allowed in file,or it displays not properly and assumes utf-8 is used as charset

oldIFS=$IFS

function log() {
echo -e "[$(date +"%F %T")]: $@"
}

function ExitFunc() {
IFS=$oldIFS
}

function calLength() {
local line=$1
local start=$2
local end=$3
local arr=($line)
local result=""

while ((start < end)); do
local col_str=${arr[start]}
# wc output: always in the following order: newline, word, character, byte, maximum line length.
local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n "$IFS"))
local bytes=${bytes_chars[2]}
local chars=${bytes_chars[1]}
if ((bytes > chars)); then
result="${result}$(((bytes + chars) / 2)),"
else
result="${result}$chars,"
fi
((start++))
done

echo "$result" >"col$2.txt"

return 0
}

function calLength2() {
local line=$1
local start=$2
local end=$3
local row=$4
local arr=($line)
local result=""

while ((start < end)); do
local col_str=${arr[start]}
# wc output: always in the following order: newline, word, character, byte, maximum line length.
local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n "$IFS"))
local bytes=${bytes_chars[2]}
local chars=${bytes_chars[1]}
if ((bytes > chars)); then
result="${result}$(((bytes + chars) / 2)),"
else
result="${result}$chars,"
fi
((start++))
done

echo "$result" >"row${row}_col$2.txt"

return 0
}

function rowCal() {
local line_start_num=$1
local col_count=$2

shift 2
local max_count_arr=()
for line in "$@"; do
local arr=($line)
local files=""
for ((l = 0; l < $col_count; l++)); do
(calLength2 "$line" "$l" "$((l + 1))" "${line_start_num}") &
files="${files}row${line_start_num}_col${l}.txt "
done
wait

local result=$(echo $files | xargs cat)
# echo "command result: $result"
local col_length_arr=($result)
for ((j = 0; j < $col_count; j++)); do
local max_length=${max_count_arr[j]}
local current_length=${col_length_arr[j]}

if ((current_length > max_length)); then
max_count_arr[j]=$current_length
fi
done
done

echo $files | xargs rm &

local final=""
for count in "${max_count_arr[@]}"; do
final="${final}${count},"
done

echo -n "$final" >"row${line_start_num}"
}

function printSymbolLie() {
sep=$1
mid=$2
shift 2

echo -n "$sep"
for count in "$@"; do
for ((m = -2; m < $count; m++)); do
echo -n "$mid"
done
echo -n "$sep"
done
echo
}

trap 'ExitFunc' 2 9 15 20 EXIT

isMD=0
isColor=0
IFS=,
parrel=4
while getopts 'cmp:d:' OPT; do
case "$OPT" in
c)
isColor=1
;;
m)
isMD=1
;;
p)
parrel=$OPTARG
;;
d)
IFS=$(echo -ne "$OPTARG")
;;
?)
echo "avaliable options: [-c] [-m] [-d delimiter]" >&2 ## standard error
exit 1
;;
esac
done
shift "$(($OPTIND - 1))"

log "delimiter has been set to [$IFS]"
echo -n "$IFS" | hexdump -C
echo

for item in $@; do
if [ ! -f "$item" ]; then
echo "file ${item} not exists"
continue
fi

log "file name: $item"

##########################################################
# array contasing all line per file
line_arr=()
col_count=0
row_count=0
while read line; do
line_arr[$row_count]=$line
arr=($line)
temp_count=${#arr[@]}
if ((temp_count > col_count)); then
col_count=$temp_count
fi
((row_count++))
done <$item
log "table column size: ${col_count}, row size: ${row_count}"

##########################################################
row_files=""
if ((row_count > parrel)); then
per_size=$((row_count / parrel))
for ((r = 0; r < $parrel; r++)); do
start_index=$((r * per_size))
if ((r == parrel - 1)); then
length=$((row_count - (r*per_size)))
else
length=$per_size
fi
(rowCal "${start_index}" $col_count "${line_arr[@]:${start_index}:${length}}") &
row_files="${row_files}row${start_index}$IFS"
done
else
(rowCal 0 $col_count "${line_arr[@]:0:${col_count}}") &
row_files="row0"
fi
wait

# array contains max length of every col
max_count_arr=()
for row_cal_file in $row_files; do
result=$(cat $row_cal_file)
col_length_arr=($result)

for ((j = 0; j < $col_count; j++)); do
max_length=${max_count_arr[j]}
current_length=${col_length_arr[j]}

if ((current_length > max_length)); then
max_count_arr[j]=$current_length
fi
done
rm $row_cal_file &
done
log "max width of every col cal \n"

##########################################################
# for ((i = 0; i < ${row_count}; i++)); do
i=0
while read line; do
# line=${line_arr[i]}
arr=($line)

# echo "line:######### $line"
if [[ $isMD -eq 1 && $i -eq 1 ]]; then
printSymbolLie "|" "-" "${max_count_arr[@]}"
fi

# print the table head line +-------+-------+
if [[ $isMD -eq 0 && ($i -eq 0 || $i -eq 1) ]]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi

for ((j = 0; j < $col_count; j++)); do
bytes_chars=($(echo -n ${arr[j]} | wc -c -m | xargs -n 1 echo -n "$IFS"))
bytes=${bytes_chars[2]}
chars=${bytes_chars[1]}

# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8
# as for display width: one Chinese character = ascii * 2
#
# (1) x*3 + y = bytes (use wc)
# (2) x + y = chars (use wc)
# (3) x*2 + y + z = max_width (we have calculated it before, z is the empty char count)
#
# printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes
# so finally, the printf min size for current col is:
# 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)

if ((j == 0)); then echo -n "|"; fi
max_col_width=${max_count_arr[j]}
min_print=$((max_col_width + (bytes - chars) / 2))
# z=$((max_col_width - (bytes + chars) / 2))

if ((i == 0 && isColor == 1)); then
printf " \033[1;33m%-${min_print}s\033[00m |" ${arr[j]}
else
printf " %-${min_print}s |" ${arr[j]}
fi

done

echo
((i++))
done <$item

# print the end line +-------+-------+
if [ $isMD -eq 0 ]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi
echo
done
IFS=$oldIFS

一次性输出

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
#!/bin/bash

### rule: always use `LF` as the end of lie in linux or unexpected things will drive you crazy
### csv file to md table or normal table
### only ascii and chinese characters allowed in file,or it displays not properly and assumes utf-8 is used as charset

oldIFS=$IFS
isVerbose=0

function debug() {
if ((isVerbose == 1)); then
echo -e "[$(date +"%F %T")]: $@"
fi
}

function info() {
echo -e "$@"
}

function ExitFunc() {
IFS=$oldIFS
}

function calLength2() {
local line=$1
local start=$2
local end=$3
local row=$4
local arr=($line)
local result=""

while ((start < end)); do
local col_str=${arr[start]}
# wc output: always in the following order: newline, word, character, byte, maximum line length.
local bytes_chars=($(echo -n $col_str | wc -c -m | xargs -n 1 echo -n "$IFS"))
local bytes=${bytes_chars[2]}
local chars=${bytes_chars[1]}
if ((bytes > chars)); then
result="${result}$(((bytes + chars) / 2)),"
else
result="${result}$chars,"
fi
((start++))
done

echo "$result" >"row${row}_col$2.txt"

return 0
}

function rowCal() {
local line_start_num=$1
local col_count=$2

shift 2
local max_count_arr=()
for line in "$@"; do

debug "line: $line"

local arr=($line)
local files=""
for ((l = 0; l < $col_count; l++)); do
(calLength2 "$line" "$l" "$((l + 1))" "${line_start_num}") &
files="${files}row${line_start_num}_col${l}.txt "
done
wait

local result=$(echo $files | xargs cat)
# echo "command result: $result"
local col_length_arr=($result)
for ((j = 0; j < $col_count; j++)); do
local max_length=${max_count_arr[j]}
local current_length=${col_length_arr[j]}

if ((current_length > max_length)); then
max_count_arr[j]=$current_length
fi
done
done

echo $files | xargs rm &

local final=""
for count in "${max_count_arr[@]}"; do
final="${final}${count},"
done

echo -n "$final" >"row${line_start_num}"
}

function printSymbolLie() {
sep=$1
mid=$2
shift 2

echo -n "$sep"
for count in "$@"; do
for ((m = -2; m < $count; m++)); do
echo -n "$mid"
done
echo -n "$sep"
done
}

trap 'ExitFunc' 2 9 15 20 EXIT

isMD=0
isColor=0
isParrel=0
IFS=,
parrel=4

start_seconds=$(date +"%s")

while getopts 'cmvpn:d:' OPT; do
case "$OPT" in
c)
isColor=1
;;
m)
isMD=1
;;
p)
isParrel=1
;;
v)
isVerbose=1
;;
n)
parrel=$OPTARG
;;
d)
IFS=$(echo -ne "$OPTARG")
;;
?)
echo "avaliable options: [-c] [-v] [-m] [-p] [-n parrel_size] [-d delimiter]" >&2 ## standard error
exit 1
;;
esac
done
shift "$(($OPTIND - 1))"

debug "delimiter has been set to [$IFS]"
if ((isVerbose == 1)); then echo -n "$IFS" | hexdump -C; fi
echo

for item in $@; do
if [ ! -f "$item" ]; then
echo "file ${item} not exists"
continue
fi

debug "file name: $item"

##########################################################
# array contasing all line per file
line_arr=()
col_count=0
row_count=0
while read line; do
line_arr[$row_count]=$line
arr=($line)
temp_count=${#arr[@]}
if ((temp_count > col_count)); then
col_count=$temp_count
fi
((row_count++))
done <$item
debug "table column size: ${col_count}, row size: ${row_count}"

##########################################################
row_files=""
if ((isParrel == 1 && row_count > parrel)); then
per_size=$((row_count / parrel))
for ((r = 0; r < $parrel; r++)); do
start_index=$((r * per_size))
if ((r == parrel - 1)); then
length=$((row_count - (r * per_size)))
else
length=$per_size
fi
(rowCal "${start_index}" $col_count "${line_arr[@]:${start_index}:${length}}") &
row_files="${row_files}row${start_index}$IFS"
done
else
debug "no parrel function call:" 0 $col_count "${line_arr[@]:0:${row_count}}"
(rowCal 0 $col_count "${line_arr[@]:0:${row_count}}") &
row_files="row0"
fi
wait

# array contains max length of every col
max_count_arr=()
for row_cal_file in $row_files; do
result=$(cat $row_cal_file)
col_length_arr=($result)

for ((j = 0; j < $col_count; j++)); do
max_length=${max_count_arr[j]}
current_length=${col_length_arr[j]}

if ((current_length > max_length)); then
max_count_arr[j]=$current_length
fi
done
rm $row_cal_file &
done
debug "max width of every col cal \n"

##########################################################
# for ((i = 0; i < ${row_count}; i++)); do
i=0

format_str=""
parameter_str=""
print_count=0
while read line; do
((print_count++))
# line=${line_arr[i]}
arr=($line)

# echo "line:######### $line"
if [[ $isMD -eq 1 && $i -eq 1 ]]; then
format_str=${format_str}$(printSymbolLie "|" "-" "${max_count_arr[@]}")"\n"
fi

# print the table head line +-------+-------+
if [[ $isMD -eq 0 && ($i -eq 0 || $i -eq 1) ]]; then
format_str=${format_str}$(printSymbolLie "+" "-" "${max_count_arr[@]}")"\n"
fi

for ((j = 0; j < $col_count; j++)); do
bytes_chars=($(echo -n ${arr[j]} | wc -c -m | xargs -n 1 echo -n "$IFS"))
bytes=${bytes_chars[2]}
chars=${bytes_chars[1]}

# let's assume x: Chinese char count; y:ascii char count; default charset is utf-8
# as for display width: one Chinese character = ascii * 2
#
# (1) x*3 + y = bytes (use wc)
# (2) x + y = chars (use wc)
# (3) x*2 + y + z = max_width (we have calculated it before, z is the empty char count)
#
# printf command will count bytes as min size: printf "%-10s" "hello" 10 means bytes
# so finally, the printf min size for current col is:
# 3*x + y + z = max_col_width + (bytes - chars) / 2 (calculated by (1) (2) (3) above)
#
# todo:// optimization has to be done to reduce cal time

if ((j == 0)); then format_str="${format_str}|"; fi
max_col_width=${max_count_arr[j]}
min_print=$((max_col_width + (bytes - chars) / 2))

if ((i == 0 && isColor == 1)); then
format_str="${format_str} \033[1;33m%-${min_print}s\033[00m |"
else
format_str="${format_str} %-${min_print}s |"
fi

parameter_str="${parameter_str}\"${arr[j]}\" "
done

format_str="${format_str}\n"

debug "format_str: $format_str"
debug "parameter_str: $parameter_str"

if ((print_count == 50 || print_count == row_count)); then
echo -n "$parameter_str" | xargs printf "$format_str"
parameter_str=""
format_str=""
fi

((i++))
done <$item

# echo -n "$parameter_str" | xargs printf "$format_str"

# print the end line +-------+-------+
if [ $isMD -eq 0 ]; then
printSymbolLie "+" "-" "${max_count_arr[@]}"
fi
echo
done
IFS=$oldIFS

end_seconds=$(date +"%s")

info "\nTable row size: ${row_count}, column size: ${col_count}"
info "It takes $((end_seconds - start_seconds)) seconds to calculate and display.\n"