高级文本命令

1 sed 命令

synopsis - 基本用法

sed [OPTION]… {script-only-if-no-other-script} [input-file]..

参考链接

man sed

option

1
2
3

# suppress automatic printing of pattern space
# 关闭 pattern space 的自动打印，如果 command 中已经指定了 p, 可能就需要指定该选项
-n, --quiet, --silent

script command

d      # Delete pattern space.  Start next cycle.

# Copy 即覆盖， append 即追加
h H    # Copy/append pattern space to hold space.
g G    # Copy/append hold space to pattern space.
x      # Exchange the contents of the hold and pattern spaces.

n N    # Read/append the next line of input into the pattern space.
p      # Print the current pattern space.
P      # Print up to the first embedded newline of the current pattern space.

w filename # Write the current pattern space to filename.
W filename # Write the first line of the current pattern space to filename.  This is a GNU extension.

examples

逆序打印文件行，等同于 tac

# 第一行，只将 pattern space 中的内容 copy 到 hold space，再删除 pattern space，下一轮循环
# 接下来，每一行，将 hold space 的内容 append 到 pattern space，再删除 pattern space，下一轮循环
sed '1{h;d};G;h;$!d' seq.txt


sed '1!G;h;$!d' t.txt
# 1!G —— 只有第一行不执行G命令，将hold space中的内容append回到pattern space
# h —— 第一行都执行h命令，将pattern space中的内容拷贝到hold space中
# $!d —— 除了最后一行不执行d命令，其它行都执行d命令，删除当前行

指定要处理的行

# 打印第一行，1 指定第一行
sed  -n '1p' seq.txt

# 打印除了第一行的所有行
sed -n '1!p' seq.txt

# 打印奇数行，如第 1 行，第 3 行，第 5 行...， 语义为，从第一行开始，每隔 2 行打印
sed -n '1~2p' seq.txt

# 打印偶数行, 语义为每隔 2 行，打印
sed -n '0~2p' seq.txt

# 打印第一行，以及后续连续的 5 行
sed -n '1,+5p' seq.txt

# 打印第一行，直到第一次遇到行号是 2 的倍数的行，打印
sed -n '1,~2p' seq.txt

# 打印最后一行
sed -n '$p' seq.txt 

# 打印
sed -n '/regex/p' seq.txt

# & 代表匹配的内容
echo {a..z} | xargs -n 1 | sed "s/[a-z]/[&]/g"

sed表达式通常用单引号来引用。不过也可以使用双引号。 shell会在调用sed前先扩展双引
号中的内容。如果想在sed表达式中使用变量，双引号就能派上用场了。

2 cut

synopsis - 基本用法

cut OPTION [FILE]

-d, --delimiter		# 指定列分隔符
-f, --fields		# 制定要打印的列（不包含列分隔符的行同样会被打印，除非指定 -s）
--complement		# 打印指定之外的列、字符或字节（相当于取反）
-output-delimiter	# 指定打印时的列分隔符
-z 					# 行分隔符为 NUL， cut 是对一行进行切割的

cut -d "," f 2,5 file		# 打印第2列，第5列
cut -d "," f 2-5 file		# 打印第2列到第5列
cut -d "," f 2-5 --complement file	# 打印除了2-5列的其他列

# 制定打印第几个字节、字符、列（区间）
# N-M 从第N个字节、字符或字段开始到行尾
# N-  从第N个字节、字符或字段开始到第M个（包括第M个在内）字节、字符或字段
# -M  从第1个字节、字符或字段开始到第M个（包括第M个在内）字节、字符或字段

echo {a..z} | cut -d " " -f-4
# result: a b c d


echo -e "4 3\x001 2" | cut -z -d " " --output-delimiter "#" -f1,2

3 awk

awk BEGIN{} PATTERN {} END{}

# awk 的 pattern
$ awk 'NR < 5' # 行号小于5的行
$ awk 'NR==1,NR==4' # 行号在1到5之间的行
$ awk '/linux/' # 包含模式为linux的行（可以用正则表达式来指定模式）
$ awk '!/linux/' # 不包含模式为linux的行

1	awk -F":" 'BEGIN{OFS="#"} {print $1,$2}'

# awk可以调用命令并读取输出。把命令放入引号中，然后利用管道将命令输出传入getline：
awk 'BEGIN {FS=":"} { "grep root /etc/passwd" | getline; \
print $1,$6 }'

root /root


awk '/import.+/ { print "echo "$0"|grep -o com"|"bash" }' import

4 tr

 alnum：字母和数字。
 alpha：字母。
 cntrl：控制（非打印）字符。
 digit：数字。
 graph：图形字符。
 lower：小写字母。
 print：可打印字符。
 punct：标点符号。
 space：空白字符。
 upper：大写字母。
 xdigit：十六进制字符。