第五届“长城杯”网络安全大赛暨京津冀蒙网络安全技能竞赛（初赛）

AI

从你训练好的 *.pth（保存的是 state_dict）中自动定位最后一层的线性权重与偏置（常见形式：...weight 的 shape 为 [1, hidden]，...bias 的 shape 为 [1]），并把它们取负（相当于把模型输出翻转为 1 - original）。
可选地在本地做一次快速自检：把原模型和翻转后模型都载入到 TextClassifier(Parameters) 中，用一个随机/空输入比较两者输出（期望近似 y + y_flipped ≈ 1）

说明：此脚本遵循尽量通用的查找规则（通过参数形状和常见名字识别最后一层）。若你的模型最后层命名非常特殊，脚本会给出诊断信息并提示手工指定权重名。

若原模型在 secret val_set.csv 上的预测几乎全部正确（原模型 pred == original_label），那么翻转后 pred' = 1 - pred 将几乎全部等于 1 - original_label，从而满足题目判定条件，服务器会返回 flag。
如果原模型在验证集上不够准确（预测与真实标签并非基本一致），翻转也无法保证“全部被误导”。这就是为什么通常先用投毒或训练得到一个在验证集上表现很高的模型，再翻转其输出以稳妥通过。

# flip_model.py
import argparse
import torch
import os
import sys

def find_candidate_last_linear(state_dict):
    """
    尝试从 state_dict 中寻找最后一层 linear 的 weight key。
    策略：
      1) 首先找所有 tensor ndim==2 且 shape[0]==1 的 weight（典型 binary linear 输出 [1, hidden]）
      2) 若没有，尝试按名字启发式匹配包含常见后缀的 weight（fc, classifier, out, linear, dense）
    返回 (weight_key, bias_key) 或 (None, None)
    """
    cand_weights = [k for k, v in state_dict.items() if (isinstance(v, torch.Tensor) and v.ndim == 2 and v.shape[0] == 1)]
    if cand_weights:
        # 选第一个（通常只有一个）
        wkey = cand_weights[0]
        # 尝试对应 bias
        bkey = wkey.rsplit('.', 1)[0] + '.bias' if (wkey.rsplit('.', 1)[0] + '.bias') in state_dict else None
        if bkey is None:
            # 尝试找到任何 shape == (1,) 的 bias
            bias_cands = [k for k, v in state_dict.items() if isinstance(v, torch.Tensor) and v.ndim == 1 and v.shape[0] == 1]
            bkey = bias_cands[0] if bias_cands else None
        return wkey, bkey

    # 启发式名称匹配
    name_hints = ['fc.weight', 'classifier.weight', 'out.weight', 'linear.weight', 'dense.weight']
    for hint in name_hints:
        matches = [k for k in state_dict.keys() if hint in k and isinstance(state_dict[k], torch.Tensor)]
        if matches:
            wkey = matches[-1]  # 取最后一项更可能是顶层
            bkey = wkey.rsplit('.', 1)[0] + '.bias' if (wkey.rsplit('.', 1)[0] + '.bias') in state_dict else None
            if bkey is None:
                bias_cands = [k for k, v in state_dict.items() if isinstance(v, torch.Tensor) and v.ndim == 1 and v.shape[0] == 1]
                bkey = bias_cands[0] if bias_cands else None
            return wkey, bkey

    return None, None

def flip_last_layer(input_path, output_path, force=False):
    if not os.path.exists(input_path):
        print(f"[ERROR] 输入模型文件不存在: {input_path}")
        return False

    sd = torch.load(input_path, map_location='cpu')
    if not isinstance(sd, dict):
        print("[WARN] 加载到的对象不是 state_dict（dict）。如果这是整个 model 对象，尝试保存 state_dict 而不是整个模型。")
        # 如果用户确实保存了整个模型对象，尝试取其 state_dict
        if hasattr(sd, 'state_dict'):
            sd = sd.state_dict()
        else:
            print("[ERROR] 既不是 state_dict 也不是 model 对象，无法处理。")
            return False

    wkey, bkey = find_candidate_last_linear(sd)
    if wkey is None:
        print("[WARN] 未能自动定位最后一层线性权重（没有找到形状为 [1, hidden] 的 weight，也未匹配常见名称）。")
        print("可选操作：")
        print("  1) 手动指定要翻转的权重 key（使用 --weight-key 与 --bias-key）。")
        print("  2) 查看 state_dict.keys() 并选择合适的 key。")
        print("\nstate_dict keys preview:")
        for i, k in enumerate(list(sd.keys())[:50]):
            print(f"  {i+1:02d}. {k}    shape={tuple(sd[k].shape) if isinstance(sd[k], torch.Tensor) else type(sd[k])}")
        return False

    print(f"[INFO] 自动定位到 weight: '{wkey}'  bias: '{bkey}'")
    # 备份
    new_sd = {}
    for k, v in sd.items():
        new_sd[k] = v.clone() if isinstance(v, torch.Tensor) else v

    # 翻转 weight / bias
    new_sd[wkey] = -new_sd[wkey]
    if bkey:
        new_sd[bkey] = -new_sd[bkey]
        print(f"[OK] 已将 {wkey} 与 {bkey} 取负。")
    else:
        print(f"[OK] 已将 {wkey} 取负。未找到对应 bias (将仅翻转 weight)。")

    torch.save(new_sd, output_path)
    print(f"[OK] 已保存翻转后模型为: {output_path}  ({os.path.getsize(output_path)/1024:.1f} KB)")
    return True

def quick_self_test(original_path, flipped_path):
    """
    若工程包含 src.model.TextClassifier 和 src.parameters.Parameters，
    此函数会加载两份 state_dict 到模型并对一个零输入进行比对（近似 y + y_flipped ≈ 1）。
    """
    try:
        from src.parameters import Parameters
        from src.model import TextClassifier
    except Exception as e:
        print("[SKIP TEST] 未能导入 TextClassifier/Parameters（", e, ")，跳过自检。")
        return

    params = Parameters()
    model_a = TextClassifier(params)
    model_b = TextClassifier(params)

    sd_a = torch.load(original_path, map_location='cpu')
    sd_b = torch.load(flipped_path, map_location='cpu')

    # 如果用户保存的是整 model 对象，尝试获取 state_dict
    if not isinstance(sd_a, dict) and hasattr(sd_a, 'state_dict'):
        sd_a = sd_a.state_dict()
    if not isinstance(sd_b, dict) and hasattr(sd_b, 'state_dict'):
        sd_b = sd_b.state_dict()

    model_a.load_state_dict(sd_a)
    model_b.load_state_dict(sd_b)
    model_a.eval(); model_b.eval()

    seq_len = params.seq_len if hasattr(params, 'seq_len') else None
    if seq_len is None:
        print("[SKIP TEST] Parameters 未包含 seq_len，无法构造输入进行自检。")
        return

    # 构造一个零输入 / 随机输入（batch_size=1, seq_len）
    inp = torch.zeros((1, seq_len), dtype=torch.long)
    with torch.no_grad():
        out_a = model_a(inp)
        out_b = model_b(inp)
        # 处理不同输出维度情况
        def to_scalar(x):
            if isinstance(x, torch.Tensor):
                x = x.detach()
                if x.numel() == 0:
                    return None
                x = x.squeeze()
                if x.dim() == 0:
                    return float(x.item())
                else:
                    # 取第一个元素
                    return float(x.view(-1)[0].item())
            return None
        va = to_scalar(out_a)
        vb = to_scalar(out_b)
        print(f"[SELF TEST] 原模型输出 (sample): {va}")
        print(f"[SELF TEST] 翻转后模型输出 (sample): {vb}")
        if va is not None and vb is not None:
            print(f"[SELF TEST] va + vb = {va + vb:.6f} (接近 1 则表示翻转成功)")

def main():
    parser = argparse.ArgumentParser(description="Flip final linear layer (weight & bias) in a PyTorch state_dict to invert outputs (approx 1 - y).")
    parser.add_argument("--input", "-i", required=True, help="输入的 state_dict 文件 (pth)")
    parser.add_argument("--output", "-o", default="flipped_model.pth", help="输出文件名")
    parser.add_argument("--weight-key", help="手动指定要翻转的 weight key（可选）")
    parser.add_argument("--bias-key", help="手动指定要翻转的 bias key（可选）")
    parser.add_argument("--test", action="store_true", help="尝试对原/翻转模型做快速自检（需能导入 src.model）")
    args = parser.parse_args()

    if args.weight_key:
        sd = torch.load(args.input, map_location='cpu')
        if not isinstance(sd, dict) and hasattr(sd, 'state_dict'):
            sd = sd.state_dict()
        if args.weight_key not in sd:
            print(f"[ERROR] 指定的 weight key 不存在: {args.weight_key}")
            print("state_dict keys preview:")
            for k in list(sd.keys())[:50]:
                print("  ", k)
            sys.exit(2)
        # 直接翻转指定的 keys
        if args.bias_key and args.bias_key not in sd:
            print(f"[ERROR] 指定的 bias key 不存在: {args.bias_key}")
            sys.exit(2)
        new_sd = {}
        for k, v in sd.items():
            new_sd[k] = v.clone() if isinstance(v, torch.Tensor) else v
        new_sd[args.weight_key] = -new_sd[args.weight_key]
        if args.bias_key:
            new_sd[args.bias_key] = -new_sd[args.bias_key]
        torch.save(new_sd, args.output)
        print(f"[OK] 翻转并保存到 {args.output}")
        if args.test:
            quick_self_test(args.input, args.output)
        sys.exit(0)

    ok = flip_last_layer(args.input, args.output)
    if not ok:
        print("[FAILED] 自动翻转失败，请手动指定 --weight-key 和 --bias-key（参考 state_dict.keys()）")
        sys.exit(2)

    if args.test:
        quick_self_test(args.input, args.output)

if __name__ == "__main__":
    main()

先使用example.py运行得到一个干净模型，然后执行

1	`python flip_model.py -i sentiment_model.pth -o submitted_model.pth --test`

数据安全

RealCheckIn-1

Wireshark 打开流量包，先试着搜索 flag ，发现一段可疑流量，貌似有 flag.txt 的文件

追踪一下 TCP 流

发现可疑数据，我们都知道字符串“flag{”的 Base64 编码字符是“ZmxhZ3”开头的，因此发现 echo 的可能就是 flag，解码一下，确实得到了 flag。