summaryrefslogtreecommitdiff
path: root/fs/f2fs/file.c
AgeCommit message (Collapse)AuthorFilesLines
2018-12-05f2fs: fix to do sanity check with block address in main areaChao Yu1-0/+12
commit c9b60788fc760d136211853f10ce73dc152d1f4a upstream. This patch add to do sanity check with below field: - cp_pack_total_block_count - blkaddr of data/node - extent info - Overview BUG() in verify_block_addr() when writing to a corrupted f2fs image - Reproduce (4.18 upstream kernel) - POC (poc.c) static void activity(char *mpoint) { char *foo_bar_baz; int err; static int buf[8192]; memset(buf, 0, sizeof(buf)); err = asprintf(&foo_bar_baz, "%s/foo/bar/baz", mpoint); int fd = open(foo_bar_baz, O_RDWR | O_TRUNC, 0777); if (fd >= 0) { write(fd, (char *)buf, sizeof(buf)); fdatasync(fd); close(fd); } } int main(int argc, char *argv[]) { activity(argv[1]); return 0; } - Kernel message [ 689.349473] F2FS-fs (loop0): Mounted with checkpoint version = 3 [ 699.728662] WARNING: CPU: 0 PID: 1309 at fs/f2fs/segment.c:2860 f2fs_inplace_write_data+0x232/0x240 [ 699.728670] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.729056] CPU: 0 PID: 1309 Comm: a.out Not tainted 4.18.0-rc1+ #4 [ 699.729064] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.729074] RIP: 0010:f2fs_inplace_write_data+0x232/0x240 [ 699.729076] Code: ff e9 cf fe ff ff 49 8d 7d 10 e8 39 45 ad ff 4d 8b 7d 10 be 04 00 00 00 49 8d 7f 48 e8 07 49 ad ff 45 8b 7f 48 e9 fb fe ff ff <0f> 0b f0 41 80 4d 48 04 e9 65 fe ff ff 90 66 66 66 66 90 55 48 8d [ 699.729130] RSP: 0018:ffff8801f43af568 EFLAGS: 00010202 [ 699.729139] RAX: 000000000000003f RBX: ffff8801f43af7b8 RCX: ffffffffb88c9113 [ 699.729142] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8802024e5540 [ 699.729144] RBP: ffff8801f43af590 R08: 0000000000000009 R09: ffffffffffffffe8 [ 699.729147] R10: 0000000000000001 R11: ffffed0039b0596a R12: ffff8802024e5540 [ 699.729149] R13: ffff8801f0335500 R14: ffff8801e3e7a700 R15: ffff8801e1ee4450 [ 699.729154] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.729156] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.729159] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.729171] Call Trace: [ 699.729192] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.729203] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.729238] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.729269] ? __radix_tree_replace+0xa3/0x120 [ 699.729276] __write_data_page+0x5c7/0xe30 [ 699.729291] ? kasan_check_read+0x11/0x20 [ 699.729310] ? page_mapped+0x8a/0x110 [ 699.729321] ? page_mkclean+0xe9/0x160 [ 699.729327] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.729331] ? invalid_page_referenced_vma+0x130/0x130 [ 699.729345] ? clear_page_dirty_for_io+0x332/0x450 [ 699.729351] f2fs_write_cache_pages+0x4ca/0x860 [ 699.729358] ? __write_data_page+0xe30/0xe30 [ 699.729374] ? percpu_counter_add_batch+0x22/0xa0 [ 699.729380] ? kasan_check_write+0x14/0x20 [ 699.729391] ? _raw_spin_lock+0x17/0x40 [ 699.729403] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.729413] ? iov_iter_advance+0x113/0x640 [ 699.729418] ? f2fs_write_end+0x133/0x2e0 [ 699.729423] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.729428] f2fs_write_data_pages+0x329/0x520 [ 699.729433] ? generic_perform_write+0x250/0x320 [ 699.729438] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729454] ? current_time+0x110/0x110 [ 699.729459] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.729464] do_writepages+0x37/0xb0 [ 699.729468] ? f2fs_write_cache_pages+0x860/0x860 [ 699.729472] ? do_writepages+0x37/0xb0 [ 699.729478] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.729483] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.729496] ? __vfs_write+0x2b2/0x410 [ 699.729501] file_write_and_wait_range+0x66/0xb0 [ 699.729506] f2fs_do_sync_file+0x1f9/0xd90 [ 699.729511] ? truncate_partial_data_page+0x290/0x290 [ 699.729521] ? __sb_end_write+0x30/0x50 [ 699.729526] ? vfs_write+0x20f/0x260 [ 699.729530] f2fs_sync_file+0x9a/0xb0 [ 699.729534] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.729548] vfs_fsync_range+0x68/0x100 [ 699.729554] ? __fget_light+0xc9/0xe0 [ 699.729558] do_fsync+0x3d/0x70 [ 699.729562] __x64_sys_fdatasync+0x24/0x30 [ 699.729585] do_syscall_64+0x78/0x170 [ 699.729595] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.729613] RIP: 0033:0x7f9bf930d800 [ 699.729615] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.729668] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.729673] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.729675] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.729678] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.729680] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.729683] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.729687] ---[ end trace 4ce02f25ff7d3df5 ]--- [ 699.729782] ------------[ cut here ]------------ [ 699.729785] kernel BUG at fs/f2fs/segment.h:654! [ 699.731055] invalid opcode: 0000 [#1] SMP KASAN PTI [ 699.732104] CPU: 0 PID: 1309 Comm: a.out Tainted: G W 4.18.0-rc1+ #4 [ 699.733684] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.735611] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.736649] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.740524] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.741573] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.743006] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.744426] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.745833] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.747256] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.748683] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.750293] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.751462] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.752874] Call Trace: [ 699.753386] ? f2fs_inplace_write_data+0x93/0x240 [ 699.754341] f2fs_inplace_write_data+0xd2/0x240 [ 699.755271] f2fs_do_write_data_page+0x2e2/0xe00 [ 699.756214] ? f2fs_should_update_outplace+0xd0/0xd0 [ 699.757215] ? memcg_drain_all_list_lrus+0x280/0x280 [ 699.758209] ? __radix_tree_replace+0xa3/0x120 [ 699.759164] __write_data_page+0x5c7/0xe30 [ 699.760002] ? kasan_check_read+0x11/0x20 [ 699.760823] ? page_mapped+0x8a/0x110 [ 699.761573] ? page_mkclean+0xe9/0x160 [ 699.762345] ? f2fs_do_write_data_page+0xe00/0xe00 [ 699.763332] ? invalid_page_referenced_vma+0x130/0x130 [ 699.764374] ? clear_page_dirty_for_io+0x332/0x450 [ 699.765347] f2fs_write_cache_pages+0x4ca/0x860 [ 699.766276] ? __write_data_page+0xe30/0xe30 [ 699.767161] ? percpu_counter_add_batch+0x22/0xa0 [ 699.768112] ? kasan_check_write+0x14/0x20 [ 699.768951] ? _raw_spin_lock+0x17/0x40 [ 699.769739] ? f2fs_mark_inode_dirty_sync.part.18+0x16/0x30 [ 699.770885] ? iov_iter_advance+0x113/0x640 [ 699.771743] ? f2fs_write_end+0x133/0x2e0 [ 699.772569] ? balance_dirty_pages_ratelimited+0x239/0x640 [ 699.773680] f2fs_write_data_pages+0x329/0x520 [ 699.774603] ? generic_perform_write+0x250/0x320 [ 699.775544] ? f2fs_write_cache_pages+0x860/0x860 [ 699.776510] ? current_time+0x110/0x110 [ 699.777299] ? f2fs_preallocate_blocks+0x1ef/0x370 [ 699.778279] do_writepages+0x37/0xb0 [ 699.779026] ? f2fs_write_cache_pages+0x860/0x860 [ 699.779978] ? do_writepages+0x37/0xb0 [ 699.780755] __filemap_fdatawrite_range+0x19a/0x1f0 [ 699.781746] ? delete_from_page_cache_batch+0x4e0/0x4e0 [ 699.782820] ? __vfs_write+0x2b2/0x410 [ 699.783597] file_write_and_wait_range+0x66/0xb0 [ 699.784540] f2fs_do_sync_file+0x1f9/0xd90 [ 699.785381] ? truncate_partial_data_page+0x290/0x290 [ 699.786415] ? __sb_end_write+0x30/0x50 [ 699.787204] ? vfs_write+0x20f/0x260 [ 699.787941] f2fs_sync_file+0x9a/0xb0 [ 699.788694] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.789572] vfs_fsync_range+0x68/0x100 [ 699.790360] ? __fget_light+0xc9/0xe0 [ 699.791128] do_fsync+0x3d/0x70 [ 699.791779] __x64_sys_fdatasync+0x24/0x30 [ 699.792614] do_syscall_64+0x78/0x170 [ 699.793371] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 699.794406] RIP: 0033:0x7f9bf930d800 [ 699.795134] Code: 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 83 3d 49 bf 2c 00 00 75 10 b8 4b 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be 78 01 00 48 89 04 24 [ 699.798960] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.800483] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.801923] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.803373] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.804798] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.806233] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.807667] Modules linked in: snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_pcm snd_timer snd mac_hid i2c_piix4 soundcore ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx raid1 raid0 multipath linear 8139too crct10dif_pclmul crc32_pclmul qxl drm_kms_helper syscopyarea aesni_intel sysfillrect sysimgblt fb_sys_fops ttm drm aes_x86_64 crypto_simd cryptd 8139cp glue_helper mii pata_acpi floppy [ 699.817079] ---[ end trace 4ce02f25ff7d3df6 ]--- [ 699.818068] RIP: 0010:f2fs_submit_page_bio+0x29b/0x730 [ 699.819114] Code: 54 49 8d bd 18 04 00 00 e8 b2 59 af ff 41 8b 8d 18 04 00 00 8b 45 b8 41 d3 e6 44 01 f0 4c 8d 73 14 41 39 c7 0f 82 37 fe ff ff <0f> 0b 65 8b 05 2c 04 77 47 89 c0 48 0f a3 05 52 c1 d5 01 0f 92 c0 [ 699.822919] RSP: 0018:ffff8801f43af508 EFLAGS: 00010283 [ 699.823977] RAX: 0000000000000000 RBX: ffff8801f43af7b8 RCX: ffffffffb88a7cef [ 699.825436] RDX: 0000000000000007 RSI: dffffc0000000000 RDI: ffff8801e3e7a64c [ 699.826881] RBP: ffff8801f43af558 R08: ffffed003e066b55 R09: ffffed003e066b55 [ 699.828292] R10: 0000000000000001 R11: ffffed003e066b54 R12: ffffea0007876940 [ 699.829750] R13: ffff8801f0335500 R14: ffff8801e3e7a600 R15: 0000000000000001 [ 699.831192] FS: 00007f9bf97f5700(0000) GS:ffff8801f6e00000(0000) knlGS:0000000000000000 [ 699.832793] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 699.833981] CR2: 00007f9bf925d170 CR3: 00000001f0c34000 CR4: 00000000000006f0 [ 699.835556] ================================================================== [ 699.837029] BUG: KASAN: stack-out-of-bounds in update_stack_state+0x38c/0x3e0 [ 699.838462] Read of size 8 at addr ffff8801f43af970 by task a.out/1309 [ 699.840086] CPU: 0 PID: 1309 Comm: a.out Tainted: G D W 4.18.0-rc1+ #4 [ 699.841603] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 [ 699.843475] Call Trace: [ 699.843982] dump_stack+0x7b/0xb5 [ 699.844661] print_address_description+0x70/0x290 [ 699.845607] kasan_report+0x291/0x390 [ 699.846351] ? update_stack_state+0x38c/0x3e0 [ 699.853831] __asan_load8+0x54/0x90 [ 699.854569] update_stack_state+0x38c/0x3e0 [ 699.855428] ? __read_once_size_nocheck.constprop.7+0x20/0x20 [ 699.856601] ? __save_stack_trace+0x5e/0x100 [ 699.857476] unwind_next_frame.part.5+0x18e/0x490 [ 699.858448] ? unwind_dump+0x290/0x290 [ 699.859217] ? clear_page_dirty_for_io+0x332/0x450 [ 699.860185] __unwind_start+0x106/0x190 [ 699.860974] __save_stack_trace+0x5e/0x100 [ 699.861808] ? __save_stack_trace+0x5e/0x100 [ 699.862691] ? unlink_anon_vmas+0xba/0x2c0 [ 699.863525] save_stack_trace+0x1f/0x30 [ 699.864312] save_stack+0x46/0xd0 [ 699.864993] ? __alloc_pages_slowpath+0x1420/0x1420 [ 699.865990] ? flush_tlb_mm_range+0x15e/0x220 [ 699.866889] ? kasan_check_write+0x14/0x20 [ 699.867724] ? __dec_node_state+0x92/0xb0 [ 699.868543] ? lock_page_memcg+0x85/0xf0 [ 699.869350] ? unlock_page_memcg+0x16/0x80 [ 699.870185] ? page_remove_rmap+0x198/0x520 [ 699.871048] ? mark_page_accessed+0x133/0x200 [ 699.871930] ? _cond_resched+0x1a/0x50 [ 699.872700] ? unmap_page_range+0xcd4/0xe50 [ 699.873551] ? rb_next+0x58/0x80 [ 699.874217] ? rb_next+0x58/0x80 [ 699.874895] __kasan_slab_free+0x13c/0x1a0 [ 699.875734] ? unlink_anon_vmas+0xba/0x2c0 [ 699.876563] kasan_slab_free+0xe/0x10 [ 699.877315] kmem_cache_free+0x89/0x1e0 [ 699.878095] unlink_anon_vmas+0xba/0x2c0 [ 699.878913] free_pgtables+0x101/0x1b0 [ 699.879677] exit_mmap+0x146/0x2a0 [ 699.880378] ? __ia32_sys_munmap+0x50/0x50 [ 699.881214] ? kasan_check_read+0x11/0x20 [ 699.882052] ? mm_update_next_owner+0x322/0x380 [ 699.882985] mmput+0x8b/0x1d0 [ 699.883602] do_exit+0x43a/0x1390 [ 699.884288] ? mm_update_next_owner+0x380/0x380 [ 699.885212] ? f2fs_sync_file+0x9a/0xb0 [ 699.885995] ? f2fs_do_sync_file+0xd90/0xd90 [ 699.886877] ? vfs_fsync_range+0x68/0x100 [ 699.887694] ? __fget_light+0xc9/0xe0 [ 699.888442] ? do_fsync+0x3d/0x70 [ 699.889118] ? __x64_sys_fdatasync+0x24/0x30 [ 699.889996] rewind_stack_do_exit+0x17/0x20 [ 699.890860] RIP: 0033:0x7f9bf930d800 [ 699.891585] Code: Bad RIP value. [ 699.892268] RSP: 002b:00007ffee3606c68 EFLAGS: 00000246 ORIG_RAX: 000000000000004b [ 699.893781] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f9bf930d800 [ 699.895220] RDX: 0000000000008000 RSI: 00000000006010a0 RDI: 0000000000000003 [ 699.896643] RBP: 00007ffee3606ca0 R08: 0000000001503010 R09: 0000000000000000 [ 699.898069] R10: 00000000000002e8 R11: 0000000000000246 R12: 0000000000400610 [ 699.899505] R13: 00007ffee3606da0 R14: 0000000000000000 R15: 0000000000000000 [ 699.901241] The buggy address belongs to the page: [ 699.902215] page:ffffea0007d0ebc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 [ 699.903811] flags: 0x2ffff0000000000() [ 699.904585] raw: 02ffff0000000000 0000000000000000 ffffffff07d00101 0000000000000000 [ 699.906125] raw: 0000000000000000 0000000000240000 00000000ffffffff 0000000000000000 [ 699.907673] page dumped because: kasan: bad access detected [ 699.909108] Memory state around the buggy address: [ 699.910077] ffff8801f43af800: 00 f1 f1 f1 f1 00 f4 f4 f4 f3 f3 f3 f3 00 00 00 [ 699.911528] ffff8801f43af880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 699.912953] >ffff8801f43af900: 00 00 00 00 00 00 00 00 f1 01 f4 f4 f4 f2 f2 f2 [ 699.914392] ^ [ 699.915758] ffff8801f43af980: f2 00 f4 f4 00 00 00 00 f2 00 00 00 00 00 00 00 [ 699.917193] ffff8801f43afa00: 00 00 00 00 00 00 00 00 00 f3 f3 f3 00 00 00 00 [ 699.918634] ================================================================== - Location https://elixir.bootlin.com/linux/v4.18-rc1/source/fs/f2fs/segment.h#L644 Reported-by Wen Xu <wen.xu@gatech.edu> Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> [bwh: Backported to 4.14: - Error label is different in validate_checkpoint() due to the earlier backport of "f2fs: fix invalid memory access" - Adjust context] Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> Signed-off-by: Sasha Levin <sashal@kernel.org>
2018-12-05f2fs: introduce and spread verify_blkaddrChao Yu1-4/+5
commit e1da7872f6eda977bd812346bf588c35e4495a1e upstream. This patch introduces verify_blkaddr to check meta/data block address with valid range to detect bug earlier. In addition, once we encounter an invalid blkaddr, notice user to run fsck to fix, and let the kernel panic. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> [bwh: Backported to 4.14: I skipped an earlier renaming of is_valid_meta_blkaddr() to f2fs_is_valid_meta_blkaddr()] Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> Signed-off-by: Sasha Levin <sashal@kernel.org>
2018-12-05f2fs: clean up with is_valid_blkaddr()Chao Yu1-1/+1
commit 7b525dd01365c6764018e374d391c92466be1b7a upstream. - rename is_valid_blkaddr() to is_valid_meta_blkaddr() for readability. - introduce is_valid_blkaddr() for cleanup. No logic change in this patch. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk> Signed-off-by: Sasha Levin <sashal@kernel.org>
2018-09-19f2fs: Fix uninitialized return in f2fs_ioc_shutdown()Dan Carpenter1-1/+1
[ Upstream commit 2a96d8ad94ce57cb0072f7a660b1039720c47716 ] "ret" can be uninitialized on the success path when "in == F2FS_GOING_DOWN_FULLSYNC". Fixes: 60b2b4ee2bc0 ("f2fs: Fix deadlock in shutdown ioctl") Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-08-03f2fs: fix race in between GC and atomic openChao Yu1-0/+3
[ Upstream commit 27319ba4044c0c67d62ae39e53c0118c89f0a029 ] Thread GC thread - f2fs_ioc_start_atomic_write - get_dirty_pages - filemap_write_and_wait_range - f2fs_gc - do_garbage_collect - gc_data_segment - move_data_page - f2fs_is_atomic_file - set_page_dirty - set_inode_flag(, FI_ATOMIC_FILE) Dirty data page can still be generated by GC in race condition as above call stack. This patch adds fi->dio_rwsem[WRITE] in f2fs_ioc_start_atomic_write to avoid such race. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-08-03f2fs: fix to detect failure of dquot_initializeChao Yu1-1/+3
[ Upstream commit c22aecd75919511abea872b201751e0be1add898 ] dquot_initialize() can fail due to any exception inside quota subsystem, f2fs needs to be aware of it, and return correct return value to caller. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-08-03f2fs: Fix deadlock in shutdown ioctlSahitya Tummala1-4/+7
[ Upstream commit 60b2b4ee2bc01dd052f99fa9d65da2232102ef8e ] f2fs_ioc_shutdown() ioctl gets stuck in the below path when issued with F2FS_GOING_DOWN_FULLSYNC option. __switch_to+0x90/0xc4 percpu_down_write+0x8c/0xc0 freeze_super+0xec/0x1e4 freeze_bdev+0xc4/0xcc f2fs_ioctl+0xc0c/0x1ce0 f2fs_compat_ioctl+0x98/0x1f0 Signed-off-by: Sahitya Tummala <stummala@codeaurora.org> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-07-11f2fs: truncate preallocated blocks in error caseJaegeuk Kim1-0/+9
commit dc7a10ddee0c56c6d891dd18de5c4ee9869545e0 upstream. If write is failed, we must deallocate the blocks that we couldn't write. Cc: stable@vger.kernel.org Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-05-30f2fs: fix to set KEEP_SIZE bit in f2fs_zero_rangeChao Yu1-2/+6
[ Upstream commit 17cd07ae95073c298af92c1ba14ac58ce84de33b ] As Jayashree Mohan reported: A simple workload to reproduce this would be : 1. create foo 2. Write (8K - 16K) // foo size = 16K now 3. fsync() 4. falloc zero_range , keep_size (4202496 - 4210688) // foo size must be 16K 5. fdatasync() Crash now On recovery, we see that the file size is 4210688 and not 16K, which violates the semantics of keep_size flag. We have a test case to reproduce this using CrashMonkey on 4.15 kernel. Try this out by simply running : ./c_harness -f /dev/sda -d /dev/cow_ram0 -t f2fs -e 102400 -P -v tests/generic_468_zero.so The root cause is that we miss to set KEEP_SIZE bit correctly in zero_range when zeroing block cross EOF with FALLOC_FL_KEEP_SIZE, let's fix this missing case. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-14f2fs: fix to clear FI_NO_PREALLOCChao Yu1-0/+1
[ Upstream commit 28cfafb73853f0494b06649716687a3ea07681d5 ] We need to clear FI_NO_PREALLOC flag in error path of f2fs_file_write_iter, otherwise we will lose the chance to preallocate blocks in latter write() at one time. Fixes: dc91de78e5e1 ("f2fs: do not preallocate blocks which has wrong buffer") Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-11-30f2fs: expose some sectors to user in inline data or dentry caseJaegeuk Kim1-0/+6
commit 5b4267d195dd887c4412e34b5a7365baa741b679 upstream. If there's some data written through inline data or dentry, we need to shouw st_blocks. This fixes reporting zero blocks even though there is small written data. Reviewed-by: Chao Yu <yuchao0@huawei.com> [Jaegeuk Kim: avoid link file for quotacheck] Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-09-13Merge tag 'f2fs-for-4.14' of ↵Linus Torvalds1-55/+305
git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs Pull f2fs updates from Jaegeuk Kim: "In this round, we've mostly tuned f2fs to provide better user experience for Android. Especially, we've worked on atomic write feature again with SQLite community in order to support it officially. And we added or modified several facilities to analyze and enhance IO behaviors. Major changes include: - add app/fs io stat - add inode checksum feature - support project/journalled quota - enhance atomic write with new ioctl() which exposes feature set - enhance background gc/discard/fstrim flows with new gc_urgent mode - add F2FS_IOC_FS{GET,SET}XATTR - fix some quota flows" * tag 'f2fs-for-4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (63 commits) f2fs: hurry up to issue discard after io interruption f2fs: fix to show correct discard_granularity in sysfs f2fs: detect dirty inode in evict_inode f2fs: clear radix tree dirty tag of pages whose dirty flag is cleared f2fs: speed up gc_urgent mode with SSR f2fs: better to wait for fstrim completion f2fs: avoid race in between read xattr & write xattr f2fs: make get_lock_data_page to handle encrypted inode f2fs: use generic terms used for encrypted block management f2fs: introduce f2fs_encrypted_file for clean-up Revert "f2fs: add a new function get_ssr_cost" f2fs: constify super_operations f2fs: fix to wake up all sleeping flusher f2fs: avoid race in between atomic_read & atomic_inc f2fs: remove unneeded parameter of change_curseg f2fs: update i_flags correctly f2fs: don't check inode's checksum if it was dirtied or writebacked f2fs: don't need to update inode checksum for recovery f2fs: trigger fdatasync for non-atomic_write file f2fs: fix to avoid race in between aio and gc ...
2017-09-07Merge tag 'wberr-v4.14-1' of ↵Linus Torvalds1-1/+1
git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux Pull writeback error handling updates from Jeff Layton: "This pile continues the work from last cycle on better tracking writeback errors. In v4.13 we added some basic errseq_t infrastructure and converted a few filesystems to use it. This set continues refining that infrastructure, adds documentation, and converts most of the other filesystems to use it. The main exception at this point is the NFS client" * tag 'wberr-v4.14-1' of git://git.kernel.org/pub/scm/linux/kernel/git/jlayton/linux: ecryptfs: convert to file_write_and_wait in ->fsync mm: remove optimizations based on i_size in mapping writeback waits fs: convert a pile of fsync routines to errseq_t based reporting gfs2: convert to errseq_t based writeback error reporting for fsync fs: convert sync_file_range to use errseq_t based error-tracking mm: add file_fdatawait_range and file_write_and_wait fuse: convert to errseq_t based error tracking for fsync mm: consolidate dax / non-dax checks for writeback Documentation: add some docs for errseq_t errseq: rename __errseq_set to errseq_set
2017-09-06f2fs: use generic terms used for encrypted block managementJaegeuk Kim1-1/+1
This patch renames functions regarding to buffer management via META_MAPPING used for encrypted blocks especially. We can actually use them in generic way. Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-09-06f2fs: introduce f2fs_encrypted_file for clean-upJaegeuk Kim1-1/+1
This patch replaces (f2fs_encrypted_inode() && S_ISREG()) with f2fs_encrypted_file(), which gives no functional change. Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-29f2fs: trigger fdatasync for non-atomic_write fileChao Yu1-1/+1
Sqlite only cares about synchronization of file data instead of other data unrelated attribute of inode, so in commit flow, call fdatasync is enough. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-29f2fs: trigger normal fsync for non-atomic_write fileChao Yu1-1/+1
If file was not opened with atomic write mode, but user uses atomic write ioctl to fsync datas, in the flow, we should not fsync that file with atomic write mode. Fixes: 608514deba38 ("f2fs: set fsync mark only for the last dnode") Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-29f2fs: clear FI_HOT_DATA correctlyChao Yu1-0/+2
This patch fixes to clear FI_HOT_DATA correctly in below path: - error handling in f2fs_ioc_start_atomic_write - after commit atomic write in f2fs_ioc_commit_atomic_write - after drop atomic write in drop_inmem_pages Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-22f2fs: merge equivalent flags F2FS_GET_BLOCK_[READ|DIO]Qiuyang Sun1-2/+2
Currently, the two flags F2FS_GET_BLOCK_[READ|DIO] are totally equivalent and can be used interchangably in all scenarios they are involved in. Neither of the flags is referenced in f2fs_map_blocks(), making them both the default case. To remove the ambiguity, this patch merges both flags into F2FS_GET_BLOCK_DEFAULT, and introduces an enum for all distinct flags. Signed-off-by: Qiuyang Sun <sunqiuyang@huawei.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-10f2fs: add app/fs io statChao Yu1-1/+6
This patch enables inner app/fs io stats and introduces below virtual fs nodes for exposing stats info: /sys/fs/f2fs/<dev>/iostat_enable /proc/fs/f2fs/<dev>/iostat_info Signed-off-by: Chao Yu <yuchao0@huawei.com> [Jaegeuk Kim: fix wrong stat assignment] Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-01fs: convert a pile of fsync routines to errseq_t based reportingJeff Layton1-1/+1
This patch converts most of the in-kernel filesystems that do writeback out of the pagecache to report errors using the errseq_t-based infrastructure that was recently added. This allows them to report errors once for each open file description. Most filesystems have a fairly straightforward fsync operation. They call filemap_write_and_wait_range to write back all of the data and wait on it, and then (sometimes) sync out the metadata. For those filesystems this is a straightforward conversion from calling filemap_write_and_wait_range in their fsync operation to calling file_write_and_wait_range. Acked-by: Jan Kara <jack@suse.cz> Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com> Signed-off-by: Jeff Layton <jlayton@redhat.com>
2017-08-01f2fs: support F2FS_IOC_FS{GET,SET}XATTRChao Yu1-28/+236
This patch adds FS_IOC_FSSETXATTR/FS_IOC_FSGETXATTR ioctl interface support for f2fs. The interface is kept consistent with the one of ext4/xfs. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-01f2fs: don't need to wait for node writes for atomic writeJaegeuk Kim1-3/+13
We have a node chain to serialize node block writes, so if any IOs for node block writes are reordered, we'll get broken node chain. IOWs, roll-forward recovery will see all or none node blocks given fsync mark. E.g., Node chain consists of: N1 -> N2 -> N3 -> NFSYNC -> N1' -> N2' -> N'FSYNC Reordered to: 1) N1 -> N2 -> N3 -> N2' -> NFSYNC -> N'FSYNC -> power-cut 2) N1 -> N2 -> N3 -> N1' -> NFSYNC -> power-cut 3) N1 -> N2 -> NFSYNC -> N1' -> N'FSYNC -> N3 -> power-cut 4) N1 -> NFSYNC -> N1' -> N2' -> N'FSYNC -> N3 -> power-cut Roll-forward recovery can proceed to: 1) N1 -> N2 -> N3 -> NFSYNC -> X 2) N1 -> N2 -> N3 -> NFSYNC -> N1' -> X 3) N1 -> N2 -> N3 -> FSYNC -> N1' -> X 4) N1 -> X Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-01f2fs: support project quotaChao Yu1-13/+0
This patch adds to support plain project quota. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-01f2fs: enhance on-disk inode structure scalabilityChao Yu1-8/+15
This patch add new flag F2FS_EXTRA_ATTR storing in inode.i_inline to indicate that on-disk structure of current inode is extended. In order to extend, we changed the inode structure a bit: Original one: struct f2fs_inode { ... struct f2fs_extent i_ext; __le32 i_addr[DEF_ADDRS_PER_INODE]; __le32 i_nid[DEF_NIDS_PER_INODE]; } Extended one: struct f2fs_inode { ... struct f2fs_extent i_ext; union { struct { __le16 i_extra_isize; __le16 i_padding; __le32 i_extra_end[0]; }; __le32 i_addr[DEF_ADDRS_PER_INODE]; }; __le32 i_nid[DEF_NIDS_PER_INODE]; } Once F2FS_EXTRA_ATTR is set, we will steal four bytes in the head of i_addr field for storing i_extra_isize and i_padding. with i_extra_isize, we can calculate actual size of reserved space in i_addr, available attribute fields included in total extra attribute fields for current inode can be described as below: +--------------------+ | .i_mode | | ... | | .i_ext | +--------------------+ | .i_extra_isize |-----+ | .i_padding | | | .i_prjid | | | .i_atime_extra | | | .i_ctime_extra | | | .i_mtime_extra |<----+ | .i_inode_cs |<----- store blkaddr/inline from here | .i_xattr_cs | | ... | +--------------------+ | | | block address | | | +--------------------+ | .i_nid | +--------------------+ | node_footer | | (nid, ino, offset) | +--------------------+ Hence, with this patch, we would enhance scalability of f2fs inode for storing more newly added attribute. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-08-01f2fs: add ioctl to expose current featuresJaegeuk Kim1-0/+13
This patch adds an ioctl to provide feature information to user. For exapmle, SQLite can use this ioctl to detect whether f2fs support atomic write or not. Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-29f2fs: don't give partially written atomic data from process crashJaegeuk Kim1-0/+18
This patch resolves the below scenario. == Process 1 == == Process 2 == open(w) open(rw) begin write(new_#1) process_crash f_op->flush locks_remove_posix f_op>release read (new_#1) In order to avoid corrupted database caused by new_#1, we must do roll-back at process_crash time. In order to check that, this patch keeps task which triggers transaction begin, and does roll-back in f_op->flush before removing file locks. Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-16f2fs: remove extra inode_unlock() in error pathLuis Henriques1-4/+1
This commit removes an extra inode_unlock() that is being done in function f2fs_ioc_setflags error path. While there, get rid of a useless 'out' label as well. Fixes: 0abd675e97e6 ("f2fs: support plain user/group quota") Signed-off-by: Luis Henriques <lhenriques@suse.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-11Merge tag 'for-f2fs-4.13' of ↵Linus Torvalds1-36/+147
git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs Pull f2fs updates from Jaegeuk Kim: "In this round, we've added new features such as disk quota and statx, and modified internal bio management flow to merge more IOs depending on block types. We've also made internal threads freezeable for Android battery life. In addition to them, there are some patches to avoid lock contention as well as a couple of deadlock conditions. Enhancements: - support usrquota, grpquota, and statx - manage DATA/NODE typed bios separately to serialize more IOs - modify f2fs_lock_op/wio_mutex to avoid lock contention - prevent lock contention in migratepage Bug fixes: - fix missing load of written inode flag - fix worst case victim selection in GC - freezeable GC and discard threads for Android battery life - sanitize f2fs metadata to deal with security hole - clean up sysfs-related code and docs" * tag 'for-f2fs-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (59 commits) f2fs: support plain user/group quota f2fs: avoid deadlock caused by lock order of page and lock_op f2fs: use spin_{,un}lock_irq{save,restore} f2fs: relax migratepage for atomic written page f2fs: don't count inode block in in-memory inode.i_blocks Revert "f2fs: fix to clean previous mount option when remount_fs" f2fs: do not set LOST_PINO for renamed dir f2fs: do not set LOST_PINO for newly created dir f2fs: skip ->writepages for {mete,node}_inode during recovery f2fs: introduce __check_sit_bitmap f2fs: stop gc/discard thread in prior during umount f2fs: introduce reserved_blocks in sysfs f2fs: avoid redundant f2fs_flush after remount f2fs: report # of free inodes more precisely f2fs: add ioctl to do gc with target block address f2fs: don't need to check encrypted inode for partial truncation f2fs: measure inode.i_blocks as generic filesystem f2fs: set CP_TRIMMED_FLAG correctly f2fs: require key for truncate(2) of encrypted file f2fs: move sysfs code from super.c to fs/f2fs/sysfs.c ...
2017-07-09f2fs: support plain user/group quotaChao Yu1-7/+27
This patch adds to support plain user/group quota. Change Note by Jaegeuk Kim. - Use f2fs page cache for quota files in order to consider garbage collection. so, quota files are not tolerable for sudden power-cuts, so user needs to do quotacheck. - setattr() calls dquot_transfer which will transfer inode->i_blocks. We can't reclaim that during f2fs_evict_inode(). So, we need to count node blocks as well in order to match i_blocks with dquot's space. Note that, Chao wrote a patch to count inode->i_blocks without inode block. (f2fs: don't count inode block in in-memory inode.i_blocks) - in f2fs_remount, we need to make RW in prior to dquot_resume. - handle fault_injection case during f2fs_quota_off_umount - TODO: Project quota Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-04f2fs: add ioctl to do gc with target block addressJaegeuk Kim1-0/+47
This patch adds f2fs_ioc_gc_range() to move blocks located in the given range. Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-04f2fs: don't need to check encrypted inode for partial truncationJaegeuk Kim1-2/+4
The cache_only is always false, if inode is encrypted. Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-04f2fs: measure inode.i_blocks as generic filesystemChao Yu1-1/+0
Both in memory or on disk, generic filesystems record i_blocks with 512bytes sized sector count, also VFS sub module such as disk quota follows this rule, but f2fs records it with 4096bytes sized block count, this difference leads to that once we use dquota's function which inc/dec iblocks, it will make i_blocks of f2fs being inconsistent between in memory and on disk. In order to resolve this issue, this patch changes to make in-memory i_blocks of f2fs recording sector count instead of block count, meanwhile leaving on-disk i_blocks recording block count. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-04f2fs: require key for truncate(2) of encrypted fileEric Biggers1-3/+7
Currently, filesystems allow truncate(2) on an encrypted file without the encryption key. However, it's impossible to correctly handle the case where the size being truncated to is not a multiple of the filesystem block size, because that would require decrypting the final block, zeroing the part beyond i_size, then encrypting the block. As other modifications to encrypted file contents are prohibited without the key, just prohibit truncate(2) as well, making it fail with ENOKEY. Signed-off-by: Eric Biggers <ebiggers@google.com> Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-07-04f2fs: dax: fix races between page faults and truncating pagesQiuyang Sun1-9/+39
Currently in F2FS, page faults and operations that truncate the pagecahe or data blocks, are completely unsynchronized. This can result in page fault faulting in a page into a range that we are changing after truncating, and thus we can end up with a page mapped to disk blocks that will be shortly freed. Filesystem corruption will shortly follow. This patch fixes the problem by creating new rw semaphore i_mmap_sem in f2fs_inode_info and grab it for functions removing blocks from extent tree and for read over page faults. The mechanism is similar to that in ext4. Signed-off-by: Qiuyang Sun <sunqiuyang@huawei.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-05-24f2fs: don't bother checking for encryption key in ->write_iter()Eric Biggers1-5/+0
Since only an open file can be written to, and we only allow open()ing an encrypted file when its key is available, there is no need to check for the key again before permitting each ->write_iter(). This code was also broken in that it wouldn't actually have failed if the key was in fact unavailable. Signed-off-by: Eric Biggers <ebiggers@google.com> Reviewed-by: David Gstir <david@sigma-star.at> Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-05-24f2fs: don't bother checking for encryption key in ->mmap()Eric Biggers1-8/+0
Since only an open file can be mmap'ed, and we only allow open()ing an encrypted file when its key is available, there is no need to check for the key again before permitting each mmap(). This f2fs copy of this code was also broken in that it wouldn't actually have failed if the key was in fact unavailable. Signed-off-by: Eric Biggers <ebiggers@google.com> Reviewed-by: David Gstir <david@sigma-star.at> Acked-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-05-24f2fs: support statxChao Yu1-1/+22
Last kernel has already support new syscall statx() in commit a528d35e8bfc ("statx: Add a system call to make enhanced file info available"), with this interface we can show more file info including file creation and some attribute flags to user. This patch tries to support this functionality. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-05-24f2fs: load inode's flag from diskJaegeuk Kim1-0/+1
This patch fixes missing inode flag loaded from disk, reported by Tom. [tom@localhost ~]$ sudo mount /dev/loop0 /mnt/ [tom@localhost ~]$ sudo chown tom:tom /mnt/ [tom@localhost ~]$ touch /mnt/testfile [tom@localhost ~]$ sudo chattr +i /mnt/testfile [tom@localhost ~]$ echo test > /mnt/testfile bash: /mnt/testfile: Operation not permitted [tom@localhost ~]$ rm /mnt/testfile rm: cannot remove '/mnt/testfile': Operation not permitted [tom@localhost ~]$ sudo umount /mnt/ [tom@localhost ~]$ sudo mount /dev/loop0 /mnt/ [tom@localhost ~]$ lsattr /mnt/testfile ----i-------------- /mnt/testfile [tom@localhost ~]$ echo test > /mnt/testfile [tom@localhost ~]$ rm /mnt/testfile [tom@localhost ~]$ sudo umount /mnt/ Cc: stable@vger.kernel.org Reported-by: Tom Yan <tom.ty89@outlook.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-05-09Merge branch 'akpm' (patches from Andrew)Linus Torvalds1-2/+2
Merge more updates from Andrew Morton: - the rest of MM - various misc things - procfs updates - lib/ updates - checkpatch updates - kdump/kexec updates - add kvmalloc helpers, use them - time helper updates for Y2038 issues. We're almost ready to remove current_fs_time() but that awaits a btrfs merge. - add tracepoints to DAX * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (114 commits) drivers/staging/ccree/ssi_hash.c: fix build with gcc-4.4.4 selftests/vm: add a test for virtual address range mapping dax: add tracepoint to dax_insert_mapping() dax: add tracepoint to dax_writeback_one() dax: add tracepoints to dax_writeback_mapping_range() dax: add tracepoints to dax_load_hole() dax: add tracepoints to dax_pfn_mkwrite() dax: add tracepoints to dax_iomap_pte_fault() mtd: nand: nandsim: convert to memalloc_noreclaim_*() treewide: convert PF_MEMALLOC manipulations to new helpers mm: introduce memalloc_noreclaim_{save,restore} mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC mm/huge_memory.c: deposit a pgtable for DAX PMD faults when required mm/huge_memory.c: use zap_deposited_table() more time: delete CURRENT_TIME_SEC and CURRENT_TIME gfs2: replace CURRENT_TIME with current_time apparmorfs: replace CURRENT_TIME with current_time() lustre: replace CURRENT_TIME macro fs: ubifs: replace CURRENT_TIME_SEC with current_time fs: ufs: use ktime_get_real_ts64() for birthtime ...
2017-05-09mm: introduce kv[mz]alloc helpersMichal Hocko1-2/+2
Patch series "kvmalloc", v5. There are many open coded kmalloc with vmalloc fallback instances in the tree. Most of them are not careful enough or simply do not care about the underlying semantic of the kmalloc/page allocator which means that a) some vmalloc fallbacks are basically unreachable because the kmalloc part will keep retrying until it succeeds b) the page allocator can invoke a really disruptive steps like the OOM killer to move forward which doesn't sound appropriate when we consider that the vmalloc fallback is available. As it can be seen implementing kvmalloc requires quite an intimate knowledge if the page allocator and the memory reclaim internals which strongly suggests that a helper should be implemented in the memory subsystem proper. Most callers, I could find, have been converted to use the helper instead. This is patch 6. There are some more relying on __GFP_REPEAT in the networking stack which I have converted as well and Eric Dumazet was not opposed [2] to convert them as well. [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com This patch (of 9): Using kmalloc with the vmalloc fallback for larger allocations is a common pattern in the kernel code. Yet we do not have any common helper for that and so users have invented their own helpers. Some of them are really creative when doing so. Let's just add kv[mz]alloc and make sure it is implemented properly. This implementation makes sure to not make a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also to not warn about allocation failures. This also rules out the OOM killer as the vmalloc is a more approapriate fallback than a disruptive user visible action. This patch also changes some existing users and removes helpers which are specific for them. In some cases this is not possible (e.g. ext4_kvmalloc, libcfs_kvzalloc) because those seems to be broken and require GFP_NO{FS,IO} context which is not vmalloc compatible in general (note that the page table allocation is GFP_KERNEL). Those need to be fixed separately. While we are at it, document that __vmalloc{_node} about unsupported gfp mask because there seems to be a lot of confusion out there. kvmalloc_node will warn about GFP_KERNEL incompatible (which are not superset) flags to catch new abusers. Existing ones would have to die slowly. [sfr@canb.auug.org.au: f2fs fixup] Link: http://lkml.kernel.org/r/20170320163735.332e64b7@canb.auug.org.au Link: http://lkml.kernel.org/r/20170306103032.2540-2-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> Reviewed-by: Andreas Dilger <adilger@dilger.ca> [ext4 part] Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: John Hubbard <jhubbard@nvidia.com> Cc: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-05-04f2fs: relocate inode_{,un}lock in F2FS_IOC_SETFLAGSChao Yu1-3/+4
This patch expands cover region of inode->i_rwsem to keep setting flag atomically. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-05-03f2fs: reconstruct code to write a data pageHou Pengyang1-2/+2
This patch introduces encrypt_one_page which encrypts one data page before submit_bio, and change the use of need_inplace_update. Signed-off-by: Hou Pengyang <houpengyang@huawei.com> Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-04-24f2fs: add ioctl to flush data from faster device to cold areaJaegeuk Kim1-2/+67
This patch adds an ioctl to flush data in faster device to cold area. User can give device number and number of segments to move. It doesn't move it if there is only one device. The parameter looks like: struct f2fs_flush_device { u32 dev_num; /* device number to flush */ u32 segments; /* # of segments to flush */ }; Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-04-19f2fs: introduce async IPU policyHou Pengyang1-1/+1
This patch introduces an ASYNC IPU policy. Under senario of large # of async updating(e.g. log writing in Android), disk would be seriously fragmented, and higher frequent gc would be triggered. This patch uses IPU to rewrite the async update writting, since async is NOT sensitive to io latency. Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
2017-04-12f2fs: allocate hot_data for atomic writesJaegeuk Kim1-0/+1
We'd better allocate atomic writes to hot_data zone. Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-04-11f2fs: clean up some macros in terms of GET_SEGNOJaegeuk Kim1-2/+1
This patch cleans several macros by introducing: - BLKS_PER_SEC - GET_SEC_FROM_SEG - GET_SEG_FROM_SEC - GET_ZONE_FROM_SEC - GET_ZONE_FROM_SEG Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-03-24f2fs: show the max number of volatile operationsChao Yu1-0/+5
This patch adds to show the max number of volatile operations which are conducting concurrently. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-03-22f2fs: avoid stat_inc_atomic_write for non-atomic fileKinglong Mee1-3/+7
After filemap_write_and_wait_range fail, the FI_ATOMIC_FILE flags is removed, so that f2fs should not increase the stat of atomic_write. Signed-off-by: Kinglong Mee <kinglongmee@gmail.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
2017-03-22f2fs: cleanup the disk level filename updatingKinglong Mee1-8/+0
As discuss with Jaegeuk and Chao, "Once checkpoint is done, f2fs doesn't need to update there-in filename at all." The disk-level filename is used only one case, 1. create a file A under a dir 2. sync A 3. godown 4. umount 5. mount (roll_forward) Only the rename/cross_rename changes the filename, if it happens, a. between step 1 and 2, the sync A will caused checkpoint, so that, the roll_forward at step 5 never happens. b. after step 2, the roll_forward happens, file A will roll forward to the result as after step 1. So that, any updating the disk filename is useless, just cleanup it. Signed-off-by: Kinglong Mee <kinglongmee@gmail.com> Reviewed-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>