-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrackmate_model_final.py
463 lines (356 loc) · 21.4 KB
/
trackmate_model_final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
# -*- coding: utf-8 -*-
"""TrackMate_Model_Final.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/14Di2qGv_RcnpteM3iCMYm1eMmBctif2P
"""
pip install tensorflow_recommenders
from google.colab import drive
# df = drive.mount('/content/drive/MyDrive/amazon.csv')
# Commented out IPython magic to ensure Python compatibility.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from typing import Dict, Text
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_recommenders as tfrs
from tensorflow.keras.layers.experimental.preprocessing import StringLookup
from tensorflow.keras.layers import Embedding, Input, Dense
from tensorflow.keras.models import Model
from google.colab import files
import warnings; warnings.simplefilter('ignore')
# %matplotlib inline
"""# Load Data"""
df = pd.read_csv('TrackMate Dataset.csv', on_bad_lines = 'skip')
df_new = df.copy()
df_new.tail(7)
jumlah_baris = len(df_new)
print(f"Jumlah baris dalam dataset: {jumlah_baris}")
# Data produk baru yang ingin ditambahkan
data_produk_baru_1 = [
{
'product_id': 'B112X5ADO8',
'product_name': 'Greenbean Coffee',
'category': 'Grocery and Gourmet Food',
'discounted_price': '₹80,000',
'actual_price': '₹80,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': 'Cerry kopi yang sudah mengalami proses pasca panen dan siap panggang',
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "A1B2C3D4E5F6G7H8, I9J1K2L3M4N5O6, P7Q8R9S1T2U3V4, W5X6Y7Z8A9B1C2, D3E4F5G6H7I8J9, K1L2M3N4O5P6Q7, R8S9T1U2V3W4X5, Y6Z7A8B9C1D2E3",
'review_title': "Rasa autentik dan khas, bahan-bahan segar terasa, penyajian cantik dan instagramable, menu ramah vegetarian, tempat yang bersih dan nyaman, suasana tenang, pilihan sehat dan bergizi, layanan ramah.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://drive.google.com/open?id=1mCbKdwvUu9lZV-uC18Ak9_oAACyy18YH',
'product_link': 'https://www.instagram.com/agroniaga_idn/'
},
{
'product_id': 'B613M6MSA8',
'product_name': 'MORINGA TEA',
'category': 'Grocery and Gourmet Food',
'discounted_price': '₹40,000',
'actual_price': '₹40,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': "Produk berbahan dasar daun kelor yang bentuk kemasan teabag tinggal seduh.",
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'review_title': "Makanan sehat dan organik, pilihan vegan yang baik, desain interior yang Instagramable, suasana yang tenang dan ramah lingkungan, menu salad yang kreatif, harga terjangkau untuk makanan sehat, pelayanan yang ramah dan informatif.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://drive.google.com/open?id=18osvUd5nE_Uu21d6c3aqi7ccPbT6dxOQ',
'product_link': 'https://www.tokopedia.com/archive-newedenmoringa-1645404339/moringa-tea-new-eden-20gr?extParam=src%3Dshop%26whid%3D9669096'
},
{
'product_id': 'B404F7FMA1',
'product_name': 'TEMPEKITA TEMPE SEGAR',
'category': 'Grocery and Gourmet Food',
'discounted_price': '₹17,000',
'actual_price': '₹17,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': "Tempe segar adalah produk yang berasal dari kacang kedelai yang difermentasi menggunakan ragi tempe.",
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "M8N9O1P2Q3R4S5T6U7V8W9, X1Y2Z3A4B5C6D7E8F9G1H2I3J4K5, L6M7N8O9P1Q2R3S4T5U6V7W8X9, Y1Z2A3B4C5D6E7F8G9H1I2J3K4L5, M6N7O8P9Q1R2S3T4U5V6W7X8Y9, Z1A2B3C4D5E6F7G8H9I1J2K3L4, M5N6O7P8Q9R1S2T3U4V5W6, X7Y8Z9A1B2C3D4E5F6G7",
'review_title': "Makanan sehat dan organik, pilihan vegan yang baik, desain interior yang Instagramable, suasana yang tenang dan ramah lingkungan, menu salad yang kreatif, harga terjangkau untuk makanan sehat, pelayanan yang ramah dan informatif.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://drive.google.com/open?id=1fv6EFhTNGeAuZgMapiOODILB3um7LgOX',
'product_link': 'https://shopee.co.id/-Tempekita.id-Tempe-Segar-i.419555041.8543489708?xptdk=8ec5c78a-ef7b-4600-b5ab-8652b22055eb'
},
{
'product_id': 'B772O6JWR8',
'product_name': 'ZAKET',
'category': 'Home&Kitchen',
'discounted_price': '₹28,000',
'actual_price': '₹28,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': "Terbuat dari limbah sawit",
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "O8P9Q1R2S3T4U5V6W7X8Y9, Z1A2B3C4D5E6F7G8H9I1J2K3, L4M5N6O7P8Q9R1S2T3U4V5W6, X7Y8Z9A1B2C3D4E5F6G7H8, I9J1K2L3M4N5O6P7Q8R9S1T2, U3V4W5X6Y7Z8A9B1C2D3E4F5, G6H7I8J9K1L2M3N4O5P6, Q7R8S9T1U2V3W4X5Y6Z7",
'review_title': "Arang dari limbah ini adalah solusi ramah lingkungan yang brilian, Mengubah limbah menjadi sumber energi yang berguna, ini membantu mengurangi jejak karbon, Proses daur ulang limbah menjadi arang sangat inovatif dan membantu menjaga lingkungan, Saya sangat mendukung produk-produk yang memprioritaskan keberlanjutan, arang dari limbah ini adalah langkah positif menuju masa depan yang lebih hijau dan berkelanjutan.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://drive.google.com/open?id=1JTMePUNF4XMWe8IW5VW5et513nESlgWB',
'product_link': 'https://drive.google.com/open?id=1JTMePUNF4XMWe8IW5VW5et513nESlgWB'
},
{
'product_id': 'B103G6MHZ9',
'product_name': 'Mutiara Beras Glukomanan Porang',
'category': 'Grocery and Gourmet Food',
'discounted_price': '₹135,000',
'actual_price': '₹135,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': "Nasi tinggi serat rendah karbohidrat rendah kalori dan rendah gula",
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "A8B9C1D2E3F4G5H6I7J8, K9L1M2N3O4P5Q6R7S8T9U1V2W3, X4Y5Z6A7B8C9D1E2F3G4H5I6J7, K8L9M1N2O3P4Q5R6S7T8U9V1W2, X3Y4Z5A6B7C8D9E1F2G3H4I5, J6K7L8M9N1O2P3Q4R5S6T7, U8V9W1X2Y3Z4A5B6C7D8, E9F1G2H3I4J5K6L7M8N9O1P2",
'review_title': "Makanan sehat dan organik, pilihan vegan yang baik, desain interior yang Instagramable, suasana yang tenang dan ramah lingkungan, menu salad yang kreatif, harga terjangkau untuk makanan sehat, pelayanan yang ramah dan informatif.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://drive.google.com/open?id=1xUJLdhbxytbRK9u1IdTXN9h3jETHAJVi',
'product_link': 'https://dapurporang.com/'
},
{ 'product_id': 'B307H9TVZ8',
'product_name': 'Nasi Jagung Instan Loyangku',
'category': 'Grocery and Gourmet Food',
'discounted_price': '₹15,000',
'actual_price': '₹15,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': "Nasi Jagung Instan Loyangku terbuat dari jagung pilihan dari petani jagung Banjarnegara yang diolah secara tradisioanal oleh masyarakat Desa Pucungbedug. Yuk hidup sehat bersama Nasi Jagung Instan Loyangku.",
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "Q3R4S5T6U7V8W9X1Y2Z3A4, B5C6D7E8F9G1H2I3J4K5L6M7N8O9, P1Q2R3S4T5U6V7W8X9Y1Z2A3B4C5D6E7, F8G9H1I2J3K4L5M6N7O8P9Q1R2S3T4, U5V6W7X8Y9Z1A2B3C4D5E6F7G8H9, I1J2K3L4M5N6O7P8Q9R1S2T3U4V5, W6X7Y8Z9A1B2C3D4E5F6G7H8, I9J1K2L3M4N5O6P7Q8R9S1T2",
'review_title': "Makanan sehat dan organik, pilihan vegan yang baik, desain interior yang Instagramable, suasana yang tenang dan ramah lingkungan, menu salad yang kreatif, harga terjangkau untuk makanan sehat, pelayanan yang ramah dan informatif.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://drive.google.com/open?id=1auE4DYUhpOjOSn-P_Fk7akmAHzNoCj7e',
'product_link': 'https://gubug-eva.business.site/'
},
{ 'product_id': 'B397P6QHS8',
'product_name': 'Sambal Baby Cumi',
'category': 'Grocery and Gourmet Food',
'discounted_price': '₹40,000',
'actual_price': '₹40,000',
'discount_percentage': '0%',
'rating': 4.9,
'rating_count': '7',
'about_product': 'Sambal yang terbuat dari baby cumi asin yang berasal dari daerah desa Gerokgak kec. Gerokgak Kab. Buleleng beserta aneka cabai dan rempah-rempah bumbu segar tanpa pengawet yang diolah dengan cara dimasak sehingga memiliki rasa yang pedas, gurih dan nikmat. Cocok jika dimakan bersama nasi panas.',
'user_id': "AGSGSRTEZBQY64WO2HKQTV7TWFSA,AEYD5HVYAJ23CR6PTWOOIKUOIDHA,AFRMNW6TDHDZBP2UHF2K3MEAEYUA,AHICHCW6EC3BNV2IDAEAJPBG4HZQ,AGWFKE7RNP6EVC4JFLFSL76EEVVQ,AGEOQQHGNELZNEUKJAJUA7NTPBLA,AFS3QBSOMCE2FAZFUYZ3NBFQDLMQ,AGJYG6ZWCWD74WNE6Y37XZ2VUSMA",
'user_name': "Birendra ku Dash,Aditya Gupta,Abdulla A N,Deepak,Gowtham,Rakesh,Pawan Kumar,Prabhat Raj Pathak",
'review_id': "U3V4W5X6Y7Z8A9B1C2D3E4F5, G6H7I8J9K1L2M3N4O5P6Q7R8S9, T1U2V3W4X5Y6Z7A8B9C1D2E3F4G5, H6I7J8K9L1M2N3O4P5Q6R7S8T9U1, V2W3X4Y5Z6A7B8C9D1E2F3G4H5, I6J7K8L9M1N2O3P4,D3E4F5G6H7I8J9, K1L2M3N4O5P6Q7",
'review_title': "Makanan sehat dan organik, pilihan vegan yang baik, desain interior yang Instagramable, suasana yang tenang dan ramah lingkungan, menu salad yang kreatif, harga terjangkau untuk makanan sehat, pelayanan yang ramah dan informatif.",
'review_content': 'Revolutionary, high-quality, elegant design, affordable, user-friendly, functional, fast delivery, highly recommended for any product!',
'img_link': 'https://www.instagram.com/sambal_mamo/',
'product_link': 'https://shopee.co.id/sambal_mamo-i.997481767.21685181932?sp_atk=3e60a7ec-5e55-431a-9527-c1ee7d2dca32&xptdk=3e60a7ec-5e55-431a-9527-c1ee7d2dca32'
}
]
df_new = df_new.append(data_produk_baru_1, ignore_index=True)
df_new.to_csv('TrackMate Dataset.csv', index=False)
jumlah_baris = len(df_new)
print(f"Jumlah baris dalam dataset: {jumlah_baris}")
df_new.tail(7)
"""# Data Cleaning"""
# Memeriksa bentuk data
df_new.shape
"""## Checking Missing Value"""
df_new.isna().sum()
df_new.dropna(inplace=True)
df_new.isna().sum()
df_new.shape
"""## Checking Duplicates"""
# Memeriksa duplikasi data
def check_duplicates(dataframe):
return dataframe.duplicated().sum()
print(check_duplicates(df_new))
"""## Changing Data Types"""
# Memeriksa Tipe Data
def check_data_types(dataframe):
return dataframe.dtypes
print(check_data_types(df_new))
# Memisahkan kolom kategori
cat_split = df_new['category'].str.split('|', expand=True)
cat_split.isnull().sum()
# mengganti nama kolom
cat_split = cat_split.rename(columns={0:'Main category', 1:'Sub category'})
# mengganti kolom lama dengan yang baru
df_new['category'] = cat_split['Main category']
df_new.tail(7)
# mengganti tipe data harga diskon, harga asli dan persen diskon
df_new['discounted_price'] = df_new['discounted_price'].astype(str).str.replace('₹', '').str.replace(',', '').astype(float)
df_new['actual_price'] = df_new['actual_price'].astype(str).str.replace('₹', '').str.replace(',', '').astype(float)
df_new['discount_percentage'] = df_new['discount_percentage'].astype(str).str.replace('%','').astype(float)/100
# Menghapus data yang tidak sesuai
count = df_new['rating'].str.contains('\|').sum()
print(f"Total rows with '|' in the 'rating' column': {count}")
df_new = df_new[df_new['rating'].apply(lambda x: '|' not in str(x))]
count = df_new['rating'].str.contains('\|').sum()
print(f"Total rows with '|' in the 'rating' column': {count}")
df_new['rating'] = df_new['rating'].astype(str).str.replace(',', '').astype(float)
df_new['rating_count'] = df_new['rating_count'].astype(str).str.replace(',', '').astype(float)
print(check_data_types(df_new))
"""# Preparing the Data"""
dataset = df_new.copy()
dataset = dataset.drop(columns='product_name')
dataset = dataset.drop(columns='category')
dataset = dataset.drop(columns='discounted_price')
dataset = dataset.drop(columns='actual_price')
dataset = dataset.drop(columns='discount_percentage')
dataset = dataset.drop(columns='about_product')
dataset = dataset.drop(columns='user_name')
dataset = dataset.drop(columns='review_id')
dataset = dataset.drop(columns='review_title')
dataset = dataset.drop(columns='review_content')
dataset = dataset.drop(columns='img_link')
dataset = dataset.drop(columns='product_link')
dataset = dataset.drop(columns='rating_count')
dataset.tail(7)
dataset.isna().sum()
# Memeriksa jumlah kolom dan baris
rows, columns = dataset.shape
print("No of rows: ", rows)
print("No of columns: ", columns)
dataset.dtypes
# mencari nilai maksimal dan minimal dari rating
def find_min_max_rating():
print('The minimum rating is: %d' %(dataset['rating'].min()))
print('The maximum rating is: %d' %(dataset['rating'].max()))
find_min_max_rating()
# Number of unique user id and product id in the data
print('Number of unique USERS in Raw data = ', dataset['user_id'].nunique())
print('Number of unique ITEMS in Raw data = ', dataset['product_id'].nunique())
# Encode user_id
user_lookup = tf.keras.layers.StringLookup(
vocabulary=list(dataset['user_id'].unique()), # List of unique user_ids
mask_token=None,
num_oov_indices=0,
output_mode='int', # Output integers
name='user_lookup'
)
# Transform user_id
dataset['encoded_user_id'] = user_lookup(dataset['user_id'])
dataset.head()
# Encode product_id
product_lookup = tf.keras.layers.StringLookup(
vocabulary=list(dataset['product_id'].unique()), # List of unique user_ids
mask_token=None,
num_oov_indices=0,
output_mode='int', # Output integers
name='product_lookup'
)
# Transform product_id
dataset['encoded_product_id'] = product_lookup(dataset['product_id'])
dataset.info()
dataset.tail(7)
"""# EDA"""
# Top 10 user berdasarkan rating
most_rated = dataset.groupby('user_id').size().sort_values(ascending=False)[:10]
most_rated
# Visualisasi dengan countplot untuk kolom kategoris
plt.figure(figsize=(10, 6))
sns.countplot(x='product_id', data=dataset)
plt.title('Countplot of Product IDs')
plt.show()
# Visualisasi dengan histogram untuk kolom numerik
plt.figure(figsize=(10, 6))
sns.histplot(dataset['rating'], bins=20, kde=True)
plt.title('Histogram of rating')
plt.show()
"""# MOdeling"""
new_data = dataset.copy()
new_data = new_data.drop('user_id', axis=1)
new_data.tail(7)
new_data.info()
# Memisahkan fitur dan target
X_product = new_data[['encoded_product_id']]
X_rating = new_data[['rating']]
y = new_data['rating']
X_prod_train, X_prod_test, X_rating_train, X_rating_test, y_train, y_test = train_test_split(
X_product, X_rating, y, test_size=0.2, random_state=42)
# Normalisasi fitur menggunakan StandardScaler
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_prod_train_scaled = scaler.fit_transform(X_prod_train)
X_prod_test_scaled = scaler.transform(X_prod_test)
# Membangun model regresi linier dengan TensorFlow
from tensorflow.keras import regularizers
model_rating = tf.keras.Sequential([
tf.keras.layers.Input(shape=(1,), name='input_product'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1, activation='linear', name='output_rating')
])
# Menentukan optimizer, fungsi loss, dan metrik evaluasi
model_rating.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mean_absolute_error', metrics=['mae'])
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
history = model_rating.fit(X_prod_train_scaled, X_rating_train, validation_data=(X_prod_test_scaled, X_rating_test), epochs=10)
# Evaluasi performa model rating pada set pengujian
from sklearn.metrics import mean_absolute_error
rating_predictions = model_rating.predict(X_prod_test_scaled)
mae_rating = mean_absolute_error(X_rating_test, rating_predictions)
print(f'Mean Absolute Error (Rating): {mae_rating}')
# Menampilkan plot loss dan validation
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
predictions = model_rating.predict(X_prod_test_scaled)
def recommend_products(encoded_user_id, dataset, top_k_rating=5):
# Filter dataset berdasarkan user_id dan encoded_user_id
user_data = dataset[(dataset['encoded_user_id'] == encoded_user_id)].drop_duplicates(subset='user_id')
# Inisialisasi DataFrame untuk hasil rekomendasi
recommended_products = pd.DataFrame(columns=['product_id', 'product_name', 'rating_x', 'product_link'])
# Ambil daftar kategori unik
unique_categories = dataset['category'].unique()
# Shuffle urutan kategori
shuffled_categories = np.random.permutation(unique_categories)
# Inisialisasi DataFrame sebelum loop dimulai
recommended_products = pd.DataFrame(columns=['product_id', 'product_name', 'rating_x', 'product_link'])
# Loop melalui setiap kategori
for category in shuffled_categories:
# Filter dataset berdasarkan kategori
category_data = dataset[dataset['category'] == category]
# Ambil sampel acak dari produk dalam kategori
random_products = category_data.sample(n=min(top_k_rating, len(category_data))).reset_index(drop=True)
# Gabungkan hasil rekomendasi untuk kategori saat ini ke dalam DataFrame utama
recommended_products = pd.concat([recommended_products, random_products])
# Urutkan hasil rekomendasi berdasarkan rating tertinggi
recommended_products = recommended_products.sort_values(by='rating_x', ascending=False).reset_index(drop=True).head(top_k_rating)
return user_data[['user_name']], random_products[['product_id', 'product_name', 'rating_x', 'product_link']]
# Contoh penggunaan
# Gantilah 'user_id', 'encoded_user_id', dan 'dataset' dengan data sesungguhnya
encoded_user_id_example = 3
dataset_example = pd.merge(df_new, dataset, on=['product_id', 'user_id'], how='inner')
recommendations_user, top_5_recommendations = recommend_products(encoded_user_id_example, dataset_example)
print(f"\nRekomendasi untuk {(recommendations_user).to_string(index=False)} ! :" +"\n"+ f"\n{top_5_recommendations}")
# Menyimpan model ke dalam format H5
model_rating.save('/content/TrackMate_Model.h5')
files.download('/content/TrackMate_Model.h5')
import os
import tensorflow as tf
model = tf.keras.models.load_model('TrackMate_Model.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter._experimental_lower_tensor_list_ops = False
tflite_model = converter.convert()
output_directory = r'C:\Users\BAPPENAS\OneDrive\Documents\Bangkit\coba'
os.makedirs(output_directory, exist_ok=True)
output_file_path = os.path.join(output_directory, 'model.tflite')
with open(output_file_path, 'wb') as f:
f.write(tflite_model)
print(f'TFLite model saved to: {output_file_path}')