PYTHON 23
Data Science Guest on 16th January 2023 10:53:55 PM
  1. import pandas as pd
  2. # df = pd.read_csv('salaries_by_college_major.csv')
  3. # print(df.head())
  4. # print(df.shape)
  5. # print(df.tail())
  6. # print(df.columns)
  7.  
  8. # print(df.isna())
  9. # print(df.tail())
  10. # clean_df = df.dropna()
  11. # print(clean_df.tail())
  12. # print(clean_df['Undergraduate Major'])
  13. # print(clean_df['Starting Median Salary'])
  14. # print(clean_df['Starting Median Salary'].max())
  15. # print(clean_df['Starting Median Salary'].min())
  16. # print(clean_df['Starting Median Salary'].idxmax())
  17. # print(clean_df['Starting Median Salary'].idxmin())
  18. # print(clean_df['Starting Median Salary'][43])
  19. # highest = clean_df.sort_values(['Starting Median Salary'], ascending=True)
  20. # print(highest['Starting Median Salary'])
  21. # difference = clean_df['Mid-Career 90th Percentile Salary'] - clean_df['Mid-Career 10th Percentile Salary']
  22. # clean_df.insert(1, 'Spread', difference)
  23. # print(clean_df.head())
  24. # print(clean_df.groupby('Group').count())
  25.  
  26. df = pd.read_csv('QueryResults.csv', names=['DATE', 'TAG', 'POSTS'], header=0)
  27. print(df.head)
  28. print(df.groupby('TAG').sum())
  29. print(df.groupby('TAG').count())
  30. print(df['DATE'][1])
  31. print(type(df['DATE'][1]))
  32. df.DATE = pd.to_datetime(df.DATE)
  33. print(df.head)
  34. print(type(df['DATE'][1]))
  35. test_df = pd.DataFrame({
  36.   'Age': ['Young', 'Young', 'Young', 'Young', 'Old', 'Old', 'Old', 'Old'],
  37.   'Actor': [
  38.     'Jack', 'Arnold', 'Keanu', 'Sylvester', 'Jack', 'Arnald', 'Keanu',
  39.     'Sylvester'
  40.   ],
  41.   'Power': [100, 80, 25, 50, 99, 75, 5, 30]
  42. })
  43. pivoted_df = test_df.pivot(index='Age', columns='Actor', values='Power')
  44. print(test_df)
  45.  
  46. import matplotlib.pyplot as plt
  47.  
  48. reshaped_df = df.pivot(index='DATE', columns='TAG', values='POSTS')
  49. reshaped_df = reshaped_df.fillna(0)
  50. plt.plot(reshaped_df.index, reshaped_df.java)
  51. plt.figure(figsize=(16, 10))
  52. plt.plot(reshaped_df.index, reshaped_df.java)
  53. plt.figure(figsize=(16, 10))
  54. plt.xticks(fontsize=14)
  55. plt.yticks(fontsize=14)
  56. plt.plot(reshaped_df.index, reshaped_df.java)
  57. plt.xlabel('Date', fontsize=14)
  58. plt.ylabel('Number of Posts', fontsize=14)
  59. plt.ylim(0, 300)
  60.  
  61. # Create a simple DataFrame
  62. data = {
  63.   'Name': ['John', 'Mike', 'Sara', 'Mike', 'John', 'Sara'],
  64.   'Age': [25, 30, 22, 30, 27, 22],
  65.   'Gender': ['M', 'M', 'F', 'M', 'M', 'F']
  66. }
  67. df = pd.DataFrame(data)
  68.  
  69. # Use the value_counts() method on the 'Name' column
  70. name_counts = df['Name'].value_counts()
  71. print(name_counts)
  72.  
  73. #merged_df = pd.merge(set_theme_count, themes, on='id')
  74.  
  75. print(df.describe())
  76.  
  77. data = {
  78.   'Name': ['John', 'Mike', 'Sara', 'Mike', 'John', 'Sara'],
  79.   'Age': [25, 30, 22, 30, 27, 22],
  80.   'Gender': ['M', 'M', 'F', 'M', 'M', 'F']
  81. }
  82. df = pd.DataFrame(data)
  83.  
  84. # Use the .values.any() method on the 'Age' column
  85. has_age_over_30 = (df['Age'].values > 30).any()
  86. print(has_age_over_30)

Coding Base is for source code and general debugging text.

Login or Register to edit, delete and keep track of your pastes and more.

Raw Paste

Login or Register to edit or fork this paste. It's free.